1 import whisper
2 ?
3 model = whisper.load_model("base")
4 ?
5 # load audio and pad/trim it to fit 30 seconds
6 audio = whisper.load_audio("audio.mp3")
7 audio = whisper.pad_or_trim(audio)
8 ?
9 # make log-Mel spectrogram and move to the same device as the model
10 mel = whisper.log_mel_spectrogram(audio).to(model.device)
11 ?
12 # detect the spoken language
13 _, probs = model.detect_language(mel)
14 print(f"Detected language: {max(probs, key=probs.get)}")
15 ?
16 # decode the audio
17 options = whisper.DecodingOptions()
18 result = whisper.decode(model, mel, options)
19 ?
20 # print the recognized text
21 print(result.text)
import whisper
from whisper.utils import get_writer
?
model = whisper.load_model('small')
?
def get_transcribe(audio: str, language: str = 'en'):
return model.transcribe(audio=audio, language=language, verbose=True)
?
def save_file(results, format='tsv'):
writer = get_writer(format, 'output/')
writer(results, f'transcribe.{format}')
?
def get_language():
"""
构造了个语言选择输入,如果是默认就回车就好了,会设置为英文
:return:
"""
language_input = input("input the song language[default->enter]\n"
"(英语->en、中文->zh、德语->de、西班牙语->es、法语->fr、日语->ja、.....):")
default = 'en' if not language_input else language_input #如果language_input为空 则语言为英文,否则是输入的语言
print(f"model language is {default}")
return default
?
?
if __name__ == "__main__":
result = get_transcribe(audio=input("please input your music path:"), language= get_language())
print('-'*50)
print(result.get('text', ''))
save_file(result)
save_file(result, 'txt')
save_file(result, 'srt')