从目录获取多个音频文件的功能循环

debugcn 发表于 Dev

舒巴姆·林恩（Shubham Ringne）

我目前正在从目录中获取单个音频文件的输入，并将输出保存为CSV文件，文件名和语音转换为文本输出，但该目录中有100个文件（即001.wav，002）。 wav，003.wav .......... 100.wav）

我想编写一个循环或函数，将语音自动保存到CSV输出的文本中，并在不同的行中使用相应的文件名。

这是代码：

import speech_recognition as sr
import csv
import os
AUDIO_FILE =path.join(path.dirname('C:/path/to/directory'), "001.wav")
file_name = os.path.basename(AUDIO_FILE)
name = os.path.basename(AUDIO_FILE)

# use the audio file as the audio source
r = sr.Recognizer()
with sr.AudioFile(AUDIO_FILE) as source:
audio = r.record(source)  # read the entire audio file

# recognize speech using Google Speech Recognition
try:
    # for testing purposes, we're just using the default API key
    # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
    # instead of `r.recognize_google(audio)`
    a =  r.recognize_google(audio)        
except sr.UnknownValueError:
    a = "Google Speech Recognition could not understand audio"
except sr.RequestError as e:
    a = "Could not request results from Google Speech Recognition service; {0}".format(e)

try:
    b = r.recognize_sphinx(audio)
except sr.UnknownValueError:
    b = "Sphinx could not understand audio"
except sr.RequestError as e:
    b = "Sphinx error; {0}".format(e)

with open('speech_output.csv', 'a') as f:
writer = csv.writer(f)
writer.writerow(['file_name','google',sphinx])
writer.writerow([file_name,a,b])

对代码的引用。https://github.com/Uberi/speech_recognition/blob/master/examples/audio_transcribe.py

埃德温·范·米洛

您可以使用来获取目录和子目录的所有文件，这些文件os.walk已包含在get_file_paths()下面的代码中，下面是一个示例：

import speech_recognition as sr
import csv
import os


DIRNAME = r'c:\path\to\directory'
OUTPUTFILE = r'c:\path\to\outputfiledir\outputfile.csv'

def get_file_paths(dirname):
    file_paths = []  
    for root, directories, files in os.walk(dirname):
        for filename in files:
            filepath = os.path.join(root, filename)
            file_paths.append(filepath)  
    return file_paths    

def process_file(file):
    r = sr.Recognizer()
    a = ''
    with sr.AudioFile(file) as source:
        audio = r.record(source)    
        try:
            a =  r.recognize_google(audio)        
        except sr.UnknownValueError:
            a = "Google Speech Recognition could not understand audio"
        except sr.RequestError as e:
            a = "Could not request results from Google Speech Recognition service; {0}".format(e)  
    return a

def main():
    files = get_file_paths(DIRNAME)                 # get all file-paths of all files in dirname and subdirectories
    for file in files:                              # execute for each file
        (filepath, ext) = os.path.splitext(file)    # get the file extension
        file_name = os.path.basename(file)          # get the basename for writing to output file
        if ext == '.wav':                           # only interested if extension is '.wav'
            a = process_file(file)                  # result is returned to a
            with open(OUTPUTFILE, 'a') as f:        # write results to file
                writer = csv.writer(f)
                writer.writerow(['file_name','google'])
                writer.writerow([file_name, a])            


if __name__ == '__main__':
    main()

如果要执行多个识别器，则可以使用类似的方法。请注意，这是未经测试的示例：

import speech_recognition as sr
import csv
import os


DIRNAME = r'c:\path\to\directory'
OUTPUTFILE = r'c:\path\to\outputfiledir\outputfile.csv'

def get_file_paths(dirname):
    file_paths = []  
    for root, directories, files in os.walk(dirname):
        for filename in files:
            filepath = os.path.join(root, filename)
            file_paths.append(filepath)  
    return file_paths    

def recog_multiple(file):
    r = sr.Recognizer()
    r_types = ['recognize_google', 'recognize_sphinx']
    results = []
    for r_type in r_types:
        result = ''
        with sr.AudioFile(file) as source:
            audio = r.record(source)
            try:
                result = r_type + ': ' + str(getattr(r, r_type)(audio))
            except sr.UnknownValueError:
                result = r_type + ': Speech Recognition could not understand audio'
            except sr.RequestError as e:
                result = r_type + ': Could not request results from Speech Recognition service; {0}'.format(e)        
        results.append(result)
    return results

def main():
    files = get_file_paths(DIRNAME)                 # get all file-paths of all files in dirname and subdirectories
    for file in files:                              # execute for each file
        (filepath, ext) = os.path.splitext(file)    # get the file extension
        file_name = os.path.basename(file)          # get the basename for writing to output file
        if ext == '.wav':                           # only interested if extension is '.wav'
            a = recog_multiple(file)                # result is returned to a
            with open(OUTPUTFILE, 'a') as f:        # write results to file
                writer = csv.writer(f)
                writer.writerow(['file_name','results'])
                writer.writerow([file_name, a])            


if __name__ == '__main__':
    main()

本文收集自互联网，转载请注明来源。

如有侵权，请联系[email protected] 删除。