折腾:
期间,api接口试用成功后,需要把服务集成到Flask后台中。先去集成到本地Flask环境。
期间,明显还要增加后台服务,用已有的celery加上周期性task,去每隔不到10分钟,更新一次JWT的token。
参考:
The Speech Synthesis Markup Language – Microsoft Cognitive Services | Microsoft Docs
Speech service REST APIs | Microsoft Docs
Use Text to Speech using Speech services – Microsoft Cognitive Services | Microsoft Docs
【总结】
然后用代码:
<code>import requests
### TTS ###
TTS_AUDIO_SUFFIX = ".mp3"
# Use Text to Speech using Speech services - Microsoft Cognitive Services | Microsoft Docs
# https://docs.microsoft.com/zh-cn/azure/cognitive-services/speech-service/how-to-text-to-speech
MS_TTS_OUTPUT_FORMAT = "audio-16khz-128kbitrate-mono-mp3"
# MS_TTS_OUTPUT_FORMAT = "riff-24khz-16bit-mono-pcm"
# https://docs.microsoft.com/zh-cn/azure/cognitive-services/speech-service/supported-languages
# MS_TTS_SPEAKER = "Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)"
# MS_TTS_SPEAKER = "Microsoft Server Speech Text to Speech Voice (en-US, JessaRUS)"
MS_TTS_SPEAKER = "Microsoft Server Speech Text to Speech Voice (en-US, Jessa24kRUS)"
MS_TTS_RATE = "-30.00%"
MS_TTS_VOLUME = "+20.00%"
# https://docs.microsoft.com/zh-cn/azure/cognitive-services/speech-service/how-to-text-to-speech
MS_ERR_BAD_REQUEST = 400
MS_ERR_UNAUTHORIZED = 401
MS_ERR_REQUEST_ENTITY_TOO_LARGE = 413
def initAudioSynthesis():
    """
    init audio synthesis related:
        init token
    :return:
    """
    createAudioTempFolder()
    # getBaiduToken()
    getMsToken()
    refreshMsTokenPeriodic()
def getMsToken():
    """get ms azure token"""
    refreshMsToken()
def refreshMsToken():
    """refresh microsoft azure token for later call tts api"""
    global app, log, gMsToken
    log.info("refreshMsToken: gMsToken=%s", gMsToken)
    getMsTokenUrl = app.config["MS_GET_TOKEN_URL"]
    reqHeaders = {"Ocp-Apim-Subscription-Key": app.config["MS_TTS_SECRET_KEY"]}
    log.info("getMsTokenUrl=%s, reqHeaders=%s", getMsTokenUrl, reqHeaders)
    resp = requests.post(getMsTokenUrl, headers=reqHeaders)
    log.info("resp=%s", resp)
    respTokenText = resp.text  # eyxxxxiJ9.xxx.xxx
    log.info("respTokenText=%s", respTokenText)
    gMsToken = respTokenText
    # # for debug
    # gMsToken = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJ1cm46bXMuY29nbml0aXZlc2VydmljZXMiLCJleHAiOiIxNTI3MDU5MTYxIiwicmVnaW9uIjoid2VzdHVzIiwic3Vic2NyaXB0aW9uLWlkIjoiOWQ0MmQ1N2I3YTQ1NDVjOThhZDE0ZDdjOWRhYWNjNjIiLCJwcm9kdWN0LWlkIjoiU3BlZWNoU2VydmljZXMuRnJlZSIsImNvZ25pdGl2ZS1zZXJ2aWNlcy1lbmRwb2ludCI6Imh0dHBzOi8vYXBpLmNvZ25pdGl2ZS5taWNyb3NvZnQuY29tL2ludGVybmFsL3YxLjAvIiwiYXp1cmUtcmVzb3VyY2UtaWQiOiIiLCJzY29wZSI6InNwZWVjaHNlcnZpY2VzIiwiYXVkIjoidXJuOm1zLnNwZWVjaHNlcnZpY2VzLndlc3R1cyJ9.B8_QoDtUtfsQs6OlKXG6p5SC4mm8s0nISdiUyr4Fnez"
    log.info("gMsToken=%s", gMsToken)
def refreshMsTokenPeriodic():
    """periodically refresh ms token"""
    # TODO: add celery periodic task to refresh ms token
    log.info("refreshMsTokenPeriodic")
    refreshMsToken()
def msTTS(unicodeText):
    """call ms azure tts to generate audio(mp3/wav/...) from text"""
    global app, log, gMsToken
    log.info("msTTS: unicodeText=%s", unicodeText)
    isOk = False
    audioBinData = None
    errNo = 0
    errMsg = "Unknown error"
    msTtsUrl = app.config["MS_TTS_URL"]
    log.info("msTtsUrl=%s", msTtsUrl)
    reqHeaders = {
        "Content-Type": "application/ssml+xml",
        "X-Microsoft-OutputFormat": MS_TTS_OUTPUT_FORMAT,
        "Ocp-Apim-Subscription-Key": app.config["MS_TTS_SECRET_KEY"],
        "Authorization": "Bear " + gMsToken
    }
    log.info("reqHeaders=%s", reqHeaders)
    # # for debug
    # MS_TTS_SPEAKER = "zhang san"
    ssmlDataStr = """
        <speak version='1.0' xmlns="http://www.w3.org/2001/10/synthesis" xml:lang='en-US'>
            <voice name='%s'>
                <prosody rate='%s' volume='%s'>
                    %s
                </prosody>
            </voice>
        </speak>
    """ % (MS_TTS_SPEAKER, MS_TTS_RATE, MS_TTS_VOLUME, unicodeText)
    log.info("ssmlDataStr=%s", ssmlDataStr)
    resp = requests.post(msTtsUrl, headers=reqHeaders, data=ssmlDataStr)
    log.info("resp=%s", resp)
    statusCode = resp.status_code
    log.info("statusCode=%s", statusCode)
    if statusCode == 200:
        # respContentType = resp.headers["Content-Type"]  # 'audio/x-wav', 'audio/mpeg'
        # log.info("respContentType=%s", respContentType)
        # if re.match("audio/.*", respContentType):
        audioBinData = resp.content
        log.info("resp content is audio binary data, length=%d", len(audioBinData))
        isOk = True
        errMsg = ""
    else:
        isOk = False
        errNo = resp.status_code
        errMsg = resp.reason
        log.error("resp errNo=%d, errMsg=%s", errNo, errMsg)
        # errNo=400, errMsg=Voice zhang san not supported
        # errNo=401, errMsg=Unauthorized
        # errNo=413, errMsg=Content length exceeded the allowed limit of 1024 characters.
    return isOk, audioBinData, errNo, errMsg
def doAudioSynthesis(unicodeText):
    """
        do audio synthesis from unicode text
        if failed for token invalid/expired, will refresh token to do one more retry
    """
    global app, log, gCurBaiduRespDict
    isOk = False
    audioBinData = None
    errMsg = ""
    # # for debug
    # gCurBaiduRespDict["access_token"] = "99.569b3b5b470938a522ce60d2e2ea2506.2592000.1528015602.282335-11192483"
    log.info("doAudioSynthesis: unicodeText=%s", unicodeText)
    # isOk, audioBinData, errNo, errMsg = baiduText2Audio(unicodeText)
    isOk, audioBinData, errNo, errMsg = msTTS(unicodeText)
    log.info("isOk=%s, errNo=%d, errMsg=%s", isOk, errNo, errMsg)
    if isOk:
        errMsg = ""
        log.info("got synthesized audio binary data length=%d", len(audioBinData))
    else:
        # if errNo == BAIDU_ERR_TOKEN_INVALID:
        if errNo == MS_ERR_UNAUTHORIZED:
            log.warning("Token invalid -> retry one for refresh token")
            # refreshBaiduToken()
            refreshMsToken()
            # isOk, audioBinData, errNo, errMsg = baiduText2Audio(unicodeText)
            isOk, audioBinData, errNo, errMsg = msTTS(unicodeText)
            log.info("after refresh token: isOk=%ss, errNo=%s, errMsg=%s", isOk, errNo, errMsg)
        else:
            log.warning("try synthesized audio occur error: errNo=%d, errMsg=%s", errNo, errMsg)
            audioBinData = None
    log.info("return isOk=%s, errMsg=%s", isOk, errMsg)
    if audioBinData:
        log.info("audio binary bytes=%d", len(audioBinData))
    return isOk, audioBinData, errMsg
def testAudioSynthesis():
    global app, log, gTempAudioFolder
    # testInputUnicodeText = u"as a book-collector, i have the story you just want to listen!"
    testInputUnicodeText = u"but i have an funny story, as well. would you like to listen, very very funny?"
    # # for debug
    # testInputUnicodeText = u"but i have an funny story, as well. ttttttttttooooooooolllllllllllooooooooonnnnnnnnnnggggggg but i have an funny story, as well. would you like to listen, very very funny?"
    isOk, audioBinData, errMsg = doAudioSynthesis(testInputUnicodeText)
    if isOk:
        audioBinDataLen = len(audioBinData)
        log.info("Now will save audio binary data %d bytes to file", audioBinDataLen)
        # 1. save audio binary data into tmp file
        newUuid = generateUUID()
        log.info("newUuid=%s", newUuid)
        tempFilename = newUuid + TTS_AUDIO_SUFFIX
        log.info("tempFilename=%s", tempFilename)
        if not gTempAudioFolder:
            createAudioTempFolder()
        tempAudioFullname = os.path.join(gTempAudioFolder, tempFilename) #'/Users/crifan/dev/dev_root/company/naturling/projects/robotDemo/server/tmp/audio/2aba73d1-f8d0-4302-9dd3-d1dbfad44458.mp3'
        log.info("tempAudioFullname=%s", tempAudioFullname)
        with open(tempAudioFullname, 'wb') as tmpAudioFp:
            log.info("tmpAudioFp=%s", tmpAudioFp)
            tmpAudioFp.write(audioBinData)
            tmpAudioFp.close()
            log.info("Done to write audio data into file of %d bytes", audioBinDataLen)
        log.info("use celery to delay delete tmp file")
    else:
        log.warning("Fail to get synthesis audio for errMsg=%s", errMsg)
initAudioSynthesis()
testAudioSynthesis()
</code>config.py
<code># Audio Synthesis == TTS MS_TTS_SECRET_KEY = "224xxxxx2" MS_GET_TOKEN_URL = "https://westus.api.cognitive.microsoft.com/sts/v1.0/issueToken" MS_TTS_URL = "https://westus.tts.speech.microsoft.com/cognitiveservices/v1" </code>
可以获得对应的token,和返回语音文件:

且调试模拟出错也是返回预期的错误的:

然后接着去:
新增celery的periodical的task,去refresh azure的token
【已解决】Flask中新增Celery周期任务去定期更新Azure的token
转载请注明:在路上 » 【已解决】把微软Azure语言合成TTS集成到Flask本地环境