
安装依赖:
pip install azure-cognitiveservices-speech 代码如下:(大家可以换成自己的 key)
import azure.cognitiveservices.speech as speechsdk import time import re from xml.sax.saxutils import escape def start_voice_interpreter(): speech_key = "4ahKWCGXqoob2qwwel9HPTebKD0Mk1WMg43EDoaieIPJ6V33XyWGJQQJ99BLACYeBjFXJ3w3AAAYACOGLK1w" service_region = "eastus" # === 1. 定义支持的语言配置 === # Azure 自动检测最多支持 10 种语言。这里配置了最常用的 9 种外语 + 中文。 # 格式: (识别语言代码, 翻译目标代码, 语音合成人名称) langs_setup = [ ("zh-CN", "zh-Hans", "zh-CN-XiaoxiaoNeural"), # 中文 ("en-US", "en", "en-US-AvaNeural"), # 英语 ("ja-JP", "ja", "ja-JP-NanamiNeural"), # 日语 ("ko-KR", "ko", "ko-KR-SunHiNeural"), # 韩语 ("fr-FR", "fr", "fr-FR-DeniseNeural"), # 法语 ("es-ES", "es", "es-ES-ElviraNeural"), # 西班牙语 ("de-DE", "de", "de-DE-KatjaNeural"), # 德语 ("ru-RU", "ru", "ru-RU-SvetlanaNeural"), # 俄语 ("it-IT", "it", "it-IT-ElsaNeural"), # 意大利语 ("pt-BR", "pt", "pt-BR-FranciscaNeural") # 葡萄牙语 ] # 提取检测列表 (Azure 限制最多 10 个) detect_candidates = [x[0] for x in langs_setup] # 建立映射方便查找 # 识别代码 -> 翻译代码 (如 'ja-JP' -> 'ja') src_to_target_map = {x[0]: x[1] for x in langs_setup} # 翻译代码 -> 发音人 (如 'ja' -> 'ja-JP-NanamiNeural') voice_map = {x[1]: x[2] for x in langs_setup} # === 2. 初始化配置 === translation_cOnfig= speechsdk.translation.SpeechTranslationConfig( subscription=speech_key, region=service_region) # 添加所有语言作为翻译目标 (因为我们可能需要把中文翻译成任何一种外语) for _, target_code, _ in langs_setup: translation_config.add_target_language(target_code) # 自动语言检测配置 auto_detect_cOnfig= speechsdk.languageconfig.AutoDetectSourceLanguageConfig( languages=detect_candidates ) # 语音合成配置 tts_cOnfig= speechsdk.SpeechConfig(subscription=speech_key, region=service_region) speech_synthesizer = speechsdk.SpeechSynthesizer(speech_cOnfig=tts_config) # 识别器 audio_cOnfig= speechsdk.audio.AudioConfig(use_default_microphOne=True) recognizer = speechsdk.translation.TranslationRecognizer( translation_cOnfig=translation_config, audio_cOnfig=audio_config, auto_detect_source_language_cOnfig=auto_detect_config ) # 强制连续识别模式 recognizer.properties.set_property( property_id=speechsdk.PropertyId.SpeechServiceConnection_LanguageIdMode, value='Continuous' ) # === 状态变量 === # 用于记录“当前对话的外语是什么”。默认英语。 # 当识别到别人说日语时,这个变量会变成 'ja'。之后你说中文,就会翻译成 'ja'。 state = {"last_foreign_target": "en"} # === 播放函数 === def play_translation(text, language_code): voice_name = voice_map.get(language_code, "en-US-AvaNeural") # 简单的 XML 转义防止报错 safe_text = escape(text) ssml_string = f""" <speak version='1.0' xml:lang='{language_code}'> <voice name='{voice_name}'>{safe_text}</voice> </speak> """ try: speech_synthesizer.speak_ssml_async(ssml_string) except Exception as e: print(f"[播放错误]: {e}") # === 结果处理回调 === def result_callback(evt): if evt.result.reason == speechsdk.ResultReason.TranslatedSpeech: # 获取 Azure 检测到的源语言 (例如 "ja-JP") detected_src_lang = evt.result.properties.get( speechsdk.PropertyId.SpeechServiceConnection_AutoDetectSourceLanguageResult ) text = evt.result.text # 找到对应的简写代码 (例如 "ja") # Azure 有时返回 "ja-JP" 有时可能带额外信息,这里做个模糊匹配 current_lang_code = None for src_full, target_short, _ in langs_setup: if src_full in detected_src_lang: current_lang_code = target_short break if not current_lang_code: print(f">> [忽略]: 无法识别的语言代码 {detected_src_lang}") return print(f"\n[识别语言]: {detected_src_lang} ({current_lang_code}) | [原文]: {text}") # 逻辑分支 # --- 情况 A: 识别到中文 (你自己说话) --- if current_lang_code == "zh-Hans": # 检查是否误判 (识别为中文但全是英文字母) is_latin_text = bool(re.search(r"[a-zA-Z]", text)) has_chinese_char = bool(re.search(r"[\u4e00-\u9fa5]", text)) if is_latin_text and not has_chinese_char: print(">> [系统警告]: 检测到中文模式,但内容似英文,Azure 可能误判,跳过翻译。") return # 正常中文 -> 翻译成“最近一次的外语” target_lang = state["last_foreign_target"] trans_text = evt.result.translations.get(target_lang, "") print(f"[中文 -> {target_lang}]: {trans_text}") play_translation(trans_text, target_lang) # --- 情况 B: 识别到外语 (对方说话) --- else: # 更新状态:记住这个外语 state["last_foreign_target"] = current_lang_code # 外语 -> 翻译成中文 target_lang = "zh-Hans" trans_text = evt.result.translations.get(target_lang, "") print(f"[{current_lang_code} -> 中文]: {trans_text}") play_translation(trans_text, target_lang) recognizer.recognized.connect(result_callback) # 开始 print("--------------------------------------------------") print("万能翻译官已启动。") print("1. 听到外语(英/日/韩/法/德/西/俄/意/葡) -> 翻译成中文") print("2. 听到中文 -> 翻译成刚才那门外语 (默认英语)") print("--------------------------------------------------") recognizer.start_continuous_recognition() try: while True: time.sleep(0.5) except KeyboardInterrupt: recognizer.stop_continuous_recognition() if __name__ == "__main__": start_voice_interpreter()