You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
4.0 KiB

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
实时语音转文字系统主程序
基于sherpa-onnx的实时语音识别应用
"""
import sys
import argparse
import logging
from pathlib import Path
# 添加src目录到Python路径
sys.path.insert(0, str(Path(__file__).parent / "src"))
from src import RealTimeVTT, ModelDownloader, ModelConfig
def setup_logging(level: str = "INFO"):
"""设置日志"""
logging.basicConfig(
level=getattr(logging, level.upper()),
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
def check_models() -> bool:
"""检查模型文件是否存在"""
config = ModelConfig()
missing_files = config.validate_model_files()
if missing_files:
print("错误: 缺少模型文件")
print("缺少的文件:")
for file_path in missing_files:
print(f" - {file_path}")
print("\n请运行以下命令下载模型:")
print(" python main.py --download-model")
return False
return True
def download_model_interactive():
"""交互式下载模型"""
config = ModelConfig()
downloader = ModelDownloader(config)
downloader.interactive_download()
def main():
"""主函数"""
parser = argparse.ArgumentParser(
description="实时语音转文字系统",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例用法:
python main.py # 启动实时语音识别
python main.py --download-model # 下载语音识别模型
python main.py --list-devices # 列出音频设备
python main.py --log-level DEBUG # 启用调试日志
"""
)
parser.add_argument(
"--download-model",
action="store_true",
help="下载语音识别模型"
)
parser.add_argument(
"--list-devices",
action="store_true",
help="列出可用的音频设备"
)
parser.add_argument(
"--log-level",
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
default="INFO",
help="设置日志级别 (默认: INFO)"
)
parser.add_argument(
"--no-save",
action="store_true",
help="不保存识别结果到文件"
)
parser.add_argument(
"--no-partial",
action="store_true",
help="不显示部分识别结果"
)
args = parser.parse_args()
# 设置日志
setup_logging(args.log_level)
try:
if args.download_model:
# 下载模型
download_model_interactive()
return
# 创建应用实例
app = RealTimeVTT()
# 应用配置
if args.no_save:
app.app_config.save_to_file = False
if args.no_partial:
app.app_config.show_partial_results = False
if args.list_devices:
# 列出音频设备
if not app.audio_processor.initialize():
print("错误: 无法初始化音频设备")
return 1
devices = app.list_audio_devices()
print("可用的音频设备:")
for device in devices:
print(f" [{device['index']}] {device['name']}")
print(f" 通道数: {device['channels']}")
print(f" 采样率: {device['sample_rate']} Hz")
app.cleanup()
return
# 检查模型文件
if not check_models():
return 1
# 初始化应用
if not app.initialize():
print("错误: 应用初始化失败")
return 1
# 运行应用
app.run_interactive()
except KeyboardInterrupt:
print("\n程序被用户中断")
except Exception as e:
print(f"错误: {e}")
if args.log_level == "DEBUG":
import traceback
traceback.print_exc()
return 1
return 0
if __name__ == "__main__":
sys.exit(main())