import openpyxl from openpyxl.styles import PatternFill import time import os import re import requests from openai import AzureOpenAI
 
  AUTO_TRANSLATE_FILL = PatternFill(     start_color="FFD8E4BC",     end_color="FFD8E4BC",     fill_type="solid" )
  LANGUAGE_MAPPING = {     "中文简体": "Simplified Chinese",     "中文繁体": "Traditional Chinese",     "Japanese": "Japanese",     "Spanish": "Spanish",     "Korean": "Korean",     "Thailand": "Thai",     "Indonesia": "Indonesian",     "German": "German",     "French": "French",     "Portuguese": "Portuguese" }
 
  DEEPSEEK_API_URL = os.getenv("DEEPSEEK_API_URL") DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
  DEEPSEEK_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B" AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") GPT_API_VERSION = "2024-06-01" BATCH_SIZE = 8 MAX_TOKENS = 4096
 
  def parse_translation_result(raw_text, texts):     translated = []     pattern = re.compile(r'^\d+\.\s*(.+)$', re.MULTILINE)     matches = pattern.findall(raw_text)
      if len(matches) == len(texts):         translated = [m.strip() for m in matches]     else:                  translated = [line.split(". ", 1)[-1].strip()                       for line in raw_text.split("\n")                       if line.strip() and line[0].isdigit()]     return translated
 
  def batch_translate(texts, target_lang, model_choice):     if not texts or not target_lang:         return []
           numbered_texts = "\n".join([f"{i + 1}. {text}" for i, text in enumerate(texts)])          system_prompt = f"""你是一位专业App内文案翻译人员,请将以下英文文本列表准确翻译为{LANGUAGE_MAPPING[target_lang]}: {numbered_texts}
  请严格遵循: 1. 保持专业术语一致性 2. 保留数字和特殊符号,${{TT}}、${{time}}等是占位符均不翻译。 3. 使用正式书面语体 4. 按以下格式返回: 1. 翻译结果 2. 翻译结果"""
      if model_choice == "deepseek":         payload = {             "model": DEEPSEEK_MODEL_NAME,             "messages": [                 {"role": "system", "content": system_prompt},                 {"role": "user", "content": "请开始翻译"}             ],             "temperature": 0,             "max_tokens": MAX_TOKENS         }
          headers = {             "Authorization": f"Bearer {DEEPSEEK_API_KEY}",             "Content-Type": "application/json"         }
          try:                          print(system_prompt)             print("🚀🚀🚀当前DeepSeek模型是" + DEEPSEEK_MODEL_NAME)
              response = requests.post(DEEPSEEK_API_URL, json=payload, headers=headers, timeout=(10, 30))             response.raise_for_status()
                           print(f"API响应状态码: {response.status_code}")
              try:                 response_data = response.json()                 print(response_data)             except ValueError:                 raise Exception("无效的JSON响应")
              if 'choices' not in response_data or not response_data['choices']:                 raise Exception("API返回结构异常")
              raw_text = response_data['choices'][0]['message']['content']             translated = parse_translation_result(raw_text, texts)             print(translated)             return translated
          except Exception as e:             print(f"API调用失败: {str(e)}")             return []          elif model_choice == "gpt":         client = AzureOpenAI(             azure_endpoint=AZURE_OPENAI_ENDPOINT,             api_key=AZURE_OPENAI_API_KEY,             api_version=GPT_API_VERSION         )
          try:             response = client.chat.completions.create(                 model="gpt-4o-mini",                 messages=[                     {"role": "system", "content": system_prompt},                     {"role": "user", "content": "请开始翻译"}                 ]             )             raw_text = response.choices[0].message.content             translated = parse_translation_result(raw_text, texts)             print(translated)             return translated         except Exception as e:             print(f"API调用失败: {str(e)}")             return []
 
  def process_excel(input_path, output_path, model_choice):     print(f"开始读取Excel {input_path}")     print(f"当前使用的大模型是: {model_choice.upper()}")
      try:         wb = openpyxl.load_workbook(input_path)         sheet = wb.active
          source_col = None         for idx, cell in enumerate(sheet[1], 1):             if cell.value and cell.value.strip() == "English":                 source_col = idx                 break
          if not source_col:             raise ValueError("工作表中未找到'English'列,请检查第一行标题")
                   for col_idx in range(1, sheet.max_column + 1):             header_cell = sheet.cell(row=1, column=col_idx)             if not header_cell.value:                 continue
              lang_name = header_cell.value.strip()             if lang_name == "English" or lang_name not in LANGUAGE_MAPPING:                 continue
              print(f"\n=== 正在处理 {lang_name} ===")             process_language_column(sheet, source_col, col_idx, lang_name, model_choice)
                   wb.save(output_path)         print(f"\n处理完成!文件已保存至:{output_path}")
      except Exception as e:         print(f"处理过程中发生错误: {str(e)}")         raise
 
  def process_language_column(sheet, source_col, target_col, lang_name, model_choice):     """处理单个语言列"""     batch_texts = []     batch_positions = []
      for row_idx in range(2, sheet.max_row + 1):         source_cell = sheet.cell(row=row_idx, column=source_col)         target_cell = sheet.cell(row=row_idx, column=target_col)
                   if not source_cell.value or (target_cell.value and str(target_cell.value).strip()):             continue
                   batch_texts.append(str(source_cell.value).strip())         batch_positions.append((row_idx, target_col))
          if len(batch_texts) >= BATCH_SIZE:             process_batch(sheet, batch_texts, batch_positions, lang_name, model_choice)             batch_texts.clear()             batch_positions.clear()
           if batch_texts:         process_batch(sheet, batch_texts, batch_positions, lang_name, model_choice)
 
  def process_batch(sheet, texts, positions, lang_name, model_choice):     print(f"正在批量翻译 {len(texts)} 条文本到 {lang_name}")
      translated = []     for retry in range(3):         try:             translated = batch_translate(texts, lang_name, model_choice)             if len(translated) == len(texts):                 break             print(f"第{retry + 1}次重试...")             time.sleep(2 ** retry)           except Exception as e:             print(f"批处理失败: {str(e)}")
      if len(translated) != len(texts):         print(f"未能获取完整翻译,预期{len(texts)}条,实际{len(translated)}条")         translated += [f"[翻译失败] {text}" for text in texts[len(translated):]]
           for (row, col), text in zip(positions, translated):         cell = sheet.cell(row=row, column=col)         cell.value = text         cell.fill = AUTO_TRANSLATE_FILL
 
  if __name__ == '__main__':     import sys     if len(sys.argv) > 1 and sys.argv[1] not in ["deepseek", "gpt"]:         print("请指定有效的模型,可选值为 [deepseek|gpt],默认使用gpt。")         sys.exit(1)
      model_choice = sys.argv[1] if len(sys.argv) > 1 else "gpt"     input_file = os.path.expanduser('~/Desktop/test2.xlsx')     output_file = os.path.expanduser('~/Desktop/translated_output.xlsx')
      try:         process_excel(input_file, output_file, model_choice)     except Exception as e:         print(f"运行失败: {str(e)}")         exit(1)
 
  |