Implement improve mode

In improve mode, the AI is allowed to change any translation to correct mistakes of the old Google Translate code.
2026-02-03 11:38:11 +00:00 · 2025-09-13 20:02:44 +02:00
parent 0dd8bd72ff
commit d1f5abfc1f
1 changed files with 64 additions and 12 deletions
--- a/translate.py
+++ b/translate.py
@@ -31,6 +31,7 @@
 import os
 import json
 import argparse
 from typing import Dict, List, Tuple
 from google import genai
@@ -135,7 +136,24 @@ def build_translation_prompt(
    english_comments: List[str],
    existing_translated_comments: List[str],
    generator_comment_en: str,
    improve_mode: bool,
 ) -> str:
    if improve_mode:
        existing_header = "Here are previous translations for this language (you may reuse them or improve them; keep unchanged if already correct):"
        items_header = "Here are the strings to review and output FINAL translations for (provide a value for every key; if keeping the existing translation, repeat it verbatim):"
        mode_rules = """
 Improve mode rules:
 - You are revising existing translations for a smartwatch UI.
 - For each string:
  - If the English source text changed in meaning, update the translation accordingly.
  - If the existing translation has grammar or style issues, or you are certain a different translation is a better fit (more natural, concise, and consistent with UI), provide an improved translation.
  - If the existing translation is already accurate, natural, and consistent, you may keep it unchanged by returning the same text.
 """.strip()
    else:
        existing_header = "Here are existing translations for this language (do not modify these; use for terminology/style consistency):"
        items_header = "Here are the ONLY strings that need new translations (translate the values):"
        mode_rules = ""
    return f"""
 You are a professional localizer for a smartwatch UI. Translate UI strings into {language_name}.
@@ -150,6 +168,7 @@ Rules:
 - Keep meaning accurate and UI-appropriate (short, natural, consistent).
 - Use consistent terminology aligned with existing translations for this language.
 - Do NOT translate the string IDs themselves.
 {("\n" + mode_rules) if mode_rules else ""}
 Comments handling:
 - You are given comments from the English XML (in order) and the current translations (same order where available).
@@ -160,10 +179,10 @@ Comments handling:
 Here are the complete English strings for context:
 {json.dumps(english_full, ensure_ascii=False, indent=2)}
-Here are existing translations for this language (do not modify these; use for terminology/style consistency):
+{existing_header}
 {json.dumps(existing_translations, ensure_ascii=False, indent=2)}
-Here are the ONLY strings that need new translations (translate the values):
+{items_header}
 {json.dumps(to_translate, ensure_ascii=False, indent=2)}
 Comments to translate (same order as in the XML):
@@ -193,6 +212,8 @@ def translate_language(
    lang_tuple: Tuple[str, str, str],
    english_soup: BeautifulSoup,
    english_strings: Dict[str, str],
    verbose: bool = False,
    improve: bool = False,
 ) -> None:
    garmin_code, _unused, language_name = lang_tuple
@@ -226,7 +247,7 @@ def translate_language(
    else:
        generator_comment_en = f"Generated by {MODEL_NAME} from English to {language_name}"
-    # Decide which strings need translation (not in corrections, not in previous)
+    # Decide which strings need translation
    to_translate_map: Dict[str, str] = {}
    final_values: Dict[str, str] = {}
@@ -234,21 +255,33 @@ def translate_language(
        sid = s.get("id")
        if not sid:
            continue
        # Always keep English as-is for exception IDs
        if sid in exceptionIds:
            # Keep English as-is for exception IDs
            final_values[sid] = s.get_text()
            continue
        # Respect corrections.xml as authoritative
        if sid in corrections_map and corrections_map[sid] is not None:
            final_values[sid] = corrections_map[sid]
-        elif sid in prev_map and prev_map[sid] is not None:
+            continue
            final_values[sid] = prev_map[sid]
        else:
            to_translate_map[sid] = s.get_text()
-    # If there are no new strings to translate, skip this language entirely
+        if improve:
            # Improve mode: reprocess all remaining strings
            to_translate_map[sid] = s.get_text()
        else:
            # Normal mode: translate only new strings
            if sid in prev_map and prev_map[sid] is not None:
                final_values[sid] = prev_map[sid]
            else:
                to_translate_map[sid] = s.get_text()
    # If there are no strings to translate (e.g., all covered by corrections), skip
    if not to_translate_map:
-        print(f"  Skipping {language_name}: no new strings to translate.")
+        reason = "no strings to translate (all covered by corrections or exceptions)"
        if not improve:
            reason = "no new strings to translate."
        print(f"  Skipping {language_name}: {reason}")
        return
    # Prepare context (always include full English strings)
@@ -266,8 +299,12 @@ def translate_language(
        english_comments=english_comments,
        existing_translated_comments=existing_translated_comments,
        generator_comment_en=generator_comment_en,
        improve_mode=improve,
    )
    if verbose:
        print(prompt)
    config = genai.types.GenerateContentConfig(
        temperature=0,
        response_mime_type="application/json",
@@ -291,6 +328,9 @@ def translate_language(
            raise RuntimeError("Empty response from model; cannot parse translations.")
        data = json.loads(txt)
    if verbose:
        print(data)
    translations = data.get("translations", {}) or {}
    for sid, translated in translations.items():
        if sid in to_translate_map:
@@ -327,6 +367,11 @@ def translate_language(
        w.write(soup.encode("utf-8") + b"\n")
 def main():
    parser = argparse.ArgumentParser(description="Translate Garmin IQ strings.xml using Gemini.")
    parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose debug output (prints prompts and responses)")
    parser.add_argument("-i", "--improve", action="store_true", help="Improve mode: re-run all strings through translation for potential improvements")
    args = parser.parse_args()
    # Init client
    client = genai.Client()
@@ -342,9 +387,16 @@ def main():
    total_langs = len(languages)
    for i, lang in enumerate(languages, start=1):
-        print(f"{i} of {total_langs}: Translating English to {lang[2]}")
+        print(f"{i} of {total_langs}: Translating English to {lang[2]}" + (" [improve]" if args.improve else ""))
        try:
-            translate_language(client, lang, english_soup, english_strings)
+            translate_language(
                client=client,
                lang_tuple=lang,
                english_soup=english_soup,
                english_strings=english_strings,
                verbose=args.verbose,
                improve=args.improve,
            )
        except Exception as e:
            print(f"  Error translating {lang[2]}: {e}")