Implement improve mode

In improve mode, the AI is allowed to change any translation to correct
mistakes of the old Google Translate code.
This commit is contained in:
krzys-h
2025-09-13 20:02:44 +02:00
parent 0dd8bd72ff
commit d1f5abfc1f

View File

@@ -31,6 +31,7 @@
import os
import json
import argparse
from typing import Dict, List, Tuple
from google import genai
@@ -135,7 +136,24 @@ def build_translation_prompt(
english_comments: List[str],
existing_translated_comments: List[str],
generator_comment_en: str,
improve_mode: bool,
) -> str:
if improve_mode:
existing_header = "Here are previous translations for this language (you may reuse them or improve them; keep unchanged if already correct):"
items_header = "Here are the strings to review and output FINAL translations for (provide a value for every key; if keeping the existing translation, repeat it verbatim):"
mode_rules = """
Improve mode rules:
- You are revising existing translations for a smartwatch UI.
- For each string:
- If the English source text changed in meaning, update the translation accordingly.
- If the existing translation has grammar or style issues, or you are certain a different translation is a better fit (more natural, concise, and consistent with UI), provide an improved translation.
- If the existing translation is already accurate, natural, and consistent, you may keep it unchanged by returning the same text.
""".strip()
else:
existing_header = "Here are existing translations for this language (do not modify these; use for terminology/style consistency):"
items_header = "Here are the ONLY strings that need new translations (translate the values):"
mode_rules = ""
return f"""
You are a professional localizer for a smartwatch UI. Translate UI strings into {language_name}.
@@ -150,6 +168,7 @@ Rules:
- Keep meaning accurate and UI-appropriate (short, natural, consistent).
- Use consistent terminology aligned with existing translations for this language.
- Do NOT translate the string IDs themselves.
{("\n" + mode_rules) if mode_rules else ""}
Comments handling:
- You are given comments from the English XML (in order) and the current translations (same order where available).
@@ -160,10 +179,10 @@ Comments handling:
Here are the complete English strings for context:
{json.dumps(english_full, ensure_ascii=False, indent=2)}
Here are existing translations for this language (do not modify these; use for terminology/style consistency):
{existing_header}
{json.dumps(existing_translations, ensure_ascii=False, indent=2)}
Here are the ONLY strings that need new translations (translate the values):
{items_header}
{json.dumps(to_translate, ensure_ascii=False, indent=2)}
Comments to translate (same order as in the XML):
@@ -193,6 +212,8 @@ def translate_language(
lang_tuple: Tuple[str, str, str],
english_soup: BeautifulSoup,
english_strings: Dict[str, str],
verbose: bool = False,
improve: bool = False,
) -> None:
garmin_code, _unused, language_name = lang_tuple
@@ -226,7 +247,7 @@ def translate_language(
else:
generator_comment_en = f"Generated by {MODEL_NAME} from English to {language_name}"
# Decide which strings need translation (not in corrections, not in previous)
# Decide which strings need translation
to_translate_map: Dict[str, str] = {}
final_values: Dict[str, str] = {}
@@ -234,21 +255,33 @@ def translate_language(
sid = s.get("id")
if not sid:
continue
# Always keep English as-is for exception IDs
if sid in exceptionIds:
# Keep English as-is for exception IDs
final_values[sid] = s.get_text()
continue
# Respect corrections.xml as authoritative
if sid in corrections_map and corrections_map[sid] is not None:
final_values[sid] = corrections_map[sid]
elif sid in prev_map and prev_map[sid] is not None:
continue
if improve:
# Improve mode: reprocess all remaining strings
to_translate_map[sid] = s.get_text()
else:
# Normal mode: translate only new strings
if sid in prev_map and prev_map[sid] is not None:
final_values[sid] = prev_map[sid]
else:
to_translate_map[sid] = s.get_text()
# If there are no new strings to translate, skip this language entirely
# If there are no strings to translate (e.g., all covered by corrections), skip
if not to_translate_map:
print(f" Skipping {language_name}: no new strings to translate.")
reason = "no strings to translate (all covered by corrections or exceptions)"
if not improve:
reason = "no new strings to translate."
print(f" Skipping {language_name}: {reason}")
return
# Prepare context (always include full English strings)
@@ -266,8 +299,12 @@ def translate_language(
english_comments=english_comments,
existing_translated_comments=existing_translated_comments,
generator_comment_en=generator_comment_en,
improve_mode=improve,
)
if verbose:
print(prompt)
config = genai.types.GenerateContentConfig(
temperature=0,
response_mime_type="application/json",
@@ -291,6 +328,9 @@ def translate_language(
raise RuntimeError("Empty response from model; cannot parse translations.")
data = json.loads(txt)
if verbose:
print(data)
translations = data.get("translations", {}) or {}
for sid, translated in translations.items():
if sid in to_translate_map:
@@ -327,6 +367,11 @@ def translate_language(
w.write(soup.encode("utf-8") + b"\n")
def main():
parser = argparse.ArgumentParser(description="Translate Garmin IQ strings.xml using Gemini.")
parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose debug output (prints prompts and responses)")
parser.add_argument("-i", "--improve", action="store_true", help="Improve mode: re-run all strings through translation for potential improvements")
args = parser.parse_args()
# Init client
client = genai.Client()
@@ -342,9 +387,16 @@ def main():
total_langs = len(languages)
for i, lang in enumerate(languages, start=1):
print(f"{i} of {total_langs}: Translating English to {lang[2]}")
print(f"{i} of {total_langs}: Translating English to {lang[2]}" + (" [improve]" if args.improve else ""))
try:
translate_language(client, lang, english_soup, english_strings)
translate_language(
client=client,
lang_tuple=lang,
english_soup=english_soup,
english_strings=english_strings,
verbose=args.verbose,
improve=args.improve,
)
except Exception as e:
print(f" Error translating {lang[2]}: {e}")