mirror of
https://github.com/house-of-abbey/GarminHomeAssistant.git
synced 2025-09-15 05:21:33 +00:00
Update the translation script to use Gemini instead of Google Translate
This code itself was also AI generated.
This commit is contained in:
363
translate.py
363
translate.py
@@ -15,32 +15,37 @@
|
||||
# Description:
|
||||
#
|
||||
# Python script to automatically translate the strings.xml file to each supported
|
||||
# language using Google Translate.
|
||||
# language. Rewritten by krzys_h with the help of AI to use Gemini instead of
|
||||
# Google Translate for more contextual translations.
|
||||
#
|
||||
# Python installation:
|
||||
# pip install beautifulsoup4
|
||||
# pip install deep-translator
|
||||
# NB. For XML formatting:
|
||||
# pip install lxml
|
||||
# Requirements:
|
||||
# pip install google-genai beautifulsoup4 lxml
|
||||
#
|
||||
# References:
|
||||
# * https://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
# * https://realpython.com/beautiful-soup-web-scraper-python/
|
||||
# * https://www.crummy.com/software/BeautifulSoup/bs4/doc/#parsing-xml
|
||||
# * https://www.crummy.com/software/BeautifulSoup/bs4/doc/#xml
|
||||
# Env:
|
||||
# export GEMINI_API_KEY="YOUR_API_KEY"
|
||||
#
|
||||
# To get your own API key, go to:
|
||||
# https://aistudio.google.com/app/apikey
|
||||
#
|
||||
####################################################################################
|
||||
|
||||
import os
|
||||
import json
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import Comment
|
||||
from deep_translator import GoogleTranslator
|
||||
from google import genai
|
||||
from bs4 import BeautifulSoup, Comment
|
||||
|
||||
# List of tuples in the form os:
|
||||
# * Garmin IQ language three letter mnemonic,
|
||||
# * Google Translate language mnemonic,
|
||||
# * Language familiar name (mainly for reference)
|
||||
languages: list[tuple[str, str, str]] = [
|
||||
# ---------------- Configuration ----------------
|
||||
|
||||
# Gemini model name
|
||||
MODEL_NAME = "gemini-2.5-flash"
|
||||
|
||||
# Language definitions:
|
||||
# * Garmin IQ language three-letter mnemonic (used in resources-XXX folder),
|
||||
# * Unused Google mnemonic kept for reference,
|
||||
# * Human-readable language name for prompts
|
||||
languages: List[Tuple[str, str, str]] = [
|
||||
("ara", "ar", "Arabic"),
|
||||
("bul", "bg", "Bulgarian"),
|
||||
("zhs", "zh-CN", "Chinese (Simplified)"),
|
||||
@@ -79,75 +84,269 @@ languages: list[tuple[str, str, str]] = [
|
||||
("vie", "vi", "Vietnamese"),
|
||||
]
|
||||
|
||||
langLength = len(languages)
|
||||
exceptionIds: List[str] = ["AppName", "AppVersionTitle"]
|
||||
|
||||
exceptionIds: list[str] = ["AppName", "AppVersionTitle"]
|
||||
titleIds: list[str] = []
|
||||
# ---------------- Helpers ----------------
|
||||
|
||||
# def merge(curr: BeautifulSoup, prev: BeautifulSoup) -> BeautifulSoup:
|
||||
# """
|
||||
# Merge the current strings.xml with the previous one, overwriting
|
||||
# the previous strings with the current ones if they exist.
|
||||
# """
|
||||
# out = prev.__copy__()
|
||||
# for s in curr.find(name="strings").find_all(name="string"):
|
||||
# s_prev = out.find(name="string", attrs={"id": s["id"]})
|
||||
# if s_prev:
|
||||
# s_prev.string = s.string
|
||||
# else:
|
||||
# out.find(name="strings").append(s)
|
||||
# return out
|
||||
def load_xml_as_soup(path: str) -> BeautifulSoup:
|
||||
if not os.path.exists(path):
|
||||
return BeautifulSoup("", features="xml")
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return BeautifulSoup(f.read().replace("\r", ""), features="xml")
|
||||
|
||||
i = 1
|
||||
with open("./resources/strings/strings.xml", "r") as f:
|
||||
c = f.read().replace("\r", "")
|
||||
for l in languages:
|
||||
os.makedirs(f"./resources-{l[0]}/strings/", exist_ok=True)
|
||||
# Old translations will not be automatically updated/removed, use removeTranslations.py
|
||||
try:
|
||||
with open(f"./resources-{l[0]}/strings/strings.xml", "r", encoding="utf-8") as r:
|
||||
prev = BeautifulSoup(r.read().replace("\r", ""), features="xml")
|
||||
except FileNotFoundError:
|
||||
prev = BeautifulSoup("", features="xml")
|
||||
try:
|
||||
with open(f"./resources-{l[0]}/strings/corrections.xml", "r", encoding="utf-8") as r:
|
||||
curr = BeautifulSoup(r.read().replace("\r", ""), features="xml")
|
||||
except FileNotFoundError:
|
||||
curr = BeautifulSoup("", features=["xml"])
|
||||
print(f"{i} of {langLength}: Translating English to {l[2]}")
|
||||
soup = BeautifulSoup(c, features="xml")
|
||||
translator = GoogleTranslator(source="en", target=l[1])
|
||||
soup.find(name="strings").insert_before("\n\n")
|
||||
soup.find(name="strings").insert_before(
|
||||
Comment(
|
||||
f"\n Generated by Google Translate: English to {l[2]}\n " +
|
||||
translator.translate("Generated by Google Translate from English") + "\n"))
|
||||
soup.find(name="strings").insert_before("\n\n")
|
||||
def extract_strings(soup: BeautifulSoup) -> Dict[str, str]:
|
||||
out = {}
|
||||
strings_node = soup.find(name="strings")
|
||||
if not strings_node:
|
||||
return out
|
||||
for s in strings_node.find_all(name="string"):
|
||||
sid = s.get("id")
|
||||
if not sid:
|
||||
continue
|
||||
value = s.string if s.string is not None else s.get_text()
|
||||
out[sid] = value if value is not None else ""
|
||||
return out
|
||||
|
||||
for s in soup.find(name="strings").find_all(name="string"):
|
||||
s.insert_before(" ")
|
||||
if s["id"] in exceptionIds:
|
||||
def extract_comments_in_order(soup: BeautifulSoup) -> List[str]:
|
||||
comments = []
|
||||
strings_node = soup.find(name="strings")
|
||||
if not strings_node:
|
||||
return comments
|
||||
for c in strings_node.find_all(string=lambda text: isinstance(text, Comment)):
|
||||
comments.append(str(c))
|
||||
return comments
|
||||
|
||||
def replace_comments_in_order(soup: BeautifulSoup, translated_comments: List[str]) -> None:
|
||||
strings_node = soup.find(name="strings")
|
||||
if not strings_node:
|
||||
return
|
||||
idx = 0
|
||||
for c in strings_node.find_all(string=lambda text: isinstance(text, Comment)):
|
||||
if idx < len(translated_comments):
|
||||
c.insert_before(" ")
|
||||
c.replace_with(Comment(translated_comments[idx]))
|
||||
idx += 1
|
||||
|
||||
def build_translation_prompt(
|
||||
language_name: str,
|
||||
english_full: Dict[str, str],
|
||||
existing_translations: Dict[str, str],
|
||||
to_translate: Dict[str, str],
|
||||
english_comments: List[str],
|
||||
existing_translated_comments: List[str],
|
||||
generator_comment_en: str,
|
||||
) -> str:
|
||||
return f"""
|
||||
You are a professional localizer for a smartwatch UI. Translate UI strings into {language_name}.
|
||||
|
||||
Rules:
|
||||
- Preserve placeholders EXACTLY and do not translate them:
|
||||
- printf style: %s, %d, %f, %1$s, %2$d, etc.
|
||||
- brace placeholders: {{0}}, {{1}}, {{name}}, {{value}}
|
||||
- dollar placeholders: $1, $2
|
||||
- Never translate app/product names; keep them unchanged, e.g., "Home Assistant".
|
||||
- Do not change punctuation, spacing, or add extra punctuation unless natural in the target language.
|
||||
- Keep any whitespace at the beginning or end of string unchanged.
|
||||
- Keep meaning accurate and UI-appropriate (short, natural, consistent).
|
||||
- Use consistent terminology aligned with existing translations for this language.
|
||||
- Do NOT translate the string IDs themselves.
|
||||
|
||||
Comments handling:
|
||||
- You are given comments from the English XML (in order) and the current translations (same order where available).
|
||||
- If a given English comment has not changed since the last revision and a current translation exists at the same index, return the existing translation unchanged.
|
||||
- If you believe an existing translation is already correct for the provided English, keep it unchanged; otherwise provide an improved translation.
|
||||
- Also translate the generator comment line shown below. We will store both the English and translated lines inside a single XML comment.
|
||||
|
||||
Here are the complete English strings for context:
|
||||
{json.dumps(english_full, ensure_ascii=False, indent=2)}
|
||||
|
||||
Here are existing translations for this language (do not modify these; use for terminology/style consistency):
|
||||
{json.dumps(existing_translations, ensure_ascii=False, indent=2)}
|
||||
|
||||
Here are the ONLY strings that need new translations (translate the values):
|
||||
{json.dumps(to_translate, ensure_ascii=False, indent=2)}
|
||||
|
||||
Comments to translate (same order as in the XML):
|
||||
{json.dumps(english_comments, ensure_ascii=False, indent=2)}
|
||||
|
||||
Existing translated comments (same order; may be fewer items):
|
||||
{json.dumps(existing_translated_comments, ensure_ascii=False, indent=2)}
|
||||
|
||||
Generator comment (English; translate this too):
|
||||
{json.dumps(generator_comment_en, ensure_ascii=False)}
|
||||
|
||||
Return only valid JSON with this exact structure and nothing else (no markdown fences, no prose):
|
||||
{{
|
||||
"translations": {{ "<STRING_ID>": "<translated string>", ... }},
|
||||
"translated_comments": ["<translated comment 1>", "<translated comment 2>", ...],
|
||||
"generator_comment_translated": "<translated generator comment line>"
|
||||
}}
|
||||
- "translations" must have exactly the keys provided in "to_translate".
|
||||
- "translated_comments" must have the same number of items and order as the input comments list.
|
||||
- For comments that should remain unchanged based on the rules above, return the existing translation verbatim.
|
||||
""".strip()
|
||||
|
||||
# ---------------- Main translation logic ----------------
|
||||
|
||||
def translate_language(
|
||||
client: genai.Client,
|
||||
lang_tuple: Tuple[str, str, str],
|
||||
english_soup: BeautifulSoup,
|
||||
english_strings: Dict[str, str],
|
||||
) -> None:
|
||||
garmin_code, _unused, language_name = lang_tuple
|
||||
|
||||
# Ensure output directory exists
|
||||
out_dir = f"./resources-{garmin_code}/strings/"
|
||||
os.makedirs(out_dir, exist_ok=True)
|
||||
|
||||
# Load previous translations and corrections
|
||||
prev_soup = load_xml_as_soup(os.path.join(out_dir, "strings.xml"))
|
||||
corrections_soup = load_xml_as_soup(os.path.join(out_dir, "corrections.xml"))
|
||||
|
||||
prev_map = extract_strings(prev_soup)
|
||||
corrections_map = extract_strings(corrections_soup)
|
||||
|
||||
# Build a fresh soup for this language from English source
|
||||
soup = BeautifulSoup(str(english_soup), features="xml")
|
||||
|
||||
# Collect comments
|
||||
english_comments = extract_comments_in_order(english_soup)
|
||||
existing_translated_comments = extract_comments_in_order(prev_soup)
|
||||
|
||||
# Detect any mention of Google Translate anywhere in the previous XML
|
||||
all_comments_text_prev = [
|
||||
str(c) for c in prev_soup.find_all(string=lambda t: isinstance(t, Comment))
|
||||
]
|
||||
mentions_google_translate = any("google translate" in c.lower() for c in all_comments_text_prev)
|
||||
|
||||
# Build generator comment English line (the translated line will be returned by the API)
|
||||
if mentions_google_translate:
|
||||
generator_comment_en = f"Generated by Google Translate and {MODEL_NAME} from English to {language_name}"
|
||||
else:
|
||||
generator_comment_en = f"Generated by {MODEL_NAME} from English to {language_name}"
|
||||
|
||||
# Decide which strings need translation (not in corrections, not in previous)
|
||||
to_translate_map: Dict[str, str] = {}
|
||||
final_values: Dict[str, str] = {}
|
||||
|
||||
for s in soup.find_all(name="string"):
|
||||
sid = s.get("id")
|
||||
if not sid:
|
||||
continue
|
||||
if sid in exceptionIds:
|
||||
# Keep English as-is for exception IDs
|
||||
final_values[sid] = s.get_text()
|
||||
continue
|
||||
|
||||
s_curr = curr.find(name="string", attrs={"id": s["id"]})
|
||||
if s_curr:
|
||||
s.string = s_curr.string
|
||||
if sid in corrections_map and corrections_map[sid] is not None:
|
||||
final_values[sid] = corrections_map[sid]
|
||||
elif sid in prev_map and prev_map[sid] is not None:
|
||||
final_values[sid] = prev_map[sid]
|
||||
else:
|
||||
s_prev = prev.find(name="string", attrs={"id": s["id"]})
|
||||
if s_prev:
|
||||
s.string = s_prev.string
|
||||
else:
|
||||
a = translator.translate(s.string)
|
||||
if s["id"] in titleIds:
|
||||
s.string = a.title()
|
||||
else:
|
||||
s.string = a
|
||||
for s in soup.find(name="strings").find_all(
|
||||
string=lambda text: isinstance(text, Comment)):
|
||||
s.insert_before(" ")
|
||||
s.replace_with(Comment(" " + translator.translate(s) + " "))
|
||||
to_translate_map[sid] = s.get_text()
|
||||
|
||||
# print(str(soup))
|
||||
with open(f"./resources-{l[0]}/strings/strings.xml", "wb") as w:
|
||||
# If there are no new strings to translate, skip this language entirely
|
||||
if not to_translate_map:
|
||||
print(f" Skipping {language_name}: no new strings to translate.")
|
||||
return
|
||||
|
||||
# Prepare context (always include full English strings)
|
||||
english_context = english_strings
|
||||
existing_translations = {k: v for k, v in prev_map.items()}
|
||||
if corrections_map:
|
||||
existing_translations.update(corrections_map)
|
||||
|
||||
# Translate all at once; force JSON output but do not enforce a schema
|
||||
prompt = build_translation_prompt(
|
||||
language_name=language_name,
|
||||
english_full=english_context,
|
||||
existing_translations=existing_translations,
|
||||
to_translate=to_translate_map,
|
||||
english_comments=english_comments,
|
||||
existing_translated_comments=existing_translated_comments,
|
||||
generator_comment_en=generator_comment_en,
|
||||
)
|
||||
|
||||
config = genai.types.GenerateContentConfig(
|
||||
temperature=0,
|
||||
response_mime_type="application/json",
|
||||
)
|
||||
|
||||
resp = client.models.generate_content(
|
||||
model=MODEL_NAME,
|
||||
contents=prompt,
|
||||
config=config,
|
||||
)
|
||||
|
||||
data = getattr(resp, "parsed", None)
|
||||
if data is None:
|
||||
txt = getattr(resp, "text", None)
|
||||
if not txt:
|
||||
try:
|
||||
txt = resp.candidates[0].content.parts[0].text
|
||||
except Exception:
|
||||
txt = ""
|
||||
if not txt.strip():
|
||||
raise RuntimeError("Empty response from model; cannot parse translations.")
|
||||
data = json.loads(txt)
|
||||
|
||||
translations = data.get("translations", {}) or {}
|
||||
for sid, translated in translations.items():
|
||||
if sid in to_translate_map:
|
||||
final_values[sid] = translated
|
||||
|
||||
translated_comments_all: List[str] = data.get("translated_comments", []) or []
|
||||
generator_comment_translated: str = data.get("generator_comment_translated", "") or ""
|
||||
|
||||
# Apply final values to the soup
|
||||
for s in soup.find_all(name="string"):
|
||||
sid = s.get("id")
|
||||
if not sid:
|
||||
continue
|
||||
if sid in final_values:
|
||||
val = final_values[sid]
|
||||
s.insert_before(" ")
|
||||
s.string = val
|
||||
|
||||
# Replace comments with translated versions (order-preserving)
|
||||
if translated_comments_all:
|
||||
replace_comments_in_order(soup, translated_comments_all)
|
||||
|
||||
# Insert the generator comment (English + translated) before <strings>
|
||||
strings_node = soup.find(name="strings")
|
||||
if strings_node:
|
||||
strings_node.insert_before("\n\n")
|
||||
combined = f"\n {generator_comment_en}\n {generator_comment_translated}\n"
|
||||
strings_node.insert_before(Comment(combined))
|
||||
strings_node.insert_before("\n\n")
|
||||
|
||||
# Write output
|
||||
out_path = os.path.join(out_dir, "strings.xml")
|
||||
with open(out_path, "wb") as w:
|
||||
w.write(soup.encode("utf-8") + b"\n")
|
||||
i += 1
|
||||
|
||||
def main():
|
||||
# Init client
|
||||
client = genai.Client()
|
||||
|
||||
# Load English source
|
||||
src_path = "./resources/strings/strings.xml"
|
||||
if not os.path.exists(src_path):
|
||||
raise FileNotFoundError(f"Missing source file: {src_path}")
|
||||
|
||||
with open(src_path, "r", encoding="utf-8") as f:
|
||||
english_xml = f.read().replace("\r", "")
|
||||
english_soup = BeautifulSoup(english_xml, features="xml")
|
||||
english_strings = extract_strings(english_soup)
|
||||
|
||||
total_langs = len(languages)
|
||||
for i, lang in enumerate(languages, start=1):
|
||||
print(f"{i} of {total_langs}: Translating English to {lang[2]}")
|
||||
try:
|
||||
translate_language(client, lang, english_soup, english_strings)
|
||||
except Exception as e:
|
||||
print(f" Error translating {lang[2]}: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Reference in New Issue
Block a user