mirror of
				https://github.com/house-of-abbey/GarminHomeAssistant.git
				synced 2025-10-30 23:38:15 +00:00 
			
		
		
		
	Update the translation script to use Gemini instead of Google Translate
This code itself was also AI generated.
This commit is contained in:
		
							
								
								
									
										367
									
								
								translate.py
									
									
									
									
									
								
							
							
						
						
									
										367
									
								
								translate.py
									
									
									
									
									
								
							| @@ -15,32 +15,37 @@ | |||||||
| # Description: | # Description: | ||||||
| # | # | ||||||
| # Python script to automatically translate the strings.xml file to each supported | # Python script to automatically translate the strings.xml file to each supported | ||||||
| # language using Google Translate. | # language. Rewritten by krzys_h with the help of AI to use Gemini instead of | ||||||
|  | # Google Translate for more contextual translations. | ||||||
| # | # | ||||||
| # Python installation: | # Requirements: | ||||||
| #   pip install beautifulsoup4 | #   pip install google-genai beautifulsoup4 lxml | ||||||
| #   pip install deep-translator |  | ||||||
| # NB. For XML formatting: |  | ||||||
| #   pip install lxml |  | ||||||
| # | # | ||||||
| # References: | # Env: | ||||||
| #  * https://www.crummy.com/software/BeautifulSoup/bs4/doc/ | #   export GEMINI_API_KEY="YOUR_API_KEY" | ||||||
| #  * https://realpython.com/beautiful-soup-web-scraper-python/ | # | ||||||
| #  * https://www.crummy.com/software/BeautifulSoup/bs4/doc/#parsing-xml | # To get your own API key, go to: | ||||||
| #  * https://www.crummy.com/software/BeautifulSoup/bs4/doc/#xml | # https://aistudio.google.com/app/apikey | ||||||
| # | # | ||||||
| #################################################################################### | #################################################################################### | ||||||
|  |  | ||||||
| import os | import os | ||||||
|  | import json | ||||||
|  | from typing import Dict, List, Tuple | ||||||
|  |  | ||||||
| from bs4 import BeautifulSoup | from google import genai | ||||||
| from bs4 import Comment | from bs4 import BeautifulSoup, Comment | ||||||
| from deep_translator import GoogleTranslator |  | ||||||
|  |  | ||||||
| # List of tuples in the form os: | # ---------------- Configuration ---------------- | ||||||
| #  * Garmin IQ language three letter mnemonic, |  | ||||||
| #  * Google Translate language mnemonic, | # Gemini model name | ||||||
| #  * Language familiar name (mainly for reference) | MODEL_NAME = "gemini-2.5-flash" | ||||||
| languages: list[tuple[str, str, str]] = [ |  | ||||||
|  | # Language definitions: | ||||||
|  | #  * Garmin IQ language three-letter mnemonic (used in resources-XXX folder), | ||||||
|  | #  * Unused Google mnemonic kept for reference, | ||||||
|  | #  * Human-readable language name for prompts | ||||||
|  | languages: List[Tuple[str, str, str]] = [ | ||||||
|     ("ara", "ar", "Arabic"), |     ("ara", "ar", "Arabic"), | ||||||
|     ("bul", "bg", "Bulgarian"), |     ("bul", "bg", "Bulgarian"), | ||||||
|     ("zhs", "zh-CN", "Chinese (Simplified)"), |     ("zhs", "zh-CN", "Chinese (Simplified)"), | ||||||
| @@ -79,75 +84,269 @@ languages: list[tuple[str, str, str]] = [ | |||||||
|     ("vie", "vi", "Vietnamese"), |     ("vie", "vi", "Vietnamese"), | ||||||
| ] | ] | ||||||
|  |  | ||||||
| langLength = len(languages) | exceptionIds: List[str] = ["AppName", "AppVersionTitle"] | ||||||
|  |  | ||||||
| exceptionIds: list[str] = ["AppName", "AppVersionTitle"] | # ---------------- Helpers ---------------- | ||||||
| titleIds: list[str] = [] |  | ||||||
|  |  | ||||||
| # def merge(curr: BeautifulSoup, prev: BeautifulSoup) -> BeautifulSoup: | def load_xml_as_soup(path: str) -> BeautifulSoup: | ||||||
| #     """ |     if not os.path.exists(path): | ||||||
| #     Merge the current strings.xml with the previous one, overwriting |         return BeautifulSoup("", features="xml") | ||||||
| #     the previous strings with the current ones if they exist. |     with open(path, "r", encoding="utf-8") as f: | ||||||
| #     """ |         return BeautifulSoup(f.read().replace("\r", ""), features="xml") | ||||||
| #     out = prev.__copy__() |  | ||||||
| #     for s in curr.find(name="strings").find_all(name="string"): |  | ||||||
| #         s_prev = out.find(name="string", attrs={"id": s["id"]}) |  | ||||||
| #         if s_prev: |  | ||||||
| #             s_prev.string = s.string |  | ||||||
| #         else: |  | ||||||
| #             out.find(name="strings").append(s) |  | ||||||
| #     return out |  | ||||||
|  |  | ||||||
| i = 1 | def extract_strings(soup: BeautifulSoup) -> Dict[str, str]: | ||||||
| with open("./resources/strings/strings.xml", "r") as f: |     out = {} | ||||||
|     c = f.read().replace("\r", "") |     strings_node = soup.find(name="strings") | ||||||
|     for l in languages: |     if not strings_node: | ||||||
|         os.makedirs(f"./resources-{l[0]}/strings/", exist_ok=True) |         return out | ||||||
|         # Old translations will not be automatically updated/removed, use removeTranslations.py |     for s in strings_node.find_all(name="string"): | ||||||
|         try: |         sid = s.get("id") | ||||||
|             with open(f"./resources-{l[0]}/strings/strings.xml", "r", encoding="utf-8") as r: |         if not sid: | ||||||
|                 prev = BeautifulSoup(r.read().replace("\r", ""), features="xml") |             continue | ||||||
|         except FileNotFoundError: |         value = s.string if s.string is not None else s.get_text() | ||||||
|             prev = BeautifulSoup("", features="xml") |         out[sid] = value if value is not None else "" | ||||||
|         try: |     return out | ||||||
|             with open(f"./resources-{l[0]}/strings/corrections.xml", "r", encoding="utf-8") as r: |  | ||||||
|                 curr = BeautifulSoup(r.read().replace("\r", ""), features="xml") |  | ||||||
|         except FileNotFoundError: |  | ||||||
|             curr = BeautifulSoup("", features=["xml"]) |  | ||||||
|         print(f"{i} of {langLength}: Translating English to {l[2]}") |  | ||||||
|         soup = BeautifulSoup(c, features="xml") |  | ||||||
|         translator = GoogleTranslator(source="en", target=l[1]) |  | ||||||
|         soup.find(name="strings").insert_before("\n\n") |  | ||||||
|         soup.find(name="strings").insert_before( |  | ||||||
|             Comment( |  | ||||||
|                 f"\n  Generated by Google Translate: English to {l[2]}\n  " + |  | ||||||
|                     translator.translate("Generated by Google Translate from English") + "\n")) |  | ||||||
|         soup.find(name="strings").insert_before("\n\n") |  | ||||||
|  |  | ||||||
|         for s in soup.find(name="strings").find_all(name="string"): | def extract_comments_in_order(soup: BeautifulSoup) -> List[str]: | ||||||
|  |     comments = [] | ||||||
|  |     strings_node = soup.find(name="strings") | ||||||
|  |     if not strings_node: | ||||||
|  |         return comments | ||||||
|  |     for c in strings_node.find_all(string=lambda text: isinstance(text, Comment)): | ||||||
|  |         comments.append(str(c)) | ||||||
|  |     return comments | ||||||
|  |  | ||||||
|  | def replace_comments_in_order(soup: BeautifulSoup, translated_comments: List[str]) -> None: | ||||||
|  |     strings_node = soup.find(name="strings") | ||||||
|  |     if not strings_node: | ||||||
|  |         return | ||||||
|  |     idx = 0 | ||||||
|  |     for c in strings_node.find_all(string=lambda text: isinstance(text, Comment)): | ||||||
|  |         if idx < len(translated_comments): | ||||||
|  |             c.insert_before("  ") | ||||||
|  |             c.replace_with(Comment(translated_comments[idx])) | ||||||
|  |         idx += 1 | ||||||
|  |  | ||||||
|  | def build_translation_prompt( | ||||||
|  |     language_name: str, | ||||||
|  |     english_full: Dict[str, str], | ||||||
|  |     existing_translations: Dict[str, str], | ||||||
|  |     to_translate: Dict[str, str], | ||||||
|  |     english_comments: List[str], | ||||||
|  |     existing_translated_comments: List[str], | ||||||
|  |     generator_comment_en: str, | ||||||
|  | ) -> str: | ||||||
|  |     return f""" | ||||||
|  | You are a professional localizer for a smartwatch UI. Translate UI strings into {language_name}. | ||||||
|  |  | ||||||
|  | Rules: | ||||||
|  | - Preserve placeholders EXACTLY and do not translate them: | ||||||
|  |   - printf style: %s, %d, %f, %1$s, %2$d, etc. | ||||||
|  |   - brace placeholders: {{0}}, {{1}}, {{name}}, {{value}} | ||||||
|  |   - dollar placeholders: $1, $2 | ||||||
|  | - Never translate app/product names; keep them unchanged, e.g., "Home Assistant". | ||||||
|  | - Do not change punctuation, spacing, or add extra punctuation unless natural in the target language. | ||||||
|  | - Keep any whitespace at the beginning or end of string unchanged. | ||||||
|  | - Keep meaning accurate and UI-appropriate (short, natural, consistent). | ||||||
|  | - Use consistent terminology aligned with existing translations for this language. | ||||||
|  | - Do NOT translate the string IDs themselves. | ||||||
|  |  | ||||||
|  | Comments handling: | ||||||
|  | - You are given comments from the English XML (in order) and the current translations (same order where available). | ||||||
|  | - If a given English comment has not changed since the last revision and a current translation exists at the same index, return the existing translation unchanged. | ||||||
|  | - If you believe an existing translation is already correct for the provided English, keep it unchanged; otherwise provide an improved translation. | ||||||
|  | - Also translate the generator comment line shown below. We will store both the English and translated lines inside a single XML comment. | ||||||
|  |  | ||||||
|  | Here are the complete English strings for context: | ||||||
|  | {json.dumps(english_full, ensure_ascii=False, indent=2)} | ||||||
|  |  | ||||||
|  | Here are existing translations for this language (do not modify these; use for terminology/style consistency): | ||||||
|  | {json.dumps(existing_translations, ensure_ascii=False, indent=2)} | ||||||
|  |  | ||||||
|  | Here are the ONLY strings that need new translations (translate the values): | ||||||
|  | {json.dumps(to_translate, ensure_ascii=False, indent=2)} | ||||||
|  |  | ||||||
|  | Comments to translate (same order as in the XML): | ||||||
|  | {json.dumps(english_comments, ensure_ascii=False, indent=2)} | ||||||
|  |  | ||||||
|  | Existing translated comments (same order; may be fewer items): | ||||||
|  | {json.dumps(existing_translated_comments, ensure_ascii=False, indent=2)} | ||||||
|  |  | ||||||
|  | Generator comment (English; translate this too): | ||||||
|  | {json.dumps(generator_comment_en, ensure_ascii=False)} | ||||||
|  |  | ||||||
|  | Return only valid JSON with this exact structure and nothing else (no markdown fences, no prose): | ||||||
|  | {{ | ||||||
|  |   "translations": {{ "<STRING_ID>": "<translated string>", ... }}, | ||||||
|  |   "translated_comments": ["<translated comment 1>", "<translated comment 2>", ...], | ||||||
|  |   "generator_comment_translated": "<translated generator comment line>" | ||||||
|  | }} | ||||||
|  | - "translations" must have exactly the keys provided in "to_translate". | ||||||
|  | - "translated_comments" must have the same number of items and order as the input comments list. | ||||||
|  | - For comments that should remain unchanged based on the rules above, return the existing translation verbatim. | ||||||
|  | """.strip() | ||||||
|  |  | ||||||
|  | # ---------------- Main translation logic ---------------- | ||||||
|  |  | ||||||
|  | def translate_language( | ||||||
|  |     client: genai.Client, | ||||||
|  |     lang_tuple: Tuple[str, str, str], | ||||||
|  |     english_soup: BeautifulSoup, | ||||||
|  |     english_strings: Dict[str, str], | ||||||
|  | ) -> None: | ||||||
|  |     garmin_code, _unused, language_name = lang_tuple | ||||||
|  |  | ||||||
|  |     # Ensure output directory exists | ||||||
|  |     out_dir = f"./resources-{garmin_code}/strings/" | ||||||
|  |     os.makedirs(out_dir, exist_ok=True) | ||||||
|  |  | ||||||
|  |     # Load previous translations and corrections | ||||||
|  |     prev_soup = load_xml_as_soup(os.path.join(out_dir, "strings.xml")) | ||||||
|  |     corrections_soup = load_xml_as_soup(os.path.join(out_dir, "corrections.xml")) | ||||||
|  |  | ||||||
|  |     prev_map = extract_strings(prev_soup) | ||||||
|  |     corrections_map = extract_strings(corrections_soup) | ||||||
|  |  | ||||||
|  |     # Build a fresh soup for this language from English source | ||||||
|  |     soup = BeautifulSoup(str(english_soup), features="xml") | ||||||
|  |  | ||||||
|  |     # Collect comments | ||||||
|  |     english_comments = extract_comments_in_order(english_soup) | ||||||
|  |     existing_translated_comments = extract_comments_in_order(prev_soup) | ||||||
|  |  | ||||||
|  |     # Detect any mention of Google Translate anywhere in the previous XML | ||||||
|  |     all_comments_text_prev = [ | ||||||
|  |         str(c) for c in prev_soup.find_all(string=lambda t: isinstance(t, Comment)) | ||||||
|  |     ] | ||||||
|  |     mentions_google_translate = any("google translate" in c.lower() for c in all_comments_text_prev) | ||||||
|  |  | ||||||
|  |     # Build generator comment English line (the translated line will be returned by the API) | ||||||
|  |     if mentions_google_translate: | ||||||
|  |         generator_comment_en = f"Generated by Google Translate and {MODEL_NAME} from English to {language_name}" | ||||||
|  |     else: | ||||||
|  |         generator_comment_en = f"Generated by {MODEL_NAME} from English to {language_name}" | ||||||
|  |  | ||||||
|  |     # Decide which strings need translation (not in corrections, not in previous) | ||||||
|  |     to_translate_map: Dict[str, str] = {} | ||||||
|  |     final_values: Dict[str, str] = {} | ||||||
|  |  | ||||||
|  |     for s in soup.find_all(name="string"): | ||||||
|  |         sid = s.get("id") | ||||||
|  |         if not sid: | ||||||
|  |             continue | ||||||
|  |         if sid in exceptionIds: | ||||||
|  |             # Keep English as-is for exception IDs | ||||||
|  |             final_values[sid] = s.get_text() | ||||||
|  |             continue | ||||||
|  |  | ||||||
|  |         if sid in corrections_map and corrections_map[sid] is not None: | ||||||
|  |             final_values[sid] = corrections_map[sid] | ||||||
|  |         elif sid in prev_map and prev_map[sid] is not None: | ||||||
|  |             final_values[sid] = prev_map[sid] | ||||||
|  |         else: | ||||||
|  |             to_translate_map[sid] = s.get_text() | ||||||
|  |  | ||||||
|  |     # If there are no new strings to translate, skip this language entirely | ||||||
|  |     if not to_translate_map: | ||||||
|  |         print(f"  Skipping {language_name}: no new strings to translate.") | ||||||
|  |         return | ||||||
|  |  | ||||||
|  |     # Prepare context (always include full English strings) | ||||||
|  |     english_context = english_strings | ||||||
|  |     existing_translations = {k: v for k, v in prev_map.items()} | ||||||
|  |     if corrections_map: | ||||||
|  |         existing_translations.update(corrections_map) | ||||||
|  |  | ||||||
|  |     # Translate all at once; force JSON output but do not enforce a schema | ||||||
|  |     prompt = build_translation_prompt( | ||||||
|  |         language_name=language_name, | ||||||
|  |         english_full=english_context, | ||||||
|  |         existing_translations=existing_translations, | ||||||
|  |         to_translate=to_translate_map, | ||||||
|  |         english_comments=english_comments, | ||||||
|  |         existing_translated_comments=existing_translated_comments, | ||||||
|  |         generator_comment_en=generator_comment_en, | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     config = genai.types.GenerateContentConfig( | ||||||
|  |         temperature=0, | ||||||
|  |         response_mime_type="application/json", | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     resp = client.models.generate_content( | ||||||
|  |         model=MODEL_NAME, | ||||||
|  |         contents=prompt, | ||||||
|  |         config=config, | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     data = getattr(resp, "parsed", None) | ||||||
|  |     if data is None: | ||||||
|  |         txt = getattr(resp, "text", None) | ||||||
|  |         if not txt: | ||||||
|  |             try: | ||||||
|  |                 txt = resp.candidates[0].content.parts[0].text | ||||||
|  |             except Exception: | ||||||
|  |                 txt = "" | ||||||
|  |         if not txt.strip(): | ||||||
|  |             raise RuntimeError("Empty response from model; cannot parse translations.") | ||||||
|  |         data = json.loads(txt) | ||||||
|  |  | ||||||
|  |     translations = data.get("translations", {}) or {} | ||||||
|  |     for sid, translated in translations.items(): | ||||||
|  |         if sid in to_translate_map: | ||||||
|  |             final_values[sid] = translated | ||||||
|  |  | ||||||
|  |     translated_comments_all: List[str] = data.get("translated_comments", []) or [] | ||||||
|  |     generator_comment_translated: str = data.get("generator_comment_translated", "") or "" | ||||||
|  |  | ||||||
|  |     # Apply final values to the soup | ||||||
|  |     for s in soup.find_all(name="string"): | ||||||
|  |         sid = s.get("id") | ||||||
|  |         if not sid: | ||||||
|  |             continue | ||||||
|  |         if sid in final_values: | ||||||
|  |             val = final_values[sid] | ||||||
|             s.insert_before("  ") |             s.insert_before("  ") | ||||||
|             if s["id"] in exceptionIds: |             s.string = val | ||||||
|                 continue |  | ||||||
|  |  | ||||||
|             s_curr = curr.find(name="string", attrs={"id": s["id"]}) |     # Replace comments with translated versions (order-preserving) | ||||||
|             if s_curr: |     if translated_comments_all: | ||||||
|                 s.string = s_curr.string |         replace_comments_in_order(soup, translated_comments_all) | ||||||
|             else: |  | ||||||
|                 s_prev = prev.find(name="string", attrs={"id": s["id"]}) |  | ||||||
|                 if s_prev: |  | ||||||
|                     s.string = s_prev.string |  | ||||||
|                 else: |  | ||||||
|                     a = translator.translate(s.string) |  | ||||||
|                     if s["id"] in titleIds: |  | ||||||
|                         s.string = a.title() |  | ||||||
|                     else: |  | ||||||
|                         s.string = a |  | ||||||
|         for s in soup.find(name="strings").find_all( |  | ||||||
|                 string=lambda text: isinstance(text, Comment)): |  | ||||||
|             s.insert_before("  ") |  | ||||||
|             s.replace_with(Comment(" " + translator.translate(s) + " ")) |  | ||||||
|  |  | ||||||
|         # print(str(soup)) |     # Insert the generator comment (English + translated) before <strings> | ||||||
|         with open(f"./resources-{l[0]}/strings/strings.xml", "wb") as w: |     strings_node = soup.find(name="strings") | ||||||
|             w.write(soup.encode("utf-8") + b"\n") |     if strings_node: | ||||||
|         i += 1 |         strings_node.insert_before("\n\n") | ||||||
|  |         combined = f"\n  {generator_comment_en}\n  {generator_comment_translated}\n" | ||||||
|  |         strings_node.insert_before(Comment(combined)) | ||||||
|  |         strings_node.insert_before("\n\n") | ||||||
|  |  | ||||||
|  |     # Write output | ||||||
|  |     out_path = os.path.join(out_dir, "strings.xml") | ||||||
|  |     with open(out_path, "wb") as w: | ||||||
|  |         w.write(soup.encode("utf-8") + b"\n") | ||||||
|  |  | ||||||
|  | def main(): | ||||||
|  |     # Init client | ||||||
|  |     client = genai.Client() | ||||||
|  |  | ||||||
|  |     # Load English source | ||||||
|  |     src_path = "./resources/strings/strings.xml" | ||||||
|  |     if not os.path.exists(src_path): | ||||||
|  |         raise FileNotFoundError(f"Missing source file: {src_path}") | ||||||
|  |  | ||||||
|  |     with open(src_path, "r", encoding="utf-8") as f: | ||||||
|  |         english_xml = f.read().replace("\r", "") | ||||||
|  |     english_soup = BeautifulSoup(english_xml, features="xml") | ||||||
|  |     english_strings = extract_strings(english_soup) | ||||||
|  |  | ||||||
|  |     total_langs = len(languages) | ||||||
|  |     for i, lang in enumerate(languages, start=1): | ||||||
|  |         print(f"{i} of {total_langs}: Translating English to {lang[2]}") | ||||||
|  |         try: | ||||||
|  |             translate_language(client, lang, english_soup, english_strings) | ||||||
|  |         except Exception as e: | ||||||
|  |             print(f"  Error translating {lang[2]}: {e}") | ||||||
|  |  | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     main() | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user