Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Translate Cookbook with Google Translate or OpenAI ChatGPT
#1
Need LA 1.1 or later.

Code:
Copy      Help
// script "Translate cookbook recipe.cs"
/// Run this script. It translates Cookbook articles as you open them.
/// At first edit the `language` value below (replace "lt").
///
/// To translate, this script can use Google Translate (Cloud or free) or Microsoft Translator or OpenAI ChatGPT.
/// By default uses a free Google Translate endpoint. Don't need an account etc. It is undocumented and may stop working in the future, therefore this script also can use Google Cloud, which has the same translation speed and quality.
/// To use Microsoft Translator:
///     Change the `translator` value below in `#region settings`.
///     Change the `language` value in the `microsoft` block.
/// To use OpenAI or Google Cloud:
///     Change the `translator` value below in `#region settings`.
///     Change the `language` value in the `google` or `openai` block.
///     Create account at https://platform.openai.com or Google Cloud. Generate an API key. For OpenAI at first need to upgrade to a paid tier ($5 one-time). For Google Cloud need a project etc. You can find more info and tutorials on the Internet.
///     Save the API key it the Registry (see `registryApiKey` below).
///     How much it costs? Both services can translate more than enough text / month for free. Entire Cookbook can be translated for free.
///
/// To run this script at startup, add its name in Options > Workspace.
/// After editing this script, run it again.
///
/// This script caches translated texts. If you want to re-translate a recipe, open it with Ctrl.
/// OpenAI is slow. If the recipe contains much text, may need to wait 30 minutes or more. During that time LA is hung. Because of the cache, next time will not need to wait for the same recipe.
/// Google is fast, usually 100-300 ms, but incorrectly translates many words.
/// Microsoft is just slightly slower than Google, and it seems its translation quality is better.

/*/ role editorExtension; testInternal Au.Editor; r Au.Editor.dll; /*/
/*/ role editorExtension; testInternal Au.Editor; r Au.Editor.dll; nuget -\WeCantSpell.Hunspell; /*/

//#define DEBUG_PRINT
//#define SPELLCHECK


using System.Text.Json.Nodes;

#region settings

const Translator translator = Translator.GoogleFree;

var google = new {
    language = "lt", //https://cloud.google.com/translate/docs/languages
    registryApiKey = (key: @"HKEY_CURRENT_USER\Software\Au", value: "Google Cloud API key"), //for Translator.GoogleCloud only. Don't need to edit these strings, just create the Registry key and value.
};

var microsoft = new {
    language = "lt",
};


var openai = new {
    language = "Lithuanian",
    model = "gpt-4-turbo-preview", //"gpt-3.5-turbo", "gpt-4", "gpt-4-turbo-preview", etc
    registryApiKey = (key: @"HKEY_CURRENT_USER\Software\Au", value: "OpenAI API key"), //Don't need to edit these strings, just create the Registry key and value.
};

#endregion


#if
SPELLCHECK
var dict = @"C:\Program Files\LibreOffice\share\extensions\dict-en\en_US.dic";
//var dict = @"C:\Program Files\LibreOffice\share\wordbook\technical.dic";
var dictionary = WeCantSpell.Hunspell.WordList.CreateFromFiles(dict);
#endif

EditorExtension.WindowReady += () => {
    Panels.Recipe.OpeningRecipe = _OpeningRecipe;
};


void _OpeningRecipe(PanelRecipe.OpeningRecipeArgs e) {
    //convert e to text:
    //Name
    //
    //e.parts[0] //text with replaced tags
    //
    //#<1># //replaced code
    //
    //e.parts[2] //text with replaced tags
    //
    //#<3># //replaced code
    //
    //...

    
#if DEBUG_PRINT
    print.clear();
#endif
    List<(string s, int lenEndTag, int lenText)> tags = new();
    regexp rx = new(@"<_>(?<g1>[\s\S]*?)</_>|<image ""[^""]*"">|<([+\.]?[a-z]+)(?<g3> [^>]+)?>(?<g2>[^<]++|(?R))+<(?:/\1)?>");
    var sb = new StringBuilder(_PrepareTitle(e.name, tags) + "\r\n\r\n");
    int ipart = -1;
    foreach (var v in e.parts) {
        ipart++;
        if (v.isText) {
            //print.it(v.s); print.it("----");
            sb.AppendLine(rx.Replace(v.s, _Repl1));
            
            string _Repl1(RXMatch m) {
                int i = tags.Count;
                var s = m.Value;
                var g = m[s.Starts("<_>") ? "g1" : "g2"];
                var t = g.Value;
                bool translate = false;
                if (t.Contains('<')) { /*print.it(t);*/ } //nested tags (mostly keywords, identifiers)
                else if (s.Starts("<_>")) { /*print.it(t);*/ } //don't translate
                else if (s.Starts("<image")) { /*print.it(t);*/ } //
                else if (s.Starts("<.")) { /*print.it(t);*/ } //code, keyword
                else if (s.Starts("<b>")) { /*print.it(t);*/ } //mostly identifiers and UI
                else if (s.Starts("<i>")) { /*print.it(t);*/ } //mostly parameters and terms
                else if (s.Starts("<mono>")) { /*print.it(t);*/ } //keys, hotkeys
                else if (s.Starts("<+see")) { /*print.it(t);*/ } //identifiers
                //else if (s.Starts("<+recipe")) { /*print.it(t);*/ } //

                else if (s.Starts("<+nuget")) { /*print.it(t);*/ } //
                else if (t.Starts("http")) { /*print.it(t);*/ } //URL
                else if (t.FindAny(".;") >= 0) { /*print.it(t);*/ } //avoid breaking the sentence
                else {
                    //print.it(s);
                    translate = true;
                    if (!m["g3"].Exists) { s = $"{s[..(g.Start - m.Start - 1)]} \"{t}\">{t}{s[(g.End - m.Start)..]}"; /*print.it(t, s);*/ } //<tag>text<> -> <tag "text">translatedText<>
                }
                //print.it(s, t);
                tags.Add(translate ? (s, m.End - g.End, t.Length) : (s, 0, 0));
                return translate ? $"<Q{i}>{t}</Q{i}>" : $"[Q{i}]";
            }
        }
else {
            sb.AppendLine($"\r\n#<{ipart}>#\r\n");
        }
    }

    var text = sb.ToString();
    
#if DEBUG_PRINT
    //if (text.RxIsMatch(@"<[+\.]?[a-z_]")) {
    //    print.it($"<><lc yellow>{e.name}    <c red>not all tags replaced<><>");
    //    print.it(text);
    //    print.it("----");
    //    print.it(e.parts.Select(o => o.s));
    //    return;
    //}
    
    //if (text.RxIsMatch(@"(?i)(?<![a-z])'")) {
    //    print.it($"<><lc yellow>{e.name}<>");
    //    print.it(text);
    //}
    
    //var s1 = text.RxReplace(@""".+?""", $"<bc yellow>$0<>");
    //if (s1 != text) {
    //    print.it($"<><lc yellow>{e.name}<>\r\n{s1}");
    //}
    
    //print.it($"<><lc yellowgreen>{e.name}<>");
    //print.it(text);
    //if (text.Length > 1500) print.it(text.Length);
    //if (text.Length > 1900) {
    //    print.it($"<><lc yellowgreen>{e.name}<>");
    //    print.it(text);
    //}
#endif
    
#if
SPELLCHECK
    text = text.RxReplace(@"\b[A-Za-z][a-z']+\b", m => {
        var s = m.Value;
        bool ok = dictionary.Check(s);
        if (!ok) {
            s = $"<bc yellow>{s}<>";
        }
        return s;
    });
#else
    
    //translate
    
    //print.it(text); print.it("-------");
    //return;
    
    //perf.first();

    
    text = _Translate(e.name, text, true);
    
    //perf.nw();
    //print.it(text);
    //return;

    if (text == null) return;
#endif
    
    regexp rxRT1 = null, rxRT2 = null;
    
    //get translated name
    
    if (!text.RxMatch(@"\R\R", 0, out RXGroup g1)) { print.it("Failed to translate. Result:\r\n" + text); return; }
    e.name = _RestoreTags(text[..g1.Start].Trim(), -1);
    int i = g1.End;
    
    //parse translated text parts and create new e.parts with translated texts (with restored tags) and restored codes
    
    List<(bool isText, string s)> r = new();
    regexp rx2 = new(@"\R+#<(\d+)>#\R*");
    ipart = 0;
    foreach (var m in rx2.FindAll(text)) {
        if (m.Start > i) r.Add((true, _RestoreTags(text[i..m.Start], ipart++)));
        r.Add(e.parts[m[1].Value.ToInt()]);
        ipart++;
        i = m.End;
    }

    if (text.Length > i) r.Add((true, _RestoreTags(text[i..], ipart)));
    
#if DEBUG_PRINT
    //print.it($"<><lc yellowgreen>{e.name}<>");
    //print.it(r);
#endif
    
    e.parts = r;
    
    string _RestoreTags(string s, int ipart) {
        rxRT1 ??= new(@"\[Q\d+\]");
        s = rxRT1.Replace(s, m => tags[s.ToInt(m.Start + 2)].s);
        rxRT2 ??= new(@"<Q(\d+)>([^<]++)</Q\1>");
        s = rxRT2.Replace(s, m => {
            var translatedLinkText = m[2].Value;
            var tag = tags[s.ToInt(m.Start + 2)];
            var r = tag.s[..^(tag.lenEndTag + tag.lenText)] + translatedLinkText + tag.s[^tag.lenEndTag..];
            //print.it(r);
            return r;
        });

        
        if (ipart >= 0) s = $"<lc #e0ffe0>{e.parts[ipart].s}<>\r\n\r\n<lc #ffffe0>{s}<>";
        
        return s;
    }

    
    static string _PrepareTitle(string s, List<(string s, int i1, int i2)> tags) {
        HashSet<string> dontTranslate = new() { "bool", "null", "default", "class", "struct", "generic", "nullable", "tuple", "using", "if", "else", "switch", "for", "return", "try", "catch", "finally", "throw", "delegate", "event", };
        
        s = s.RxReplace(@"\b[a-zA-Z]+\b", m => {
            var k = m.Value;
            if (dontTranslate.Contains(k) || (m.Start > 0 && char.IsUpper(k[0]))) {
                int i = tags.Count;
                tags.Add((k, 0, 0));
                return $"[Q{i}]";
            }

            return k;
        });

        return s;
    }
}


#pragma warning disable CS0162 //Unreachable code detected
string _Translate(string name, string text, bool debugPrint) {
    string targetLang = translator switch { Translator.OpenAI => openai.language, Translator.Microsoft => microsoft.language, _ => google.language };
    
    _Cache cache = new();
    string cacheDir = translator switch { Translator.OpenAI => "openai ", Translator.Microsoft => "microsoft ", _ => "google " };
    if (cache.Get(cacheDir + targetLang, name, text, out var translated)) return translated;
    
    string apiKey = null;
    if (translator == Translator.Microsoft) {
        apiKey = internet.http.Get("https://edge.microsoft.com/translate/auth").Text();
    }
else if (translator != Translator.GoogleFree) {
        var rkv = translator == Translator.OpenAI ? openai.registryApiKey : google.registryApiKey;
        apiKey = Registry.GetValue(rkv.key, rkv.value, null) as string ?? throw new AuException("no API key in registry");
    }

    
    if (translator == Translator.OpenAI) {
        var json = new JsonObject {
            [
"model"] = openai.model,
            [
"messages"] = new JsonArray(new JsonObject { ["role"] = "user", ["content"] = $"Translate English to {targetLang}:\r\n{text}" })
        };

        
        var r = internet.http.Post("https://api.openai.com/v1/chat/completions", internet.jsonContent(json.ToJsonString()), [$"Authorization: Bearer {apiKey}"]);
        if (!r.IsSuccessStatusCode) {
            if (debugPrint) print.it($"api.openai.com error: {(int)r.StatusCode} {r.ReasonPhrase}");
            return null;
        }

        var j = r.Json();
        
        //print.it(j.ToJsonString());
        translated = (string)j["choices"][0]["message"]["content"];
    }
else if (translator == Translator.Microsoft) {
        var json = new JsonArray(new JsonObject { ["Text"] = text });
        var r = internet.http.Post($"https://api.cognitive.microsofttranslator.com/translate?api-version=3.0&from=en&to={targetLang}", internet.jsonContent(json.ToJsonString()), headers: [$"Authorization: Bearer {apiKey}"]);
        if (!r.IsSuccessStatusCode) {
            if (debugPrint) print.it($"api.cognitive.microsofttranslator.com error: {(int)r.StatusCode} {r.ReasonPhrase}");
            return null;
        }

        var j = r.Json();
        
        translated = (string)j[0]["translations"][0]["text"];
    }
else if (translator == Translator.GoogleCloud) {
        var url = internet.urlAppend("https://translation.googleapis.com/language/translate/v2", "key=" + apiKey);
        var json = new { q = text, source = "en", target = targetLang, format = "text" };
        var r = internet.http.Post(url, internet.jsonContent(json));
        if (!r.IsSuccessStatusCode) {
            if (debugPrint) print.it($"translation.googleapis.com error: {(int)r.StatusCode} {r.ReasonPhrase}");
            return null;
        }

        var j = r.Json();
        
        translated = (string)j["data"]["translations"][0]["translatedText"];
    }
else {
        var url = internet.urlAppend("https://translate.googleapis.com/translate_a/single", "client=gtx", "sl=en", "tl=" + targetLang, "dt=t", "q=" + text);
        var r = internet.http.Get(url);
        if (!r.IsSuccessStatusCode) {
            if (debugPrint) print.it($"translate.googleapis.com error: {(int)r.StatusCode} {r.ReasonPhrase}");
            return null;
        }

        var j = r.Json();
        
        var sb = new StringBuilder();
        foreach (var v in j[0].AsArray()) sb.Append((string)v[0]);
        translated = sb.ToString();
    }

    
    cache.Save(translated);
    
    return translated;
}

#pragma warning restore CS0162 //Unreachable code detected

class _Cache {
    string _file, _hash;
    
    public bool Get(string folder, string name, string text, out string translated) {
        _hash = Hash.MD5(text, true);
        _file = folders.ThisAppDataRoaming + @"translate\" + folder + @"\" + name + ".txt";
        //run.selectInExplorer(_file);
        if (!keys.isCtrl)
            if (filesystem.exists(_file)) {
                var s = filesystem.loadText(_file);
                if (s.Starts(_hash)) {
                    translated = s[_hash.Length..];
                    return true;
                }
            }

        translated = null;
        return false;
    }

    
    public void Save(string translated) {
        filesystem.saveText(_file, _hash + translated);
    }
}


enum Translator { GoogleFree, GoogleCloud, Microsoft, OpenAI }

Changes:
2024-03-03: Added Misrosoft Translator.


Messages In This Thread
Translate Cookbook with Google Translate or OpenAI ChatGPT - by Gintaras - 02-12-2024, 08:46 AM

Forum Jump:


Users browsing this thread: 1 Guest(s)