ဝိက်ရှေန်နရဳ mnwwiktionary https://mnw.wiktionary.org/wiki/%E1%80%9D%E1%80%AD%E1%80%80%E1%80%BA%E1%80%9B%E1%80%BE%E1%80%B1%E1%80%94%E1%80%BA%E1%80%94%E1%80%9B%E1%80%B3:%E1%80%99%E1%80%AF%E1%80%80%E1%80%BA%E1%80%9C%E1%80%AD%E1%80%80%E1%80%BA%E1%80%90%E1%80%99%E1%80%BA MediaWiki 1.47.0-wmf.4 case-sensitive မဳဒဳယာ တၟေင် ဓရီုကျာ ညးလွပ် ညးလွပ် ဓရီုကျာ ဝိက်ရှေန်နရဳ ဝိက်ရှေန်နရဳ ဓရီုကျာ ဝှာင် ဝှာင် ဓရီုကျာ မဳဒဳယာဝဳကဳ မဳဒဳယာဝဳကဳ ဓရီုကျာ ထာမ်ပလိက် ထာမ်ပလိက် ဓရီုကျာ ရီု ရီု ဓရီုကျာ ကဏ္ဍ ကဏ္ဍ ဓရီုကျာ အဆက်လက္ကရဴ အဆက်လက္ကရဴ ဓရီုကျာ ကာရန် ကာရန် ဓရီုကျာ အဘိဓာန် အဘိဓာန် ဓရီုကျာ ဗီုပြၚ်သိုၚ်တၟိ ဗီုပြၚ်သိုၚ်တၟိ ဓရီုကျာ TimedText TimedText talk မဝ်ဂျူ မဝ်ဂျူ ဓရီုကျာ Event Event talk ထာမ်ပလိက်:Documentation 10 95 395914 154873 2026-05-29T18:03:51Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:documentation]] ဇရေင် [[ထာမ်ပလိက်:Documentation]] 154873 wikitext text/x-wiki {{#invoke:documentation|show|hr=above}} j0s13scsii7qihzcaj44tsxynimxx68 မဝ်ဂျူ:languages/data/3/a 828 654 395876 394330 2026-05-29T15:30:23Z Intobesa.bot 1035 Bot: ပလေဝ်ဒါန် 395876 Scribunto text/plain local m_langdata = require("Module:languages/data") -- Loaded on demand, as it may not be needed (depending on the data). local function u(...) u = require("Module:string utilities").char return u(...) end local c = m_langdata.chars local p = m_langdata.puaChars local s = m_langdata.shared local m = {} m["aaa"] = { "ဂါဝ်တူဥူ", 35463, "alv-yek", "Latn", } m["aab"] = { "အာဠူမူ-ထေတ်သူ", 35034, "nic-alu", "Latn", } m["aac"] = { "အာရဳ", 1811224, "ngf-gsu", "Latn", } m["aad"] = { "အာမာန်", 56708, "paa-sep", "Latn", } -- "aae" is treated as "sq", see [[WT:LT]] m["aaf"] = { "အာရာနဒါန်", 3507928, "dra-mal", "Mlym", -- Mlym translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission) } m["aag"] = { "အာန်ဗရေတ်ခ်", 4741706, "paa-pal", "Latn", } m["aah"] = { "အာၜေအ်' အာရာဗေါတ်", 4670715, "paa-ara", "Latn", } m["aai"] = { "အာရဳဖှာန်မာ-မဳနဳယျာဖှဳယျာ", 4790560, "poz-ocw", "Latn", } m["aak"] = { "အာန်ခါဝေ", 3446690, "ngf-ata", "Latn", } m["aal"] = { "အာဖှာဲဒေ", 56434, "cdc-cbm", "Latn", } m["aan"] = { "အာန္နာမ်ဗေ", 3507873, "tup-gua", "Latn", } m["aap"] = { "ဘာရာ အဝ်ရာအ်ရာ", 56807, "sai-pek", "Latn", } m["aaq"] = { "ဘာနိုတ်သကေတ်", 3515185, "alg-abp", "Latn", } m["aas"] = { "အောတ်သေတ်", 56620, "cus-sou", "Latn", } -- "aat" is treated as "sq", see [[WT:LT]] m["aau"] = { "အာၜေဴ", 3073568, "paa-sep", "Latn", } m["aaw"] = { "သဝ်လံန်", 7558834, "poz-ocw", "Latn", } m["aax"] = { "မာန်ဒဝ်ဗဝ် အာတ်တာပ်", 12636156, "ngf-dum", "Latn", } m["aaz"] = { "အာန်မာရသဳ", 4740192, "poz-tim", "Latn", } m["aba"] = { "အေက်ဗေ", 34833, "alv-lag", "Latn", } m["abb"] = { "Bankon", 34860, "bnt-bsa", "Latn", } m["abc"] = { "Ambala Ayta", 3448896, "phi", "Latn", } m["abd"] = { "Camarines Norte Agta", 3399682, "phi", "Latn", } m["abe"] = { "အေတ်ဗဒ်နာကဳ", 17502788, "alg-abp", "Latn", } m["abf"] = { "Abai Sungai", 4663287, "poz-san", "Latn", } m["abg"] = { "Abaga", 3507954, "ngf-kya", "Latn", } m["abh"] = { "အာရဗဳ တဇေတ်ကဳ", 56833, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["abi"] = { "Abidji", 34781, "alv-lag", "Latn", } m["abj"] = { "အကာ-ဗဳအ်", 2356391, "qfa-ads", "Latn", } m["abl"] = { "Abung", 49215, "poz-lgx", "Latn", } m["abm"] = { "Abanyom", 7502, "nic-eko", "Latn", } m["abn"] = { "Abua", 34835, "nic-cde", "Latn", } m["abo"] = { "Abon", 35121, "nic-tvn", "Latn", } m["abp"] = { "အာဗေန်လာန် အာဲတာ", 3436621, "phi", "Latn", } m["abq"] = { "အဗါတ်သာ", 27567, "cau-abz", "Cyrl, Latn", translit = { Cyrl = "abq-translit" }, override_translit = true, display_text = { Cyrl = s["cau-Cyrl-displaytext"] }, strip_diacritics = { Cyrl = s["cau-Cyrl-stripdiacritics"], Latn = s["cau-Latn-stripdiacritics"], }, sort_key = { Cyrl = { from = { "гъв", "гъь", "гӏв", "джв", "джь", "къв", "къь", "кӏв", "кӏь", "хъв", "хӏв", "чӏв", -- 3 chars "гв", "гъ", "гь", "гӏ", "дж", "дз", "ё", "жв", "жь", "кв", "къ", "кь", "кӏ", "ль", "лӏ", "пӏ", "тл", "тш", "тӏ", "фӏ", "хв", "хъ", "хь", "хӏ", "цӏ", "чв", "чӏ", "шв", "шӏ" -- 2 chars }, to = { "г" .. p[3], "г" .. p[4], "г" .. p[7], "д" .. p[2], "д" .. p[3], "к" .. p[3], "к" .. p[4], "к" .. p[7], "к" .. p[8], "х" .. p[3], "х" .. p[6], "ч" .. p[3], "г" .. p[1], "г" .. p[2], "г" .. p[5], "г" .. p[6], "д" .. p[1], "д" .. p[4], "е" .. p[1], "ж" .. p[1], "ж" .. p[2], "к" .. p[1], "к" .. p[2], "к" .. p[5], "к" .. p[6], "л" .. p[1], "л" .. p[2], "п" .. p[1], "т" .. p[1], "т" .. p[2], "т" .. p[3], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[4], "х" .. p[5], "ц" .. p[1], "ч" .. p[1], "ч" .. p[2], "ш" .. p[1], "ш" .. p[2] } }, }, } -- "abr" Abron is treated as "ak" Akan, see [[WT:LT]] m["abs"] = { "မလေဝ် အာန်ဗဝ်နေတ်", 3124354, "crp", "Latn", ancestors = "ms", } m["abt"] = { "Ambulas", 3508015, "paa-nnd", "Latn", } m["abu"] = { "Abure", 34767, "alv-ptn", "Latn", } m["abv"] = { "အာရဗဳ ဗာဟာနာ", 56576, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["abw"] = { "Pal", 7126121, "ngf-omo", "Latn", } m["abx"] = { "Inabaknon", 2820163, "poz-sbj", "Latn", } m["aby"] = { "Aneme Wake", 3508107, "ngf-yar", "Latn", } m["abz"] = { "Abui", 2822110, "paa-alp", "Latn", } m["aca"] = { "Achagua", 2822982, "awd", "Latn", } m["acb"] = { "Áncá", 11130787, "nic-mom", "Latn", } m["acd"] = { "Gikyode", 35256, "alv-gng", "Latn", } m["ace"] = { "အာသံနဳစ်", 27683, "cmc", "Latn, ms-Arab", standard_chars = { Latn = "AaBbCcDdEeÉéÈèËëFfGgHhIiJjKkLlMmNnOoÔôÖöPpQqRrSsTtUuVvWwXxYyZz", -- current orthography (not yet add Arab) c.punc }, } m["ach"] = { "Acholi", 34926, "sdv-los", "Latn", } m["aci"] = { "အကာ-ကာရဳ", 2670418, "qfa-adn", "Latn", } m["ack"] = { "အကာ-ကိုဝ်ရာ", 3433680, "qfa-adn", "Latn", } m["acl"] = { "အာပ်-ဗေလ်", 3436825, "qfa-ads", "Latn", } m["acm"] = { "အာရဗဳ အဳရတ်", 56232, "sem-arb", "Arab, Hebr", strip_diacritics = { Arab = "ar-stripdiacritics", }, -- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] } m["acn"] = { "Achang", 56582, "tbq-brm", "Latn", } m["acp"] = { "Eastern Acipa", 5329945, "nic-kmk", "Latn", } m["acr"] = { "Achi", 34774, "myn", "Latn", } m["acs"] = { "Acroá", 2829146, "sai-cje", "Latn", } m["acu"] = { "Achuar", 2823170, "sai-jiv", "Latn", } m["acv"] = { "Achumawi", 56661, "nai-pal", "Latn", } m["acw"] = { "အာရဗဳဟဳဂျာဇြဳ", 56608, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["acx"] = { "အာရဗဳ အဝ်မာန်နဳ", 56630, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["acy"] = { "အာရဗဳ သာဲပရေက်", 56416, "sem-arb", "Latn, Grek", ancestors = "acm", strip_diacritics = { Latn = {remove_diacritics = c.grave .. c.acute .. c.breve}, }, -- Grek display_text, strip_diacritics, sort_key in [[Module:scripts/data]] standard_chars = { Latn = "AaBbCcDdΔδEeFfGgĠġĊċIiJjKkLlMmNnOoPpΘθRrSsTtUuVvWwXxYyZzŞş", c.punc }, } m["acz"] = { "Acheron", 34769, "alv-tal", "Latn", } m["ada"] = { "Adangme", 35141, "alv-gda", "Latn", } m["adb"] = { "Atauran", 125421255, "poz-cet", "Latn", } m["add"] = { "Dzodinka", 35266, "nic-nka", "Latn", } m["ade"] = { "Adele", 27740, "alv-ntg", "Latn", } m["adf"] = { "အာရဗဳ ဒဝ်ဖာရဳ", 56565, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["adg"] = { "Andegerebinha", 3508123, "aus-rnd", "Latn", } m["adh"] = { "Adhola", 1971400, "sdv-los", "Latn", } m["adi"] = { "အဒဳ", 56440, "sit-tan", "Latn", } m["adj"] = { "အာဒေတ်အူခရု", 34738, "alv-lag", "Latn", } m["adl"] = { "Galo", 2857892, "sit-tan", "Latn", } m["adn"] = { "Adang", 3398276, "paa-alp", "Latn", } m["ado"] = { "Abu", 56659, "paa-por", "Latn", } m["adp"] = { "အဒပ်", 3512402, "sit-tib", "Tibt", ancestors = "dz", translit = "Tibt-translit", override_translit = true, display_text = s["Tibt-displaytext"], entry_name = s["Tibt-entryname"], sort_key = "Tibt-sortkey", wikipedia_article = "Dzongkha", -- Considered a dialect of Dzongkha } m["adq"] = { "Adangbe", 34730, "alv-gda", "Latn", ancestors = "ada", } m["adr"] = { "Adonara", 4684505, "poz-cet", "Latn", } m["ads"] = { "Adamorobe Sign Language", 27709, "sgn", "Latn", -- when documented } m["adt"] = { "Adnyamathanha", 2225391, "aus-psw", "Latn", } m["adu"] = { "Aduge", 34734, "alv-nwd", "Latn", ancestors = "opa", } m["adw"] = { "Amondawa", 12626847, "tup-gua", "Latn", } m["ady"] = { "အာက်ဒေါတ်ကာယ်", 27776, "cau-cir", "Cyrl, Latn, Arab", translit = { Cyrl = "cau-cir-translit", Arab = "ar-translit", }, override_translit = true, display_text = { Cyrl = s["cau-Cyrl-displaytext"] }, strip_diacritics = { Cyrl = s["cau-Cyrl-stripdiacritics"], Latn = s["cau-Latn-stripdiacritics"], }, sort_key = { Cyrl = { from = { "кхъу", "къӏу", -- 4 chars "гъу", "джу", "дзу", "жъу", "къу", "кхъ", "къӏ", "кӏу", "кӏь", "лъу", "лӏу", "пӏу", "сӏу", "тӏу", "фӏу", "хъу", "цӏу", "чъу", "чӏу", "шъу", "шӏу", "щӏу", -- 3 chars "гу", "гъ", "гь", "дж", "дз", "ё", "жъ", "жь", "ку", "къ", "кь", "кӏ", "лъ", "ль", "лӏ", "пӏ", "сӏ", "тӏ", "фӏ", "ху", "хъ", "хь", "цу", "цӏ", "чу", "чъ", "чӏ", "шъ", "шӏ", "щӏ", "ӏу", "ӏь" -- 2 chars }, to = { "к" .. p[5], "к" .. p[7], "г" .. p[3], "д" .. p[2], "д" .. p[4], "ж" .. p[2], "к" .. p[3], "к" .. p[4], "к" .. p[6], "к" .. p[10], "к" .. p[11], "л" .. p[2], "л" .. p[5], "п" .. p[2], "с" .. p[2], "т" .. p[2], "ф" .. p[2], "х" .. p[3], "ц" .. p[3], "ч" .. p[3], "ч" .. p[5], "ш" .. p[2], "ш" .. p[4], "щ" .. p[2], "г" .. p[1], "г" .. p[2], "г" .. p[4], "д" .. p[1], "д" .. p[3], "е" .. p[1], "ж" .. p[1], "ж" .. p[3], "к" .. p[1], "к" .. p[2], "к" .. p[8], "к" .. p[9], "л" .. p[1], "л" .. p[3], "л" .. p[4], "п" .. p[1], "с" .. p[1], "т" .. p[1], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[4], "ц" .. p[1], "ц" .. p[2], "ч" .. p[1], "ч" .. p[2], "ч" .. p[4], "ш" .. p[1], "ш" .. p[3], "щ" .. p[1], "ӏ" .. p[1], "ӏ" .. p[2] } }, }, } m["adz"] = { "Adzera", 3327445, "poz-ocw", "Latn", } m["aea"] = { "Areba", 3509129, "aus-pam", "Latn", } m["aeb"] = { "အာရဗဳ တူနဳယှေန်", 56240, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["aed"] = { "Argentine Sign Language", 3322073, "sgn", "Latn", -- when documented } m["aee"] = { "ပါသျှယဳ ဒိုဟ်ဗၟံက်သၟဝ်ကျာ", 12642198, "inc-pas", "fa-Arab, Latn", } m["aek"] = { "Haeke", 5638166, "poz-cln", "Latn", } m["ael"] = { "Ambele", 34818, "nic-grf", "Latn", } m["aem"] = { "အါန်", 3507920, "mkh-vie", "Latn", } m["aen"] = { "Armenian Sign Language", 3446604, "sgn", } m["aeq"] = { "Aer", 3246741, "inc-wes", "Arab", } m["aer"] = { "အာရေန်တာယ်", 10728232, "aus-rnd", "Latn", } m["aes"] = { "Alsea", 2395641, nil, "Latn", } m["aeu"] = { "Akeu", 4700657, "tbq-sil", "Latn", } m["aew"] = { "Ambakich", 56642, "paa-eke", "Latn", } m["aey"] = { "Amele", 3508025, "ngf-gum", "Latn", } m["aez"] = { "ဨကာ", 16110528, "ngf-oro", "Latn", } m["afb"] = { "အာရဗဳအထံက်ဂၚ်", 56385, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["afd"] = { "Andai", 4753480, "paa-arf", "Latn", } m["afe"] = { "Putukwam", 3914930, "nic-ben", "Latn", } m["afg"] = { "Afghan Sign Language", 4689093, "sgn", } m["afh"] = { "Afrihili", 384707, "art", "Latn", type = "appendix-constructed", } m["afi"] = { "Akrukay", 57003, "paa-tam", "Latn", } m["afk"] = { "Nanubae", 6964416, "paa-arf", "Latn", } m["afn"] = { "Defaka", 35174, "nic", "Latn", } m["afo"] = { "Eloyi", 3914066, "nic-plt", "Latn", } m["afp"] = { "Tapei", 16887371, "paa-arf", "Latn", } m["afs"] = { "Afro-Seminole Creole", 27867, "crp", "Latn", ancestors = "en", } m["aft"] = { "Afitti", 3400829, "sdv-nyi", "Latn", } m["afu"] = { "Awutu", 34847, "alv-gng", "Latn", } m["afz"] = { "Obokuitai", 7075258, "paa-clp", "Latn", } m["aga"] = { "Aguano", 3331203, nil, "Latn", } m["agb"] = { "Legbo", 35584, "nic-uce", "Latn", } m["agc"] = { "Agatu", 34732, "alv-ido", "Latn", } m["agd"] = { "Agarabi", 3399642, "ngf-gau", "Latn", } m["age"] = { "Angal", 10951553, "ngf-ank", "Latn", } m["agf"] = { "Arguni", 12473346, "poz-cet", "Latn", } m["agg"] = { "Angor", 3508100, "paa-sng", "Latn", } m["agh"] = { "Ngelima", 7022266, "bnt-bta", "Latn", } m["agi"] = { "Agariya", 663586, "mun", "Deva", } m["agj"] = { "Argobba", 29292, "sem-eth", "Ethi", } m["agk"] = { "Isarog Agta", 6078982, "phi", "Latn", } m["agl"] = { "Fembe", 372927, "ngf-est", "Latn", } m["agm"] = { "Angaataha", 3508001, "ngf-ang", "Latn", } m["agn"] = { "Agutaynen", 3399717, "phi-kal", "Latn", } m["ago"] = { "Tainae", 7676186, "ngf-taa", "Latn", } m["agq"] = { "Aghem", 34737, "nic-rnw", "Latn", } m["agr"] = { "Aguaruna", 1526530, "sai-jiv", "Latn", } m["ags"] = { "Esimbi", 35260, "nic-bds", "Latn", } m["agt"] = { "ကာဂါယာန် အာက်ထာ ဗဟဵု", 5017296, "phi", "Latn", } m["agu"] = { "အာဂွာကာတေကာ", 35091, "myn", "Latn", } m["agv"] = { "ရောမါန်ဒါဒဝ် အာက်ဂါ", 3508085, "phi", "Latn", } m["agw"] = { "Kahua", 3191906, "poz-sls", "Latn", } m["agx"] = { "အာခူန်", 36498, "cau-esm", "Cyrl", translit = "cau-nec-translit", override_translit = true, display_text = s["cau-Cyrl-displaytext"], strip_diacritics = s["cau-Cyrl-stripdiacritics"], sort_key = { from = {"аь", "гъ", "гь", "гӏ", "дж", "ё", "къ", "кь", "кӏ", "оь", "пӏ", "тӏ", "уь", "хъ", "хь", "хӏ", "цӏ", "чӏ"}, to = {"а" .. p[1], "г" .. p[1], "г" .. p[2], "г" .. p[3], "д" .. p[1], "е" .. p[1], "к" .. p[1], "к" .. p[2], "к" .. p[3], "о" .. p[1], "п" .. p[1], "т" .. p[1], "у" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[3], "ц" .. p[1], "ч" .. p[1]} }, } m["agy"] = { "Southern Alta", 7569611, "phi", "Latn", } m["agz"] = { "Mount Iriga Agta", 6921432, "phi", "Latn", } m["aha"] = { "Ahanta", 34729, "alv-ctn", "Latn", } m["ahb"] = { "Axamb", 2874710, "poz-vnc", "Latn", } m["ahg"] = { "Qimant", 35663, "cus-cen", "Latn", } m["ahh"] = { "Aghu", 3436645, "ngf-awy", "Latn", } m["ahi"] = { "Tiagba", 3400073, "kro-aiz", "Latn", } m["ahk"] = { "အာခါ", 56643, "tbq-han", "Latn, Mymr, Thai", sort_key = { Thai = { from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"} }, }, } m["ahl"] = { "Igo", 35412, "alv-ktg", "Latn", } m["ahm"] = { "Mobu", 35967, "kro-aiz", "Latn", } m["ahn"] = { "အ'ဟာန်", 34723, "alv-aah", "Latn", } m["aho"] = { "အဟုမ်", 34778, "tai-swe", "Ahom", translit = "Ahom-translit", } m["ahp"] = { "Apro", 34810, "alv-kwa", "Latn", } m["ahr"] = { "အဟိရာန်နဳ", 15549890, "raj", "Deva", translit = "mr-translit", } m["ahs"] = { "Ashe", 34823, "nic-plc", "Latn", } m["aht"] = { "Ahtna", 21058, "ath-nor", "Latn", } m["aia"] = { "အာရဝ်သဳ", 2863483, "poz-sls", "Latn", } m["aib"] = { "Äynu", 27927, "qfa-mix", "Arab, Latn", ancestors = "ug, fa" } m["aic"] = { "Ainbai", 3332149, "paa-bew", "Latn", } m["aid"] = { "အာန်ကဝ်ရေဝ်ထေန်", 3279409, "aus-pmn", "Latn", } m["aie"] = { "Amara", 2841180, "poz-ocw", "Latn", } m["aif"] = { "Agi", 3331491, "paa-wpa", "Latn", } m["aig"] = { "အာန်တဳဂွါ ကဵု အၚ်္ဂလိက် ဗါၜူဒါ ခရဳအတ်လ်", 3244184, "crp", "Latn", ancestors = "en", } m["aih"] = { "အာဲ-ချာန်", 2827749, "qfa-kms", "Latn, Hani", sort_key = { Hani = "Hani-sortkey" }, } m["aii"] = { "အာက်သဳရိ နဳအဝ်-အာရာမေဣ", 29440, "sem-nna", "Syrc", translit = "aii-translit", strip_diacritics = "Syrc-stripdiacritics", } m["aij"] = { "Lishanid Noshan", 3436467, "sem-nna", "Hebr", -- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] } m["aik"] = { "Ake", 34808, "nic-pls", "Latn", } m["ail"] = { "Aimele", 3327418, "ngf-bos", "Latn", } m["aim"] = { "Aimol", 4697175, "tbq-kuk", "Latn, Beng", } m["ain"] = { "အာဲနု", 27969, "qfa-ain", "Kana, Latn, Cyrl", sort_key = { Kana = "Kana-sortkey" }, } m["aio"] = { "အာဲတောန်", 3399725, "tai-swe", "Mymr", translit = "aio-phk-translit", display_text = s["aio-displaytext"], strip_diacritics = s["aio-stripdiacritics"], } m["aip"] = { "Burumakok", 5000984, "ngf-wok", "Latn", } m["air"] = { "Airoran", 3321131, "paa-saa", "Latn", } m["ait"] = { "အာရေဝ်ခေန်", 3446679, "tup", "Latn", } m["aiw"] = { "Aari", 7495, "omv-aro", "Latn", } m["aix"] = { "Aighon", 3504287, "poz-ocw", "Latn", } m["aiy"] = { "Ali", 34814, "gba-eas", "Latn", } m["aja"] = { "အာဂျာ (အေက်ဖရိက လ္ပာ်ဗၟံက်)", 3237491, "csu-bkr", "Latn", } m["ajg"] = { "Aja (West Africa)", 35035, "alv-gbe", "Latn", } m["aji"] = { "အဂျဳ", 2828867, "poz-cln", "Latn", } m["ajn"] = { "Andajin", 16111302, "aus-wor", "Latn", } m["ajp"] = { "အာရဗဳလပ်ဗေန်ထေန်သမၠုၚ်ကျာ", 55633582, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["ajw"] = { "Ajawa", 56645, "cdc-wst", "Latn", } m["ajz"] = { "Amri Karbi", 3508092, "tbq-kuk", "Latn", ancestors = "mjw", } m["akb"] = { "Angkola Batak", 2640686, "btk", "Latn, Batk", } m["akc"] = { "Mpur", 3327139, "qfa-iso", -- Papuan; based on Palmer (2018), Ethnologue and Glottolog "Latn", } m["akd"] = { "Ukpet-Ehom", 36618, "nic-ucr", "Latn", } m["ake"] = { "အကာဝယဝ်", 28059, "sai-pem", "Latn", } m["akf"] = { "Akpa", 34801, "alv-ido", "Latn", } m["akg"] = { "အနှတ်ခါလာန်ဂူ", 4750964, "poz-cet", "Latn", } m["akh"] = { "Angal Heneng", 10950354, "ngf-ank", "Latn", } m["aki"] = { "Aiome", 56735, "paa-aia", "Latn", } m["akj"] = { "ဇေရု", 2919121, "qfa-adn", "Latn, Deva", } m["akk"] = { "အခါဒဳယာန်", 35518, "sem-eas", "Xsux, Latn", } m["akl"] = { "အာက်ခလာန်", 8773, "phi", "Latn", } m["akm"] = { "Aka-Bo", 35361, "qfa-adn", "Latn", } m["ako"] = { "အာကူရဳအဝ်", 56650, "sai-tar", "Latn", } m["akp"] = { "Siwu", 36470, "alv-ntg", "Latn", } m["akq"] = { "Ak", 56654, "paa-sep", "Latn", } m["akr"] = { "အာရာကဳ", 2699882, "poz-vnn", "Latn", } m["aks"] = { "Akaselem", 34817, "nic-grm", "Latn", } m["akt"] = { "Akolet", 3330162, "poz-ocw", "Latn", } m["aku"] = { "Akum", 34799, "nic-ykb", "Latn", } m["akv"] = { "အာပ်ခါဝက်", 56423, "cau-and", "Cyrl", translit = "cau-nec-translit", override_translit = true, display_text = s["cau-Cyrl-displaytext"], strip_diacritics = s["cau-Cyrl-stripdiacritics"], } m["akw"] = { "Akwa", 34802, "bnt-mbo", "Latn", } m["akx"] = { "အကာ-ကေဒဵု", 3436816, "qfa-adc", "Latn", } m["aky"] = { "အကာ-ကောန်", 3436784, "qfa-adc", "Latn", } m["akz"] = { "အာလာဗာမာ", 1815020, "nai-mus", "Latn", } m["ala"] = { "Alago", 34813, "alv-ido", "Latn", } m["alc"] = { "ခါဝေတ်သကာ", 56544, "aqa", "Latn", } m["ald"] = { "Alladian", 34837, "alv-lag", "Latn", } m["ale"] = { "အာလောတ်", 27210, "esx", "Latn, Cyrl", } m["alf"] = { "Alege", 34815, "nic-ben", "Latn", } m["alh"] = { "Alawa", 2147917, "aus-gun", "Latn", } m["ali"] = { "Amaimon", 3327427, "ngf-mad", "Latn", } m["alj"] = { "အလံၚ်ဂံၚ်", 3327423, "phi", "Latn", } m["alk"] = { "Alak", 2714690, "mkh", "Latn", } m["all"] = { "Allar", 3393634, "dra-mal", "Mlym", -- Mlym translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission) } -- "aln" is treated as "sq", see [[WT:LT]] m["alm"] = { "Amblong", 11022615, "poz-vnn", "Latn", } m["alo"] = { "Larike-Wakasihu", 3217929, "poz-cma", "Latn", } m["alp"] = { "Alune", 3327367, "poz-cet", "Latn", } m["alq"] = { "အာယ်လ်ကေန်ဂွေန်", 28092, "alg", "Latn, Cans", ancestors = "oj", } m["alr"] = { "အဠူတေ", 28213, "qfa-ckn", "Cyrl", strip_diacritics = { from = {"['’]"}, to = {"ʼ"} }, sort_key = { from = {"вʼ", "гʼ", "ғ", "ә", "ё", "ӄ", "ӈ"}, to = {"в" .. p[1], "г" .. p[1], "г" .. p[2], "е" .. p[1], "е" .. p[2], "к" .. p[1], "н" .. p[1]} }, } m["alt"] = { "အာန်တာဲ ဒိုဟ်သမၠုၚ်ကျာ", 1991779, "trk-kkp", "Cyrl", translit = "Altai-translit", sort_key = { from = {"ј", "ё", "ҥ", "ӧ", "ӱ"}, to = {"д" .. p[1], "е" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1]} }, } m["alu"] = { "'အာရေဝ်'အာရာ", 5160, "poz-sls", "Latn", } m["alw"] = { "Alaba", 56652, "cus-hec", "Latn", } m["alx"] = { "Amol", 3504260, "paa-pal", "Latn", } m["aly"] = { "Alyawarr", 3327389, "aus-rnd", "Latn", } m["alz"] = { "Alur", 56507, "sdv-los", "Latn", } m["ama"] = { "Amanayé", 3508053, "tup-gua", "Latn", } m["amb"] = { "Ambo", 3450142, "nic-tvn", "Latn", } m["amc"] = { "Amahuaca", 2669150, "sai-pan", "Latn", } m["ame"] = { "Yanesha'", 3088540, "awd", "Latn", } m["amf"] = { "ဟာမေ-ဗါန်နာ", 35764, "omv-aro", "Latn, Ethi", sort_key = "amf-utilities" } m["amg"] = { "Amurdag", 3360016, "aus-wdj", "Latn", } m["ami"] = { "ဨမေတ်", 35132, "map", "Latn", } m["amj"] = { "Amdang", 28335, "ssa-fur", "Latn", } m["amk"] = { "အီုဗါဲ", 1875885, "poz-hce", "Latn", } m["aml"] = { "War-Jaintia", 56321, "aav-khs", "Latn", } m["amm"] = { "အာမာ", 3446626, "paa-lma", "Latn", } m["amn"] = { "အာမနှာတ်", 3327399, "paa-war", "Latn", } m["amo"] = { "Amo", 34826, "nic-kne", "Latn", } m["amp"] = { "Alamblak", 56688, "paa-sep", "Latn", } m["amq"] = { "Amahai", 3327384, "poz-cma", "Latn", } m["amr"] = { "Amarakaeri", 35128, "sai-har", "Latn", } m["ams"] = { "အမာမဳ-အဝ်ဃှဳမာ လ္ပာ်ဒိုဟ်သမၠုၚ်ကျာ", 2840986, "jpx-nry", "Jpan", translit = s["jpx-translit"], display_text = s["jpx-displaytext"], strip_diacritics = s["jpx-stripdiacritics"], sort_key = s["jpx-sortkey"], } m["amt"] = { "Amto", 56517, "paa-amu", "Latn", } m["amu"] = { "ဂေရေရဝ် အာမတ်သဂဝ်", 3501942, "omq", "Latn", } m["amv"] = { "Ambelau", 2669214, "poz-cma", "Latn", } m["amw"] = { "နဳအဝ်-အာရမေအဳ လ္ပာ်ပလိုတ်", 34226, "sem-arw", "Armi, Syrc, Latn", strip_diacritics = { Syrc = "Syrc-stripdiacritics" }, } m["amx"] = { "Anmatyerre", 10412317, "aus-rnd", "Latn", } m["amy"] = { "Ami", 10408315, "aus-dal", "Latn", } m["amz"] = { "Atampaya", 3446651, "aus-pam", "Latn", } m["ana"] = { "Andaqui", 2846078, nil, "Latn", } m["anb"] = { "Andoa", 2846171, "sai-zap", "Latn", } m["anc"] = { "Ngas", 35999, "cdc-wst", "Latn", } m["and"] = { "အာန်သာတ်သ်", 3513300, "poz-hce", "Latn", } m["ane"] = { "သာရခူ", 3571097, "poz-cln", "Latn", } m["anf"] = { "Animere", 34783, "alv-ktg", "Latn", } m["ang"] = { "အၚ်္ဂလိက်တြေံ", 42365, "gmw-ang", "Latn, Runr", translit = { Runr = "Runr-translit" }, strip_diacritics = { Latn = { remove_diacritics = c.acute .. c.circ .. c.macron .. c.breve .. c.dotabove .. c.diaer .. c.dotbelow, from = {"[Ƿƿ]"}, to = {{ ["Ƿ"] = "W", ["ƿ"] = "w", }}, }, }, sort_key = { Latn = { remove_diacritics = c.acute .. c.circ .. c.macron .. c.breve .. c.dotabove .. c.diaer .. c.dotbelow, from = {"[æƀꝺðꝼᵹȝłœꞃꞅꞇþꝥꝧƿ]"}, to = {{ ["æ"] = "ae", ["ƀ"] = "b", ["ꝺ"] = "d", ["ð"] = "d" .. p[1], ["ꝼ"] = "f", ["ᵹ"] = "g", ["ȝ"] = "g" .. p[1], ["ł"] = "l", ["œ"] = "oe", ["ꞃ"] = "r", ["ꞅ"] = "s", ["ꞇ"] = "t", ["þ"] = "t" .. p[1], ["ꝥ"] = "t" .. p[1], ["ꝧ"] = "t" .. p[1], ["ƿ"] = "w", }}, }, }, standard_chars = { Latn = "AaÆæBbCcDdÐðEeFfGgHhIiLlMmNnOoŒœPpRrSsTtÞþUuWwXxYy", c.punc, }, } m["anh"] = { "Nend", 6991554, "ngf-wso", "Latn", } m["ani"] = { "အာန်ဒဳ", 34849, "cau-and", "Cyrl", translit = "cau-nec-translit", override_translit = true, display_text = s["cau-Cyrl-displaytext"], strip_diacritics = s["cau-Cyrl-stripdiacritics"], } m["anj"] = { "Anor", 56458, "paa-aia", "Latn", } m["ank"] = { "Goemai", 35272, "cdc-wst", "Latn", } m["anl"] = { "Anu", 4777679, "sit-mru", "Latn", } m["anm"] = { "Anāl", 56235, "tbq-kuk", "Latn", } m["ann"] = { "Obolo", 36614, "nic-lcr", "Latn", } m["ano"] = { "Andoque", 2669225, "qfa-iso", "Latn", } m["anp"] = { "အာန်ဂဳကာ", 28378, "inc-bih", "Deva, Kthi", translit = { Deva = "hi-translit", Kthi = "bho-Kthi-translit", }, } m["anq"] = { "ဂျရာဝါ", 2475526, "qfa-ong", "Latn", } m["anr"] = { "Andh", 4754314, "inc-sou", "Deva", } m["ans"] = { "Anserma", 3446613, "sai-chc", "Latn", } m["ant"] = { "Antakarinya", 921304, "aus-psw", "Latn", } m["anu"] = { "Anuak", 56677, "sdv-lon", "Latn", } m["anv"] = { "ဒါန်ညာ", 35187, "nic-mam", "Latn", } m["anw"] = { "Anaang", 2845320, "nic-ief", "Latn", } m["anx"] = { "Andra-Hus", 2846195, "poz-aay", "Latn", } m["any"] = { "Anyi", 28395, "alv-ctn", "Latn", } m["anz"] = { "Anem", 56512, "qfa-dis", -- Papuan; might be an isolate or in a putative West New Britain family "Latn", } m["aoa"] = { "Angolar", 34994, "crp", "Latn", ancestors = "pt", } m["aob"] = { "Abom", 3446647, "qfa-dis", -- Papuan; possibly a divergent Tirio language (Anim family), or a top-level TNG node "Latn", } m["aoc"] = { "ပေမန်", 10729616, "sai-pem", "Latn", } m["aod"] = { "Andarum", 3507888, "paa-ata", "Latn", } m["aoe"] = { "Angal Enen", 10951638, "ngf-ank", "Latn", } m["aof"] = { "Bragat", 3507977, "paa-pal", "Latn", } m["aog"] = { "Angoram", 56366, -- cf 6754745 for merged dialect "paa-lse", "Latn", } m["aoi"] = { "Anindilyakwa", 2714654, "aus-arn", "Latn", } m["aoj"] = { "Mufian", 3507881, "paa-ara", "Latn", } m["aok"] = { "Arhö", 4790086, "poz-cln", "Latn", } m["aol"] = { "Alorese", 3332062, "poz", "Latn", } m["aom"] = { "Ömie", 8078975, "ngf-koi", "Latn", } m["aon"] = { "Bumbita Arapesh", 3508044, "paa-ara", "Latn", } m["aor"] = { "Aore", 12627129, "poz-vnn", "Latn", } m["aos"] = { "Taikat", 7676018, "paa-taa", "Latn", } m["aot"] = { "အိန္ဒိ အာတုံ", 5646, "tbq-bdg", "Latn, Beng", } m["aou"] = { "အအ်ဥူ", 16109994, "gio", "Latn", -- also Hani? } m["aox"] = { "Atorada", 3507932, "awd", "Latn", } m["aoz"] = { "Uab Meto", 3441962, "poz-tim", "Latn", } m["apb"] = { "သာ'အ်", 36294, "poz-sls", "Latn", } m["apc"] = { "အာရဗဳလပ်ဗေန်ထေန်သၟဝ်ကျာ", 22809485, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["apd"] = { "အာရဗဳ သုဒါန်နဳ", 56573, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["ape"] = { "Bukiyip", 3507895, "paa-ara", "Latn", } m["apf"] = { "Pahanan Agta", 7135432, "phi", "Latn", } m["apg"] = { "Ampanang", 4748035, "poz", "Latn", } m["aph"] = { "Athpare", 3449126, "sit-kie", "Deva, Latn", } m["api"] = { "Apiaká", 3507941, "tup-gua", "Latn", } m["apj"] = { "Jicarilla", 28277, "apa", "Latn", } m["apk"] = { "Plains Apache", 27861, "apa", "Latn", } m["apl"] = { "Lipan", 28269, "apa", "Latn", } m["apm"] = { "Chiricahua", 13368, "apa", "Latn", } m["apn"] = { "အဖဳနာရဲ", 2858311, "sai-nje", "Latn", } m["apo"] = { "Ambul", 12627135, "poz-ocw", "Latn", } m["app"] = { "Apma", 2669188, "poz-vnn", "Latn", } m["apq"] = { "အ-ၜေအ်သိခွါ", 28466, "qfa-adc", "Latn", } m["apr"] = { "Arop-Lokep", 2863482, "poz-ocw", "Latn", } m["aps"] = { "Arop-Sissano", 12627242, "poz-ocw", "Latn", } m["apt"] = { "Apatani", 56306, "sit-tan", "Latn", } m["apu"] = { "Apurinã", 2859081, "awd", "Latn", } m["apv"] = { "Alapmunte", 16110782, "sai-nmk", "Latn", } m["apw"] = { "အာဖေန်ချဳ လ္ပာ်ပလိုတ်", 28060, "apa", "Latn", } m["apx"] = { "Aputai", 12473343, "poz-tim", "Latn", } m["apy"] = { "အာက်ပါလာဲန်", 2736980, "sai-gui", "Latn", } m["apz"] = { "သာပဵုယဝ်ကာ", 7398693, "ngf-woj", "Latn", } m["aqc"] = { "အာဆိ", 34915, "cau-lzg", "Cyrl", translit = "cau-nec-translit", override_translit = true, display_text = s["cau-Cyrl-displaytext"], strip_diacritics = s["cau-Cyrl-stripdiacritics"], sort_key = { from = { "ккъӏв", "ххьӏв", -- 5 chars "гъӏв", "ёоӏ", "ккъӏ", "ккъв", "къӏв", "ллъв", "ххьӏ", "хъӏв", "хьӏв", "ццӏв", "ччӏв", -- 4 chars "ааӏ", "гӏв", "гъӏ", "гъв", "гьв", "ееӏ", "ёӏ", "ёо", "ииӏ", "кӏв", "ккв", "ккъ", "къӏ", "къв", "кьв", "лӏв", "ллъ", "лъв", "льв", "ооӏ", "пӏв", "ппв", "ссв", "тӏв", "ттв", "ууӏ", "хӏв", "ххв", "хъӏ", "хъв", "хьӏ", "цӏв", "ццӏ", "ццв", "чӏв", "ччӏ", "ээӏ", "юуӏ", "яаӏ", -- 3 chars "аӏ", "аа", "гӏ", "гв", "гъ", "гь", "дв", "еӏ", "ее", "ё", "жв", "зв", "иӏ", "ии", "кӏ", "кв", "кк", "къ", "кь", "лӏ", "лв", "лъ", "ль", "оӏ", "оо", "пӏ", "пв", "пп", "св", "сс", "тӏ", "тв", "тт", "уӏ", "уу", "фв", "хӏ", "хв", "хх", "хъ", "цӏ", "цв", "цц", "чӏ", "чв", "шв", "щв", "эӏ", "ээ", "юӏ", "юу", "яӏ", "яа" -- 2 chars }, to = { "к" .. p[8], "х" .. p[7], "г" .. p[6], "е" .. p[7], "к" .. p[7], "к" .. p[9], "к" .. p[12], "л" .. p[5], "х" .. p[6], "х" .. p[10], "х" .. p[13], "ц" .. p[6], "ч" .. p[5], "а" .. p[3], "г" .. p[2], "г" .. p[5], "г" .. p[7], "г" .. p[9], "е" .. p[3], "е" .. p[5], "е" .. p[6], "и" .. p[3], "к" .. p[2], "к" .. p[5], "к" .. p[6], "к" .. p[11], "к" .. p[13], "к" .. p[15], "л" .. p[2], "л" .. p[4], "л" .. p[7], "л" .. p[9], "о" .. p[3], "п" .. p[2], "п" .. p[5], "с" .. p[3], "т" .. p[2], "т" .. p[5], "у" .. p[3], "х" .. p[2], "х" .. p[5], "х" .. p[9], "х" .. p[11], "х" .. p[12], "ц" .. p[2], "ц" .. p[5], "ц" .. p[7], "ч" .. p[2], "ч" .. p[4], "э" .. p[3], "ю" .. p[3], "я" .. p[3], "а" .. p[1], "а" .. p[2], "г" .. p[1], "г" .. p[3], "г" .. p[4], "г" .. p[8], "д" .. p[1], "е" .. p[1], "е" .. p[2], "е" .. p[4], "ж" .. p[1], "з" .. p[1], "и" .. p[1], "и" .. p[2], "к" .. p[1], "к" .. p[3], "к" .. p[4], "к" .. p[10], "к" .. p[14], "л" .. p[1], "л" .. p[3], "л" .. p[6], "л" .. p[8], "о" .. p[1], "о" .. p[2], "п" .. p[1], "п" .. p[3], "п" .. p[4], "с" .. p[1], "с" .. p[2], "т" .. p[1], "т" .. p[3], "т" .. p[4], "у" .. p[1], "у" .. p[2], "ф" .. p[1], "х" .. p[1], "х" .. p[3], "х" .. p[4], "х" .. p[8], "ц" .. p[1], "ц" .. p[3], "ц" .. p[4], "ч" .. p[1], "ч" .. p[3], "ш" .. p[1], "щ" .. p[1], "э" .. p[1], "э" .. p[2], "ю" .. p[1], "ю" .. p[2], "я" .. p[1], "я" .. p[2] } }, } m["aqd"] = { "Ampari Dogon", 4748057, "nic-dgw", "Latn", } m["aqg"] = { "Arigidi", 34829, "alv-von", "Latn", } m["aqm"] = { "Atohwaim", 11732297, "paa-kay", "Latn", } m["aqn"] = { "Northern Alta", 7058116, "phi", "Latn", } m["aqp"] = { "Atakapa", 10975683, "qfa-iso", "Latn", } m["aqr"] = { "Arhâ", 4790085, "poz-cln", "Latn", } m["aqt"] = { "Angaité", 15736037, "sai-mas", "Latn", } m["aqz"] = { "Akuntsu", 4701960, "tup", "Latn", } m["arc"] = { "အာရမေအဳ", 28602, "sem-ara", "Hebr, Armi, Syrc, Palm, Nbat, Phnx, Mand, Samr, Hatr, Elym", translit = { Armi = "Armi-translit", Palm = "Palm-translit", }, strip_diacritics = { -- The first three were added by [[User:Wikitiki89]] in 2015 for use with Syriac, which has diacritics that look -- like a diaeresis (syāmē) and macrons above and below (mṭalqānā); see Wikipedia [[w:Syriac alphabet]]. But -- I don't know if they are actually represented using these diacritics. Syrc = {remove_diacritics = c.macron .. c.diaer .. c.macronbelow .. u(0x0730) .. "-" .. u(0x0748)}, }, -- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] -- Samr strip_diacritics, sort_key in [[Module:scripts/data]]; previously no sort_key for Samr, presumably a mistake -- Phnx translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission) } m["ard"] = { "Arabana", 3507959, "aus-kar", "Latn", } m["are"] = { "Western Arrernte", 12645549, "aus-rnd", "Latn", } m["arh"] = { "Arhuaco", 2640621, "cba", "Latn", } m["ari"] = { "Arikara", 56539, "cdd", "Latn", strip_diacritics = {remove_diacritics = c.acute}, } m["arj"] = { "Arapaso", 9627356, "sai-tuc", "Latn", } m["ark"] = { "Arikapú", 3446640, "sai-mje", "Latn", } m["arl"] = { "Arabela", 2591221, "sai-zap", "Latn", } m["arn"] = { "မာၜေအ်ဓုန်ကာန်", 33730, "sai-ara", "Latn", } m["aro"] = { "Araona", 958414, "sai-tac", "Latn", } m["arp"] = { "အာရာပါဟဝ်", 56417, "alg-ara", "Latn", } m["arq"] = { "အာရဗဳ အာန်လ်ဂျဳရဳယျာ", 56499, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["arr"] = { "Arara-Karo", 35539, "tup", "Latn", } m["ars"] = { "အာရဗဳ နေတ်ဒဳ", 56574, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["aru"] = { "Arua", 2746221, "auf", "Latn", } m["arv"] = { "Arbore", 56883, "cus-eas", "Latn", } m["arw"] = { "အာရတ်ဝါတ်", 2655664, "awd-taa", "Latn", } m["arx"] = { "Aruá", 3507907, "tup", "Latn", } m["ary"] = { "အာရဗဳ မဝ်ရဝ်ကာန်", 56426, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["arz"] = { "အာရဗဳ အဳဂျေပ်", 29919, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["asa"] = { "Pare", 36403, "bnt-par", "Latn", } m["asb"] = { "Assiniboine", 2591288, "sio-dkt", "Latn", } m["asc"] = { "Casuarina Coast Asmat", 11732046, "ngf-asm", "Latn", } m["ase"] = { "အရေဝ်ဘာသာကွတ်တဲအမေရိကာန်", 14759, "sgn", "Sgnw", } m["asf"] = { "Auslan", 29525, "sgn", "Latn", -- when documented } m["asg"] = { "Cishingini", 35199, "nic-kam", "Latn", } m["ash"] = { "Abishira", 2871740, "qfa-dis", -- extinct, poorly documented; isolate or in a proposed Tequiraca-Canichana family by Kaufman (1994) "Latn", } m["asi"] = { "Buruwai", 5001031, "ngf-sab", "Latn", } m["asj"] = { "Nsari", 36418, "nic-bbe", "Latn", } m["ask"] = { "အာပ်သကေန်", 29379, "nur-sou", "Arab, Latn", } m["asl"] = { "Asilulu", 12473347, "poz-cma", "Latn", } m["asn"] = { "ဃှေန်ဂူ အေက်သဝေနဳ", 8044571, "tup-gua", "Latn", } m["aso"] = { "Dano", 5220979, "ngf-gah", "Latn", } m["asp"] = { "Algerian Sign Language", 3135421, "sgn", } m["asq"] = { "Austrian Sign Language", 36668, "sgn", "Latn", -- when documented } m["asr"] = { "Asuri", 3504321, "mun", "Latn", -- when documented } m["ass"] = { "Ipulo", 35408, "nic-tvc", "Latn", } m["ast"] = { "အေက်သတဝ်ရေန်", 29507, "roa-asl", "Latn", } m["asu"] = { "Tocantins Asurini", 32041490, "tup-gua", "Latn", } m["asv"] = { "Asoa", 56296, "csu-maa", "Latn", } m["asw"] = { "Australian Aboriginal Sign Language", 955216, "sgn", "Latn", -- when documented } m["asx"] = { "Muratayak", 11732766, "ngf-war", "Latn", } m["asy"] = { "Yaosakor Asmat", 16113158, "ngf-asm", "Latn", } m["asz"] = { "As", 2866218, "poz-hce", "Latn", } m["ata"] = { "Pele-Ata", 56511, "qfa-dis", -- Papuan; possibly in a putative West New Britain family, or an isolate "Latn", } m["atb"] = { "ဇြာဲဝါ", 56594, "tbq-brm", "Latn, Lisu", -- also Hani? translit = {Lisu = "Lisu-translit"}, sort_key = {Lisu = s["Lisu-sortkey"]}, } m["atc"] = { "Atsahuaca", 4817730, "sai-pan", "Latn", } m["atd"] = { "Ata Manobo", 12627315, "mno", "Latn", } m["ate"] = { "အေက်တာမ်ဗါဝ်လ်", 4813055, "ngf-wso", "Latn", } m["atg"] = { "Okpela", 7082551, "alv-yek", "Latn", } m["ati"] = { "Attié", 34844, "alv-lag", "Latn", } m["atj"] = { "အထိကာမိတ်", 56590, "alg", "Latn", ancestors = "cr", } m["atk"] = { "Ati", 3217458, "phi", "Latn", } m["atl"] = { "Mount Iraya Agta", 6921430, "phi", "Latn", } m["atm"] = { "Ata", 4812603, "phi", "Latn", } m["ato"] = { "Atong (Cameroon)", 34824, "nic-grs", "Latn", } m["atp"] = { "Pudtol Atta", 12640726, "phi", "Latn", } m["atq"] = { "Aralle-Tabulahan", 4783889, "poz-ssw", "Latn", } m["atr"] = { "ဝါဲမဳရဳ-အာထရဝ်ရဳ", 56865, "sai-car", "Latn", } m["ats"] = { "ဂရတ် ဗါန်တေ", 56628, "alg-ara", "Latn", } m["att"] = { "ပါန်ပလဝ်နာ အာတ်တာ", 12639245, "phi", "Latn", } m["atu"] = { "Reel", 7306882, "sdv-dnu", "Latn", } m["atv"] = { "အာန်တာယ် လ္ပာ်သၟဝ်ကျာ", 2640863, "trk-ssb", "Cyrl", translit = "Altai-translit", } m["atw"] = { "Atsugewi", 56718, "nai-pal", "Latn", } m["atx"] = { "Arutani", 56609, nil, "Latn", } m["aty"] = { "Aneityum", 2379113, "poz-vns", "Latn", } m["atz"] = { "Arta", 3508067, "phi", "Latn", } m["aua"] = { "Asumboa", 4811870, "poz-tem", "Latn", } m["aub"] = { "Alugu", 12626798, "tbq-urp", "Latn", -- also Hani? } m["auc"] = { "Huaorani", 758570, "qfa-iso", "Latn", } m["aud"] = { "Anuta", 35326, "poz-pnp", "Latn", } m["aug"] = { "Aguna", 34733, "alv-gbe", "Latn", } m["auh"] = { "Aushi", 2872082, "bnt-sbi", "Latn", } m["aui"] = { "Anuki", 3508132, "poz-ocw", "Latn", } m["auj"] = { "အာဂျဳလာ", 56398, "ber", "Latn, Arab, Tfng", } m["auk"] = { "Heyo", 3504295, "paa-hya", "Latn", } m["aul"] = { "Aulua", 427300, "poz-vnc", "Latn", } m["aum"] = { "အာသူ", 34798, "alv-ngb", "Latn", } m["aun"] = { "Molmo One", 12637224, "paa-trr", "Latn", } m["auo"] = { "Auyokawa", 56247, "cdc-wst", "Latn", } m["aup"] = { "Makayam", 6738863, "paa-tir", "Latn", } m["auq"] = { "Anus", 23855, "poz-ocw", "Latn", } m["aur"] = { "Aruek", 3504279, "paa-kom", "Latn", } m["aut"] = { "Austral", 2669261, "poz-pep", "Latn", } m["auu"] = { "Auye", 4827334, "ngf-pan", "Latn", } m["auw"] = { "Awyi", 3513326, "paa-taa", "Latn", } m["aux"] = { "အာဝ်ရာက်", 3507995, "tup-gua", "Latn", } m["auy"] = { "Auyana", 2873211, "ngf-gau", "Latn", } m["auz"] = { "အာရဗဳ ဥူသဗက်ကဳ", 3399507, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["avb"] = { "Avau", 12627412, "poz-ocw", "Latn", } m["avd"] = { "အာယ်ဝဳရဳ-ဝဳဒါရဳ", 3327357, "xme", "fa-Arab", ancestors = "xme-mid", } m["avi"] = { "Avikam", 34840, "alv-lag", "Latn", } m["avk"] = { "ခါဝ်တာဝါယ်", 1377116, "art", "Latn", type = "appendix-constructed", } m["avm"] = { "Angkamuthi", 62603022, "aus-pmn", "Latn", } m["avn"] = { "Avatime", 34796, "alv-ktg", "Latn", } m["avo"] = { "Agavotaguerra", 3508007, "awd", "Latn", } m["avs"] = { "Aushiri", 3409318, "sai-zap", "Latn", } m["avt"] = { "Au", 3446608, "paa-wap", "Latn", } m["avu"] = { "အာတ်ဝါဝ်ခါယျ", 56685, "csu-mma", "Latn", } m["avv"] = { "Avá-Canoeiro", 4829584, "tup-gua", "Latn", } m["awa"] = { "အဝါဒဳ", 29579, "inc-hie", "Deva, Kthi, fa-Arab", ancestors = "inc-oaw", translit = { Deva = "hi-translit" }, } m["awb"] = { "Awa (New Guinea)", 2874650, "ngf-gau", "Latn", } m["awc"] = { "Cicipu", 35193, "nic-kam", "Latn", } m["awe"] = { "အာဝပ်တဳ", 4830038, "tup", "Latn", } m["awg"] = { "အာန်ဂူတဳမဳရဳ", 4764288, "aus-pam", "Latn", } m["awh"] = { "Awbono", 3446684, "paa-baa", "Latn", } m["awi"] = { "Aekyom", 3399691, "paa-kae", "Latn", } m["awk"] = { "အဝါဗာကဴ", 3449138, "aus-pam", "Latn", } m["awm"] = { "Arawum", 4784537, "ngf-rai", "Latn", } m["awn"] = { "Awngi", 34934, "cus-cen", "Ethi", } m["awo"] = { "Awak", 3446643, "alv-wjk", "Latn", } m["awr"] = { "Awera", 56379, "paa-flp", "Latn", } m["aws"] = { "South Awyu", 12633986, "ngf-awy", "Latn", } m["awt"] = { "Araweté", 4784535, "tup-gua", "Latn", } m["awu"] = { "Central Awyu", 12628801, "ngf-awy", "Latn", } m["awv"] = { "Jair Awyu", 16110177, "ngf-awy", "Latn", } m["aww"] = { "Awun", 56369, "paa-sep", "Latn", } m["awx"] = { "Awara", 2874670, "ngf-waa", "Latn", } m["awy"] = { "Edera Awyu", 12630425, "ngf-awy", "Latn", } m["axb"] = { "Abipón", 11252539, "sai-guc", "Latn", } m["axe"] = { "Ayerrerenge", 16112737, "aus-pam", "Latn", } m["axg"] = { "Arára (Mato Grosso)", 3446660, nil, "Latn", } m["axk"] = { "Aka (Central Africa)", 11010149, "bnt-ngn", "Latn", } m["axl"] = { "Lower Southern Aranda", 6693295, "aus-rnd", "Latn", } m["axm"] = { "အာမေနဳယျာအဒေါဝ်", 4438498, "hyx", "Armn", ancestors = "xcl", translit = "Armn-translit", override_translit = true, strip_diacritics = { remove_diacritics = "՞՜՛՟", from = {"եւ", "ՙ", "՚"}, to = {"և", "ʻ", "’"} } } m["axx"] = { "Xârâgurè", 8045635, "poz-cln", "Latn", } m["aya"] = { "Awar", 56876, "paa-baw", "Latn", } m["ayb"] = { "Ayizo", 34841, "alv-pph", "Latn", } m["ayd"] = { "Ayabadhu", 3509164, "aus-pmn", "Latn", } m["aye"] = { "Ayere", 34788, "alv-aah", "Latn", } m["ayg"] = { "Nyanga (Togo)", 35446, "alv-gng", "Latn", } m["ayi"] = { "Leyigha", 3914492, "nic-uce", "Latn", } m["ayk"] = { "Akuku", 3450179, "alv-nwd", "Latn", } m["ayl"] = { "အာရဗဳလေတ်ဗျာ", 56503, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["ayn"] = { "အာရဗဳ ယာက်မနဳ", 1686766, "sem-arb", "Arab, Hebr", strip_diacritics = { Arab = "ar-stripdiacritics", }, -- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] } m["ayo"] = { "Ayoreo", 56634, "sai-zam", "Latn", } m["ayp"] = { "အာရဗဳ မာက်သဝ်ပဝ်တေမဳယာန် သၟဝ်ကျာ", 56577, "sem-arb", "Arab", ancestors = "acm", strip_diacritics = "ar-stripdiacritics", } m["ayq"] = { "Ayi", 56449, "paa-sep", "Latn", } m["ays"] = { "Sorsogon Ayta", 7563752, "phi", "Latn", } m["ayt"] = { "Bataan Ayta", 4921648, "phi", "Latn", } m["ayu"] = { "Ayu", 34786, "alv", "Latn", } -- ayy deleted and removed from ISO; per the removal request, "no linguistic data exists for any [Ayta] language that the -- ancestors of this group might have once spoken. And thus, there is no evidence that this group ever had a language -- distinct from any other Philippine language." [Lobel] m["ayz"] = { "Maybrat", 4830892, "paa-may", -- either an isolate; grouped with Abun and the West Bird's Head family; or in the putative West Papuan family "Latn", } m["aza"] = { "Azha", 4832486, "tbq-axi", "Latn", } m["azd"] = { "ဒူရာန်ဂဝ် နာဟွာတာယ်လ်လ္ပာ်ဖာဗၟံက်", 16115449, "azc-dur", "Latn", } m["azg"] = { "San Pedro Amuzgos Amuzgo", 35092, "omq", "Latn", } m["azm"] = { "Ipalapa Amuzgo", 12633013, "omq", "Latn", } m["azn"] = { "Western Durango နာဟွာတာယ်လ်", 12645553, "azc-dur", "Latn", } m["azo"] = { "Awing", 34856, "nic-nge", "Latn", } m["azt"] = { "Faire Atta", 12630884, "phi", "Latn", } m["azz"] = { "ဟာဲလာန် ပွာယ်ဗလာ နာဟွာတာယ်လ်", 12953754, "azc-nah", "Latn", } return require("Module:languages").finalizeData(m, "language") b3pj7g4zpr457pzqai4ac8r1vla0z1n 395877 395876 2026-05-29T15:33:19Z Intobesa.bot 1035 Bot: ပလေဝ်ဒါန် 395877 Scribunto text/plain local m_langdata = require("Module:languages/data") -- Loaded on demand, as it may not be needed (depending on the data). local function u(...) u = require("Module:string utilities").char return u(...) end local c = m_langdata.chars local p = m_langdata.puaChars local s = m_langdata.shared local m = {} m["aaa"] = { "ဂါဝ်တူဥူ", 35463, "alv-yek", "Latn", } m["aab"] = { "အာဠူမူ-ထေတ်သူ", 35034, "nic-alu", "Latn", } m["aac"] = { "အာရဳ", 1811224, "ngf-gsu", "Latn", } m["aad"] = { "အာမာန်", 56708, "paa-sep", "Latn", } -- "aae" is treated as "sq", see [[WT:LT]] m["aaf"] = { "အာရာနဒါန်", 3507928, "dra-mal", "Mlym", -- Mlym translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission) } m["aag"] = { "အာန်ဗရေတ်ခ်", 4741706, "paa-pal", "Latn", } m["aah"] = { "အာၜေအ်' အာရာဗေါတ်", 4670715, "paa-ara", "Latn", } m["aai"] = { "အာရဳဖှာန်မာ-မဳနဳယျာဖှဳယျာ", 4790560, "poz-ocw", "Latn", } m["aak"] = { "အာန်ခါဝေ", 3446690, "ngf-ata", "Latn", } m["aal"] = { "အာဖှာဲဒေ", 56434, "cdc-cbm", "Latn", } m["aan"] = { "အာန္နာမ်ဗေ", 3507873, "tup-gua", "Latn", } m["aap"] = { "ဘာရာ အဝ်ရာအ်ရာ", 56807, "sai-pek", "Latn", } m["aaq"] = { "ဘာနိုတ်သကေတ်", 3515185, "alg-abp", "Latn", } m["aas"] = { "အောတ်သေတ်", 56620, "cus-sou", "Latn", } -- "aat" is treated as "sq", see [[WT:LT]] m["aau"] = { "အာၜေဴ", 3073568, "paa-sep", "Latn", } m["aaw"] = { "သဝ်လံန်", 7558834, "poz-ocw", "Latn", } m["aax"] = { "မာန်ဒဝ်ဗဝ် အာတ်တာပ်", 12636156, "ngf-dum", "Latn", } m["aaz"] = { "အာန်မာရသဳ", 4740192, "poz-tim", "Latn", } m["aba"] = { "အေက်ဗေ", 34833, "alv-lag", "Latn", } m["abb"] = { "Bankon", 34860, "bnt-bsa", "Latn", } m["abc"] = { "Ambala Ayta", 3448896, "phi", "Latn", } m["abd"] = { "Camarines Norte Agta", 3399682, "phi", "Latn", } m["abe"] = { "အေတ်ဗဒ်နာကဳ", 17502788, "alg-abp", "Latn", } m["abf"] = { "Abai Sungai", 4663287, "poz-san", "Latn", } m["abg"] = { "Abaga", 3507954, "ngf-kya", "Latn", } m["abh"] = { "အာရဗဳ တဇေတ်ကဳ", 56833, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["abi"] = { "Abidji", 34781, "alv-lag", "Latn", } m["abj"] = { "အကာ-ဗဳအ်", 2356391, "qfa-ads", "Latn", } m["abl"] = { "Abung", 49215, "poz-lgx", "Latn", } m["abm"] = { "Abanyom", 7502, "nic-eko", "Latn", } m["abn"] = { "Abua", 34835, "nic-cde", "Latn", } m["abo"] = { "Abon", 35121, "nic-tvn", "Latn", } m["abp"] = { "အာဗေန်လာန် အာဲတာ", 3436621, "phi", "Latn", } m["abq"] = { "အဗါတ်သာ", 27567, "cau-abz", "Cyrl, Latn", translit = { Cyrl = "abq-translit" }, override_translit = true, display_text = { Cyrl = s["cau-Cyrl-displaytext"] }, strip_diacritics = { Cyrl = s["cau-Cyrl-stripdiacritics"], Latn = s["cau-Latn-stripdiacritics"], }, sort_key = { Cyrl = { from = { "гъв", "гъь", "гӏв", "джв", "джь", "къв", "къь", "кӏв", "кӏь", "хъв", "хӏв", "чӏв", -- 3 chars "гв", "гъ", "гь", "гӏ", "дж", "дз", "ё", "жв", "жь", "кв", "къ", "кь", "кӏ", "ль", "лӏ", "пӏ", "тл", "тш", "тӏ", "фӏ", "хв", "хъ", "хь", "хӏ", "цӏ", "чв", "чӏ", "шв", "шӏ" -- 2 chars }, to = { "г" .. p[3], "г" .. p[4], "г" .. p[7], "д" .. p[2], "д" .. p[3], "к" .. p[3], "к" .. p[4], "к" .. p[7], "к" .. p[8], "х" .. p[3], "х" .. p[6], "ч" .. p[3], "г" .. p[1], "г" .. p[2], "г" .. p[5], "г" .. p[6], "д" .. p[1], "д" .. p[4], "е" .. p[1], "ж" .. p[1], "ж" .. p[2], "к" .. p[1], "к" .. p[2], "к" .. p[5], "к" .. p[6], "л" .. p[1], "л" .. p[2], "п" .. p[1], "т" .. p[1], "т" .. p[2], "т" .. p[3], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[4], "х" .. p[5], "ц" .. p[1], "ч" .. p[1], "ч" .. p[2], "ш" .. p[1], "ш" .. p[2] } }, }, } -- "abr" Abron is treated as "ak" Akan, see [[WT:LT]] m["abs"] = { "မလေဝ် အာန်ဗဝ်နေတ်", 3124354, "crp", "Latn", ancestors = "ms", } m["abt"] = { "Ambulas", 3508015, "paa-nnd", "Latn", } m["abu"] = { "Abure", 34767, "alv-ptn", "Latn", } m["abv"] = { "အာရဗဳ ဗာဟာနာ", 56576, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["abw"] = { "Pal", 7126121, "ngf-omo", "Latn", } m["abx"] = { "Inabaknon", 2820163, "poz-sbj", "Latn", } m["aby"] = { "Aneme Wake", 3508107, "ngf-yar", "Latn", } m["abz"] = { "Abui", 2822110, "paa-alp", "Latn", } m["aca"] = { "Achagua", 2822982, "awd", "Latn", } m["acb"] = { "Áncá", 11130787, "nic-mom", "Latn", } m["acd"] = { "Gikyode", 35256, "alv-gng", "Latn", } m["ace"] = { "အာသံနဳစ်", 27683, "cmc", "Latn, ms-Arab", standard_chars = { Latn = "AaBbCcDdEeÉéÈèËëFfGgHhIiJjKkLlMmNnOoÔôÖöPpQqRrSsTtUuVvWwXxYyZz", -- current orthography (not yet add Arab) c.punc }, } m["ach"] = { "Acholi", 34926, "sdv-los", "Latn", } m["aci"] = { "အကာ-ကာရဳ", 2670418, "qfa-adn", "Latn", } m["ack"] = { "အကာ-ကိုဝ်ရာ", 3433680, "qfa-adn", "Latn", } m["acl"] = { "အာပ်-ဗေလ်", 3436825, "qfa-ads", "Latn", } m["acm"] = { "အာရဗဳ အဳရတ်", 56232, "sem-arb", "Arab, Hebr", strip_diacritics = { Arab = "ar-stripdiacritics", }, -- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] } m["acn"] = { "Achang", 56582, "tbq-brm", "Latn", } m["acp"] = { "Eastern Acipa", 5329945, "nic-kmk", "Latn", } m["acr"] = { "Achi", 34774, "myn", "Latn", } m["acs"] = { "Acroá", 2829146, "sai-cje", "Latn", } m["acu"] = { "Achuar", 2823170, "sai-jiv", "Latn", } m["acv"] = { "Achumawi", 56661, "nai-pal", "Latn", } m["acw"] = { "အာရဗဳဟဳဂျာဇြဳ", 56608, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["acx"] = { "အာရဗဳ အဝ်မာန်နဳ", 56630, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["acy"] = { "အာရဗဳ သာဲပရေက်", 56416, "sem-arb", "Latn, Grek", ancestors = "acm", strip_diacritics = { Latn = {remove_diacritics = c.grave .. c.acute .. c.breve}, }, -- Grek display_text, strip_diacritics, sort_key in [[Module:scripts/data]] standard_chars = { Latn = "AaBbCcDdΔδEeFfGgĠġĊċIiJjKkLlMmNnOoPpΘθRrSsTtUuVvWwXxYyZzŞş", c.punc }, } m["acz"] = { "Acheron", 34769, "alv-tal", "Latn", } m["ada"] = { "Adangme", 35141, "alv-gda", "Latn", } m["adb"] = { "Atauran", 125421255, "poz-cet", "Latn", } m["add"] = { "Dzodinka", 35266, "nic-nka", "Latn", } m["ade"] = { "Adele", 27740, "alv-ntg", "Latn", } m["adf"] = { "အာရဗဳ ဒဝ်ဖာရဳ", 56565, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["adg"] = { "Andegerebinha", 3508123, "aus-rnd", "Latn", } m["adh"] = { "Adhola", 1971400, "sdv-los", "Latn", } m["adi"] = { "အဒဳ", 56440, "sit-tan", "Latn", } m["adj"] = { "အာဒေတ်အူခရု", 34738, "alv-lag", "Latn", } m["adl"] = { "Galo", 2857892, "sit-tan", "Latn", } m["adn"] = { "Adang", 3398276, "paa-alp", "Latn", } m["ado"] = { "Abu", 56659, "paa-por", "Latn", } m["adp"] = { "အဒပ်", 3512402, "sit-tib", "Tibt", ancestors = "dz", translit = "Tibt-translit", override_translit = true, display_text = s["Tibt-displaytext"], entry_name = s["Tibt-entryname"], sort_key = "Tibt-sortkey", wikipedia_article = "Dzongkha", -- Considered a dialect of Dzongkha } m["adq"] = { "Adangbe", 34730, "alv-gda", "Latn", ancestors = "ada", } m["adr"] = { "Adonara", 4684505, "poz-cet", "Latn", } m["ads"] = { "Adamorobe Sign Language", 27709, "sgn", "Latn", -- when documented } m["adt"] = { "Adnyamathanha", 2225391, "aus-psw", "Latn", } m["adu"] = { "Aduge", 34734, "alv-nwd", "Latn", ancestors = "opa", } m["adw"] = { "Amondawa", 12626847, "tup-gua", "Latn", } m["ady"] = { "အာက်ဒေါတ်ကာယ်", 27776, "cau-cir", "Cyrl, Latn, Arab", translit = { Cyrl = "cau-cir-translit", Arab = "ar-translit", }, override_translit = true, display_text = { Cyrl = s["cau-Cyrl-displaytext"] }, strip_diacritics = { Cyrl = s["cau-Cyrl-stripdiacritics"], Latn = s["cau-Latn-stripdiacritics"], }, sort_key = { Cyrl = { from = { "кхъу", "къӏу", -- 4 chars "гъу", "джу", "дзу", "жъу", "къу", "кхъ", "къӏ", "кӏу", "кӏь", "лъу", "лӏу", "пӏу", "сӏу", "тӏу", "фӏу", "хъу", "цӏу", "чъу", "чӏу", "шъу", "шӏу", "щӏу", -- 3 chars "гу", "гъ", "гь", "дж", "дз", "ё", "жъ", "жь", "ку", "къ", "кь", "кӏ", "лъ", "ль", "лӏ", "пӏ", "сӏ", "тӏ", "фӏ", "ху", "хъ", "хь", "цу", "цӏ", "чу", "чъ", "чӏ", "шъ", "шӏ", "щӏ", "ӏу", "ӏь" -- 2 chars }, to = { "к" .. p[5], "к" .. p[7], "г" .. p[3], "д" .. p[2], "д" .. p[4], "ж" .. p[2], "к" .. p[3], "к" .. p[4], "к" .. p[6], "к" .. p[10], "к" .. p[11], "л" .. p[2], "л" .. p[5], "п" .. p[2], "с" .. p[2], "т" .. p[2], "ф" .. p[2], "х" .. p[3], "ц" .. p[3], "ч" .. p[3], "ч" .. p[5], "ш" .. p[2], "ш" .. p[4], "щ" .. p[2], "г" .. p[1], "г" .. p[2], "г" .. p[4], "д" .. p[1], "д" .. p[3], "е" .. p[1], "ж" .. p[1], "ж" .. p[3], "к" .. p[1], "к" .. p[2], "к" .. p[8], "к" .. p[9], "л" .. p[1], "л" .. p[3], "л" .. p[4], "п" .. p[1], "с" .. p[1], "т" .. p[1], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[4], "ц" .. p[1], "ц" .. p[2], "ч" .. p[1], "ч" .. p[2], "ч" .. p[4], "ш" .. p[1], "ш" .. p[3], "щ" .. p[1], "ӏ" .. p[1], "ӏ" .. p[2] } }, }, } m["adz"] = { "Adzera", 3327445, "poz-ocw", "Latn", } m["aea"] = { "Areba", 3509129, "aus-pam", "Latn", } m["aeb"] = { "အာရဗဳ တူနဳယှေန်", 56240, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["aed"] = { "Argentine Sign Language", 3322073, "sgn", "Latn", -- when documented } m["aee"] = { "ပါသျှယဳ ဒိုဟ်ဗၟံက်သၟဝ်ကျာ", 12642198, "inc-pas", "fa-Arab, Latn", } m["aek"] = { "Haeke", 5638166, "poz-cln", "Latn", } m["ael"] = { "Ambele", 34818, "nic-grf", "Latn", } m["aem"] = { "အါန်", 3507920, "mkh-vie", "Latn", } m["aen"] = { "Armenian Sign Language", 3446604, "sgn", } m["aeq"] = { "Aer", 3246741, "inc-wes", "Arab", } m["aer"] = { "အာရေန်တာယ်", 10728232, "aus-rnd", "Latn", } m["aes"] = { "Alsea", 2395641, nil, "Latn", } m["aeu"] = { "Akeu", 4700657, "tbq-sil", "Latn", } m["aew"] = { "Ambakich", 56642, "paa-eke", "Latn", } m["aey"] = { "Amele", 3508025, "ngf-gum", "Latn", } m["aez"] = { "ဨကာ", 16110528, "ngf-oro", "Latn", } m["afb"] = { "အာရဗဳအထံက်ဂၚ်", 56385, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["afd"] = { "Andai", 4753480, "paa-arf", "Latn", } m["afe"] = { "Putukwam", 3914930, "nic-ben", "Latn", } m["afg"] = { "Afghan Sign Language", 4689093, "sgn", } m["afh"] = { "Afrihili", 384707, "art", "Latn", type = "appendix-constructed", } m["afi"] = { "Akrukay", 57003, "paa-tam", "Latn", } m["afk"] = { "Nanubae", 6964416, "paa-arf", "Latn", } m["afn"] = { "Defaka", 35174, "nic", "Latn", } m["afo"] = { "Eloyi", 3914066, "nic-plt", "Latn", } m["afp"] = { "Tapei", 16887371, "paa-arf", "Latn", } m["afs"] = { "Afro-Seminole Creole", 27867, "crp", "Latn", ancestors = "en", } m["aft"] = { "Afitti", 3400829, "sdv-nyi", "Latn", } m["afu"] = { "Awutu", 34847, "alv-gng", "Latn", } m["afz"] = { "Obokuitai", 7075258, "paa-clp", "Latn", } m["aga"] = { "Aguano", 3331203, nil, "Latn", } m["agb"] = { "Legbo", 35584, "nic-uce", "Latn", } m["agc"] = { "Agatu", 34732, "alv-ido", "Latn", } m["agd"] = { "Agarabi", 3399642, "ngf-gau", "Latn", } m["age"] = { "Angal", 10951553, "ngf-ank", "Latn", } m["agf"] = { "Arguni", 12473346, "poz-cet", "Latn", } m["agg"] = { "Angor", 3508100, "paa-sng", "Latn", } m["agh"] = { "Ngelima", 7022266, "bnt-bta", "Latn", } m["agi"] = { "Agariya", 663586, "mun", "Deva", } m["agj"] = { "Argobba", 29292, "sem-eth", "Ethi", } m["agk"] = { "Isarog Agta", 6078982, "phi", "Latn", } m["agl"] = { "Fembe", 372927, "ngf-est", "Latn", } m["agm"] = { "Angaataha", 3508001, "ngf-ang", "Latn", } m["agn"] = { "Agutaynen", 3399717, "phi-kal", "Latn", } m["ago"] = { "Tainae", 7676186, "ngf-taa", "Latn", } m["agq"] = { "Aghem", 34737, "nic-rnw", "Latn", } m["agr"] = { "Aguaruna", 1526530, "sai-jiv", "Latn", } m["ags"] = { "Esimbi", 35260, "nic-bds", "Latn", } m["agt"] = { "ကာဂါယာန် အာက်ထာ ဗဟဵု", 5017296, "phi", "Latn", } m["agu"] = { "အာဂွာကာတေကာ", 35091, "myn", "Latn", } m["agv"] = { "ရောမါန်ဒါဒဝ် အာက်ဂါ", 3508085, "phi", "Latn", } m["agw"] = { "Kahua", 3191906, "poz-sls", "Latn", } m["agx"] = { "အာခူန်", 36498, "cau-esm", "Cyrl", translit = "cau-nec-translit", override_translit = true, display_text = s["cau-Cyrl-displaytext"], strip_diacritics = s["cau-Cyrl-stripdiacritics"], sort_key = { from = {"аь", "гъ", "гь", "гӏ", "дж", "ё", "къ", "кь", "кӏ", "оь", "пӏ", "тӏ", "уь", "хъ", "хь", "хӏ", "цӏ", "чӏ"}, to = {"а" .. p[1], "г" .. p[1], "г" .. p[2], "г" .. p[3], "д" .. p[1], "е" .. p[1], "к" .. p[1], "к" .. p[2], "к" .. p[3], "о" .. p[1], "п" .. p[1], "т" .. p[1], "у" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[3], "ц" .. p[1], "ч" .. p[1]} }, } m["agy"] = { "Southern Alta", 7569611, "phi", "Latn", } m["agz"] = { "Mount Iriga Agta", 6921432, "phi", "Latn", } m["aha"] = { "Ahanta", 34729, "alv-ctn", "Latn", } m["ahb"] = { "Axamb", 2874710, "poz-vnc", "Latn", } m["ahg"] = { "Qimant", 35663, "cus-cen", "Latn", } m["ahh"] = { "Aghu", 3436645, "ngf-awy", "Latn", } m["ahi"] = { "Tiagba", 3400073, "kro-aiz", "Latn", } m["ahk"] = { "အာခါ", 56643, "tbq-han", "Latn, Mymr, Thai", sort_key = { Thai = { from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"} }, }, } m["ahl"] = { "Igo", 35412, "alv-ktg", "Latn", } m["ahm"] = { "Mobu", 35967, "kro-aiz", "Latn", } m["ahn"] = { "အ'ဟာန်", 34723, "alv-aah", "Latn", } m["aho"] = { "အဟုမ်", 34778, "tai-swe", "Ahom", translit = "Ahom-translit", } m["ahp"] = { "Apro", 34810, "alv-kwa", "Latn", } m["ahr"] = { "အဟိရာန်နဳ", 15549890, "raj", "Deva", translit = "mr-translit", } m["ahs"] = { "အာက်သှ်", 34823, "nic-plc", "Latn", } m["aht"] = { "Ahtna", 21058, "ath-nor", "Latn", } m["aia"] = { "အာရဝ်သဳ", 2863483, "poz-sls", "Latn", } m["aib"] = { "Äynu", 27927, "qfa-mix", "Arab, Latn", ancestors = "ug, fa" } m["aic"] = { "Ainbai", 3332149, "paa-bew", "Latn", } m["aid"] = { "အာန်ကဝ်ရေဝ်ထေန်", 3279409, "aus-pmn", "Latn", } m["aie"] = { "Amara", 2841180, "poz-ocw", "Latn", } m["aif"] = { "Agi", 3331491, "paa-wpa", "Latn", } m["aig"] = { "အာန်တဳဂွါ ကဵု အၚ်္ဂလိက် ဗါၜူဒါ ခရဳအတ်လ်", 3244184, "crp", "Latn", ancestors = "en", } m["aih"] = { "အာဲ-ချာန်", 2827749, "qfa-kms", "Latn, Hani", sort_key = { Hani = "Hani-sortkey" }, } m["aii"] = { "အာက်သဳရိ နဳအဝ်-အာရာမေဣ", 29440, "sem-nna", "Syrc", translit = "aii-translit", strip_diacritics = "Syrc-stripdiacritics", } m["aij"] = { "Lishanid Noshan", 3436467, "sem-nna", "Hebr", -- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] } m["aik"] = { "Ake", 34808, "nic-pls", "Latn", } m["ail"] = { "Aimele", 3327418, "ngf-bos", "Latn", } m["aim"] = { "Aimol", 4697175, "tbq-kuk", "Latn, Beng", } m["ain"] = { "အာဲနု", 27969, "qfa-ain", "Kana, Latn, Cyrl", sort_key = { Kana = "Kana-sortkey" }, } m["aio"] = { "အာဲတောန်", 3399725, "tai-swe", "Mymr", translit = "aio-phk-translit", display_text = s["aio-displaytext"], strip_diacritics = s["aio-stripdiacritics"], } m["aip"] = { "Burumakok", 5000984, "ngf-wok", "Latn", } m["air"] = { "Airoran", 3321131, "paa-saa", "Latn", } m["ait"] = { "အာရေဝ်ခေန်", 3446679, "tup", "Latn", } m["aiw"] = { "Aari", 7495, "omv-aro", "Latn", } m["aix"] = { "Aighon", 3504287, "poz-ocw", "Latn", } m["aiy"] = { "Ali", 34814, "gba-eas", "Latn", } m["aja"] = { "အာဂျာ (အေက်ဖရိက လ္ပာ်ဗၟံက်)", 3237491, "csu-bkr", "Latn", } m["ajg"] = { "Aja (West Africa)", 35035, "alv-gbe", "Latn", } m["aji"] = { "အဂျဳ", 2828867, "poz-cln", "Latn", } m["ajn"] = { "Andajin", 16111302, "aus-wor", "Latn", } m["ajp"] = { "အာရဗဳလပ်ဗေန်ထေန်သမၠုၚ်ကျာ", 55633582, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["ajw"] = { "Ajawa", 56645, "cdc-wst", "Latn", } m["ajz"] = { "Amri Karbi", 3508092, "tbq-kuk", "Latn", ancestors = "mjw", } m["akb"] = { "Angkola Batak", 2640686, "btk", "Latn, Batk", } m["akc"] = { "Mpur", 3327139, "qfa-iso", -- Papuan; based on Palmer (2018), Ethnologue and Glottolog "Latn", } m["akd"] = { "Ukpet-Ehom", 36618, "nic-ucr", "Latn", } m["ake"] = { "အကာဝယဝ်", 28059, "sai-pem", "Latn", } m["akf"] = { "Akpa", 34801, "alv-ido", "Latn", } m["akg"] = { "အနှတ်ခါလာန်ဂူ", 4750964, "poz-cet", "Latn", } m["akh"] = { "Angal Heneng", 10950354, "ngf-ank", "Latn", } m["aki"] = { "Aiome", 56735, "paa-aia", "Latn", } m["akj"] = { "ဇေရု", 2919121, "qfa-adn", "Latn, Deva", } m["akk"] = { "အခါဒဳယာန်", 35518, "sem-eas", "Xsux, Latn", } m["akl"] = { "အာက်ခလာန်", 8773, "phi", "Latn", } m["akm"] = { "Aka-Bo", 35361, "qfa-adn", "Latn", } m["ako"] = { "အာကူရဳအဝ်", 56650, "sai-tar", "Latn", } m["akp"] = { "Siwu", 36470, "alv-ntg", "Latn", } m["akq"] = { "Ak", 56654, "paa-sep", "Latn", } m["akr"] = { "အာရာကဳ", 2699882, "poz-vnn", "Latn", } m["aks"] = { "Akaselem", 34817, "nic-grm", "Latn", } m["akt"] = { "Akolet", 3330162, "poz-ocw", "Latn", } m["aku"] = { "Akum", 34799, "nic-ykb", "Latn", } m["akv"] = { "အာပ်ခါဝက်", 56423, "cau-and", "Cyrl", translit = "cau-nec-translit", override_translit = true, display_text = s["cau-Cyrl-displaytext"], strip_diacritics = s["cau-Cyrl-stripdiacritics"], } m["akw"] = { "Akwa", 34802, "bnt-mbo", "Latn", } m["akx"] = { "အကာ-ကေဒဵု", 3436816, "qfa-adc", "Latn", } m["aky"] = { "အကာ-ကောန်", 3436784, "qfa-adc", "Latn", } m["akz"] = { "အာလာဗာမာ", 1815020, "nai-mus", "Latn", } m["ala"] = { "Alago", 34813, "alv-ido", "Latn", } m["alc"] = { "ခါဝေတ်သကာ", 56544, "aqa", "Latn", } m["ald"] = { "Alladian", 34837, "alv-lag", "Latn", } m["ale"] = { "အာလောတ်", 27210, "esx", "Latn, Cyrl", } m["alf"] = { "Alege", 34815, "nic-ben", "Latn", } m["alh"] = { "Alawa", 2147917, "aus-gun", "Latn", } m["ali"] = { "Amaimon", 3327427, "ngf-mad", "Latn", } m["alj"] = { "အလံၚ်ဂံၚ်", 3327423, "phi", "Latn", } m["alk"] = { "Alak", 2714690, "mkh", "Latn", } m["all"] = { "Allar", 3393634, "dra-mal", "Mlym", -- Mlym translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission) } -- "aln" is treated as "sq", see [[WT:LT]] m["alm"] = { "Amblong", 11022615, "poz-vnn", "Latn", } m["alo"] = { "Larike-Wakasihu", 3217929, "poz-cma", "Latn", } m["alp"] = { "Alune", 3327367, "poz-cet", "Latn", } m["alq"] = { "အာယ်လ်ကေန်ဂွေန်", 28092, "alg", "Latn, Cans", ancestors = "oj", } m["alr"] = { "အဠူတေ", 28213, "qfa-ckn", "Cyrl", strip_diacritics = { from = {"['’]"}, to = {"ʼ"} }, sort_key = { from = {"вʼ", "гʼ", "ғ", "ә", "ё", "ӄ", "ӈ"}, to = {"в" .. p[1], "г" .. p[1], "г" .. p[2], "е" .. p[1], "е" .. p[2], "к" .. p[1], "н" .. p[1]} }, } m["alt"] = { "အာန်တာဲ ဒိုဟ်သမၠုၚ်ကျာ", 1991779, "trk-kkp", "Cyrl", translit = "Altai-translit", sort_key = { from = {"ј", "ё", "ҥ", "ӧ", "ӱ"}, to = {"д" .. p[1], "е" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1]} }, } m["alu"] = { "'အာရေဝ်'အာရာ", 5160, "poz-sls", "Latn", } m["alw"] = { "Alaba", 56652, "cus-hec", "Latn", } m["alx"] = { "Amol", 3504260, "paa-pal", "Latn", } m["aly"] = { "Alyawarr", 3327389, "aus-rnd", "Latn", } m["alz"] = { "Alur", 56507, "sdv-los", "Latn", } m["ama"] = { "Amanayé", 3508053, "tup-gua", "Latn", } m["amb"] = { "Ambo", 3450142, "nic-tvn", "Latn", } m["amc"] = { "Amahuaca", 2669150, "sai-pan", "Latn", } m["ame"] = { "Yanesha'", 3088540, "awd", "Latn", } m["amf"] = { "ဟာမေ-ဗါန်နာ", 35764, "omv-aro", "Latn, Ethi", sort_key = "amf-utilities" } m["amg"] = { "Amurdag", 3360016, "aus-wdj", "Latn", } m["ami"] = { "ဨမေတ်", 35132, "map", "Latn", } m["amj"] = { "Amdang", 28335, "ssa-fur", "Latn", } m["amk"] = { "အီုဗါဲ", 1875885, "poz-hce", "Latn", } m["aml"] = { "War-Jaintia", 56321, "aav-khs", "Latn", } m["amm"] = { "အာမာ", 3446626, "paa-lma", "Latn", } m["amn"] = { "အာမနှာတ်", 3327399, "paa-war", "Latn", } m["amo"] = { "Amo", 34826, "nic-kne", "Latn", } m["amp"] = { "Alamblak", 56688, "paa-sep", "Latn", } m["amq"] = { "Amahai", 3327384, "poz-cma", "Latn", } m["amr"] = { "Amarakaeri", 35128, "sai-har", "Latn", } m["ams"] = { "အမာမဳ-အဝ်ဃှဳမာ လ္ပာ်ဒိုဟ်သမၠုၚ်ကျာ", 2840986, "jpx-nry", "Jpan", translit = s["jpx-translit"], display_text = s["jpx-displaytext"], strip_diacritics = s["jpx-stripdiacritics"], sort_key = s["jpx-sortkey"], } m["amt"] = { "Amto", 56517, "paa-amu", "Latn", } m["amu"] = { "ဂေရေရဝ် အာမတ်သဂဝ်", 3501942, "omq", "Latn", } m["amv"] = { "Ambelau", 2669214, "poz-cma", "Latn", } m["amw"] = { "နဳအဝ်-အာရမေအဳ လ္ပာ်ပလိုတ်", 34226, "sem-arw", "Armi, Syrc, Latn", strip_diacritics = { Syrc = "Syrc-stripdiacritics" }, } m["amx"] = { "Anmatyerre", 10412317, "aus-rnd", "Latn", } m["amy"] = { "Ami", 10408315, "aus-dal", "Latn", } m["amz"] = { "Atampaya", 3446651, "aus-pam", "Latn", } m["ana"] = { "Andaqui", 2846078, nil, "Latn", } m["anb"] = { "Andoa", 2846171, "sai-zap", "Latn", } m["anc"] = { "Ngas", 35999, "cdc-wst", "Latn", } m["and"] = { "အာန်သာတ်သ်", 3513300, "poz-hce", "Latn", } m["ane"] = { "သာရခူ", 3571097, "poz-cln", "Latn", } m["anf"] = { "Animere", 34783, "alv-ktg", "Latn", } m["ang"] = { "အၚ်္ဂလိက်တြေံ", 42365, "gmw-ang", "Latn, Runr", translit = { Runr = "Runr-translit" }, strip_diacritics = { Latn = { remove_diacritics = c.acute .. c.circ .. c.macron .. c.breve .. c.dotabove .. c.diaer .. c.dotbelow, from = {"[Ƿƿ]"}, to = {{ ["Ƿ"] = "W", ["ƿ"] = "w", }}, }, }, sort_key = { Latn = { remove_diacritics = c.acute .. c.circ .. c.macron .. c.breve .. c.dotabove .. c.diaer .. c.dotbelow, from = {"[æƀꝺðꝼᵹȝłœꞃꞅꞇþꝥꝧƿ]"}, to = {{ ["æ"] = "ae", ["ƀ"] = "b", ["ꝺ"] = "d", ["ð"] = "d" .. p[1], ["ꝼ"] = "f", ["ᵹ"] = "g", ["ȝ"] = "g" .. p[1], ["ł"] = "l", ["œ"] = "oe", ["ꞃ"] = "r", ["ꞅ"] = "s", ["ꞇ"] = "t", ["þ"] = "t" .. p[1], ["ꝥ"] = "t" .. p[1], ["ꝧ"] = "t" .. p[1], ["ƿ"] = "w", }}, }, }, standard_chars = { Latn = "AaÆæBbCcDdÐðEeFfGgHhIiLlMmNnOoŒœPpRrSsTtÞþUuWwXxYy", c.punc, }, } m["anh"] = { "Nend", 6991554, "ngf-wso", "Latn", } m["ani"] = { "အာန်ဒဳ", 34849, "cau-and", "Cyrl", translit = "cau-nec-translit", override_translit = true, display_text = s["cau-Cyrl-displaytext"], strip_diacritics = s["cau-Cyrl-stripdiacritics"], } m["anj"] = { "Anor", 56458, "paa-aia", "Latn", } m["ank"] = { "Goemai", 35272, "cdc-wst", "Latn", } m["anl"] = { "Anu", 4777679, "sit-mru", "Latn", } m["anm"] = { "Anāl", 56235, "tbq-kuk", "Latn", } m["ann"] = { "Obolo", 36614, "nic-lcr", "Latn", } m["ano"] = { "Andoque", 2669225, "qfa-iso", "Latn", } m["anp"] = { "အာန်ဂဳကာ", 28378, "inc-bih", "Deva, Kthi", translit = { Deva = "hi-translit", Kthi = "bho-Kthi-translit", }, } m["anq"] = { "ဂျရာဝါ", 2475526, "qfa-ong", "Latn", } m["anr"] = { "Andh", 4754314, "inc-sou", "Deva", } m["ans"] = { "Anserma", 3446613, "sai-chc", "Latn", } m["ant"] = { "Antakarinya", 921304, "aus-psw", "Latn", } m["anu"] = { "Anuak", 56677, "sdv-lon", "Latn", } m["anv"] = { "ဒါန်ညာ", 35187, "nic-mam", "Latn", } m["anw"] = { "Anaang", 2845320, "nic-ief", "Latn", } m["anx"] = { "Andra-Hus", 2846195, "poz-aay", "Latn", } m["any"] = { "Anyi", 28395, "alv-ctn", "Latn", } m["anz"] = { "Anem", 56512, "qfa-dis", -- Papuan; might be an isolate or in a putative West New Britain family "Latn", } m["aoa"] = { "Angolar", 34994, "crp", "Latn", ancestors = "pt", } m["aob"] = { "Abom", 3446647, "qfa-dis", -- Papuan; possibly a divergent Tirio language (Anim family), or a top-level TNG node "Latn", } m["aoc"] = { "ပေမန်", 10729616, "sai-pem", "Latn", } m["aod"] = { "Andarum", 3507888, "paa-ata", "Latn", } m["aoe"] = { "Angal Enen", 10951638, "ngf-ank", "Latn", } m["aof"] = { "Bragat", 3507977, "paa-pal", "Latn", } m["aog"] = { "Angoram", 56366, -- cf 6754745 for merged dialect "paa-lse", "Latn", } m["aoi"] = { "Anindilyakwa", 2714654, "aus-arn", "Latn", } m["aoj"] = { "Mufian", 3507881, "paa-ara", "Latn", } m["aok"] = { "Arhö", 4790086, "poz-cln", "Latn", } m["aol"] = { "Alorese", 3332062, "poz", "Latn", } m["aom"] = { "Ömie", 8078975, "ngf-koi", "Latn", } m["aon"] = { "Bumbita Arapesh", 3508044, "paa-ara", "Latn", } m["aor"] = { "Aore", 12627129, "poz-vnn", "Latn", } m["aos"] = { "Taikat", 7676018, "paa-taa", "Latn", } m["aot"] = { "အိန္ဒိ အာတုံ", 5646, "tbq-bdg", "Latn, Beng", } m["aou"] = { "အအ်ဥူ", 16109994, "gio", "Latn", -- also Hani? } m["aox"] = { "Atorada", 3507932, "awd", "Latn", } m["aoz"] = { "Uab Meto", 3441962, "poz-tim", "Latn", } m["apb"] = { "သာ'အ်", 36294, "poz-sls", "Latn", } m["apc"] = { "အာရဗဳလပ်ဗေန်ထေန်သၟဝ်ကျာ", 22809485, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["apd"] = { "အာရဗဳ သုဒါန်နဳ", 56573, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["ape"] = { "Bukiyip", 3507895, "paa-ara", "Latn", } m["apf"] = { "Pahanan Agta", 7135432, "phi", "Latn", } m["apg"] = { "Ampanang", 4748035, "poz", "Latn", } m["aph"] = { "Athpare", 3449126, "sit-kie", "Deva, Latn", } m["api"] = { "Apiaká", 3507941, "tup-gua", "Latn", } m["apj"] = { "Jicarilla", 28277, "apa", "Latn", } m["apk"] = { "Plains Apache", 27861, "apa", "Latn", } m["apl"] = { "Lipan", 28269, "apa", "Latn", } m["apm"] = { "Chiricahua", 13368, "apa", "Latn", } m["apn"] = { "အဖဳနာရဲ", 2858311, "sai-nje", "Latn", } m["apo"] = { "Ambul", 12627135, "poz-ocw", "Latn", } m["app"] = { "Apma", 2669188, "poz-vnn", "Latn", } m["apq"] = { "အ-ၜေအ်သိခွါ", 28466, "qfa-adc", "Latn", } m["apr"] = { "Arop-Lokep", 2863482, "poz-ocw", "Latn", } m["aps"] = { "Arop-Sissano", 12627242, "poz-ocw", "Latn", } m["apt"] = { "Apatani", 56306, "sit-tan", "Latn", } m["apu"] = { "Apurinã", 2859081, "awd", "Latn", } m["apv"] = { "Alapmunte", 16110782, "sai-nmk", "Latn", } m["apw"] = { "အာဖေန်ချဳ လ္ပာ်ပလိုတ်", 28060, "apa", "Latn", } m["apx"] = { "Aputai", 12473343, "poz-tim", "Latn", } m["apy"] = { "အာက်ပါလာဲန်", 2736980, "sai-gui", "Latn", } m["apz"] = { "သာပဵုယဝ်ကာ", 7398693, "ngf-woj", "Latn", } m["aqc"] = { "အာဆိ", 34915, "cau-lzg", "Cyrl", translit = "cau-nec-translit", override_translit = true, display_text = s["cau-Cyrl-displaytext"], strip_diacritics = s["cau-Cyrl-stripdiacritics"], sort_key = { from = { "ккъӏв", "ххьӏв", -- 5 chars "гъӏв", "ёоӏ", "ккъӏ", "ккъв", "къӏв", "ллъв", "ххьӏ", "хъӏв", "хьӏв", "ццӏв", "ччӏв", -- 4 chars "ааӏ", "гӏв", "гъӏ", "гъв", "гьв", "ееӏ", "ёӏ", "ёо", "ииӏ", "кӏв", "ккв", "ккъ", "къӏ", "къв", "кьв", "лӏв", "ллъ", "лъв", "льв", "ооӏ", "пӏв", "ппв", "ссв", "тӏв", "ттв", "ууӏ", "хӏв", "ххв", "хъӏ", "хъв", "хьӏ", "цӏв", "ццӏ", "ццв", "чӏв", "ччӏ", "ээӏ", "юуӏ", "яаӏ", -- 3 chars "аӏ", "аа", "гӏ", "гв", "гъ", "гь", "дв", "еӏ", "ее", "ё", "жв", "зв", "иӏ", "ии", "кӏ", "кв", "кк", "къ", "кь", "лӏ", "лв", "лъ", "ль", "оӏ", "оо", "пӏ", "пв", "пп", "св", "сс", "тӏ", "тв", "тт", "уӏ", "уу", "фв", "хӏ", "хв", "хх", "хъ", "цӏ", "цв", "цц", "чӏ", "чв", "шв", "щв", "эӏ", "ээ", "юӏ", "юу", "яӏ", "яа" -- 2 chars }, to = { "к" .. p[8], "х" .. p[7], "г" .. p[6], "е" .. p[7], "к" .. p[7], "к" .. p[9], "к" .. p[12], "л" .. p[5], "х" .. p[6], "х" .. p[10], "х" .. p[13], "ц" .. p[6], "ч" .. p[5], "а" .. p[3], "г" .. p[2], "г" .. p[5], "г" .. p[7], "г" .. p[9], "е" .. p[3], "е" .. p[5], "е" .. p[6], "и" .. p[3], "к" .. p[2], "к" .. p[5], "к" .. p[6], "к" .. p[11], "к" .. p[13], "к" .. p[15], "л" .. p[2], "л" .. p[4], "л" .. p[7], "л" .. p[9], "о" .. p[3], "п" .. p[2], "п" .. p[5], "с" .. p[3], "т" .. p[2], "т" .. p[5], "у" .. p[3], "х" .. p[2], "х" .. p[5], "х" .. p[9], "х" .. p[11], "х" .. p[12], "ц" .. p[2], "ц" .. p[5], "ц" .. p[7], "ч" .. p[2], "ч" .. p[4], "э" .. p[3], "ю" .. p[3], "я" .. p[3], "а" .. p[1], "а" .. p[2], "г" .. p[1], "г" .. p[3], "г" .. p[4], "г" .. p[8], "д" .. p[1], "е" .. p[1], "е" .. p[2], "е" .. p[4], "ж" .. p[1], "з" .. p[1], "и" .. p[1], "и" .. p[2], "к" .. p[1], "к" .. p[3], "к" .. p[4], "к" .. p[10], "к" .. p[14], "л" .. p[1], "л" .. p[3], "л" .. p[6], "л" .. p[8], "о" .. p[1], "о" .. p[2], "п" .. p[1], "п" .. p[3], "п" .. p[4], "с" .. p[1], "с" .. p[2], "т" .. p[1], "т" .. p[3], "т" .. p[4], "у" .. p[1], "у" .. p[2], "ф" .. p[1], "х" .. p[1], "х" .. p[3], "х" .. p[4], "х" .. p[8], "ц" .. p[1], "ц" .. p[3], "ц" .. p[4], "ч" .. p[1], "ч" .. p[3], "ш" .. p[1], "щ" .. p[1], "э" .. p[1], "э" .. p[2], "ю" .. p[1], "ю" .. p[2], "я" .. p[1], "я" .. p[2] } }, } m["aqd"] = { "Ampari Dogon", 4748057, "nic-dgw", "Latn", } m["aqg"] = { "Arigidi", 34829, "alv-von", "Latn", } m["aqm"] = { "Atohwaim", 11732297, "paa-kay", "Latn", } m["aqn"] = { "Northern Alta", 7058116, "phi", "Latn", } m["aqp"] = { "Atakapa", 10975683, "qfa-iso", "Latn", } m["aqr"] = { "Arhâ", 4790085, "poz-cln", "Latn", } m["aqt"] = { "Angaité", 15736037, "sai-mas", "Latn", } m["aqz"] = { "Akuntsu", 4701960, "tup", "Latn", } m["arc"] = { "အာရမေအဳ", 28602, "sem-ara", "Hebr, Armi, Syrc, Palm, Nbat, Phnx, Mand, Samr, Hatr, Elym", translit = { Armi = "Armi-translit", Palm = "Palm-translit", }, strip_diacritics = { -- The first three were added by [[User:Wikitiki89]] in 2015 for use with Syriac, which has diacritics that look -- like a diaeresis (syāmē) and macrons above and below (mṭalqānā); see Wikipedia [[w:Syriac alphabet]]. But -- I don't know if they are actually represented using these diacritics. Syrc = {remove_diacritics = c.macron .. c.diaer .. c.macronbelow .. u(0x0730) .. "-" .. u(0x0748)}, }, -- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] -- Samr strip_diacritics, sort_key in [[Module:scripts/data]]; previously no sort_key for Samr, presumably a mistake -- Phnx translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission) } m["ard"] = { "Arabana", 3507959, "aus-kar", "Latn", } m["are"] = { "Western Arrernte", 12645549, "aus-rnd", "Latn", } m["arh"] = { "Arhuaco", 2640621, "cba", "Latn", } m["ari"] = { "Arikara", 56539, "cdd", "Latn", strip_diacritics = {remove_diacritics = c.acute}, } m["arj"] = { "Arapaso", 9627356, "sai-tuc", "Latn", } m["ark"] = { "Arikapú", 3446640, "sai-mje", "Latn", } m["arl"] = { "Arabela", 2591221, "sai-zap", "Latn", } m["arn"] = { "မာၜေအ်ဓုန်ကာန်", 33730, "sai-ara", "Latn", } m["aro"] = { "Araona", 958414, "sai-tac", "Latn", } m["arp"] = { "အာရာပါဟဝ်", 56417, "alg-ara", "Latn", } m["arq"] = { "အာရဗဳ အာန်လ်ဂျဳရဳယျာ", 56499, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["arr"] = { "Arara-Karo", 35539, "tup", "Latn", } m["ars"] = { "အာရဗဳ နေတ်ဒဳ", 56574, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["aru"] = { "Arua", 2746221, "auf", "Latn", } m["arv"] = { "Arbore", 56883, "cus-eas", "Latn", } m["arw"] = { "အာရတ်ဝါတ်", 2655664, "awd-taa", "Latn", } m["arx"] = { "Aruá", 3507907, "tup", "Latn", } m["ary"] = { "အာရဗဳ မဝ်ရဝ်ကာန်", 56426, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["arz"] = { "အာရဗဳ အဳဂျေပ်", 29919, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["asa"] = { "Pare", 36403, "bnt-par", "Latn", } m["asb"] = { "Assiniboine", 2591288, "sio-dkt", "Latn", } m["asc"] = { "Casuarina Coast Asmat", 11732046, "ngf-asm", "Latn", } m["ase"] = { "အရေဝ်ဘာသာကွတ်တဲအမေရိကာန်", 14759, "sgn", "Sgnw", } m["asf"] = { "Auslan", 29525, "sgn", "Latn", -- when documented } m["asg"] = { "Cishingini", 35199, "nic-kam", "Latn", } m["ash"] = { "Abishira", 2871740, "qfa-dis", -- extinct, poorly documented; isolate or in a proposed Tequiraca-Canichana family by Kaufman (1994) "Latn", } m["asi"] = { "Buruwai", 5001031, "ngf-sab", "Latn", } m["asj"] = { "Nsari", 36418, "nic-bbe", "Latn", } m["ask"] = { "အာပ်သကေန်", 29379, "nur-sou", "Arab, Latn", } m["asl"] = { "Asilulu", 12473347, "poz-cma", "Latn", } m["asn"] = { "ဃှေန်ဂူ အေက်သဝေနဳ", 8044571, "tup-gua", "Latn", } m["aso"] = { "Dano", 5220979, "ngf-gah", "Latn", } m["asp"] = { "Algerian Sign Language", 3135421, "sgn", } m["asq"] = { "Austrian Sign Language", 36668, "sgn", "Latn", -- when documented } m["asr"] = { "Asuri", 3504321, "mun", "Latn", -- when documented } m["ass"] = { "Ipulo", 35408, "nic-tvc", "Latn", } m["ast"] = { "အေက်သတဝ်ရေန်", 29507, "roa-asl", "Latn", } m["asu"] = { "Tocantins Asurini", 32041490, "tup-gua", "Latn", } m["asv"] = { "Asoa", 56296, "csu-maa", "Latn", } m["asw"] = { "Australian Aboriginal Sign Language", 955216, "sgn", "Latn", -- when documented } m["asx"] = { "Muratayak", 11732766, "ngf-war", "Latn", } m["asy"] = { "Yaosakor Asmat", 16113158, "ngf-asm", "Latn", } m["asz"] = { "As", 2866218, "poz-hce", "Latn", } m["ata"] = { "Pele-Ata", 56511, "qfa-dis", -- Papuan; possibly in a putative West New Britain family, or an isolate "Latn", } m["atb"] = { "ဇြာဲဝါ", 56594, "tbq-brm", "Latn, Lisu", -- also Hani? translit = {Lisu = "Lisu-translit"}, sort_key = {Lisu = s["Lisu-sortkey"]}, } m["atc"] = { "Atsahuaca", 4817730, "sai-pan", "Latn", } m["atd"] = { "Ata Manobo", 12627315, "mno", "Latn", } m["ate"] = { "အေက်တာမ်ဗါဝ်လ်", 4813055, "ngf-wso", "Latn", } m["atg"] = { "Okpela", 7082551, "alv-yek", "Latn", } m["ati"] = { "Attié", 34844, "alv-lag", "Latn", } m["atj"] = { "အထိကာမိတ်", 56590, "alg", "Latn", ancestors = "cr", } m["atk"] = { "Ati", 3217458, "phi", "Latn", } m["atl"] = { "Mount Iraya Agta", 6921430, "phi", "Latn", } m["atm"] = { "Ata", 4812603, "phi", "Latn", } m["ato"] = { "Atong (Cameroon)", 34824, "nic-grs", "Latn", } m["atp"] = { "Pudtol Atta", 12640726, "phi", "Latn", } m["atq"] = { "Aralle-Tabulahan", 4783889, "poz-ssw", "Latn", } m["atr"] = { "ဝါဲမဳရဳ-အာထရဝ်ရဳ", 56865, "sai-car", "Latn", } m["ats"] = { "ဂရတ် ဗါန်တေ", 56628, "alg-ara", "Latn", } m["att"] = { "ပါန်ပလဝ်နာ အာတ်တာ", 12639245, "phi", "Latn", } m["atu"] = { "Reel", 7306882, "sdv-dnu", "Latn", } m["atv"] = { "အာန်တာယ် လ္ပာ်သၟဝ်ကျာ", 2640863, "trk-ssb", "Cyrl", translit = "Altai-translit", } m["atw"] = { "Atsugewi", 56718, "nai-pal", "Latn", } m["atx"] = { "Arutani", 56609, nil, "Latn", } m["aty"] = { "Aneityum", 2379113, "poz-vns", "Latn", } m["atz"] = { "Arta", 3508067, "phi", "Latn", } m["aua"] = { "Asumboa", 4811870, "poz-tem", "Latn", } m["aub"] = { "Alugu", 12626798, "tbq-urp", "Latn", -- also Hani? } m["auc"] = { "Huaorani", 758570, "qfa-iso", "Latn", } m["aud"] = { "Anuta", 35326, "poz-pnp", "Latn", } m["aug"] = { "Aguna", 34733, "alv-gbe", "Latn", } m["auh"] = { "Aushi", 2872082, "bnt-sbi", "Latn", } m["aui"] = { "Anuki", 3508132, "poz-ocw", "Latn", } m["auj"] = { "အာဂျဳလာ", 56398, "ber", "Latn, Arab, Tfng", } m["auk"] = { "Heyo", 3504295, "paa-hya", "Latn", } m["aul"] = { "Aulua", 427300, "poz-vnc", "Latn", } m["aum"] = { "အာသူ", 34798, "alv-ngb", "Latn", } m["aun"] = { "Molmo One", 12637224, "paa-trr", "Latn", } m["auo"] = { "Auyokawa", 56247, "cdc-wst", "Latn", } m["aup"] = { "Makayam", 6738863, "paa-tir", "Latn", } m["auq"] = { "Anus", 23855, "poz-ocw", "Latn", } m["aur"] = { "Aruek", 3504279, "paa-kom", "Latn", } m["aut"] = { "Austral", 2669261, "poz-pep", "Latn", } m["auu"] = { "Auye", 4827334, "ngf-pan", "Latn", } m["auw"] = { "Awyi", 3513326, "paa-taa", "Latn", } m["aux"] = { "အာဝ်ရာက်", 3507995, "tup-gua", "Latn", } m["auy"] = { "Auyana", 2873211, "ngf-gau", "Latn", } m["auz"] = { "အာရဗဳ ဥူသဗက်ကဳ", 3399507, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["avb"] = { "Avau", 12627412, "poz-ocw", "Latn", } m["avd"] = { "အာယ်ဝဳရဳ-ဝဳဒါရဳ", 3327357, "xme", "fa-Arab", ancestors = "xme-mid", } m["avi"] = { "Avikam", 34840, "alv-lag", "Latn", } m["avk"] = { "ခါဝ်တာဝါယ်", 1377116, "art", "Latn", type = "appendix-constructed", } m["avm"] = { "Angkamuthi", 62603022, "aus-pmn", "Latn", } m["avn"] = { "Avatime", 34796, "alv-ktg", "Latn", } m["avo"] = { "Agavotaguerra", 3508007, "awd", "Latn", } m["avs"] = { "Aushiri", 3409318, "sai-zap", "Latn", } m["avt"] = { "Au", 3446608, "paa-wap", "Latn", } m["avu"] = { "အာတ်ဝါဝ်ခါယျ", 56685, "csu-mma", "Latn", } m["avv"] = { "Avá-Canoeiro", 4829584, "tup-gua", "Latn", } m["awa"] = { "အဝါဒဳ", 29579, "inc-hie", "Deva, Kthi, fa-Arab", ancestors = "inc-oaw", translit = { Deva = "hi-translit" }, } m["awb"] = { "Awa (New Guinea)", 2874650, "ngf-gau", "Latn", } m["awc"] = { "Cicipu", 35193, "nic-kam", "Latn", } m["awe"] = { "အာဝပ်တဳ", 4830038, "tup", "Latn", } m["awg"] = { "အာန်ဂူတဳမဳရဳ", 4764288, "aus-pam", "Latn", } m["awh"] = { "Awbono", 3446684, "paa-baa", "Latn", } m["awi"] = { "Aekyom", 3399691, "paa-kae", "Latn", } m["awk"] = { "အဝါဗာကဴ", 3449138, "aus-pam", "Latn", } m["awm"] = { "Arawum", 4784537, "ngf-rai", "Latn", } m["awn"] = { "Awngi", 34934, "cus-cen", "Ethi", } m["awo"] = { "Awak", 3446643, "alv-wjk", "Latn", } m["awr"] = { "Awera", 56379, "paa-flp", "Latn", } m["aws"] = { "South Awyu", 12633986, "ngf-awy", "Latn", } m["awt"] = { "Araweté", 4784535, "tup-gua", "Latn", } m["awu"] = { "Central Awyu", 12628801, "ngf-awy", "Latn", } m["awv"] = { "Jair Awyu", 16110177, "ngf-awy", "Latn", } m["aww"] = { "Awun", 56369, "paa-sep", "Latn", } m["awx"] = { "Awara", 2874670, "ngf-waa", "Latn", } m["awy"] = { "Edera Awyu", 12630425, "ngf-awy", "Latn", } m["axb"] = { "Abipón", 11252539, "sai-guc", "Latn", } m["axe"] = { "Ayerrerenge", 16112737, "aus-pam", "Latn", } m["axg"] = { "Arára (Mato Grosso)", 3446660, nil, "Latn", } m["axk"] = { "Aka (Central Africa)", 11010149, "bnt-ngn", "Latn", } m["axl"] = { "Lower Southern Aranda", 6693295, "aus-rnd", "Latn", } m["axm"] = { "အာမေနဳယျာအဒေါဝ်", 4438498, "hyx", "Armn", ancestors = "xcl", translit = "Armn-translit", override_translit = true, strip_diacritics = { remove_diacritics = "՞՜՛՟", from = {"եւ", "ՙ", "՚"}, to = {"և", "ʻ", "’"} } } m["axx"] = { "Xârâgurè", 8045635, "poz-cln", "Latn", } m["aya"] = { "Awar", 56876, "paa-baw", "Latn", } m["ayb"] = { "Ayizo", 34841, "alv-pph", "Latn", } m["ayd"] = { "Ayabadhu", 3509164, "aus-pmn", "Latn", } m["aye"] = { "Ayere", 34788, "alv-aah", "Latn", } m["ayg"] = { "Nyanga (Togo)", 35446, "alv-gng", "Latn", } m["ayi"] = { "Leyigha", 3914492, "nic-uce", "Latn", } m["ayk"] = { "Akuku", 3450179, "alv-nwd", "Latn", } m["ayl"] = { "အာရဗဳလေတ်ဗျာ", 56503, "sem-arb", "Arab", strip_diacritics = "ar-stripdiacritics", } m["ayn"] = { "အာရဗဳ ယာက်မနဳ", 1686766, "sem-arb", "Arab, Hebr", strip_diacritics = { Arab = "ar-stripdiacritics", }, -- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] } m["ayo"] = { "Ayoreo", 56634, "sai-zam", "Latn", } m["ayp"] = { "အာရဗဳ မာက်သဝ်ပဝ်တေမဳယာန် သၟဝ်ကျာ", 56577, "sem-arb", "Arab", ancestors = "acm", strip_diacritics = "ar-stripdiacritics", } m["ayq"] = { "Ayi", 56449, "paa-sep", "Latn", } m["ays"] = { "Sorsogon Ayta", 7563752, "phi", "Latn", } m["ayt"] = { "Bataan Ayta", 4921648, "phi", "Latn", } m["ayu"] = { "Ayu", 34786, "alv", "Latn", } -- ayy deleted and removed from ISO; per the removal request, "no linguistic data exists for any [Ayta] language that the -- ancestors of this group might have once spoken. And thus, there is no evidence that this group ever had a language -- distinct from any other Philippine language." [Lobel] m["ayz"] = { "Maybrat", 4830892, "paa-may", -- either an isolate; grouped with Abun and the West Bird's Head family; or in the putative West Papuan family "Latn", } m["aza"] = { "Azha", 4832486, "tbq-axi", "Latn", } m["azd"] = { "ဒူရာန်ဂဝ် နာဟွာတာယ်လ်လ္ပာ်ဖာဗၟံက်", 16115449, "azc-dur", "Latn", } m["azg"] = { "San Pedro Amuzgos Amuzgo", 35092, "omq", "Latn", } m["azm"] = { "Ipalapa Amuzgo", 12633013, "omq", "Latn", } m["azn"] = { "Western Durango နာဟွာတာယ်လ်", 12645553, "azc-dur", "Latn", } m["azo"] = { "Awing", 34856, "nic-nge", "Latn", } m["azt"] = { "Faire Atta", 12630884, "phi", "Latn", } m["azz"] = { "ဟာဲလာန် ပွာယ်ဗလာ နာဟွာတာယ်လ်", 12953754, "azc-nah", "Latn", } return require("Module:languages").finalizeData(m, "language") ofxnqqiv8iy4gm884yq49wlidw5gqlc မဝ်ဂျူ:languages/data/3/n 828 657 395878 394329 2026-05-29T15:37:06Z Intobesa.bot 1035 Bot: ပလေဝ်ဒါန် 395878 Scribunto text/plain local m_langdata = require("Module:languages/data") -- Loaded on demand, as it may not be needed (depending on the data). local function u(...) u = require("Module:string utilities").char return u(...) end local c = m_langdata.chars local p = m_langdata.puaChars local s = m_langdata.shared local m = {} m["naa"] = { "Namla", 3508760, "paa-pau", "Latn", } m["nab"] = { "Nambikwara", 2068190, "sai-nmk", "Latn", } m["nac"] = { "Narak", 6965295, "ngf", "Latn", } m["nae"] = { "Naka'ela", 6960073, "poz", "Latn", } m["naf"] = { "Nabak", 11732491, "ngf", "Latn", } m["nag"] = { "Naga Pidgin", 3503454, "crp", "Latn", ancestors = "as", } m["nah"] = { "နာဟွာတာယ်လ်", 13300, "azc-nah", "Latn", } m["naj"] = { "Nalu", 36026, "alv-nal", "Latn", } m["nak"] = { "နာကာနာဲ", 6528669, "poz-ocw", "Latn", } m["nal"] = { "Nalik", 3335387, "poz-ocw", "Latn", } m["nam"] = { "Ngan'gityemerri", 3298041, "aus-dal", "Latn", } -- Being converted into the family "Southern Min" ("zhx-nan", which will take the code "nan" once the language can be removed). Retain the name "Min Nan" here to avoid having to move things that are scheduled for deletion anyway. m["nan"] = { "မိန်နာန်", 36495, "zhx-com", "Hants, Latn, Bopo, Kana", wikimedia_codes = "zh-min-nan", generate_forms = "zh-generateforms", sort_key = { Hani = "Hani-sortkey", Kana = "Kana-sortkey" }, } m["nao"] = { "Naaba", 11883865, "sit-tib", ancestors = "xct", } m["nap"] = { "နဳပဝ်လဳတေန်", 33845, "roa-itd", "Latn", } m["naq"] = { "ခိုဝ်ခိုဝ်", 13301, "khi-khk", "Latn", } m["nar"] = { "Iguta", 5621686, "nic-jer", "Latn", } m["nas"] = { "Nasioi", 56772, "paa-sbo", "Latn", } m["nat"] = { "Hungworo", 3914395, "nic-kmk", "Latn", } m["naw"] = { "Nawuri", 35906, "alv-gng", "Latn", } m["nax"] = { "Nakwi", 3504178, "qfa-mal", "Latn", } m["nay"] = { "Ngarrindjeri", 7022091, "aus-pam", "Latn", } m["naz"] = { "Coatepec နာဟွာတာယ်လ်", 5138605, "azc-nah", "Latn", } m["nba"] = { "Nyemba", 3346655, "bnt-clu", "Latn", ancestors = "lch", } m["nbb"] = { "Ndoe", 36134, "nic-eko", "Latn", } m["nbc"] = { "Chang", 5071694, "sit-kch", "Latn", } m["nbd"] = { "Ngbinda", 11132859, "bnt-boa", "Latn", } m["nbe"] = { "Konyak Naga", 6430448, "sit-kch", "Latn", } m["nbg"] = { "Nagarchal", 13299, "dra-gon", } m["nbh"] = { "Ngamo", 3438705, "cdc-wst", "Latn", } m["nbi"] = { "Mao Naga", 12952905, "tbq-anp", "Latn", } m["nbj"] = { "Ngarinman", 10600380, nil, "Latn", } m["nbk"] = { "Nake", 11732496, "ngf-mad", "Latn", } m["nbm"] = { "Ngbaka Ma'bo", 3915331, "nic-nkm", "Latn", } m["nbn"] = { "Kuri", 3200540, "poz", "Latn", } m["nbo"] = { "Nkukoli", 3914482, "nic-uce", "Latn", } m["nbp"] = { "Nnam", 36138, "nic-eko", "Latn", } m["nbq"] = { "Nggem", 12952956, "ngf", "Latn", } m["nbr"] = { "Numana", 5529310, "nic-nin", "Latn", } m["nbs"] = { "Namibian Sign Language", 6961792, "sgn", "Latn", -- when documented } m["nbt"] = { "Na", 12952895, "sit-tan", "Deva, Latn", } m["nbu"] = { "Rongmei Naga", 12952912, "sit-zem", "Latn", } m["nbv"] = { "Ngamambo", 11129694, "nic-mom", "Latn", } m["nbw"] = { "Southern Ngbandi", 17522635, "nic-ngd", "Latn", } m["nby"] = { "Ningera", 11732524, "paa-brd", "Latn", } m["nca"] = { "Iyo", 6101336, "ngf-fin", "Latn", } m["ncb"] = { "Central Nicobarese", 3335553, "aav-nic", "Deva, Latn", } m["ncc"] = { "Ponam", 3396122, "poz-aay", "Latn", } m["ncd"] = { "Nachering", 6957144, "sit-kic", "Deva", } m["nce"] = { "Yale", 2992915, "paa", --kwomtari or isolate "Latn", } m["ncf"] = { "Notsi", 3344784, "poz-ocw", "Latn", } m["ncg"] = { "နေတ်သကာ", 3342138, "nai-tsi", "Latn", } m["nch"] = { "Central Huasteca နာဟွာတာယ်လ်", 2194290, "azc-nah", "Latn", } m["nci"] = { "နာဝါတ်ဒဝ်ဝၚ်ဂန္ထ", 559242, "azc-nah", "Latn", entry_name = {remove_diacritics = c.macron}, } m["ncj"] = { "Northern Puebla နာဟွာတာယ်လ်", 15705671, "azc-nah", "Latn", } m["nck"] = { "Nakara", 6960662, "aus-arn", "Latn", } m["ncl"] = { "Michoacán နာဟွာတာယ်လ်", 2896217, "azc-nah", "Latn", } m["ncm"] = { "Nambo", 42173731, nil, "Latn", } m["ncn"] = { "Nauna", 3337158, "poz-aay", "Latn", } m["nco"] = { "Sibe", 56806, "paa-sbo", "Latn", } m["ncr"] = { "Ncane", 11297920, "nic-bbe", "Latn", ancestors = "nhu", } m["ncs"] = { "Nicaraguan Sign Language", 33765, "sgn", "Sgnw", } m["nct"] = { "Chothe Naga", 5105385, "tbq-kuk", "Beng, Latn", } m["ncu"] = { "Chumburung", 35198, "alv-gng", "Latn", } m["ncx"] = { "ပွယ်ဗလာ နာဟွာတာယ်လ် ဗဟဵု", 5061727, "azc-nah", "Latn", } m["ncz"] = { "Natchez", 3111838, nil, "Latn", } m["nda"] = { "Ndasa", 35904, "bnt-kel", "Latn", } m["ndb"] = { "Kenswei Nsei", 7067553, "nic-rnn", "Latn", } m["ndc"] = { "Ndau", 13311, "bnt-sho", "Latn", } m["ndd"] = { "Nde-Nsele-Nta", 36131, "nic-eko", "Latn", } m["ndf"] = { "Nadruvian", 6957967, nil, "Latn", } m["ndg"] = { "Ndengereko", 6983726, "bnt-mbi", "Latn", } m["ndh"] = { "Ndali", 6983678, "bnt-run", "Latn", } m["ndi"] = { "Chamba Leko", 36381, "alv-lek", "Latn", } m["ndj"] = { "Ndamba", 6983684, "bnt-kil", "Latn", } m["ndk"] = { "Ndaka", 11164947, "bnt-nya", "Latn", } m["ndl"] = { "Ndolo", 6983788, "bnt-zbi", "Latn", ancestors = "lse", } m["ndm"] = { "Ndam", 56283, "cdc-est", "Latn", } m["ndn"] = { "Ngundi", 35916, "bnt-ngn", "Latn", } m["ndp"] = { "Ndo", 6983774, "csu-mle", "Latn", } m["ndq"] = { "Ndombe", 6983792, "bnt-swb", "Latn", } m["ndr"] = { "Ndoola", 35837, "nic-mmb", "Latn", } m["nds"] = { "ဂျာမာန်မသဝ်", 25433, "gmw-lgm", "Latn", ancestors = "gml", } m["ndt"] = { "Ndunga", 6983857, "nic-mbc", "Latn", } m["ndu"] = { "Dugun", 11015189, "alv-dur", "Latn", } m["ndv"] = { "Ndut", 36028, "alv-cng", "Latn", } m["ndw"] = { "Ndobo", 11008568, "bnt-ngn", "Latn", } m["ndx"] = { "Nduga", 6983833, nil, "Latn", } m["ndy"] = { "Lutos", 6705910, "csu-val", "Latn", } m["ndz"] = { "Ndogo", 35983, "nic-ser", "Latn", } m["nea"] = { "Eastern Ngad'a", 12473454, "poz-cet", } m["neb"] = { "ထါန်ရာ", 7853636, "dmn-mda", "Latn", } m["nec"] = { "Nedebang", 4925378, "ngf", } m["ned"] = { "Nde-Gbite", 11010279, "nic-grf", } m["nee"] = { "Kumak", 3347266, "poz-cln", "Latn", } m["nef"] = { "Nefamese", 6987002, "crp", } m["neg"] = { "နေတ်ဂေါတ်ဒါန်", 33676, "tuw-ewe", "Cyrl", } m["neh"] = { "Nyenkha", 3695185, "sit-ebo", "Tibt, Latn", translit = {Tibt = "Tibt-translit"}, override_translit = true, display_text = {Tibt = s["Tibt-displaytext"]}, entry_name = {Tibt = s["Tibt-entryname"]}, sort_key = {Tibt = "Tibt-sortkey"}, } m["nej"] = { "နေကိုဝ်", 6989840, "ngf-fin", "Latn", } m["nek"] = { "Neku", 14916900, "poz-cln", } m["nem"] = { "Nemi", 3338008, "poz-cln", "Latn", } m["nen"] = { "Nengone", 3338052, "poz-cln", "Latn", } m["neo"] = { "Ná-Meo", 15977293, "hmn", } m["neq"] = { "North Central Mixe", 25559729, nil, "Latn", } m["ner"] = { "Yahadian", 8046778, nil, "Latn", } m["nes"] = { "Bhoti Kinnauri", 21179921, "sit-las", } m["net"] = { "Nete", 6998869, "paa-eng", } m["neu"] = { "Neo", 606917, "art", "Latn", type = "appendix-constructed", } m["nev"] = { "Nyaheun", 7070801, "mkh-ban", } m["new"] = { "နူဝါ", 33979, "sit-new", "Deva, Newa, Ranj", ancestors = "nwx", translit = { Deva = "new-translit", Newa = "new-Newa-translit", }, } m["nex"] = { "Neme", 12952941, } m["ney"] = { "Neyo", 36410, "kro", } m["nez"] = { "Nez Perce", 3339226, "nai-shp", "Latn", } m["nfa"] = { "Dhao", 2053828, "poz", } m["nfd"] = { "Ahwai", 3913957, "nic-plt", "Latn", } m["nfl"] = { "အာဲဝူ", 56742, "poz-tem", "Latn", } m["nfr"] = { "Nafaanra", 13297, "alv-snf", "Latn", } m["nfu"] = { "Mfumte", 6826794, "nic-nka", "Latn", } m["nga"] = { "Ngbaka", 36022, "gba-eas", "Latn", } m["ngb"] = { "Northern Ngbandi", 17522631, "nic-ngd", "Latn", } m["ngc"] = { "Ngombe (Congo)", 3123524, "bnt-bun", } m["ngd"] = { "Ngando (Central African Republic)", 35910, "bnt-ngn", } m["nge"] = { "Ngemba", 6750551, "nic-nge", "Latn", } m["ngg"] = { -- compare 'aiy' "Ngbaka Manza", 11033316, "gba-eas", "Latn", } m["ngh"] = { "Nǀuu", 2618974, "khi-tuu", "Latn", } m["ngi"] = { "Ngizim", 3914924, "cdc-wst", "Latn", } m["ngj"] = { "Ngie", 36361, "nic-mom", "Latn", } m["ngk"] = { "Ngalkbun", 3913790, "aus-gun", "Latn", } m["ngl"] = { "Lomwe", 35824, "bnt-mak", "Latn", } m["ngm"] = { "Ngatik Men's Creole", 36400, "crp", ancestors = "en, pon", } m["ngn"] = { "Ngwo", 36051, "nic-mom", "Latn", } m["ngo"] = { "Ngoni", 7022547, "bnt-ngu", "Latn", } m["ngp"] = { "Ngulu", 7193332, "bnt-seu", "Latn", } m["ngq"] = { "Ngoreme", 7022573, "bnt-lok", "Latn", } m["ngr"] = { "Nagu", 3063524, "poz-tem", "Latn", } m["ngs"] = { "Gvoko", 3441188, "cdc-cbm", "Latn", } m["ngt"] = { "Ngeq", 25559548, "mkh-kat", } m["ngu"] = { "Guerrero နာဟွာတာယ်လ်", 5614980, "azc-nah", "Latn", } m["ngv"] = { "Nagumi", 35842, "nic-jrn", } m["ngw"] = { "Ngwaba", 3440480, "cdc-cbm", "Latn", } m["ngx"] = { "Nggwahyi", 56265, "cdc-cbm", "Latn", } m["ngy"] = { "Tibea", 36598, "bnt-baf", "Latn", } m["ngz"] = { "Ngungwel", 35920, "bnt-tkc", "Latn", } m["nha"] = { "Nhanda", 3339380, "aus-psw", "Latn", } m["nhb"] = { "ဗါန်", 3913311, "dmn-nbe", "Latn", } m["nhc"] = { "Tabasco နာဟွာတာယ်လ်", 6047326, "azc-nah", "Latn", } m["nhd"] = { "Chiripá", 2873230, "tup-gua", "Latn", ancestors = "gn", } m["nhe"] = { "ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်", 4358289, "azc-nah", "Latn", } m["nhf"] = { "Nhuwala", 10600396, "aus-nga", "Latn", } m["nhg"] = { "ထာန်ထာန်သေန်ဂဝ် နာဝါတော", 3450252, "azc-nah", "Latn", } m["nhh"] = { "Nahari", 6583560, "inc-hal", } m["nhi"] = { "Zacatlán-Ahuacatlán-Tepetzintla နာဟွာတာယ်လ်", 12953764, "azc-nah", "Latn", } m["nhk"] = { "Cosoleacaque နာဟွာတာယ်လ်", 12953757, "azc-nah", "Latn", } m["nhm"] = { "Morelos နာဟွာတာယ်လ်", 4800819, "azc-nah", "Latn", } m["nhn"] = { "နာဟောတ် မဇ္ဇျိမ", 6047309, "azc-nah", "Latn", } m["nho"] = { "Takuu", 3409818, "poz-pnp", "Latn", } m["nhp"] = { "Pajapan နာဟွာတာယ်လ်", 12953760, "azc-nah", "Latn", } m["nhq"] = { "Huaxcaleca နာဟွာတာယ်လ်", 12953758, "azc-nah", "Latn", } m["nhr"] = { "Naro", 2164778, "khi-kal", "Latn", } m["nht"] = { "Ometepec နာဟွာတာယ်လ်", 7090132, "azc-nah", "Latn", } m["nhu"] = { "နဝ်ဝါန်", 36072, "nic-bbe", "Latn", } m["nhv"] = { "Temascaltepec နာဟွာတာယ်လ်", 2379405, "azc-nah", "Latn", } m["nhw"] = { "ဝုတ်သတေကာ နာဟာဒ်တာဲ လ္ပာ်ပလိုတ်", 2678840, "azc-nah", "Latn", } m["nhx"] = { "မကာယျာပါံ နာဝါတဝ်", 12953756, "azc-nah", "Latn", } m["nhy"] = { "Northern Oaxaca နာဟွာတာယ်လ်", 12953763, "azc-nah", "Latn", } m["nhz"] = { "Santa María La Alta နာဟွာတာယ်လ်", 15705753, "azc-nah", "Latn", } m["nia"] = { "နဳယျာ", 2407831, "poz-nws", "Latn", } m["nib"] = { "Nakame", 11732495, "ngf-fin", "Latn", } m["nid"] = { "Ngandi", 7021977, "aus-arn", "Latn", } m["nie"] = { "Niellim", 33662, "alv-bua", } m["nif"] = { "Nek", 6989781, "ngf-fin", "Latn", } m["nig"] = { "Ngalakan", 3913796, "aus-gun", "Latn", } m["nih"] = { "Nyiha", 11128374, "bnt-mby", "Latn", } m["nii"] = { "Nii", 35237, "ngf", "Latn", } m["nij"] = { "ၚဂျူ", 2992872, "poz-brw", "Latn", } m["nik"] = { "Southern Nicobarese", 7570194, "aav-nic", } m["nil"] = { "Nila", 7036821, } m["nim"] = { "Nilamba", 4121200, "bnt-tkm", "Latn", } m["nin"] = { "Ninzo", 3914021, "nic-nin", } m["nio"] = { "နၞဴနေတ်သာန်", 36743, "syd", "Cyrl", translit = "nio-translit", } m["niq"] = { "Nandi", 6956591, "sdv-nma", } m["nir"] = { "Nimboran", 301116, "paa-nim", } m["nis"] = { "Nimi", 11732523, "ngf-fin", "Latn", } m["nit"] = { "ကိုဝ်လာမဳ လ္ပာ်ဒိုဟ်ပလိုတ်သမၠုၚ်ကျာ", 56767, "dra-knk", "Deva, Telu", translit = { Telu = "te-translit" }, } m["niu"] = { "နဳဥုအာယ်", 33790, "poz-ton", "Latn", } m["niv"] = { "နိဖှေတ်", 36464, "qfa-iso", "Cyrl", translit = "niv-translit", entry_name = { from = {"['’]"}, to = {"ʼ"} }, sort_key = "niv-sortkey", } m["niw"] = { "Nimo", 3504126, "paa-asa", } m["nix"] = { "Hema", 5710904, "bnt-nyg", "Latn", } m["niy"] = { "Ngiti", 7022396, "csu-lnd", } m["niz"] = { "Ningil", 11732527, "qfa-tor", } m["nja"] = { "Nzanyi", 3441299, "cdc-cbm", "Latn", } m["njb"] = { "Nocte Naga", 7046410, "sit-tno", } m["njh"] = { "Lotha Naga", 33590, "sit-aao", } m["nji"] = { "Gudanji", 3915692, "aus-mir", } m["njj"] = { "Njen", 36112, "nic-mom", "Latn", } m["njl"] = { "Njalgulgule", 7071229, "sdv-daj", } m["njm"] = { "Angami", 56761, "tbq-anp", "Latn", } m["njn"] = { "Liangmai Naga", 14194500, "sit-zem", } m["njo"] = { "အာအဝ်", 28433, "sit-aao", "Latn", } m["njr"] = { "Njerep", 35844, "nic-mmb", "Latn", } m["njs"] = { "Nisa", 13593518, "paa-egb", } m["njt"] = { "Ndyuka-Trio Pidgin", 13591205, "crp", ancestors = "djk, tri", } m["nju"] = { "Ngadjunmaya", 7021846, "aus-pam", } m["njx"] = { "Kunyi", 3196559, "bnt-kng", "Latn", } m["njy"] = { "Njyem", 35898, "bnt-ndb", "Latn", } m["njz"] = { "နာ်ယဳဃှဳ", 56870, "sit-tan", "Latn", } m["nka"] = { "Nkoya", 7042633, "bnt-lbn", "Latn", } m["nkb"] = { "Khoibu Naga", 21481876, "sit-mar", } m["nkc"] = { "Nkongho", 35863, "bnt-saw", "Latn", } m["nkd"] = { "Koireng", 6426342, "sit-zem", } m["nke"] = { "Duke", 3041075, "poz-ocw", } m["nkf"] = { "Inpui Naga", 21481817, "sit-zem", } m["nkg"] = { "Nekgini", 11732509, "ngf-fin", "Latn", } m["nkh"] = { "Khezha Naga", 6401519, "tbq-anp", } m["nki"] = { "Thangal Naga", 56374, "sit-zem", } m["nkj"] = { "Nakai", 14916897, "ngf-okk", "Latn", } m["nkk"] = { "Nokuku", 7048122, "poz-vnn", "Latn", } m["nkm"] = { "Namat", 15634505, } m["nkn"] = { "Nkangala", 10962292, "bnt-clu", "Latn", ancestors = "mck", } m["nko"] = { "အၚ်္ခါဝ်နိယျာ", 35867, "alv-gng", "Latn", } m["nkp"] = { "Niuatoputapu", 3399095, "poz-pnp", } m["nkq"] = { "Nkami", 7042522, "alv-gng", "Latn", } m["nkr"] = { "Nukuoro", 2635961, "poz-pnp", "Latn", } m["nks"] = { "North Asmat", 11732049, } m["nkt"] = { "Nyika", 16917497, "bnt-mwi", "Latn", } m["nku"] = { "Bouna Kulango", 20668241, "alv-kul", } -- nkv is treated as nkt, see WT:LT m["nkw"] = { "Nkutu", 7193313, "bnt-tet", "Latn", } m["nkx"] = { "Nkoroo", 36000, "ijo", } m["nkz"] = { "Nkari", 11130307, "nic-ief", ancestors = "ibr", } m["nla"] = { "Ngombale", 36292, "bai", "Latn", } m["nlc"] = { "Nalca", 6960839, "ngf", "Latn", } m["nle"] = { "East Nyala", 25559347, "bnt-msl", "Latn", ancestors = "luy", } m["nlg"] = { "Gela", 3063531, "poz-sls", "Latn", } m["nli"] = { "Grangali", 3444203, "inc-kun", } m["nlj"] = { "Nyali", 7070830, "bnt-nya", "Latn", } m["nlk"] = { "Ninia Yali", 12953310, } m["nll"] = { "နဳဟာလဳ", 33904, "qfa-iso", "Deva, Latn", } m["nlm"] = { "Mankiyali", 47522426, "inc-koh", } m["nlo"] = { "Ngul", 35894, "bnt-bdz", "Latn", } m["nlq"] = { "Lao Naga", 63283609, "sit-tno", } m["nlu"] = { "Nchumbulu", 36143, "alv-gng", "Latn", } m["nlv"] = { "Orizaba နာဟွာတာယ်လ်", 3086050, "azc-nah", "Latn", } m["nlw"] = { "Walangama", 7961277, } m["nlx"] = { "Nahali", 33361, "inc-bhi", } m["nly"] = { "Nyamal", 7070837, "aus-nga", "Latn", } m["nlz"] = { "Nalögo", 20527138, "poz-tem", "Latn", } m["nma"] = { "Maram Naga", 56378, "sit-zem", } m["nmb"] = { "ဗေတ် နာန်ဗာတ်သ်", 2902304, "poz-vnc", "Latn", } m["nmc"] = { "ၚါမ်", 3915446, "csu-sar", "Latn", } m["nmd"] = { "Ndumu", 35901, "bnt-mbt", "Latn", } m["nme"] = { "Mzieme Naga", 6949473, "sit-zem", } m["nmf"] = { "Tangkhul Naga", 7682992, "sit-tng", } m["nmg"] = { "Kwasio", 34098, "bnt-mnj", "Latn", } m["nmh"] = { "Monsang Naga", 6902496, } m["nmi"] = { "Nyam", 3438738, "cdc-wst", "Latn", } m["nmj"] = { "Ngombe (Central African Republic)", 3913949, "gba-sou", } m["nmk"] = { "Namakura", 3335410, "poz-vnc", "Latn", } m["nml"] = { "Ndemli", 36089, "nic-grf", "Latn", } m["nmm"] = { "Manangba", 6746900, "sit-tam", "Tibt, Deva", translit = {Tibt = "Tibt-translit"}, override_translit = true, display_text = {Tibt = s["Tibt-displaytext"]}, entry_name = {Tibt = s["Tibt-entryname"]}, sort_key = {Tibt = "Tibt-sortkey"}, } m["nmn"] = { "သူ", 13229, "khi-tuu", "Latn", } m["nmo"] = { "Moyon Naga", 6927748, "tbq-kuk", } m["nmp"] = { "Nimanbur", 16891606, } m["nmq"] = { "Nambya", 11008869, "bnt-sho", "Latn", } m["nmr"] = { "Nimbari", 36069, "alv-lni", } m["nms"] = { "Letemboi", 3236886, "poz-vnc", "Latn", } m["nmt"] = { "Namonuito", 12908815, "poz-mic", } m["nmu"] = { "Northeast Maidu", 3278074, "nai-mdu", "Latn", } m["nmv"] = { "Ngamini", 7021944, "aus-kar", "Latn", } m["nmw"] = { "Nimoa", 7037729, "poz-ocw", } m["nmy"] = { "နန်မူယဳ", 56844, "sit-nax", "Latn", } m["nmz"] = { "Nawdm", 36085, "nic-yon", "Latn", } m["nna"] = { "Nyangumarta", 33653, } m["nnb"] = { "Nande", 3196953, "bnt-glb", "Latn", } m["nnc"] = { "Nancere", 3140491, "cdc-est", "Latn", } m["nnd"] = { "West Ambae", 2841479, "poz-vnn", "Latn", } m["nne"] = { "Ngandyera", 10961003, "bnt-ova", "Latn", } m["nnf"] = { "Ngaing", 11732510, "ngf-fin", "Latn", } m["nng"] = { "Maring Naga", 12952908, "sit-mar", } m["nnh"] = { "Ngiemboon", 36286, "bai", "Latn", } m["nni"] = { "North Nuaulu", 12952968, "poz-cma", } m["nnj"] = { "Nyangatom", 4662604, "sdv-ttu", } m["nnk"] = { "Nankina", 11732502, "ngf-fin", "Latn", } m["nnl"] = { "Northern Rengma Naga", 7067615, "tbq-anp", } m["nnm"] = { "Namia", 56363, "paa-spk", "Latn", } m["nnn"] = { "Ngete", 56625, "cdc-mas", "Latn", } m["nnp"] = { "ဝါန်ဆေဝ်", 7967085, "sit-kch", "Wcho, Deva, Latn", } m["nnq"] = { "Ngindo", 7022366, "bnt-mbi", "Latn", } m["nnr"] = { "Narungga", 13591127, "aus-pam", "Latn", } m["nnt"] = { "Nanticoke", 3915517, "alg-eas", "Latn", } m["nnu"] = { "Dwang", 35258, "alv-gng", "Latn", } m["nnv"] = { "Nukunu", 10604066, } m["nnw"] = { "Southern Nuni", 11152248, "nic-gnn", "Latn", } m["nnx"] = { "Ngong", 12952915, } m["nny"] = { -- contrast aus-ynk "Nyangga", 10604331, "aus-tnk", "Latn", } m["nnz"] = { "Nda'nda'", 36016, "bai", "Latn", } m["noa"] = { "Woun Meu", 3111873, "sai-chc", "Latn", } m["noc"] = { "Nuk", 11732534, "ngf-fin", "Latn", } m["nod"] = { "သေံသၟဝ်ကျာ", 565110, "tai-swe", "Lana, Thai", translit = { Lana = "Lana-translit", Thai = "Thai alphabet-translit", }, sort_key = { from = {"%p", "᩠", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", "ก", "ค", "ร", "ฮ", "ต", u(0x200C)}, to = {"", "", "ᩈᩈ", "ᩁ", "ᩃ", "ᨦ", "%1ᨮ", "%1ᨻ", "ᩣ", "ᨠ", "ᨣ", "ᩁ", "ᩁ", "ᨲ"}}, entry_name = { from = {u(0x200C)}, to = {}}, entry_name = {remove_diacritics = c.ZWNJ}, sort_key = { Lana = "Lana-sortkey", Thai = "Thai-sortkey" }, } m["noe"] = { "နဳမာဒဳ", 3502294, "raj", "Deva", translit = "hi-translit", } m["nof"] = { "Nomane", 11732531, } m["nog"] = { "နဝ်ကာယ်", 33871, "trk-kno", "Cyrl, Arab, Latn", translit = "nog-translit", override_translit = true, } m["noh"] = { "Nomu", 11732532, } m["noi"] = { "Noiri", 12953774, "inc-bhi", } m["noj"] = { "Nonuya", 5372139, "sai-wit", "Latn", } m["nok"] = { "Nooksack", 3343396, } m["nol"] = { "Nomlaki", 3343229, "nai-wtq", "Latn", } m["nom"] = { "Nocamán", 7046289, "sai-pan", "Latn", } m["non"] = { "နဳနိုတ်တြေံ", 35505, "gmq", "Latn, Runr", translit = {Runr = "Runr-translit"}, } m["nop"] = { "Numanggang", 7069052, "ngf-fin", "Latn", } m["noq"] = { "Ngongo", 11057478, "bnt-yak", "Latn", } m["nos"] = { "Eastern Nisu", 25559419, "tbq-nis", } m["not"] = { "Nomatsiguenga", 3342992, "awd", "Latn", } m["nou"] = { "Ewage-Notu", 5418860, } m["nov"] = { "Novial", 36738, "art", "Latn", type = "appendix-constructed" } m["now"] = { "Nyambo", 4967930, "bnt-haj", "Latn", } m["noy"] = { "Noy", 36321, "alv-bua", } m["noz"] = { "Nayi", 3183349, "omv-diz", } m["npa"] = { "Nar Phu", 4926353, "sit-tam", } m["npb"] = { "Nupbikha", 3695201, "sit-ebo", } m["npg"] = { "Ponyo", 7228475, "sit-kch", } m["nph"] = { "Phom", 7187109, "sit-kch", } m["npl"] = { "Southeastern Puebla နာဟွာတာယ်လ်", 4632950, "azc-nah", "Latn", } m["npn"] = { "Mondropolon", 3320594, "poz-aay", } m["npo"] = { "Pochuri Naga", 7206342, "tbq-anp", } m["nps"] = { "Nipsan", 11732528, } m["npu"] = { "Puimei Naga", 7259044, "sit-zem", } m["npy"] = { "Napu", 12953768, } m["nqg"] = { "Ede Nago", 12952408, "alv-ede", } m["nqk"] = { "Kura Ede Nago", 12952409, "alv-ede", } m["nql"] = { "Ngendelengo", 63283693, "bnt-swb", "Latn", } m["nqm"] = { "Ndom", 6983791, "ngf", "Latn", } m["nqn"] = { "Nen", 20816352, "paa-yam", } m["nqo"] = { "အိန်'ဂဝ်", 18546266, "dmn-man", "Nkoo", } m["nqq"] = { "Kyan-Karyaw Naga", 63283784, "sit-tno", } m["nqy"] = { "Akyaung Ari", 4702035, "sit-tng", } m["nra"] = { "Ngom", 36087, "bnt-kel", "Latn", } m["nrb"] = { "Nara", 36179, "sdv-nes", } m["nrc"] = { "Noric", 37023, "cel", "Ital", } m["nre"] = { "Southern Rengma Naga", 7313205, "tbq-anp", } m["nrf"] = { "နဝ်မေံ", 33850, "roa-oil", "Latn", wikimedia_codes = "nrm", ancestors = "fro-nor", sort_key = s["roa-oil-sortkey"], } m["nrg"] = { "Narango", 12952929, "poz-vnn", "Latn", } m["nri"] = { "Chokri Naga", 5104247, "tbq-anp", } m["nrk"] = { "Ngarla", 3915860, "aus-nga", "Latn", } m["nrl"] = { "Ngarluma", 7022078, "aus-nga", "Latn", } m["nrm"] = { "Narom", 3336135, "poz-swa", "Latn", } m["nrn"] = { "နန်", 36708, "gmq-ins", "Latn", } m["nrp"] = { "ပဳသေန် သၟဝ်ကျာ", 430138, nil, "Ital", translit = "Ital-translit", } m["nrr"] = { "Norra", 12952967, "tai", } m["nrt"] = { "Northern Kalapuya", 3192121, "nai-klp", } m["nru"] = { "Narua", 21658869, "sit-nas", "Latn", } m["nrx"] = { "Ngurmbur", 2591251, } m["nrz"] = { "Lala (New Guinea)", 6480151, "poz-ocw", } m["nsa"] = { "Sangtam Naga", 7418144, "sit-aao", } m["nsb"] = { "Lower Nossob", 6693681, "khi-tuu", "Latn", } m["nsc"] = { "Nshi", 11129508, "nic-rnn", "Latn", } m["nsd"] = { "Southern Nisu", 63284284, "tbq-nis", } m["nse"] = { "Nsenga", 3081996, "bnt-sna", "Latn", } m["nsg"] = { "Ngasa", 56345, "sdv-lma", } m["nsh"] = { "Ngoshie", 7022582, "nic-mom", "Latn", } m["nsi"] = { "Nigerian Sign Language", 7033021, "sgn", } m["nsk"] = { "နေတ်သကာပဳ", 1704302, "alg", "Cans", ancestors = "cr", translit = "nsk-translit", } m["nsl"] = { "Norwegian Sign Language", 1781613, "sgn", } m["nsm"] = { "Sema", 3478238, "tbq-anp", } m["nsn"] = { "Nehan", 3337774, "poz-ocw", } m["nso"] = { "သူထူ လ္ပာ်သၟဝ်ကျာ", 33890, "bnt-sts", "Latn", } m["nsp"] = { "Nepalese Sign Language", 3915492, "sgn", } m["nsq"] = { "Northern Sierra Miwok", 3344226, "nai-utn", "Latn", } m["nsr"] = { "Maritime Sign Language", 3915483, "sgn", } m["nss"] = { "Nali", 3335385, "poz-aay", } m["nst"] = { "ထေန်သ", 56350, "sit-tno", "Latn, Tnsa", } m["nsu"] = { "Sierra Negra နာဟွာတာယ်လ်", 63284326, "azc-nah", "Latn", } m["nsv"] = { "Southwestern Nisu", 63308004, "tbq-nis", } m["nsw"] = { "Navut", 3337327, "poz-vnn", "Latn", } m["nsx"] = { "Nsongo", 7067577, "bnt-tmb", "Latn", } m["nsy"] = { "Nasal", 6966574, } m["nsz"] = { "Nisenan", 33665, "nai-mdu", "Latn", } m["ntd"] = { "Northern Tidung", 24938325, "poz-san", } m["nte"] = { "Nathembo", 11030947, "bnt-mak", } m["ntg"] = { "Ngantangarra", 33060509, } m["nti"] = { "Natioro", 36140, "alv-wan", } m["ntj"] = { "Ngaanyatjarra", 3915409, "aus-pam", "Latn", } m["ntk"] = { "Ikoma", 5996114, "bnt-lok", "Latn", } m["ntm"] = { "Nateni", 3070731, "nic-grm", "Latn", } m["nto"] = { "Ntomba", 11130292, "bnt-mon", "Latn", } m["ntp"] = { "ထေပ်ပုဝ်ဝါန် လ္ပာ်သၟဝ်ကျာ", 15615651, "azc", "Latn", sort_key = {remove_diacritics = c.acute}, } m["ntr"] = { "Delo", 35195, "nic-gne", "Latn", } m["nts"] = { "Natagaimas", 6967931, } m["ntu"] = { "Natügu", 63308082, "poz-tem", "Latn", } m["ntw"] = { "Nottoway", 3344791, "iro-nor", } m["ntx"] = { "Somra", 7560536, "sit-tng", } m["nty"] = { "Mantsi", 56878, "sit-mnz", } m["nua"] = { "Yuanga", 3573088, "poz-cln", "Latn", } m["nuc"] = { "Nukuini", 3346231, } m["nud"] = { "Ngala", 7021893, "paa-spk", "Latn", } m["nue"] = { "Ngundu", 12952953, "bad-cnt", "Latn", } m["nuf"] = { "Nusu", 56413, "tbq-nus", } m["nug"] = { "Nungali", 7069826, "aus-mir", } m["nuh"] = { "Ndunda", 3913968, "nic-mmb", "Latn", } m["nui"] = { "Ngumbi", 36459, "bnt-yko", } m["nuj"] = { "Nyole (Uganda)", 3739448, "bnt-msl", "Latn", } m["nuk"] = { "နှတ်တက", 2992876, "wak", "Latn", } m["nul"] = { "Nusa Laut", 7070332, "poz-cma", } m["num"] = { "Niuafo'ou", 36173, "poz-ton", "Latn", } m["nun"] = { "Anong", 2748232, "sit-nng", } m["nuo"] = { "ၚောန်", 3915785, "mkh-vie", "Latn", sort_key = "vi-sortkey", } m["nup"] = { "နူပဳ", 36720, "alv-ngb", "Latn", entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, sort_key = "nup-sortkey", } m["nuq"] = { "Nukumanu", 12909019, "poz-pnp", } m["nur"] = { "Nuguria", 7068910, "poz-pnp", } m["nus"] = { "နူအေဝ်", 33675, "sdv-dnu", "Latn", } m["nut"] = { "နောန်", 72695, "tai", "Latn, Hani", sort_key = {Hani = "Hani-sortkey"}, } m["nuu"] = { "Ngbundu", 11126081, "bad", "Latn", } m["nuv"] = { "Northern Nuni", 11016572, "nic-gnn", "Latn", } m["nuw"] = { "Nguluwan", 6528643, } m["nux"] = { "Mehek", 6809452, "paa-spk", "Latn", } m["nuy"] = { "Nunggubuyu", 1747811, "aus-arn", } m["nuz"] = { "Tlamacazapa နာဟွာတာယ်လ်", 2073277, "azc-nah", "Latn", } m["nvh"] = { "Nasarian", 6966614, "poz-vnc", "Latn", } m["nvm"] = { "Namiae", 12952922, } m["nvo"] = { "Nyokon", 19573407, "nic-mbw", "Latn", } m["nwa"] = { "နဝထဳဟေနအ်", 6982892, "alg-ara", "Latn", } m["nwb"] = { "Nyabwa", 33664, "kro-wee", } m["nwc"] = { "Classical Newar", 5128301, "sit-new", } m["nwe"] = { "Ngwe", 36181, "bai", "Latn", } m["nwi"] = { "Southwest Tanna", 3504488, "poz-vns", "Latn", } m["nwm"] = { "Nyamusa-Molo", 12747951, "csu-bbk", } m["nwo"] = { "Nauo", 6981305, "aus-pam", "Latn", } m["nwr"] = { "Nawaru", 12638166, "ngf", } m["nwx"] = { "Middle Newar", 65455877, "sit-new", "Deva, Newa, Ranj", ancestors = "nwc", } m["nwy"] = { "Nottoway-Meherrin", 65455878, "iro-nor", } m["nxa"] = { "နောတ်အေတ်", 6981095, "poz-tim", } m["nxd"] = { "Ngando (Congo)", 3913277, "bnt-ske", } m["nxe"] = { "Nage", 2295569, "poz-cet", } m["nxg"] = { "Ngadha", 1516651, "poz-cet", "Latn", } m["nxi"] = { "Nindi", 7038230, "bnt-mbi", "Latn", } m["nxl"] = { "South Nuaulu", 18544857, "poz-cma", } m["nxm"] = { "Numidian", 35761, "afa", "Tfng, Latn", --Tfng may not support all the needed characters } m["nxn"] = { "Ngawun", 3915711, "aus-pam", "Latn", } m["nxo"] = { "Ndambomo", 6983681, "bnt-kel", "Latn", } m["nxq"] = { "နာဃှဳ", 2478711, "sit-nas", "Nkdb, Nkgb, Latn, Lisu", translit = {Lisu = "Lisu-translit"}, sort_key = {Lisu = s["Lisu-sortkey"]}, } m["nxr"] = { "Ninggerum", 11732526, "ngf-okk", "Latn", } m["nxu"] = { "Narau", 6965452, "ngf", "Latn", } m["nxx"] = { "Nafri", 6958211, "paa-sen", "Latn", } m["nyb"] = { "Nyangbo", 36256, "alv-ktg", "Latn", } m["nyc"] = { "Nyanga-li", 7070876, "bnt-boa", "Latn", } m["nyd"] = { "Nyole (Kenya)", 7071227, "bnt-msl", "Latn", } m["nye"] = { "Nyengo", 7071068, "bnt-clu", "Latn", } m["nyf"] = { "Giryama", 3107606, "bnt-mij", "Latn", } m["nyg"] = { "Nyindu", 11030685, "bnt-shh", "Latn", } m["nyh"] = { "Nyigina", 3913780, "aus-nyu", "Latn", } m["nyi"] = { "Nyimang", 34846, "sdv-nyi", "Latn", } m["nyj"] = { "Nyanga (Congo)", 7070879, "bnt-nyb", "Latn", } m["nyk"] = { "Nyaneka", 10962298, "bnt-swb", "Latn", } m["nyl"] = { "Nyeu", 3033578, "mkh-kat", } m["nym"] = { "Nyamwezi", 4121131, "bnt-tkm", "Latn", } m["nyn"] = { "နယျာန်ကိုဝ်လေဝ်", 13207, "bnt-nyg", "Latn", } m["nyo"] = { "နယျဝ်ရုဝ်", 33794, "bnt-nyg", "Latn", } m["nyp"] = { "Nyang'i", 7070894, "ssa-klk", } m["nys"] = { "Nyunga", 7049771, "aus-pam", "Latn", } m["nyt"] = { "Nyawaygi", 3915783, "aus-dyb", } m["nyu"] = { "Nyungwe", 7071318, "bnt-sna", "Latn", } m["nyv"] = { "Nyulnyul", 3442732, "aus-nyu", "Latn", } m["nyw"] = { "နေဴ", 26425602, "tai", "Thai, Latn, Tayo", -- Vietnamese alphabet sort_key = "Thai-sortkey", -- no effect on Latn } m["nyx"] = { "Nganyaywana", 3913800, "aus-cww", "Latn", } m["nyy"] = { "Nyakyusa", 3272620, "bnt-run", "Latn", } m["nza"] = { "ထိုၚ်ဂန် အာန်ဗာန်ဗေ", 36518, "nic-jkn", "Latn", } m["nzb"] = { "Njebi", 35923, "bnt-nze", "Latn", } m["nzd"] = { "နဇာဒဳ", 17152586, "bnt-bdz", "Latn", entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.caron}, } m["nzi"] = { "Nzima", 36337, "alv-ctn", } m["nzk"] = { "Nzakara", 3913339, "znd", "Latn", } m["nzm"] = { "Zeme Naga", 21491053, "sit-zem", } m["nzs"] = { "New Zealand Sign Language", 36239, "sgn", } m["nzu"] = { "Central Teke", 36473, "bnt-tkc", } m["nzy"] = { "Nzakambay", 36374, "alv-mbm", "Latn", } m["nzz"] = { "Nanga Dama Dogon", 6963443, "nic-nwa", } return require("Module:languages").finalizeData(m, "language") thzmma9u0ovguwyie085vnsc5p9juy2 မဝ်ဂျူ:languages/data/3/k 828 710 395884 394420 2026-05-29T15:52:21Z Intobesa.bot 1035 Bot: ပလေဝ်ဒါန် 395884 Scribunto text/plain local m_langdata = require("Module:languages/data") -- Loaded on demand, as it may not be needed (depending on the data). local function u(...) u = require("Module:string utilities").char return u(...) end local c = m_langdata.chars local p = m_langdata.puaChars local s = m_langdata.shared local m = {} m["kaa"] = { "ကာရာကာလပက်", 33541, "trk-kno", "Latn, Cyrl, fa-Arab", dotted_dotless_i = true, entry_name = { from = {"['’]"}, to = {"ʼ"} }, sort_key = { Latn = { from = { -- Sort the old orthography (using the apostrophe) after the new orthography (using the acute accent). "í", "iʼ", "i", -- Ensure "i" comes after "í", "iʼ", "ı". "sh", "ch", "á", "aʼ", "ǵ", "gʼ", "x", p[4], p[5], "ı", "q", "ń", "nʼ", "ó", "oʼ", "ú", "uʼ", "c" }, to = { p[4], p[5], "i" .. p[3], "z" .. p[1], "z" .. p[3], "a" .. p[1], "a" .. p[2], "g" .. p[1], "g" .. p[2], "h" .. p[1], "i", "i" .. p[1], "i" .. p[2], "k" .. p[1], "n" .. p[1], "n" .. p[2], "o" .. p[1], "o" .. p[2], "u" .. p[1], "u" .. p[2], "z" .. p[2] } }, Cyrl = { from = {"ә", "ғ", "ё", "қ", "ң", "ө", "ү", "ў", "ҳ"}, to = {"а" .. p[1], "г" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1], "у" .. p[2], "х" .. p[1]} }, }, } m["kab"] = { "ကာဗေန်အဝ်", 35853, "ber", "Latn", } m["kac"] = { "ကချေၚ်", 33332, "sit-jnp", "Latn, Mymr", } m["kad"] = { "Kadara", 3914011, "nic-plc", "Latn", } m["kae"] = { "Ketangalan", 2779411, "map", } m["kaf"] = { "Katso", 246122, "tbq-kzh", } m["kag"] = { "Kajaman", 6348863, "poz", "Latn", } m["kah"] = { "Fer", 5443742, "csu-bgr", "Latn", } m["kai"] = { "ခါရေဝ်ခါရေဝ်", 3438770, "cdc-wst", "Latn", } m["kaj"] = { "Jju", 35401, "nic-plc", "Latn", } m["kak"] = { "ကယျာပါ ကာလဟာန်", 3192220, "phi", "Latn", } m["kam"] = { "ကေန်ဗာ", 2574767, "bnt-kka", "Latn", } m["kao"] = { "Kassonke", 36905, "dmn-wmn", "Latn", } m["kap"] = { "ဗဳသဝ်တာ", 33054, "cau-ets", "Cyrl", translit = "cau-nec-translit", override_translit = true, display_text = {Cyrl = s["cau-Cyrl-displaytext"]}, entry_name = {Cyrl = s["cau-Cyrl-entryname"]}, } m["kaq"] = { "Capanahua", 2937196, "sai-pan", "Latn", } m["kaw"] = { "ဂျာဗာတြေံ", 49341, "poz", "Latn, Java, Kawi", translit = "jv-translit", --same as jv } m["kax"] = { "Kao", 3192799, "paa-nha" } m["kay"] = { "Kamayurá", 3192336, "tup-gua", "Latn", } m["kba"] = { "Kalarko", 5517764, "aus-pam", "Latn", } m["kbb"] = { "ကာသူယျာနာ", 12953626, "sai-prk", "Latn", } m["kbc"] = { "Kadiwéu", 18168288, "sai-guc", "Latn", } m["kbd"] = { "ခါပါဒဳယာန်", 33522, "cau-cir", "Cyrl, Latn, Arab", translit = { Cyrl = "cau-cir-translit", Arab = "ar-translit", }, override_translit = true, display_text = {Cyrl = s["cau-Cyrl-displaytext"]}, entry_name = { Cyrl = s["cau-Cyrl-entryname"], Latn = s["cau-Latn-entryname"], }, sort_key = { Cyrl = { from = { "кхъу", "къӏу", -- 4 chars "гъу", "джу", "дзу", "жъу", "къу", "кхъ", "къӏ", "кӏу", "кӏь", "лъу", "лӏу", "пӏу", "сӏу", "тӏу", "фӏу", "хъу", "цӏу", "чъу", "чӏу", "шъу", "шӏу", "щӏу", -- 3 chars "гу", "гъ", "гь", "дж", "дз", "ё", "жъ", "жь", "ку", "къ", "кь", "кӏ", "лъ", "ль", "лӏ", "пӏ", "сӏ", "тӏ", "фӏ", "ху", "хъ", "хь", "цу", "цӏ", "чу", "чъ", "чӏ", "шъ", "шӏ", "щӏ", "ӏу", "ӏь", -- 2 chars "э" -- 1 char }, to = { "к" .. p[5], "к" .. p[7], "г" .. p[3], "д" .. p[2], "д" .. p[4], "ж" .. p[2], "к" .. p[3], "к" .. p[4], "к" .. p[6], "к" .. p[10], "к" .. p[11], "л" .. p[2], "л" .. p[5], "п" .. p[2], "с" .. p[2], "т" .. p[2], "ф" .. p[2], "х" .. p[3], "ц" .. p[3], "ч" .. p[3], "ч" .. p[5], "ш" .. p[2], "ш" .. p[4], "щ" .. p[2], "г" .. p[1], "г" .. p[2], "г" .. p[4], "д" .. p[1], "д" .. p[3], "е" .. p[1], "ж" .. p[1], "ж" .. p[3], "к" .. p[1], "к" .. p[2], "к" .. p[8], "к" .. p[9], "л" .. p[1], "л" .. p[3], "л" .. p[4], "п" .. p[1], "с" .. p[1], "т" .. p[1], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[4], "ц" .. p[1], "ц" .. p[2], "ч" .. p[1], "ч" .. p[2], "ч" .. p[4], "ш" .. p[1], "ш" .. p[3], "щ" .. p[1], "ӏ" .. p[1], "ӏ" .. p[2], "а" .. p[1] } }, }, } m["kbe"] = { "Kanju", 10543322, "aus-pam", "Latn", } m["kbh"] = { "Camsá", 2842667, "qfa-iso", "Latn", } m["kbi"] = { "Kaptiau", 6367294, "poz-oce", "Latn", } m["kbj"] = { "Kari", 6370438, "bnt-boa", "Latn", } m["kbk"] = { "Grass Koiari", 12952642, "ngf", "Latn", } m["kbm"] = { "Iwal", 3156391, "poz-ocw", "Latn", } m["kbn"] = { "Kare (Africa)", 35554, "alv-mbm", "Latn", } m["kbo"] = { "ခါလဳကာဝ်", 11275553, "csu-mma", } m["kbp"] = { "Kabiyé", 35475, "nic-gne", "Latn", } m["kbq"] = { "Kamano", 11732272, "paa-kag", "Latn", } m["kbr"] = { "Kafa", 35481, "omv-gon", "Ethi, Latn", } m["kbs"] = { "Kande", 35556, "bnt-tso", "Latn", } m["kbt"] = { "Gabadi", 3291159, "poz-ocw", "Latn", } m["kbu"] = { "Kabutra", 10966761, "raj", } m["kbv"] = { "Kamberataro", 5261289, "paa", "Latn", } m["kbw"] = { "Kaiep", 6347632, "poz-ocw", "Latn", } m["kbx"] = { "Ap Ma", 56298, "paa-ram", } m["kbz"] = { "Duhwa", 56295, "cdc-wst", "Latn", } m["kcb"] = { "Kawacha", 11732302, "ngf", } m["kcc"] = { "Lubila", 3914381, "nic-uce", "Latn", } m["kcd"] = { "Ngkâlmpw Kanum", 12952566, "paa-yam", } m["kce"] = { "Kaivi", 6348685, "nic-kau", } m["kcf"] = { "Ukaan", 36651, "nic-bco", } m["kcg"] = { "ထိုၚ်အာက်", 3912765, "nic-plc", } m["kch"] = { "Vono", 3913920, "nic-kau", } m["kci"] = { "Kamantan", 3914019, "nic-plc", } m["kcj"] = { "Kobiana", 35609, "alv-nyn", } m["kck"] = { "Kalanga", 33672, "bnt-sho", "Latn", } m["kcl"] = { "Kala", 6349982, "poz-ocw", "Latn", } m["kcm"] = { "Tar Gula", 277963, "csu-bba", } m["kcn"] = { "နူဗဳ", 36388, "crp", "Latn, Arab", ancestors = "apd", entry_name = {remove_diacritics = c.acute}, } m["kco"] = { "Kinalakna", 11732320, "ngf", } m["kcp"] = { "Kanga", 6362384, "qfa-kad", "Latn", } m["kcq"] = { "Kamo", 3914879, "alv-wjk", } m["kcr"] = { "Katla", 35688, "nic-ktl", } m["kcs"] = { "Koenoem", 3438755, "cdc-wst", } m["kct"] = { "Kaian", 6347538, "paa-ram", } m["kcu"] = { "Kikami", 3915212, "bnt-ruv", "Latn", } m["kcv"] = { "Kete", 3195598, "bnt-lub", } m["kcw"] = { "Kabwari", 6344539, "bnt-glb", } m["kcx"] = { "Kachama-Ganjule", 12634070, "omv-eom", } m["kcy"] = { "Korandje", 33427, "son", } m["kcz"] = { "Konongo", 11732345, "bnt-tkm", "Latn", } m["kda"] = { "Worimi", 3914062, "aus-pam", "Latn", } m["kdc"] = { "Kutu", 6448634, "bnt-ruv", } m["kdd"] = { "Yankunytjatjara", 34207, "aus-pam", "Latn", } m["kde"] = { "Makonde", 35172, "bnt-rvm", "Latn", } m["kdf"] = { "Mamusi", 6746036, "poz-ocw", "Latn", } m["kdg"] = { "Seba", 7442316, "bnt-sbi", "Latn", } m["kdh"] = { "Tem", 36531, "nic-gne", } m["kdi"] = { "Kumam", 6443410, "sdv-los", } m["kdj"] = { "Karamojong", 56326, "sdv-ttu", "Latn", } m["kdk"] = { "Numèè", 3346774, "poz-cln", "Latn", } m["kdl"] = { "Tsikimba", 3914404, "nic-kam", } m["kdm"] = { "Kagoma", 3914420, "nic-plc", } m["kdn"] = { "Kunda", 4121130, "bnt-sna", } m["kdp"] = { "Kaningdon-Nindem", 3914956, "nic-nin", } m["kdq"] = { "Koch", 56431, "tbq-bdg", } m["kdr"] = { "ကာရေန်", 33725, "trk-kcu", "Cyrl, Latn, Hebr", } m["kdt"] = { "Kuy", 56310, "mkh-kat", "Thai, Khmr, Laoo", } m["kdu"] = { "Kadaru", 35441, "nub-hil", "Latn", } m["kdv"] = { "Kado", 7402721, "sit-luu", } m["kdw"] = { "Koneraw", 11732341, "ngf", } m["kdx"] = { "Kam", 36753, "alv-wjk", } m["kdy"] = { "Keder", 6383641, "paa-tkw", } m["kdz"] = { "Kwaja", 11128866, "nic-nka", "Latn", } m["kea"] = { "ခါၜေါအ်အဝ်ဒဳယဴနူ", 35963, "crp", "Latn", ancestors = "pt", } m["keb"] = { "Kélé", 35559, "bnt-kel", } m["kec"] = { "Keiga", 3409311, "qfa-kad", "Latn", } m["ked"] = { "Kerewe", 6393846, "bnt-haj", } m["kee"] = { "Eastern Keres", 15649021, "nai-ker", "Latn", } m["kef"] = { "Kpessi", 35748, "alv-gbe", } m["keg"] = { "Tese", 16887296, "sdv", } m["keh"] = { "Keak", 6382110, "paa-spk", } m["kei"] = { "Kei", 2410352, } m["kej"] = { "Kadar", 6345179, "dra-mal", } m["kek"] = { "ခဳ'ချဳ", 35536, "myn", "Latn", } m["kel"] = { "Kela-Yela", 6385426, "bnt-mon", "Latn", } m["kem"] = { "Kemak", 35549, "poz-tim", } m["ken"] = { "Kenyang", 35650, "nic-mam", "Latn", } m["keo"] = { "Kakwa", 3033547, "sdv-bri", } m["kep"] = { "Kaikadi", 6347757, "dra-tam", } m["keq"] = { "Kamar", 14916877, "inc-hal", } m["ker"] = { "Kera", 56251, "cdc-est", "Latn", } m["kes"] = { "Kugbo", 3813394, "nic-cde", "Latn", } m["ket"] = { "ခေပ်", 33485, "qfa-yno", "Cyrl", entry_name = { from = {"['’]"}, to = {"ʼ"} }, sort_key = { from = {"ӷ", "ё", "ӄ", "ӈ", "ө", "ә", "ʼ"}, to = {"г" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "ъ" .. p[1], "ь" .. p[1]} }, } m["keu"] = { "Akebu", 35026, "alv-ktg", "Latn", } m["kev"] = { "ကာနေတ်ကာရာန်", 6363201, "dra-mal", } m["kew"] = { "Kewa", 12952619, "paa-eng", "Latn", } m["kex"] = { "Kukna", 5031131, "inc-eas", ancestors = "bh", } m["key"] = { "Kupia", 6445354, "inc-eas", } m["kez"] = { "Kukele", 3915391, "nic-ucn", "Latn", } m["kfa"] = { "ကိုဝ်ဒါဝါ", 33531, "dra-kod", "Knda, Mlym", translit = { Knda = "kn-translit", Mlym = "ml-translit" }, } m["kfb"] = { "ကိုဝ်လာမဳ", 33479, "dra-knk", "Deva, Telu", translit = { Telu = "te-translit", }, } m["kfc"] = { "ခေါန်ဒါ-ဒါဝ်ရာ", 35679, "dra-kki", "Orya, Telu", translit = { Orya = "gon-Orya-translit", Telu = "te-translit", }, } m["kfd"] = { "ခါဝ်ရာတ် ခါဝ်ရာဂါ", 12952655, "dra-kor", "Knda", translit = "kn-translit", } m["kfe"] = { "ကိုဝ်တာ (အိန္ဒိယ)", 33483, "dra-tkt", "Taml", translit = "ta-translit", } m["kff"] = { "ခါဝ်ယျာ", 33471, "dra-gon", } m["kfg"] = { "Kudiya", 12952667, "dra-tlk", } m["kfh"] = { "ခူရဳချဳယျာ", 12952676, "dra-mal", "Mlym", translit = "ml-translit", } m["kfi"] = { "Kannada Kurumba", 56589, "dra-sdo", } m["kfj"] = { "Kemiehua", 27144776, "mkh-pal", } m["kfk"] = { "Kinnauri", 2383208, "sit-kin", "Takr, Deva, Latn", } m["kfl"] = { "Kung", 6444510, "nic-rnc", "Latn", } m["kfn"] = { "Kuk", 6442398, "nic-rnc", "Latn", } m["kfo"] = { "Koro (West Africa)", 11160588, "dmn-mnk", "Latn, Nkoo", } m["kfp"] = { "Korwa", 6432786, "mun", } m["kfq"] = { "Korku", 33715, "mun", } m["kfr"] = { "ကာတ်ချဳ", 56487, "inc-snd", "Gujr, sd-Arab, Sind, Khoj", translit = { Gujr = "gu-translit", Sind = "Sind-translit", }, entry_name = { remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef, from = {u(0x0671)}, to = {u(0x0627)} }, } m["kfs"] = { "ဖဳလာသၜေါအ်ရဳ", 12953397, "him", "Deva, Takr", translit = "hi-translit", } m["kft"] = { "Kanjari", 12953610, "inc-pan", ancestors = "pa", } m["kfu"] = { "Katkari", 6377671, "inc-sou", } m["kfv"] = { "Kurmukar", 6446193, "inc-eas", } m["kfw"] = { "Kharam Naga", 12952906, "tbq-kuk", } m["kfx"] = { "ခူဠူ ပါဟာရေဝ်", 6443148, "him", "Deva", translit = "hi-translit", } m["kfy"] = { "ခူမာအဝ်နဳ", 33529, "inc-pah", "Deva, Shrd, Takr", translit = "hi-translit", } m["kfz"] = { "Koromfé", 35701, "nic-gur", "Latn", } m["kga"] = { "Koyaga", 11155632, "dmn-mnk", } m["kgb"] = { "Kawe", 12952750, "poz-hce", } m["kgd"] = { "Kataang", 12953622, "mkh", } m["kge"] = { "Komering", 49224, "poz-lgx", } m["kgf"] = { "Kube", 11732359, "ngf", } m["kgg"] = { "Kusunda", 33630, "qfa-iso", "Latn", } m["kgi"] = { "Selangor Sign Language", 33731, "sgn", } m["kgj"] = { "Gamale Kham", 22236996, "sit-kha", "Deva", } m["kgk"] = { "Kaiwá", 3111883, "tup-gua", "Latn", } m["kgl"] = { "Kunggari", 10550184, "aus-pam", } m["kgm"] = { "Karipúna", 6371069, } m["kgn"] = { "ကာရေန်ကာနဳ", 6371041, "xme-ttc", ancestors = "xme-ttc-nor", } m["kgo"] = { "Krongo", 6438927, "qfa-kad", "Latn", } m["kgp"] = { "ခါဲဂါန်", 2665734, "sai-sje", "Latn", } m["kgq"] = { "Kamoro", 6359001, "ngf", } m["kgr"] = { "Abun", 56657, "paa", "Latn", } m["kgs"] = { "Kumbainggar", 3915412, "aus-pam", } m["kgt"] = { "Somyev", 3913354, "nic-mmb", "Latn", } m["kgu"] = { "Kobol", 11732325, "ngf-mad", } m["kgv"] = { "Karas", 6368621, "ngf", } m["kgw"] = { "Karon Dori", 56817, } m["kgx"] = { "Kamaru", 12953604, "poz", } m["kgy"] = { "Kyerung", 12952691, "sit-kyk", } m["kha"] = { "ခါသဳ", 33584, "aav-pkl", "Latn, as-Beng", } m["khb"] = { "သေံလု", 36948, "tai-swe", "Talu, Lana", translit = { Talu = "Talu-translit", Lana = "Lana-translit", }, entry_name = {remove_diacritics = c.ZWNJ}, sort_key = { Talu = "Talu-sortkey", Lana = "Lana-sortkey", }, } m["khc"] = { "Tukang Besi North", 18611555, "poz", } m["khd"] = { "Bädi Kanum", 20888004, "paa-yam", } m["khe"] = { "Korowai", 6432598, "ngf", } m["khf"] = { "Khuen", 27144893, "mkh", } m["khh"] = { "Kehu", 10994953, } m["khj"] = { "Kuturmi", 3914490, "nic-plc", "Latn", } m["khl"] = { "Lusi", 3267788, "poz-ocw", "Latn", } m["khn"] = { "Khandeshi", 33726, "inc-sou", } m["kho"] = { "ခဝ်တေန်နေတ်", 6583551, "xsc-sak", "Brah, Khar", translit = "Brah-translit", } m["khp"] = { "Kapauri", 3502575, "paa-tkw", } m["khq"] = { "Koyra Chiini", 33600, "son", } m["khr"] = { "Kharia", 3915562, "mun", } m["khs"] = { "Kasua", 6374863, "ngf", } m["kht"] = { "သေံခဂၞဳ", 3915502, "tai-swe", "Mymr", translit = "kht-translit", entry_name = {remove_diacritics = c.VS01}, } m["khu"] = { "Nkhumbi", 11019169, "bnt-swb", } m["khv"] = { "ခပါဃှဳ", 56425, "cau-wts", "Cyrl", translit = "khv-translit", display_text = {Cyrl = s["cau-Cyrl-displaytext"]}, entry_name = {Cyrl = s["cau-Cyrl-entryname"]}, } m["khw"] = { "ခါဝ်ဝါ", 938216, "inc-chi", "Arab", entry_name = { -- character "ۂ" code U+06C2 to "ه" and "هٔ"‎ (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif from = {"هٔ", "ۂ", "ٱ"}, to = {"ہ", "ہ", "ا"}, remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef }, } m["khx"] = { "Kanu", 12952571, "bnt-lgb", } m["khy"] = { "Ekele", 6385549, "bnt-ske", "Latn", } m["khz"] = { "Keapara", 12952603, "poz-ocw", "Latn", } m["kia"] = { "Kim", 35685, "alv-kim", } m["kib"] = { "Koalib", 35859, "alv-hei", } m["kic"] = { "ခေတ်ခါၜေအ်", 20162127, "alg-sfk", "Latn", } m["kid"] = { "Koshin", 35632, "nic-beb", "Latn", } m["kie"] = { "Kibet", 56893, } m["kif"] = { "Eastern Parbate Kham", 12953022, "sit-kha", "Deva", } m["kig"] = { "Kimaama", 11732321, "ngf", } m["kih"] = { "Kilmeri", 6408020, "paa-brd", } m["kii"] = { "Kitsai", 56627, "cdd", "Latn", } m["kij"] = { "Kilivila", 3196601, "poz-ocw", "Latn", } m["kil"] = { "Kariya", 3438708, "cdc-wst", } m["kim"] = { "တဝ်ဖှာ", 36848, "trk-ssb", "Cyrl", } m["kio"] = { "Kiowa", 56631, "nai-kta", "Latn", } m["kip"] = { "Sheshi Kham", 12952622, "sit-kha", "Deva", } m["kiq"] = { "Kosadle", 6432994, } m["kis"] = { "ခေတ်", 6416362, "poz-ocw", "Latn", } m["kit"] = { "Agob", 3332143, nil, "Latn", } m["kiv"] = { "Kimbu", 10997740, "bnt-tkm", } m["kiw"] = { "Northeast Kiwai", 11732324, "paa-kiw", } m["kix"] = { "ခဳယျာနဳယျာဂါမ် နာဂ", 6401546, "sit-kch", "Latn", } m["kiy"] = { "Kirikiri", 6415159, "paa-lkp", } m["kiz"] = { "Kisi", 3912772, "bnt-bki", } m["kja"] = { "Mlap", 6885683, "paa-nim", } m["kjb"] = { "ခွါန်ဂျေပ်ဗါဝ်", 35551, "myn", "Latn", } m["kjc"] = { "Coastal Konjo", 3198689, "poz", } m["kjd"] = { "Southern Kiwai", 11732322, "paa-kiw", } m["kje"] = { "Kisar", 3197441, "poz", } m["kjg"] = { "ခမူ", 33335, "mkh", "Laoo", sort_key = "Laoo-sortkey", } m["kjh"] = { "ခါခေတ်", 33575, "trk-ssb", "Cyrl", translit = "kjh-translit", override_translit = true, } m["kji"] = { "Zabana", 379130, "poz-ocw", "Latn", } m["kjj"] = { "ဟဳနာလေတ်", 35278, "cau-nec", "Cyrl, Latn", translit = "kjj-translit", override_translit = true, display_text = {Cyrl = s["cau-Cyrl-displaytext"]}, entry_name = { Cyrl = s["cau-Cyrl-entryname"], Latn = s["cau-Latn-entryname"], }, } m["kjk"] = { "Highland Konjo", 3198688, "poz", } m["kjl"] = { "Western Parbate Kham", 22237017, "sit-kha", "Deva", } m["kjm"] = { "Kháng", 6403501, "mkh-pal", } m["kjn"] = { "Kunjen", 3200468, "aus-pmn", "Latn", } m["kjo"] = { "Harijan Kinnauri", 5657463, "him", } m["kjp"] = { "ကရေၚ်ပဝ်လ္ပာ်ဗၟံက်", 5330390, "kar", "Mymr, Leke, Thai", translit = "kjp-translit", override_translit = true, } m["kjq"] = { "Western Keres", 12645568, "nai-ker", "Latn", } m["kjr"] = { "Kurudu", 12952678, "poz-hce", "Latn", } m["kjs"] = { "East Kewa", 20050949, "paa-eng", } m["kjt"] = { "Phrae Pwo", 7187991, "kar", "Thai", } m["kju"] = { "Kashaya", 3193689, "nai-pom", "Latn", } m["kjx"] = { "Ramopa", 56830, "paa-nbo", } m["kjy"] = { "Erave", 12952416, "paa-eng", } m["kjz"] = { "Bumthangkha", 2786408, "sit-ebo", "Tibt", translit = "Tibt-translit", override_translit = true, display_text = s["Tibt-displaytext"], entry_name = s["Tibt-entryname"], sort_key = "Tibt-sortkey", } m["kka"] = { "ကာကာန်ဒါ", 3915342, "alv-ngb", } m["kkb"] = { "Kwerisa", 56881, "paa-lkp", } m["kkc"] = { "Odoodee", 12952987, } m["kkd"] = { "Kinuku", 6414422, "nic-kau", } m["kke"] = { "Kakabe", 3913966, "dmn-mok", "Latn", } m["kkf"] = { "Kalaktang Monpa", 63257089, "sit-tsk", "Tibt, Latn, Deva", translit = {Tibt = "Tibt-translit"}, override_translit = true, display_text = {Tibt = s["Tibt-displaytext"]}, entry_name = {Tibt = s["Tibt-entryname"]}, sort_key = {Tibt = "Tibt-sortkey"}, } m["kkg"] = { "Mabaka Valley Kalinga", 18753304, "phi", } m["kkh"] = { "သေံဃိန်", 3545044, "tai-swe", "Lana, Thai", translit = { Lana = "Lana-translit", Thai = "Thai alphabet-translit", }, sort_key = { Lana = "Lana-sortkey", Thai = "Thai-sortkey" }, } m["kki"] = { "Kagulu", 12952537, "bnt-ruv", "Latn", } m["kkj"] = { "Kako", 35755, "bnt-kak", } m["kkk"] = { "Kokota", 3198399, "poz-ocw", "Latn", } m["kkl"] = { "Kosarek Yale", 6432995, "ngf", } m["kkm"] = { "Kiong", 6414512, "nic-ucr", "Latn", } m["kkn"] = { "Kon Keu", 6428686, "mkh-pal", } m["kko"] = { "Karko", 35529, "nub-hil", } m["kkp"] = { "Koko-Bera", 6426699, "aus-pmn", "Latn", } m["kkq"] = { "Kaiku", 6347840, "bnt-kbi", "Latn", } m["kkr"] = { "Kir-Balar", 3440527, "cdc-wst", "Latn", } m["kks"] = { "Kirfi", 56242, "cdc-wst", "Latn", } m["kkt"] = { "Koi", 6426194, "sit-kiw", } m["kku"] = { "Tumi", 3913934, "nic-kau", } m["kkv"] = { "Kangean", 2071325, "poz-msa", "Latn", } m["kkw"] = { "Teke-Kukuya", 36560, "bnt-tek", } m["kkx"] = { "Kohin", 6425997, "poz-brw", } m["kky"] = { "Guugu Yimidhirr", 56543, "aus-pam", "Latn", } m["kkz"] = { "Kaska", 20823, "ath-nor", "Latn", } m["kla"] = { "Klamath-Modoc", 2669248, "nai-plp", "Latn", } m["klb"] = { "Kiliwa", 3182593, "nai-yuc", "Latn", } m["klc"] = { "Kolbila", 6427122, "alv-lek", } m["kld"] = { "ကာမိလာရာဲ", 3111818, "aus-cww", "Latn", } m["kle"] = { "Kulung", 6443304, "sit-kic", } m["klf"] = { "Kendeje", 56895, } m["klg"] = { "Tagakaulu Kalagan", 18756514, "phi", } m["klh"] = { "Weliki", 7981017, "ngf-fin", "Latn", } m["kli"] = { "Kalumpang", 13561407, "poz", } m["klj"] = { "ခါပ်လေတ်", 33455, "trk", "fa-Arab, Latn", ancestors = "klj-arg", entry_name = { remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun, } } m["klk"] = { "Kono (Nigeria)", 6429589, "nic-kau", "Latn", } m["kll"] = { "Kagan Kalagan", 18748913, "phi", } m["klm"] = { "Kolom", 6844970, "ngf-mad", "Latn", } m["kln"] = { "Kalenjin", 637228, "sdv-nma", "Latn", } m["klo"] = { "Kapya", 6367410, "nic-ykb", } m["klp"] = { "Kamasa", 6356107, "ngf", } m["klq"] = { "Rumu", 7379420, "ngf", } m["klr"] = { "ခါလေန်", 56381, "sit-kiw", "Deva", } m["kls"] = { "ကလာချာ", 33416, "inc-chi", "Latn, ks-Arab", } m["klt"] = { "Nukna", 7068874, "ngf-fin", "Latn", } m["klu"] = { "Klao", 3914866, "kro-wkr", } m["klv"] = { "မာတ်သခေလေါန်", 3297282, "poz-vnc", "Latn", } m["klw"] = { "လေန်ဒူ", 18390055, "poz-kal", "Latn", } m["klx"] = { "Koluwawa", 6427954, "poz-ocw", "Latn", } m["kly"] = { "Kalao", 6350643, "poz", } m["klz"] = { "Kabola", 11732258, "qfa-tap", } m["kma"] = { "Konni", 35680, "nic-buk", } m["kmb"] = { "ခေၚ်ဗွိုန်ဒူန်", 35891, "bnt-kmb", "Latn", } m["kmc"] = { "ကါမ် လ္ပာ်ဒိုဟ်သမၠုၚ်ကျာ", 35379, "qfa-kms", "Latn", } m["kmd"] = { "Madukayang Kalinga", 18753305, "phi", } m["kme"] = { "Bakole", 35068, "bnt-kpw", } m["kmf"] = { "Kare (New Guinea)", 11732286, "ngf-mad", "Latn", } m["kmg"] = { "Kâte", 3201059, "ngf", } m["kmh"] = { "Kalam", 12952550, "ngf-mad", } m["kmi"] = { "ခမ်မဳ", 3915372, "alv-ngb", "Latn", } m["kmj"] = { "ကူမာရာတ်ဗါတ် ပါဟာရဳယျာ", 3130374, "dra-mlo", "Beng, Deva", } m["kmk"] = { "လေန်မဝ်သေန် ကလေန်ဂါ", 18753303, "phi", } m["kml"] = { "တနုဒါန် ကလဳၚ်္ဂါ", 18753307, "phi", "Latn", } m["kmm"] = { "Kom (India)", 12952647, "tbq-kuk", } m["kmn"] = { "Awtuw", 3504217, "paa-spk", } m["kmo"] = { "Kwoma", 11732376, "paa-spk", } m["kmp"] = { "Gimme", 11152236, "alv-dur", } m["kmq"] = { "Kwama", 2591184, "ssa-kom", } m["kmr"] = { "ကာဒ် လ္ပာ်သၟဝ်ကျာ", 36163, "ku", "Latn, Cyrl, Armn, ku-Arab, Yezi", translit = { Cyrl = "kmr-translit", Armn = "Armn-translit", ["ku-Arab"] = "ckb-translit", }, entry_name = { remove_diacritics = "'’", from = {"r̄", "R̄", "ẍ", "Ẍ"}, to = {"rr", "Rr", "x", "X"} }, wikimedia_codes = "ku", } m["kms"] = { "Kamasau", 6356117, "qfa-tor", "Latn", } m["kmt"] = { "Kemtuik", 6387179, "paa-nim", } m["kmu"] = { "Kanite", 12952567, "paa-kag", } m["kmv"] = { "Karipúna Creole French", 2523999, "crp", "Latn", ancestors = "fr", sort_key = s["roa-oil-sortkey"], } m["kmw"] = { "Kumu", 6428450, "bnt-kbi", "Latn", } m["kmx"] = { "Waboda", 7958705, "paa-kiw", } m["kmy"] = { "Koma", 35634, "alv-dur", } m["kmz"] = { "Khorasani Turkish", 35373, "trk-ogz", ancestors = "trk-oat", } m["kna"] = { "Kanakuru", 56811, "cdc-wst", "Latn", } m["knb"] = { "ဠူၜေအ်ဂါန် ကလဳၚ်္ဂါ", 12953602, "phi", } m["knd"] = { "Konda", 11732340, "ngf-sbh", "Latn", } m["kne"] = { "ကာန်ခါနာအဳ", 18753329, "phi", "Latn", entry_name = { Latn = { remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer, } }, sort_key = { Latn = "tl-sortkey", }, standardChars = { Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy" .. c.punc, }, } m["knf"] = { "မေန်ခမ်ယျာ", 35789, "alv-pap", } m["kni"] = { "Kanufi", 3913297, "nic-nin", "Latn", } m["knj"] = { "အာကာတေက်", 34923, "myn", "Latn", } m["knk"] = { "ခူရာန်ခဝ်", 3198896, "dmn-mok", "Latn", } m["knl"] = { "Keninjal", 6389309, "poz-mly", } m["knm"] = { -- two unrelated lects have this name; this is the Katukinian one "Kanamari", 3438373, "sai-ktk", "Latn", } m["kno"] = { "ခဝ်နဝ် (သဲယျာရာ လဳယျေန်နဳ)", 35675, "dmn-vak", } m["knp"] = { "Kwanja", 35641, "nic-mmb", "Latn", } m["knq"] = { "Kintaq", 6414335, "mkh-asl", } m["knr"] = { "Kaningra", 6363253, "paa-spk", } m["kns"] = { "Kensiu", 6391529, "mkh-asl", } m["knt"] = { "Katukina", 3194265, "sai-pan", "Latn", } m["knu"] = { -- a dialect of 'kpe' "Kono (Guinea)", 3198703, "dmn-msw", "Latn, Kpel", ancestors = "kpe", } m["knv"] = { "Tabo", 7959888, "aav", } m["knx"] = { "Kendayan", 6388963, "poz-mly", "Latn", } m["kny"] = { "Kanyok", 11110766, "bnt-lub", } m["knz"] = { "Kalamsé", 3914000, "nic-gnn", } m["koa"] = { "Konomala", 3198732, "poz-ocw", "Latn", } m["koc"] = { "Kpati", 3913279, "nic-nge", "Latn", } m["kod"] = { "Kodi", 4577633, } m["koe"] = { "Kacipo-Balesi", 5364424, "sdv", } m["kof"] = { "Kubi", 3438718, "cdc-wst", "Latn", } m["kog"] = { "ခါဝ်ဂွဳ", 3198286, "cba", } m["koh"] = { "Koyo", 35649, "bnt-mbo", "Latn", } m["koi"] = { "ခဝ်မဳ-ဖေန်ယျိတ်", 56318, "urj-prm", "Cyrl", translit = "kv-translit", entry_name = {remove_diacritics = c.acute}, override_translit = true, } m["kok"] = { "ခေန်ကနဳ", 34239, "inc-sou", "Deva, Knda, Mlym, fa-Arab, Latn", translit = { Deva = "mr-translit", Knda = "kn-translit", Mlym = "ml-translit", }, entry_name = { from = {"च़", "ज़", "झ़", "ಚ಼", "ಜ಼", "ಝ಼"}, to = {"च", "ज", "झ", "ಚ", "ಜ", "ಝ"} } , } m["kol"] = { "Kol (New Guinea)", 4227542, } m["koo"] = { "Konzo", 2361829, "bnt-glb", } m["kop"] = { "Waube", 11732373, "ngf-mad", } m["koq"] = { "Kota (Gabon)", 35607, "bnt-kel", "Latn", } m["kos"] = { "Kosraean", 33464, "poz-mic", "Latn", } m["kot"] = { "Lagwan", 3502264, "cdc-cbm", "Latn", } m["kou"] = { "Koke", 797249, "alv-bua", } m["kov"] = { "Kudu-Camo", 3915850, "nic-jer", } m["kow"] = { "Kugama", 3913307, "alv-mye", } m["koy"] = { "Koyukon", 28304, "ath-nor", "Latn", } m["koz"] = { "Korak", 6431365, "ngf-mad", } m["kpa"] = { "Kutto", 3437656, "cdc-wst", } m["kpb"] = { "မူဠူ ကူရုန်ဗါ", 19573111, "dra-mal", } m["kpc"] = { "Curripaco", 2882543, "awd-nwk", "Latn", } m["kpd"] = { "Koba", 6424249, "poz", } m["kpe"] = { "Kpelle", 35673, "dmn-msw", "Latn, Kpel", } m["kpf"] = { "Komba", 6428239, "ngf", } m["kpg"] = { "ကပေန်ဂါမာရာန်ဂဳ", 35771, "poz-pnp", "Latn", } m["kph"] = { "Kplang", 35628, "alv-gng", } m["kpi"] = { "Kofei", 6425665, "paa-egb", "Latn", } m["kpj"] = { "Karajá", 10322066, "sai-mje", "Latn", } m["kpk"] = { "Kpan", 3915380, "nic-jkn", "Latn", } m["kpl"] = { "Kpala", 11154769, "nic-nkk", "Latn", } m["kpm"] = { "ကိုဝ်ဟဝ်", 3511919, "mkh-ban", "Latn", } m["kpn"] = { "Kepkiriwát", 3195366, "tup", "Latn", } m["kpo"] = { "Ikposo", 35029, "alv-ktg", "Latn", } m["kpq"] = { "Korupun-Sela", 6432769, "ngf", } m["kpr"] = { "Korafe-Yegha", 11732347, "ngf", } m["kps"] = { "Tehit", 7694851, } m["kpt"] = { "ခါရာတ", 56636, "cau-and", "Cyrl", display_text = {Cyrl = s["cau-Cyrl-displaytext"]}, entry_name = {Cyrl = s["cau-Cyrl-entryname"]}, } m["kpu"] = { "Kafoa", 6346151, "qfa-tap", } m["kpv"] = { "ခဝ်မဳ-သဳရေဝ်ယာန်", 34114, "urj-prm", "Cyrl", translit = "kv-translit", override_translit = true, wikimedia_codes = "kv", } m["kpw"] = { "ကဝ်ဗေန်", 11732326, "ngf-mad", } m["kpx"] = { "Mountain Koiari", 6925030, "ngf", } m["kpy"] = { "Koryak", 36199, "qfa-ckn", "Cyrl", entry_name = { from = {"['’]"}, to = {"ʼ"} }, sort_key = { from = {"вʼ", "гʼ", "ё", "ӄ", "ӈ"}, to = {"в" .. p[1], "г" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1]} }, translit = "kpy-translit", } m["kpz"] = { "Kupsabiny", 56445, "sdv-kln", } m["kqa"] = { "Mum", 6935252, "ngf-mad", } m["kqb"] = { "Kovai", 6434822, "ngf", } m["kqc"] = { "Doromu-Koki", 5298175, "ngf", } m["kqd"] = { "Koy Sanjaq Surat", 33463, "sem-nna", } m["kqe"] = { "Kalagan", 18748906, "phi", } m["kqf"] = { "Kakabai", 6349119, "poz-ocw", "Latn", } m["kqg"] = { "Khe", 3914015, "nic-gur", } m["kqh"] = { "Kisankasa", 6416409, "sdv", } m["kqi"] = { "Koitabu", 6426363, "ngf", } m["kqj"] = { "Koromira", 6432520, "paa-sbo", } m["kqk"] = { "Kotafon Gbe", 12952447, "alv-pph", } m["kql"] = { "Kyenele", 11732453, "paa-yua", } m["kqm"] = { "Khisa", 3913955, "nic-gur", } m["kqn"] = { "Kaonde", 33601, "bnt-lub", "Latn", } m["kqo"] = { "Eastern Krahn", 3915374, "kro-wee", } m["kqp"] = { "Kimré", 3441210, "cdc-est", } m["kqq"] = { "Krenak", 6436747, "sai-cer", } m["kqr"] = { "Kimaragang", 3196845, "poz-san", "Latn", } m["kqs"] = { "Northern Kissi", 19921576, "alv-kis", } m["kqt"] = { "Klias River Kadazan", 12953594, "poz-san", } m["kqu"] = { "Seroa", 33127766, "khi-tuu", } m["kqv"] = { "Okolod", 7082487, "poz-san", } m["kqw"] = { "ခါန်ဒါတ်သ်", 3192590, "poz-ocw", "Latn", } m["kqx"] = { "Mser", 3502347, "cdc-cbm", } m["kqy"] = { "Koorete", 6430753, "omv-eom", } m["kqz"] = { "Korana", 2756709, "khi-khk", "Latn", } m["kra"] = { "Kumhali", 13580783, "inc-eas", ancestors = "bh", } m["krb"] = { "Karkin", 3193345, "nai-you", "Latn", } m["krc"] = { "ကရာချဲ-ဗါဝ်ကာ", 33714, "trk-kcu", "Cyrl", translit = "krc-translit", sort_key = { from = {"гъ", "дж", "ё", "къ", "нг"}, to = {"г" .. p[1], "д" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1]} }, } m["krd"] = { "Kairui-Midiki", 12953277, "poz-tim", } m["kre"] = { "Panará", 3361895, "sai-cer", } m["krf"] = { "Koro (Vanuatu)", 3198995, "poz-vnn", "Latn", } m["krh"] = { "Kurama", 35593, "nic-kau", } m["kri"] = { "ခရိအဝ်", 35744, "crp", "Latn", ancestors = "en", } m["krj"] = { "ခဳနာရော-အာ", 33720, "phi", "Latn", } m["krk"] = { "Kerek", 332792, "qfa-ckn", "Cyrl", } m["krl"] = { "ခါရေဝ်လဳယာန်", 33557, "urj-fin", "Latn", sort_key = { from = { "č", "š", "ž", "ü", "ä", "ö", -- 2 chars "z", "'" -- 1 char }, to = { "c" .. p[1], "s" .. p[1], "s" .. p[3], "y" .. p[1], "y" .. p[2], "y" .. p[3], "s" .. p[2], "y" .. p[4], } }, } m["krm"] = { "Krim", 35713, "alv", } m["krn"] = { "Sapo", 3915386, "kro-wee", } m["krp"] = { "Korop", 35626, "nic-ucr", "Latn", } m["krr"] = { "Kru'ng", 12953650, "mkh-ban", } m["krs"] = { "Kresh", 56674, "csu-bkr", } m["kru"] = { "ကူရု", 33492, "dra-kml", "Deva, Tols", translit = { Deva = "hi-translit", }, } m["krv"] = { "Kavet", 12953649, "sai-ktk", "Latn", } m["krw"] = { "Western Krahn", 10975611, "kro-wee", } m["krx"] = { "Karon", 35704, "alv-jol", } m["kry"] = { "Kryts", 35861, "cau-ssm", "Latn, Cyrl", display_text = {Cyrl = s["cau-Cyrl-displaytext"]}, entry_name = { Latn = s["cau-Latn-entryname"], Cyrl = s["cau-Cyrl-entryname"], }, } m["krz"] = { "Sota Kanum", 12952568, "paa-yam", } m["ksa"] = { "Shuwa-Zamani", 3913929, "nic-kau", } m["ksb"] = { "Shambala", 3788739, "bnt-seu", "Latn", } m["ksc"] = { "Southern Kalinga", 18753301, "phi", } m["ksd"] = { "တဝ်လာၚ်", 35870, "poz-ocw", "Latn", } m["kse"] = { "Kuni", 6444619, "poz-ocw", "Latn", } m["ksf"] = { "Bafia", 34930, "bnt-baf", } m["ksg"] = { "Kusaghe", 3200638, "poz-ocw", "Latn", } m["ksi"] = { "ခရေတ်သ", 841704, "paa-msk", "Latn", } m["ksj"] = { "Uare", 6450052, "ngf", } m["ksk"] = { "Kansa", 3192772, "sio-dhe", } m["ksl"] = { "Kumalu", 17584381, "poz-ocw", "Latn", } m["ksm"] = { "Kumba", 3913972, "alv-mye", } m["ksn"] = { "Kasiguranin", 6374525, "phi", } m["kso"] = { "Kofa", 56278, "cdc-cbm", } m["ksp"] = { "Kaba", 3915316, "csu-sar", } m["ksq"] = { "Kwaami", 3440525, "cdc-wst", } m["ksr"] = { "Borong", 4946263, "ngf", } m["kss"] = { "Southern Kissi", 11028974, "alv-kis", } m["kst"] = { "Winyé", 3913360, "nic-gnw", } m["ksu"] = { "Khamyang", 6583541, "tai-swe", } m["ksv"] = { "Kusu", 6448199, "bnt-tet", } m["ksw"] = { "ကရေၚ်သကုဝ်", 56410, "kar", "Mymr", translit = "ksw-translit", } m["ksx"] = { "Kedang", 6382520, "poz", "Latn", } m["ksy"] = { "Kharia Thar", 6400661, "inc-eas", } m["ksz"] = { "Kodaku", 21179986, "mun", } m["kta"] = { "Katua", 6378404, "mkh-ban", } m["ktb"] = { "Kambaata", 35664, "cus-hec", "Latn", } m["ktc"] = { "Kholok", 3440464, "cdc-wst", } m["ktd"] = { "Kokata", 10547021, "aus-pam", } m["ktf"] = { "Kwami", 12952687, "bnt-lgb", } m["ktg"] = { "Kalkatungu", 3914057, "aus-pam", "Latn", } m["kth"] = { "Karanga", 713643, } m["kti"] = { "North Muyu", 20857698, "ngf", "Latn", } m["ktj"] = { "Plapo Krumen", 10975356, "kro-grb", } m["ktk"] = { "Kaniet", 3399050, "poz-aay", "Latn", } m["ktl"] = { "Koroshi", 3775265, "ira-nwi", ancestors = "bal", } m["ktm"] = { "Kurti", 3200615, "poz-aay", "Latn", } m["ktn"] = { "Karitiâna", 3112184, "tup", "Latn", } m["kto"] = { "Kuot", 56537, } m["ktp"] = { "Kaduo", 769809, "tbq-bka", } m["ktq"] = { "Katabaga", 3193895, } m["ktr"] = { "Kota Marudu Tinagas", 18642280, } m["kts"] = { "South Muyu", 42308820, "ngf", "Latn", } m["ktt"] = { "Ketum", 12952616, "ngf", } m["ktu"] = { "Kituba", 35746, "crp", "Latn", ancestors = "kg", } m["ktv"] = { "ကဒူ လ္ပာ်ဖာဗၟံက်", 22808951, "mkh-kat", "Latn", } m["ktw"] = { "Kato", 20831, "ath-pco", "Latn", } m["ktx"] = { "Kaxararí", 6380124, "sai-pan", "Latn", } m["kty"] = { "Kango", 6362818, "bnt-bta", "Latn", } m["ktz"] = { "Juǀ'hoan", 1192295, "khi-kxa", "Latn", } m["kub"] = { "Kutep", 35645, "nic-jkn", } m["kuc"] = { "Kwinsu", 6450460, "paa-tkw", } m["kud"] = { "Auhelawa", 5166, "poz-ocw", "Latn", } m["kue"] = { "Kuman", 137525, "ngf", "Latn", } m["kuf"] = { "ကတူ လ္ပာ်ပလိုတ်", 6378400, "mkh-kat", "Laoo, Tale", } m["kug"] = { "Kupa", 3915336, "alv-ngb", } m["kuh"] = { "Kushi", 3438747, "cdc-wst", } m["kui"] = { "ခူဣိခူရဝ်", 3915522, "sai-kui", "Latn", } m["kuj"] = { "Kuria", 6445968, "bnt-lok", "Latn", } m["kuk"] = { "Kepo'", 6393217, "poz", } m["kul"] = { "Kulere", 3440506, "cdc-wst", } m["kum"] = { "ခူမာတ်", 36209, "trk-kcu", "Cyrl", translit = "kum-translit", sort_key = { from = {"гъ", "гь", "ё", "къ", "нг", "оь", "уь"}, to = {"г" .. p[1], "г" .. p[2], "е" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1]} }, } m["kun"] = { "Kunama", 36041, } m["kuo"] = { "Kumukio", 11732362, "ngf", } m["kup"] = { "Kunimaipa", 6444696, } m["kuq"] = { "Karipuna", 6371071, "tup-gua", "Latn", } m["kus"] = { "ကူသာလ်", 35708, "nic-dag", "Latn", } m["kut"] = { "Kutenai", 33434, "qfa-iso", } m["kuu"] = { "Upper Kuskokwim", 28062, "ath-nor", } m["kuv"] = { "Kur", 12635082, "poz-cma", "Latn", } m["kuw"] = { "Kpagua", 11137573, "bad-cnt", } m["kux"] = { "Kukatja", 10549839, "aus-pam", } m["kuy"] = { "Kuuku-Ya'u", 10550697, "aus-pmn", } m["kuz"] = { "Kunza", 2669181, "qfa-iso", } m["kva"] = { "ဗတ်ဝါဠူ", 56638, "cau-and", "Cyrl", translit = "cau-nec-translit", override_translit = true, display_text = {Cyrl = s["cau-Cyrl-displaytext"]}, entry_name = {Cyrl = s["cau-Cyrl-entryname"]}, } m["kvb"] = { "Kubu", 6441341, "poz-mly", } m["kvc"] = { "Kove", 3199402, "poz-ocw", "Latn", } m["kvd"] = { "Kui (Indonesia)", 6442230, "ngf", } m["kve"] = { "Kalabakan", 6350003, "poz-san", } m["kvf"] = { "Kabalai", 3440427, "cdc-est", } m["kvg"] = { "Kuni-Boazi", 2907551, "ngf", } m["kvh"] = { "Komodo", 3198565, "poz-cet", } m["kvi"] = { "Kwang", 3440398, "cdc-est", "Latn", } m["kvj"] = { "Psikye", 56304, "cdc-cbm", } m["kvk"] = { "အရေဝ်ဘာသာကွတ်တဲကိုဝ်ရဳယျာ", 3073428, "sgn-jsl", } m["kvl"] = { "Brek Karen", 12952577, "kar", } m["kvm"] = { "Kendem", 35751, "nic-mam", "Latn", } m["kvn"] = { "Border Kuna", 31777873, "cba", } m["kvo"] = { "Dobel", 5286559, "poz", } m["kvp"] = { "Kompane", 18343041, "poz", } m["kvq"] = { "Geba Karen", 12952581, "kar", } m["kvr"] = { "Kerinci", 3195442, "poz-mly", } m["kvt"] = { "Lahta Karen", 12952582, "kar", } m["kvu"] = { "Yinbaw Karen", 14426328, "kar", } m["kvv"] = { "Kola", 6426967, "poz", } m["kvw"] = { "Wersing", 7983599, "qfa-tap", } m["kvx"] = { "Parkari Koli", 3244176, "inc-wes", } m["kvy"] = { "Yintale Karen", 14426329, "kar", } m["kvz"] = { "Tsakwambo", 7849438, "ngf", } m["kwa"] = { "Dâw", 3042278, "sai-nad", } m["kwb"] = { "Baa", 34842, "alv-ada", } m["kwc"] = { "Likwala", 35597, "bnt-mbo", } m["kwd"] = { "Kwaio", 3200796, "poz-sls", "Latn", } m["kwe"] = { "Kwerba", 6450328, "paa-tkw", } m["kwf"] = { "Kwara'ae", 3200829, "poz-sls", "Latn", } m["kwg"] = { "Sara Kaba Deme", 3915384, "csu-kab", } m["kwh"] = { "Kowiai", 6435028, "poz", } m["kwi"] = { "Awa-Cuaiquer", 2603103, "sai-bar", "Latn", } m["kwj"] = { "Kwanga", 3438383, "paa-spk", } m["kwk"] = { "ခွါကွာ'ဝလာ", 2640628, "wak", "Latn", } m["kwl"] = { "Kofyar", 3441382, "cdc-wst", "Latn", } m["kwm"] = { "Kwambi", 3487165, "bnt-ova", } m["kwn"] = { "Kwangali", 36334, "bnt-kav", "Latn", } m["kwo"] = { "Kwomtari", 3508116, } m["kwp"] = { "Kodia", 3914867, "kro-ekr", } m["kwq"] = { "Kwak", 11014183, "nic-nka", ancestors = "yam", } m["kwr"] = { "Kwer", 12635137, "ngf-okk", } m["kws"] = { "Kwese", 3200846, "bnt-pen", } m["kwt"] = { "Kwesten", 6450354, "paa-tkw", } m["kwu"] = { "Kwakum", 35624, "bnt-kak", } m["kwv"] = { "Sara Kaba Náà", 3915361, "csu-kab", } m["kww"] = { "ကဝေန်တဳ", 721182, "crp", "Latn", ancestors = "en" } m["kwx"] = { "Khirwar", 12976968, "dra", } m["kwz"] = { "Kwadi", 2364661, "khi-kkw", "Latn", } m["kxa"] = { "Kairiru", 3398785, "poz-ocw", "Latn", } m["kxb"] = { "Krobu", 35586, "alv-ptn", "Latn", } m["kxc"] = { "Khonso", 56624, "cus-eas", } m["kxd"] = { "မလေဝ် ဗရုနာဲ", 3182878, "poz-mly", "Latn, ms-Arab", } m["kxe"] = { "Kakihum", 3914433, "nic-kam", ancestors = "tvd", } m["kxf"] = { "ကရေၚ်မနုမနဝ်", 12952592, "kar", "Mymr, Latn", } m["kxh"] = { "Karo", 3447116, "omv-aro", } m["kxi"] = { "Keningau Murut", 6389308, "poz-san", "Latn", } m["kxj"] = { "Kulfa", 713654, "csu-kab", } m["kxk"] = { "ကရေၚ်သယာန်", 14352960, "kar", } m["kxl"] = { "Nepali Kurux", 3200624, "dra-kml", "Deva", ancestors = "kru", } m["kxm"] = { "ခမေန်သၟဝ်ကျာ", 3502234, "mkh-kmr", "Thai, Khmr", ancestors = "xhm", translit = { Khmr = "km-translit", Thai = "Thai alphabet-translit", }, sort_key = { from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"} }, } m["kxn"] = { "ကနဝ်ဝေတ်", 6364300, "poz-bnn", "Latn", } m["kxo"] = { "Kanoé", 4356223, "qfa-iso", } m["kxp"] = { "Wadiyara Koli", 12953645, "inc-wes", } m["kxq"] = { "Smärky Kanum", 12952569, "paa-yam", } m["kxr"] = { "Manus Koro", 3198994, "poz-aay", "Latn", } m["kxs"] = { "Kangjia", 3182570, "xgn-shr", "Latn", } m["kxt"] = { "Koiwat", 6426388, "paa-spk", } m["kxu"] = { "ကူအဳ (အိန္ဒိယ)", 33919, "dra-kki", "Orya", translit = "kxv-translit", entry_name = { remove_diacritics = "୕", from = {"ଆଆ", "ଇଇ", "ଉଉ", "ଏଏ", "ଓଓ", "ିଇ", "ୁଉ", "େଏ", "ୋଓ"}, to = {"ଆ", "ଈ", "ଊ", "ଏ", "ଓ", "ୀ", "ୂ", "େ", "ୋ"}, }, } m["kxv"] = { "ကူဝဳ", 3200721, "dra-kki", "Orya", translit = "kxv-translit", entry_name = { remove_diacritics = "୕", from = {"ଆଆ", "ଇଇ", "ଉଉ", "ଏଏ", "ଓଓ", "([କ-ହ])ଆ", "ିଇ", "ୁଉ", "େଏ", "ୋଓ"}, to = {"ଆ", "ଈ", "ଊ", "ଏ", "ଓ", "%1ା", "ୀ", "ୂ", "େ", "ୋ"}, }, } m["kxw"] = { "Konai", 11732339, } m["kxx"] = { "Likuba", 35646, "bnt-bmo", } m["kxy"] = { "Kayong", 6380673, "mkh", } m["kxz"] = { "Kerewo", 6393847, "paa-kiw", } m["kya"] = { "Kwaya", 6450276, "bnt-haj", "Latn", } m["kyb"] = { "Butbut Kalinga", 18753300, "phi", } m["kyc"] = { "Kyaka", 12952690, "paa-eng", } m["kyd"] = { "Karey", 6370196, "poz", } m["kye"] = { "Krache", 35658, "alv-gng", } m["kyf"] = { "Kouya", 35595, "kro-bet", } m["kyg"] = { "Keyagana", 6398208, "paa-kag", } m["kyh"] = { "Karok", 1288440, "qfa-iso", "Latn", } m["kyi"] = { "ခဳပွေန်", 3038653, "poz-swa", "Latn", } m["kyj"] = { "ကာဒ်ရာအဝ်", 3192950, "phi", "Latn", } m["kyk"] = { "Kamayo", 3192339, "phi", } m["kyl"] = { "Kalapuya", 3192120, "nai-klp", } m["kym"] = { "Kpatili", 3913982, "znd", } m["kyn"] = { "Karolanos", 6373093, "phi", } m["kyo"] = { "Kelon", 6386414, "ngf", } m["kyp"] = { "Kang", 25559558, "tai", } m["kyq"] = { "Kenga", 35707, "csu-bgr", } m["kyr"] = { "Kuruáya", 3200633, "tup", "Latn", } m["kys"] = { "Baram Kayan", 2883794, "poz", } m["kyt"] = { "Kayagar", 6380394, "ngf", } m["kyu"] = { "ကယျာလပါက်ပၠိုတ်", 12952596, "kar", "Kali, Mymr, Latn", translit = {Kali = "Kali-translit"}, } m["kyv"] = { "Kayort", 6380675, "inc-eas", "Deva", } m["kyw"] = { "ကုဒ်မာလဳ", 6446173, "inc-bih", "Deva, as-Beng, Orya, Chis", } m["kyx"] = { "Rapoisi", 7294279, "paa-nbo", } m["kyy"] = { "Kambaira", 6356254, "paa-kag", } m["kyz"] = { "Kayabí", 6380372, "tup-gua", "Latn", } m["kza"] = { "Western Karaboro", 36601, "alv-krb", } m["kzb"] = { "Kaibobo", 6347565, "poz-cma", } m["kzc"] = { "Bondoukou Kulango", 11031321, "alv-kul", } m["kzd"] = { "Kadai", 7679471, "poz-cma", "Latn", } m["kze"] = { "Kosena", 12952663, "ngf", "Latn", } m["kzf"] = { "Da'a Kaili", 33103997, "poz-kal", "Latn", } m["kzg"] = { "ခဳခါဲ", 3196527, "jpx-nry", "Jpan", translit = s["jpx-translit"], display_text = s["jpx-displaytext"], entry_name = s["jpx-entryname"], sort_key = s["jpx-sortkey"], } m["kzh"] = { "ဒံၚ်ဂါဝ်လာဝဳ", 5295991, "nub", "Latn", } m["kzi"] = { "ခေလာဗေတ်", 6385445, "poz-swa", "Latn", } m["kzj"] = { "Coastal Kadazan", 3307195, "poz-san", "Latn", } m["kzk"] = { "Kazukuru", 1089069, "poz-ocw", } m["kzl"] = { "Kayeli", 4207444, "poz-cma", "Latn", } m["kzm"] = { "Kais", 6348319, "paa", "Latn", } m["kzn"] = { "Kokola", 11128329, "bnt-mak", "Latn", ancestors = "vmw", } m["kzo"] = { "Kaningi", 35683, "bnt-mbt", } m["kzp"] = { "Kaidipang", 6347611, "phi", } m["kzq"] = { "Kaike", 10951226, "sit-tam", } m["kzr"] = { "Karang", 35681, "alv-mbm", "Latn", } m["kzs"] = { "Sugut Dusun", 12953510, "poz-san", "Latn", } m["kzt"] = { "Tambunan Dusun", 12953514, "poz-san", "Latn", } m["kzu"] = { "Kayupulau", 6380723, "poz-ocw", } m["kzv"] = { "Komyandaret", 6428671, "ngf-okk", "Latn", } m["kzw"] = { -- contrast xoo, sai-kat, sai-xoc, the last of which the ISO conflated into this code "Kariri", 12953620, "sai-mje", "Latn", } m["kzx"] = { "Kamarian", 6356040, "poz-cma", "Latn", } m["kzy"] = { "Kango-Sua", 11008360, "bnt-kbi", "Latn", ancestors = "bip", } m["kzz"] = { "Kalabra", 6350038, "paa", "Latn", } return require("Module:languages").finalizeData(m, "language") 2rdppc10eur8p1283mbm14fu8wq9vjh ညးလွပ်:咽頭べさ/Notepad 2 9186 395939 395779 2026-05-30T01:37:23Z 咽頭べさ 33 395939 wikitext text/x-wiki [[🝴]] [[🝵]] [[🝶]] [[🝻]] [[🝼]] [[🝽]] [[🝾]] [[🝿]] [[🟙]] [[🛜]] [[🩵]] [[🩶]] [[🩷]] [[🪇]] [[🪈]] [[🪭]] [[🪮]] [[🪯]] [[🪻]] [[🪼]] [[🪽]] [[🪿]] [[🫎]] [[🫏]] [[🫚]] [[🫛]] [[🫨]] [[🫷]] [[🫸]] [[File:Omx-san̊krān.png|50x50px]] # {{l|shn|ၶိူဝ်း}} {{shn-pron|ၽူၼ်-တူၵ်း}} * {{kjp-IPA|လီ}} #: {{ux|mnw|သ္ၚာ |t=bh}} #: {{ux|ksw|ဃိၣ်သၢရှ်ဖျၢၣ်တၢ်ဘါတရိၣ် |t=bh}} | #: {{ux|shn|ပိတ်းမၢၵ်ႇၼမ်ႉတဝ်ႈ |t=bh}} #: {{ux|my|အဲ |t=bh}} {{alt sp|th|บ้านมอญ นครสวรรค์}} * {{ur-IPA|grī}} {{der3|shn |တေ မိူဝ်း ယဝ့် ႁုး | | }} {{pi-alt|Mymr=ယဒိ လောကသန္တိံ ဣစ္ဆေယျ၊ ဗုဒ္ဓမဂ္ဂံ ဝိနာ အညော မဂ္ဂေါ နတ္ထိ။}} {{sa-alt|Deva=किं अहं सम्यक् अस्मि}} {{langtrack|mnw|mkh-mmn|omx|mkh-pro|mkh-mnc-pro}} {{langtrack|mn|en|enm|ang|ine-pro|gem-pro|gmw-pro}} {{langtrack|th|shn|tai-pro|tai-swe-pro|qfa-bet-pro|aho|ar|ja|}} {{langtrack|sa|la|hi|ru|ur}} {{langtrack|so|as|it|hu|pt}} {{langtrack|zh|vi|km|lo|ko}} {{langtrack|es|sh|gmw-pro|fr|ine-pro}} {{langtrack|my|za|wa|ka|mn}} lchzpqj90j9f66etxkl2z8am7cekxo4 395940 395939 2026-05-30T02:05:06Z 咽頭べさ 33 395940 wikitext text/x-wiki [[🝴]] [[🝵]] [[🝶]] [[🝻]] [[🝼]] [[🝽]] [[🝾]] [[🝿]] [[🟙]] [[🛜]] [[🩵]] [[🩶]] [[🩷]] [[🪇]] [[🪈]] [[🪭]] [[🪮]] [[🪯]] [[🪻]] [[🪼]] [[🪽]] [[🪿]] [[🫎]] [[🫏]] [[🫚]] [[🫛]] [[🫨]] [[🫷]] [[🫸]] [[File:Omx-san̊krān.png|50x50px]] # {{l|shn|ၶိူဝ်း}} {{shn-pron|ၽူၼ်-တူၵ်း}} * {{kjp-IPA|လီ}} #: {{ux|mnw|ၐြဳ ဒၞာ မိက် |t=bh}} #: {{ux|ksw|ဃိၣ်သၢရှ်ဖျၢၣ်တၢ်ဘါတရိၣ် |t=bh}} | #: {{ux|shn|ပိတ်းမၢၵ်ႇၼမ်ႉတဝ်ႈ |t=bh}} #: {{ux|my|အဲ |t=bh}} {{alt sp|th|บ้านมอญ นครสวรรค์}} * {{ur-IPA|grī}} {{der3|shn |တေ မိူဝ်း ယဝ့် ႁုး | | }} {{pi-alt|Mymr=ယဒိ လောကသန္တိံ ဣစ္ဆေယျ၊ ဗုဒ္ဓမဂ္ဂံ ဝိနာ အညော မဂ္ဂေါ နတ္ထိ။}} {{sa-alt|Deva=किं अहं सम्यक् अस्मि}} {{langtrack|mnw|mkh-mmn|omx|mkh-pro|mkh-mnc-pro}} {{langtrack|mn|en|enm|ang|ine-pro|gem-pro|gmw-pro}} {{langtrack|th|shn|tai-pro|tai-swe-pro|qfa-bet-pro|aho|ar|ja|}} {{langtrack|sa|la|hi|ru|ur}} {{langtrack|so|as|it|hu|pt}} {{langtrack|zh|vi|km|lo|ko}} {{langtrack|es|sh|gmw-pro|fr|ine-pro}} {{langtrack|my|za|wa|ka|mn}} fsaszwytf9yvh3knlzs1eu5h2pquo8f 395941 395940 2026-05-30T11:10:36Z 咽頭べさ 33 395941 wikitext text/x-wiki [[🝴]] [[🝵]] [[🝶]] [[🝻]] [[🝼]] [[🝽]] [[🝾]] [[🝿]] [[🟙]] [[🛜]] [[🩵]] [[🩶]] [[🩷]] [[🪇]] [[🪈]] [[🪭]] [[🪮]] [[🪯]] [[🪻]] [[🪼]] [[🪽]] [[🪿]] [[🫎]] [[🫏]] [[🫚]] [[🫛]] [[🫨]] [[🫷]] [[🫸]] [[File:Omx-san̊krān.png|50x50px]] # {{l|shn|ၶိူဝ်း}} {{shn-pron|ပဵင်း-ၽဵင်ႇ}} * {{kjp-IPA|လီ}} #: {{ux|mnw|ၐြဳ ဒၞာ မိက် |t=bh}} #: {{ux|ksw|ဃိၣ်သၢရှ်ဖျၢၣ်တၢ်ဘါတရိၣ် |t=bh}} | #: {{ux|shn|ပိတ်းမၢၵ်ႇၼမ်ႉတဝ်ႈ |t=bh}} #: {{ux|my|အဲ |t=bh}} {{alt sp|th|บ้านมอญ นครสวรรค์}} * {{ur-IPA|grī}} {{der3|shn |တေ မိူဝ်း ယဝ့် ႁုး | | }} {{pi-alt|Mymr=ယဒိ လောကသန္တိံ ဣစ္ဆေယျ၊ ဗုဒ္ဓမဂ္ဂံ ဝိနာ အညော မဂ္ဂေါ နတ္ထိ။}} {{sa-alt|Deva=किं अहं सम्यक् अस्मि}} {{langtrack|mnw|mkh-mmn|omx|mkh-pro|mkh-mnc-pro}} {{langtrack|mn|en|enm|ang|ine-pro|gem-pro|gmw-pro}} {{langtrack|th|shn|tai-pro|tai-swe-pro|qfa-bet-pro|aho|ar|ja|}} {{langtrack|sa|la|hi|ru|ur}} {{langtrack|so|as|it|hu|pt}} {{langtrack|zh|vi|km|lo|ko}} {{langtrack|es|sh|gmw-pro|fr|ine-pro}} {{langtrack|my|za|wa|ka|mn}} hxc43tt3ujkbhwe1dp3s21ruhrdrsdx ကဏ္ဍ:နာမ်တဳရူရာန်ဂမၠိုၚ် 14 21334 395886 30632 2026-05-29T15:56:57Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:နာမ် တဳရူရာန်]] ဇရေင် [[ကဏ္ဍ:နာမ်တဳရူရာန်ဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင် 30632 wikitext text/x-wiki [[ကဏ္ဍ:ဘာသာတဳရူရာန်]] 51nhwl3eqwxusbpqd9wlk1cfgds3hyp ကဏ္ဍ:ဝေါဟာအဓိကတဳရူရာန်ဂမၠိုၚ် 14 21336 395885 171150 2026-05-29T15:56:35Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:ဝေါဟာတဳရူရာန်နွံပ္ဍဲအဘိဓာန်ဂမၠိုၚ်]] ဇရေင် [[ကဏ္ဍ:ဝေါဟာအဓိကတဳရူရာန်ဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင် 30634 wikitext text/x-wiki [[ကဏ္ဍ:ဘာသာတဳရူရာန်]] 51nhwl3eqwxusbpqd9wlk1cfgds3hyp ကဏ္ဍ:နာမ်နာဝါတ်ဒဝ်ဝၚ်ဂန္ထဂမၠိုၚ် 14 28170 395880 166509 2026-05-29T15:39:34Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:နာမ် နာဝါတ်ဒဝ်ဝၚ်ဂန္ထ]] ဇရေင် [[ကဏ္ဍ:နာမ်နာဝါတ်ဒဝ်ဝၚ်ဂန္ထဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင် 163448 wikitext text/x-wiki [[ကဏ္ဍ:ဘာသာနာဝါတ်ဒဝ်ဝၚ်ဂန္ထ]] roafxwijt67qilxix1vqbfzebxma7ox ကဏ္ဍ:ဝေါဟာအဓိကနာဝါတ်ဒဝ်ဝၚ်ဂန္ထဂမၠိုၚ် 14 28172 395879 274636 2026-05-29T15:38:44Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:ဝေါဟာနာဝါတ်ဒဝ်ဝၚ်ဂန္ထနွံပ္ဍဲအဘိဓာန်ဂမၠိုၚ်]] ဇရေင် [[ကဏ္ဍ:ဝေါဟာအဓိကနာဝါတ်ဒဝ်ဝၚ်ဂန္ထဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင် 274636 wikitext text/x-wiki [[ကဏ္ဍ:ဘာသာနာဝါတ်ဒဝ်ဝၚ်ဂန္ထ]] roafxwijt67qilxix1vqbfzebxma7ox ကဏ္ဍ:ကြိယာဝိသေသနဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်ဂမၠိုၚ် 14 76211 395882 290821 2026-05-29T15:42:52Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:ကြိယာဝိသေသန ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]] ဇရေင် [[ကဏ္ဍ:ကြိယာဝိသေသနဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်ဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင် 290821 wikitext text/x-wiki [[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်|ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]] » [[:ကဏ္ဍ:ဝေါဟာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်နွံပ္ဍဲအဘိဓာန်ဂမၠိုၚ်|ဝေါဟာတံသ္ဇိုၚ်]] » '''ကြိယာဝိသေသနဂမၠိုၚ်''' :ဝေါဟာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်မပြုပြေၚ်ပြံၚ်လှာဲလဝ်ပိုဒ်လိက်ဂမၠိုၚ်၊ ပိုတ်ဂမၠိုၚ် ကဵု ဇၟန်လိက်တပ်ပ်ဂမၠိုၚ်။ [[ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]][[ကဏ္ဍ:ကြိယာဝိသေသနဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဝ]] ffwu2mqmq42zendcma7rsi8pf9flkzl 395883 395882 2026-05-29T15:43:36Z 咽頭べさ 33 395883 wikitext text/x-wiki [[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်|ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]] » [[:ကဏ္ဍ:ဝေါဟာအဓိကဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်ဂမၠိုၚ်|ဝေါဟာတံသ္ဇိုၚ်]] » '''ကြိယာဝိသေသနဂမၠိုၚ်''' :ဝေါဟာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်မပြုပြေၚ်ပြံၚ်လှာဲလဝ်ပိုဒ်လိက်ဂမၠိုၚ်၊ ပိုတ်ဂမၠိုၚ် ကဵု ဇၟန်လိက်တပ်ပ်ဂမၠိုၚ်။ [[ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]][[ကဏ္ဍ:ကြိယာဝိသေသနဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဝ]] o8euf8gg85oxxl40nr28pdcmqlo0wls ကဏ္ဍ:ဝေါဟာအဓိကဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်ဂမၠိုၚ် 14 76213 395881 275129 2026-05-29T15:42:23Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:ဝေါဟာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်နွံပ္ဍဲအဘိဓာန်ဂမၠိုၚ်]] ဇရေင် [[ကဏ္ဍ:ဝေါဟာအဓိကဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်ဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင် 275129 wikitext text/x-wiki [[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်|ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]] » '''ဝေါဟာတံသ္ဇိုၚ်ဂမၠိုၚ်''' :ဝေါဟာတံသ္ဇိုၚ်ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်၊ ကဏ္ဍနူကဵုမပါ်ပရံဒကုတ်မဆေၚ်စပ်ကဵုမအရေဝ်ဝေါဟာ။ [[ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]][[ကဏ္ဍ:ဝေါဟာအဓိကဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဝ]] hkfh0w5bbj3pqgeaq52zj31razczamv မဝ်ဂျူ:lt-common 828 119841 395936 154896 2026-05-29T18:27:36Z 咽頭べさ 33 395936 Scribunto text/plain local export = {} local m_str_utils = require("Module:string utilities") local u = m_str_utils.char local ugsub = m_str_utils.gsub local ulower = m_str_utils.lower local uupper = m_str_utils.upper local ufind = m_str_utils.find local ulen = m_str_utils.len local ucodepoint = m_str_utils.codepoint -- Keep native Unicode normalization functions (no replacement available) local toNFC = mw.ustring.toNFC local toNFD = mw.ustring.toNFD -- ============================================================================= -- Unicode constants -- ============================================================================= local GRAVE = u(0x0300) -- combining grave accent local ACUTE = u(0x0301) -- combining acute accent local TILDE = u(0x0303) -- combining tilde local MACRON = u(0x0304) -- combining macron local DOTABOVE = u(0x0307) -- combining dot above local CARON = u(0x030C) -- combining caron local OGONEK = u(0x0328) -- combining ogonek local ANY_ACCENT = "[" .. GRAVE .. ACUTE .. TILDE .. "]" -- Legacy aliases for backward compatibility local grave = GRAVE local acute = ACUTE local tilde = TILDE local macron = MACRON local dotabove = DOTABOVE local caron = CARON local ogonek = OGONEK local accents = ANY_ACCENT -- Export accent constants for use by other Lithuanian modules -- (lt-pron, lt-verb, lt-noun, etc.) to avoid duplicating these values. export.GRAVE = GRAVE export.ACUTE = ACUTE export.TILDE = TILDE export.MACRON = MACRON export.DOTABOVE = DOTABOVE export.CARON = CARON export.OGONEK = OGONEK export.ANY_ACCENT = ANY_ACCENT -- ============================================================================= -- Private Use Area (PUA) replacement hints -- ============================================================================= -- Maps PUA codepoints (U+E000–U+E022) to their suggested standard Unicode -- replacements. Used by reject_pua to give actionable error messages when -- editors paste in pre-composed glyphs from old Lithuanian-specific encodings. local INVALID_CHARS = { [0xE000] = "Ą́", [0xE001] = "ą́", [0xE002] = "Ą̃", [0xE003] = "ą̃", [0xE004] = "Ę́", [0xE005] = "ę́", [0xE006] = "Ę̃", [0xE007] = "ę̃", [0xE008] = "Ė́", [0xE009] = "ė́", [0xE00A] = "Ė̃", [0xE00B] = "ė̃", [0xE00C] = "i̇̀", [0xE00D] = "i̇́", [0xE00E] = "i̇̃", [0xE00F] = "Į̇́", [0xE010] = "į̇́", [0xE011] = "Į̇̃", [0xE012] = "į̇̃", [0xE013] = "J̃", [0xE014] = "j̇̃", [0xE015] = "L̃", [0xE016] = "l̃", [0xE017] = "M̃", [0xE018] = "m̃", [0xE019] = "R̃", [0xE01A] = "r̃", [0xE01B] = "Ų́", [0xE01C] = "ų́", [0xE01D] = "Ų̃", [0xE01E] = "ų̃", [0xE01F] = "Ū́", [0xE020] = "ū́", [0xE021] = "Ū̃", [0xE022] = "ū̃", } export.INVALID_CHARS = INVALID_CHARS -- ============================================================================= -- Internal helper functions -- ============================================================================= local dotless_to_dotted = { ["ı"] = "i", ["ȷ"] = "j", } local function char_to_dotted_form(base, below) return (dotless_to_dotted[base] or base) .. below end local function normalize_dotted_chars(text) -- Remove any dots above, and convert dotless forms to dotted. -- On entry, text must be in NFD form. return ugsub(text, "([iıjȷ])(" .. ogonek .. "?)" .. dotabove, char_to_dotted_form) end local function char_to_accent_form(base, below) -- Add a 'dot above' after the base. if base == "i" or base == "j" then return base .. below .. dotabove end -- Convert any dotless chars combining with accents to the dotted form, -- so that they normalize properly. This shouldn't happen, but just in case. return char_to_dotted_form(base, below) end local function stripped_text_form(text) -- Remove accents. text = ugsub(toNFD(text), accents .. "+", "") -- Normalize dotless characters and dot-above diacritics. return normalize_dotted_chars(text) end -- ============================================================================= -- Input validation -- ============================================================================= -- Reject Private Use Area characters (U+E000–U+F8FF). When the character is -- a known non-standard Lithuanian glyph, the error message includes the -- recommended standard Unicode replacement (see INVALID_CHARS above). function export.reject_pua(s) if not s then return end for i = 1, ulen(s) do local cp = ucodepoint(s, i) if cp >= 0xE000 and cp <= 0xF8FF then local replacement = INVALID_CHARS[cp] if replacement then error(string.format( "lt-common: private use area character U+%04X \"%s\" detected. " .. "Please use \"%s\" instead.", cp, u(cp), replacement)) else error(string.format( "lt-common: private use area character U+%04X detected in \"%s\". " .. "Please use a standard Unicode character instead.", cp, s)) end end end end -- ============================================================================= -- Input normalization -- ============================================================================= -- Detect nonstandard encoding patterns in the input. -- Returns: dotless_flag (found ı/ȷ), precomp_i_flag (found precomposed í/ì/ĩ) function export.detect_nonstandard(s) if not s then return false, false end local nfd_s = toNFD(s) local dotless_flag = ufind(nfd_s, "[ıȷ]") ~= nil local precomp_i_flag = ufind(nfd_s, "[íìĩ]") ~= nil return dotless_flag, precomp_i_flag end -- Normalize input to clean canonical NFC. -- Handles dotless i/j (ı, ȷ) and stray dot-above combinations. function export.canonicalize_input(s) if not s then return s end s = toNFD(s) -- Remove stray dot-above after i/j (with or without ogonek) s = ugsub(s, "([iıjȷ])(" .. OGONEK .. "?)" .. DOTABOVE, function(base, below) base = (base == "ı") and "i" or (base == "ȷ") and "j" or base return base .. below end) -- Convert any remaining dotless i/j to standard forms s = ugsub(s, "ı", "i") s = ugsub(s, "ȷ", "j") return toNFC(s) end -- ============================================================================= -- Partial NFD conversion (stem_ac representation) -- ============================================================================= -- Convert canonical NFC to partial NFD (stem_ac). -- Applies full NFD, then recomposes non-accent diacritics. -- Only grave/acute/tilde remain as combining characters. function export.to_stem_ac(s) if not s then return s end s = toNFD(s) -- Recompose ogonek vowels s = ugsub(s, "a" .. OGONEK, "ą") s = ugsub(s, "e" .. OGONEK, "ę") s = ugsub(s, "i" .. OGONEK, "į") s = ugsub(s, "u" .. OGONEK, "ų") -- Recompose macron vowel s = ugsub(s, "u" .. MACRON, "ū") -- Recompose dot-above e s = ugsub(s, "e" .. DOTABOVE, "ė") -- Recompose caron consonants s = ugsub(s, "c" .. CARON, "č") s = ugsub(s, "s" .. CARON, "š") s = ugsub(s, "z" .. CARON, "ž") return s end -- ============================================================================= -- Accent manipulation -- ============================================================================= -- Strip all accent marks (grave/acute/tilde) from partial NFD text. function export.to_stem_bare(stem_ac) if not stem_ac then return stem_ac end return ugsub(stem_ac, ANY_ACCENT, "") end -- Check if partial NFD text contains any accent marks. function export.has_accent(stem_ac) return ufind(stem_ac, ANY_ACCENT) ~= nil end -- ============================================================================= -- Complete input pipeline -- ============================================================================= -- Process raw user input through the complete normalization pipeline. -- Returns: stem_bare, stem_ac, dotless_flag, precomp_flag function export.process_input(raw) if not raw then return raw, raw, false, false end export.reject_pua(raw) local dotless_flag, precomp_flag = export.detect_nonstandard(raw) local canon = export.canonicalize_input(raw) local stem_ac = export.to_stem_ac(canon) local stem_bare = export.to_stem_bare(stem_ac) return stem_bare, stem_ac, dotless_flag, precomp_flag end -- ============================================================================= -- Display and text processing -- ============================================================================= function export.makeDisplayText(text, lang, sc) if not text then return text end -- Normalize dotless characters and dot-above diacritics (while retaining accents). text = normalize_dotted_chars(toNFD(text)) -- Add a 'dot above' between "i" or "j" and an accent. text = ugsub(text, "([iıjȷ])(" .. ogonek .. "?)%f" .. accents, char_to_accent_form) return toNFC(text) end -- Called from [[Module:languages]] since [[Module:lt-common]] is set as the stripDiacritics handler in -- [[Module:languages/data/2]]. function export.stripDiacritics(text, lang, sc) if not text then return text end return toNFC(stripped_text_form(text)) end local sortkey_substitutes = { [ogonek] = u(0xF000), [caron] = u(0xF001), [macron] = u(0xF002), [dotabove] = u(0xF003), ["y"] = "i" .. u(0xF004), } function export.makeSortKey(text, lang, sc) if not text then return text end -- Normalize to the stripped-text form and convert diacritics to Private Use -- Area characters so they sort after all other characters. text = stripped_text_form(ulower(text)) :gsub(".[\128-\191]*", sortkey_substitutes) return toNFC(uupper(text)) end return export 6n0bptzg7w3qgh8ov3knajxqwibwckv ထာမ်ပလိက်:Documentation/documentation 10 119999 395916 155140 2026-05-29T18:03:52Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:documentation/documentation]] ဇရေင် [[ထာမ်ပလိက်:Documentation/documentation]] 155140 wikitext text/x-wiki {{documentation subpage}} {{uses lua|Module:documentation}} __NOTOC__ This template automatically displays a documentation section like you are seeing now. The content of this section comes from a subpage named {{cd|<var>page</var>/documentation}}; e.g. the documentation for [[Template:affix]] is located on the page named {{cd|Template:affix/documentation}}. In order for this documentation to be displayed, templates must manually invoke {{tl|documentation}} inside of a {{cd|<nowiki><noinclude>...</noinclude></nowiki>}} section; see [[#Usage]] below. (However, {{tl|documentation}} is automatically invoked on module and JavaScript pages. The mechanism implementing this is described in more detail in the documentation for [[Module:documentation]], which implements the {{tl|documentation}} template.) ==Usage== <var>template code</var> <var>...</var>{{wt|noinclude}}{{temp|documentation}}{{wt|/noinclude}} or {{wt|onlyinclude}}template code . . .{{wt|/onlyinclude}} {{temp|documentation}} The most common practice is to place the {{temp|documentation}} template in {{wt|noinclude}} tags. Alternatively, the template code itself can be wrapped in {{wt|onlyinclude}} tags and the {{temp|documentation}} template should be outside of those tags. Both of these methods ensure that the documentation is not transcluded onto other pages as part of the template. In order to place the template itself in a category, or supply interwiki links, put those categories and interwiki links on the documentation page, inside {{wt|includeonly}} tags. If the documentation page contains {{wt|includeonly}} or {{wt|noinclude}} tags as part of the documentation, use {{tl|wikitag}} or replace {{cd|<}} with {{cd|&amp;lt;}}. == Functions == If the documentation page doesn't exist, the "edit" link includes a [[mw:Manual:Creating pages with preloaded text|preload]] parameter so that clicking it will pre-fill the edit form with a stub documentation page. == Rationale == Use of this template allows templates to be protected where necessary, while allowing anyone to edit the documentation, categories, and interwiki links. In addition, [[ဝိက်ရှေန်နရဳ:မဝ်ဂျူဂမၠိုၚ်|modules]] strictly require documentation pages as they cannot be categorized or documented any other way, so it makes sense to handle templates likewise. == See also == *[[Help:Documenting templates]] <includeonly> [[ကဏ္ဍ:ထာမ်ပလိက်စရၚ်မချူသမ္တီလဝ်ဂမၠိုၚ်]] </includeonly> lx6ccrohs466b2m5359fzc370vz9won ထာမ်ပလိက်:R:la:du Cange 10 147629 395923 211126 2026-05-29T18:12:02Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:R:du Cange]] ဇရေင် [[ထာမ်ပလိက်:R:la:du Cange]] 211126 wikitext text/x-wiki "[http://ducange.enc.sorbonne.fr/{{urlencode:{{{1|{{PAGENAME}}}}}|PATH}} {{{2|{{{1|{{PAGENAME}}}}}}}}]", in Charles du Fresne du Cange’s {{w|Charles du Fresne, sieur du Cange#Works|''Glossarium Mediæ et Infimæ Latinitatis''|lang=en}} (augmented edition with additions by D. P. Carpenterius, Adelungius and others, edited by Léopold Favre, 1883–1887)<noinclude>{{documentation}}</noinclude> jdtpexpafs28ef7kc345qq5qb2mo1id ထာမ်ပလိက်:R:la:du Cange/documentation 10 147639 395925 211136 2026-05-29T18:12:02Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:R:du Cange/documentation]] ဇရေင် [[ထာမ်ပလိက်:R:la:du Cange/documentation]] 211136 wikitext text/x-wiki {{documentation subpage}} {{documentation needed}}<!-- Replace this with a short description of the purpose of the template, and how to use it. --> <includeonly> [[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲလပ်တေန်ဂမၠိုၚ်|du Cange]] </includeonly> gl90hxj3bmdrek5fom4tjoqq874ki37 မဝ်ဂျူ:kl-pron 828 219785 395895 300356 2026-05-29T16:13:58Z 咽頭べさ 33 395895 Scribunto text/plain local export = {} local lang = require("Module:languages").getByCode("kl") local ipa = require("Module:IPA") local acc = require("Module:accent qualifier") local gsub = mw.ustring.gsub local match = mw.ustring.match local len = mw.ustring.len local lower = mw.ustring.lower local sub = mw.ustring.sub -- Letter groups local consGroup = "mnptkqvsgrljfbd" local vowelGroup = "aeiou" local uvular = "rq" local labial = "mp" local alveolar = "ntsl" local vowelBound = "ː?%.?" -- Phonemic transcription function export.phonemic(word) -- Make text lowercase word = lower(word) -- Phonemic changes local mapPL = { ["nng"] = "ŋŋ", ["ng"] = "ŋ", ["g"] = "ɡ", ["d"] = "t", ["b"] = "p", ["e"] = "i", ["o"] = "u" } word = gsub(word, "n*.", mapPL) word = gsub(word, ".", mapPL) -- Repeat to capture all remaining characters return word end -- Syllabification rules function export.syllabify(word, hide_borders) -- Mark all word borders with # word = gsub(word, "([^ ]+)", "#%1#") word = gsub(word, "([^" .. consGroup .. "]-)(n?[" .. consGroup .. "]?[" .. vowelGroup .. "])", "%1.%2") word = gsub(word, "([" .. vowelGroup .. "])%.%1", "%1%1") word = gsub(word, "%.nn", "n.n") word = gsub(word, "(#%-?)%.", "%1") return hide_borders and gsub(word, "#", "") or word end -- Phonetic transcription function export.phonetic(word) -- Make text lowercase word = lower(word) -- Syllabify the word word = export.syllabify(word, false) -- NG word = gsub(word, "ng", "ŋ") -- long vowels word = gsub(word, "([" .. vowelGroup .. "])%1", "%1ː") -- /ɡ/-allophony word = gsub(word, "ig%.g", "iç.ç") word = gsub(word, "ag%.g", "ax̟.x̟") -- /u/-labialisation word = gsub(word, "u(ː?)%.v?([" .. vowelGroup .. "])", "u%1.ʷ%2") -- /t/-affrication word = gsub(word, "ti", "t͡si") word = gsub(word, "t%.s", "t.t͡s") -- word-initial G is voiceless word = gsub(word, "#g", "#k") -- Vowel uvularisation word = gsub(word, "ːr%.([" .. consGroup .. "])", "ʶːr.%1") -- Vowel allophone changes -- U word = gsub(word, "u(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "O%1") word = gsub(word, "([" .. alveolar .. "])u(" .. vowelBound .. "[" .. alveolar .. "])", "%1ʉ%2") word = gsub(word, "u(" .. vowelBound .. "[" .. labial .. "])", "u%1") word = gsub(word, "u(" .. vowelBound .. ")", "ʊ%1") word = gsub(word, "#ʊ(" .. vowelBound .. "[^" .. uvular .. "])", "#u%1") -- A word = gsub(word, "a(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "ɑ%1") word = gsub(word, "a(" .. vowelBound .. "[^#])", "ə%1") word = gsub(word, "#ə(" .. vowelBound .. "[^" .. uvular .. "])", "#a%1") -- I word = gsub(word, "i(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "ɐ%1") word = gsub(word, "i(" .. vowelBound .. "[" .. labial .. "])", "y%1") word = gsub(word, "i(" .. vowelBound .. ")", "ɪ%1") word = gsub(word, "#ɪ(" .. vowelBound .. "[^" .. uvular .. "])", "#i%1") -- Geminates local mapGL = { ["g"] = "x", ["l"] = "ɬ", ["r"] = "χ", ["v"] = "f" } word = gsub(word, "r%.([lfsnmptk])", "%1.%1") word = gsub(word, "n%.ŋ", "ŋ.ŋ") word = gsub(word, "([glrv])%.%1", function(c) return mapGL[c] .. "." .. mapGL[c] end) -- Substitute monographs local mapML = { ["g"] = "ɣ", ["e"] = "ɜ", ["o"] = "ɔ", -- FIXME: don't substitute twice ["O"] = "o", ["r"] = "ʁ", ["d"] = "t", ["b"] = "p", ["'"] = "ˈ" } word = gsub(word, ".", mapML) -- Remove word boundaries return gsub(word, "#", "") end -- Display pronunciation function export.show(frame) local args = frame:getParent().args local pagetitle = mw.loadData("Module:headword/data").pagename local p, results = {}, {} if args[1] then for _, v in ipairs(args) do table.insert(p, (v ~= "") and v or nil) end else p = { pagetitle } end for _, word in ipairs(p) do word = (word == "kl-IPA") and "avinngaq" or word local phonemic = export.phonemic(word) local phonetic = export.phonetic(word) table.insert(results, { pron = "/" .. phonemic .. "/" }) table.insert(results, { pron = "[" .. phonetic .. "]" }) end return acc.format_qualifiers(lang, {"[[w:West Greenlandic|Nuuk]]"}) .. ' ' .. ipa.format_IPA_full { lang = lang, items = results } end return export cf3gd9qg256t4c6ugkjpdq57p5g113r 395898 395895 2026-05-29T16:18:54Z 咽頭べさ 33 395898 Scribunto text/plain local export = {} local lang = require("Module:languages").getByCode("kl") local ipa = require("Module:IPA") local acc = require("Module:accent qualifier") local gsub = mw.ustring.gsub local match = mw.ustring.match local len = mw.ustring.len local lower = mw.ustring.lower local sub = mw.ustring.sub -- Letter groups local consGroup = "mnptkqvsgrljfbd" local vowelGroup = "aeiou" local uvular = "rq" local labial = "mp" local alveolar = "ntsl" local vowelBound = "ː?%.?" -- Phonemic transcription function export.phonemic(word) -- Make text lowercase word = lower(word) -- Phonemic changes local mapPL = { ["nng"] = "ŋŋ", ["ng"] = "ŋ", ["g"] = "ɡ", ["d"] = "t", ["b"] = "p", ["e"] = "i", ["o"] = "u" } word = gsub(word, "n*.", mapPL) word = gsub(word, ".", mapPL) -- Repeat to capture all remaining characters return word end -- Syllabification rules function export.syllabify(word, hide_borders) -- Mark all word borders with # word = gsub(word, "([^ ]+)", "#%1#") word = gsub(word, "([^" .. consGroup .. "]-)(n?[" .. consGroup .. "]?[" .. vowelGroup .. "])", "%1.%2") word = gsub(word, "([" .. vowelGroup .. "])%.%1", "%1%1") word = gsub(word, "%.nn", "n.n") word = gsub(word, "(#%-?)%.", "%1") return hide_borders and gsub(word, "#", "") or word end -- Phonetic transcription function export.phonetic(word) -- Make text lowercase word = lower(word) -- Syllabify the word word = export.syllabify(word, false) -- NG word = gsub(word, "ng", "ŋ") -- long vowels word = gsub(word, "([" .. vowelGroup .. "])%1", "%1ː") -- /ɡ/-allophony word = gsub(word, "ig%.g", "iç.ç") word = gsub(word, "ag%.g", "ax̟.x̟") -- /u/-labialisation word = gsub(word, "u(ː?)%.v?([" .. vowelGroup .. "])", "u%1.ʷ%2") -- /t/-affrication word = gsub(word, "ti", "t͡si") word = gsub(word, "t%.s", "t.t͡s") -- word-initial G is voiceless word = gsub(word, "#g", "#k") -- Vowel uvularisation word = gsub(word, "ːr%.([" .. consGroup .. "])", "ʶːr.%1") -- Vowel allophone changes -- U word = gsub(word, "u(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "O%1") word = gsub(word, "([" .. alveolar .. "])u(" .. vowelBound .. "[" .. alveolar .. "])", "%1ʉ%2") word = gsub(word, "u(" .. vowelBound .. "[" .. labial .. "])", "u%1") word = gsub(word, "u(" .. vowelBound .. ")", "ʊ%1") word = gsub(word, "#ʊ(" .. vowelBound .. "[^" .. uvular .. "])", "#u%1") -- A word = gsub(word, "a(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "ɑ%1") word = gsub(word, "a(" .. vowelBound .. "[^#])", "ə%1") word = gsub(word, "#ə(" .. vowelBound .. "[^" .. uvular .. "])", "#a%1") -- I word = gsub(word, "i(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "ɐ%1") word = gsub(word, "i(" .. vowelBound .. "[" .. labial .. "])", "y%1") word = gsub(word, "i(" .. vowelBound .. ")", "ɪ%1") word = gsub(word, "#ɪ(" .. vowelBound .. "[^" .. uvular .. "])", "#i%1") -- Geminates local mapGL = { ["g"] = "x", ["l"] = "ɬ", ["r"] = "χ", ["v"] = "f" } word = gsub(word, "r%.([lfsnmptk])", "%1.%1") word = gsub(word, "n%.ŋ", "ŋ.ŋ") word = gsub(word, "([glrv])%.%1", function(c) return mapGL[c] .. "." .. mapGL[c] end) -- Substitute monographs local mapML = { ["g"] = "ɣ", ["e"] = "ɜ", ["o"] = "ɔ", -- FIXME: don't substitute twice ["O"] = "o", ["r"] = "ʁ", ["d"] = "t", ["b"] = "p", ["'"] = "ˈ" } word = gsub(word, ".", mapML) -- Remove word boundaries return gsub(word, "#", "") end -- Display pronunciation function export.show(frame) local args = frame:getParent().args local pagetitle = mw.loadData("Module:headword/data").pagename local p, results = {}, {} if args[1] then for _, v in ipairs(args) do table.insert(p, (v ~= "") and v or nil) end else p = { pagetitle } end for _, word in ipairs(p) do word = (word == "kl-IPA") and "avinngaq" or word local phonemic = export.phonemic(word) local phonetic = export.phonetic(word) table.insert(results, { pron = "/" .. phonemic .. "/" }) table.insert(results, { pron = "[" .. phonetic .. "]" }) end return acc.format_qualifiers(lang, {"[[w:en:West Greenlandic|နူခ်]]"}) .. ' ' .. ipa.format_IPA_full { lang = lang, items = results } end return export h0qp42wqkw4i4jsf0of970czuzy925i မဝ်ဂျူ:lt-pron 828 219829 395928 300400 2026-05-29T18:15:17Z 咽頭べさ 33 395928 Scribunto text/plain --[==[ Backend for {{lt-pr}}: IPA, hyphenation, and rhyme generation. Author: TongcyDai ]==] local export = {} local m_debug = require("Module:debug") local m_str = require("Module:string utilities") local m_lt_common = require("Module:lt-common") local u = m_str.char local ulower = m_str.lower local uupper = m_str.upper local usub = m_str.sub local ulen = m_str.len local ugsub = m_str.gsub local ufind = m_str.find local umatch = m_str.match local rsplit = m_str.split -- Accent mark constants (re-exported from Module:lt-common to keep all -- Lithuanian modules in sync). local GRAVE = m_lt_common.GRAVE -- U+0300 local ACUTE = m_lt_common.ACUTE -- U+0301 local TILDE = m_lt_common.TILDE -- U+0303 local DOTABOVE = m_lt_common.DOTABOVE -- U+0307 local OGONEK = m_lt_common.OGONEK -- U+0328 -- M4: Reuse Module:lt-common's display formatter directly instead of -- maintaining a parallel local copy. local makeDisplayText = m_lt_common.makeDisplayText -- Liaison marker: U+203F UNDERTIE — separates clitics from their stressed host -- in input. The phonological grammar treats it like a "soft" word boundary that -- is transparent to several cross-word processes (palatalization spread, -- geminate / fricative simplification, place assimilation), per VLKK §19–§23. local LIAISON = u(0x203F) -- Lazy-loaded external modules local m_IPA local audio_module = "Module:audio" local homophones_module = "Module:homophones" local hyphenation_module = "Module:hyphenation" local rhymes_module = "Module:rhymes" local parameters_module = "Module:parameters" local parse_util_module = "Module:parse utilities" local concat = table.concat local insert = table.insert local lang_obj local function get_lang() if not lang_obj then lang_obj = require("Module:languages").getByCode("lt") end return lang_obj end local function track(reason) m_debug.track("lt-pron/" .. reason) end -- ============================================================================ -- SECTION 1: Orthography & Phonology Definitions -- ============================================================================ -- Suffix table for automatic phonetic adjustments (currently disabled) -- Exact matching lists (must include precomposed normalized tone markers) --[[ local SUFFIX_LOAN = { ["fòbas"]=true, ["fòbė"]=true, ["fòbija"]=true, ["fònas"]=true, ["fònė"]=true, ["lògas"]=true, ["lògija"]=true, ["skòpas"]=true } --]] -- Consonant classes for syllabification (Sonority Hierarchy) local CLASS = { R = {["l"]=true, ["m"]=true, ["n"]=true, ["r"]=true, ["v"]=true, ["j"]=true}, S = {["s"]=true, ["z"]=true, ["š"]=true, ["ž"]=true, ["f"]=true, ["x"]=true, ["h"]=true, ["ch"]=true}, T = {["p"]=true, ["b"]=true, ["t"]=true, ["d"]=true, ["k"]=true, ["g"]=true, ["c"]=true, ["dz"]=true, ["č"]=true, ["dž"]=true} } -- Front vowels trigger palatalization local FRONT_V = { ["e"]=true, ["ę"]=true, ["ė"]=true, ["i"]=true, ["į"]=true, ["y"]=true, ["ie"]=true, ["ei"]=true, ["eu"]=true } -- Vowel -> Base IPA mapping (Unstressed short/inherent) local V_IPA = { ["a"] = "ɐ", ["ą"] = "ɑː", ["e"] = "ɛ", ["ę"] = "æː", ["ė"] = "eː", ["i"] = "ɪ", ["į"] = "iː", ["y"] = "iː", ["u"] = "ʊ", ["ų"] = "uː", ["ū"] = "uː", ["o"] = "oː", ["ie"] = "iɛ", ["uo"] = "uɔ", -- Unstressed simple diphthongs ["ai"] = "ɐɪ", ["au"] = "ɒʊ", ["ei"] = "ɛɪ", ["eu"] = "ɛʊ", ["ui"] = "ʊɪ", ["oi"] = "ɔɪ", ["ou"] = "ɔʊ" } -- Consonant -> Base IPA mapping (Unpalatalized) local CONS_IPA = { ["b"] = "b", ["c"] = "t͡s", ["č"] = "t͡ʃ", ["d"] = "d", ["dz"] = "d͡z", ["dž"] = "d͡ʒ", ["ch"] = "x", ["f"] = "f", ["g"] = "ɡ", ["h"] = "ɣ", ["j"] = "j", ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", ["p"] = "p", ["r"] = "r", ["s"] = "s", ["š"] = "ʃ", ["t"] = "t", ["v"] = "ʋ", ["z"] = "z", ["ž"] = "ʒ", } -- Voicing pairs for Voicing Assimilation local VOICING_PAIRS = { ["p"]="b", ["b"]="p", ["t"]="d", ["d"]="t", ["k"]="g", ["g"]="k", ["c"]="dz", ["dz"]="c", ["č"]="dž", ["dž"]="č", ["s"]="z", ["z"]="s", ["š"]="ž", ["ž"]="š", ["x"]="ɣ", ["ɣ"]="x" } local function is_voiced(c) local voiced_set = {["b"]=true, ["d"]=true, ["g"]=true, ["dz"]=true, ["dž"]=true, ["z"]=true, ["ž"]=true, ["ɣ"]=true} return voiced_set[c] == true end -- Accent pairs for conjugation module support -- Maps base vowel/diphthong to accented forms (falling/rising) local ACCENT_PAIRS = { -- Long vowels (acute or tilde) ["ą"] = {acute="ą"..ACUTE, tilde="ą"..TILDE}, ["ę"] = {acute="ę"..ACUTE, tilde="ę"..TILDE}, ["ė"] = {acute="ė"..ACUTE, tilde="ė"..TILDE}, ["y"] = {acute="y"..ACUTE, tilde="y"..TILDE}, ["į"] = {acute="į"..ACUTE, tilde="į"..TILDE}, ["ū"] = {acute="ū"..ACUTE, tilde="ū"..TILDE}, ["ų"] = {acute="ų"..ACUTE, tilde="ų"..TILDE}, -- a/e can be short (grave) or long (tilde) ["a"] = {grave="a"..GRAVE, tilde="a"..TILDE}, ["e"] = {grave="e"..GRAVE, tilde="e"..TILDE}, -- o: ó/õ/o are long, ò is short ["o"] = {acute="o"..ACUTE, grave="o"..GRAVE, tilde="o"..TILDE}, -- Short vowels i/u (only grave) ["i"] = {grave="i"..GRAVE}, ["u"] = {grave="u"..GRAVE}, -- Simple diphthongs ["ai"] = {acute="a"..ACUTE.."i", tilde="a".."i"..TILDE}, ["au"] = {acute="a"..ACUTE.."u", tilde="a".."u"..TILDE}, ["ei"] = {acute="e"..ACUTE.."i", tilde="e".."i"..TILDE}, ["ui"] = {grave="u"..GRAVE.."i", tilde="u".."i"..TILDE}, -- Complex diphthongs ["ie"] = {acute="i"..ACUTE.."e", tilde="i".."e"..TILDE}, ["uo"] = {acute="u"..ACUTE.."o", tilde="u".."o"..TILDE}, -- Mixed diphthongs (a series - acute/tilde) ["al"] = {acute="a"..ACUTE.."l", tilde="a".."l"..TILDE}, ["am"] = {acute="a"..ACUTE.."m", tilde="a".."m"..TILDE}, ["an"] = {acute="a"..ACUTE.."n", tilde="a".."n"..TILDE}, ["ar"] = {acute="a"..ACUTE.."r", tilde="a".."r"..TILDE}, -- Mixed diphthongs (e series - acute/grave/tilde, grave for foreign) ["el"] = {acute="e"..ACUTE.."l", grave="e"..GRAVE.."l", tilde="e".."l"..TILDE}, ["em"] = {acute="e"..ACUTE.."m", grave="e"..GRAVE.."m", tilde="e".."m"..TILDE}, ["en"] = {acute="e"..ACUTE.."n", grave="e"..GRAVE.."n", tilde="e".."n"..TILDE}, ["er"] = {acute="e"..ACUTE.."r", grave="e"..GRAVE.."r", tilde="e".."r"..TILDE}, -- Mixed diphthongs (i series - grave/tilde) ["il"] = {grave="i"..GRAVE.."l", tilde="i".."l"..TILDE}, ["im"] = {grave="i"..GRAVE.."m", tilde="i".."m"..TILDE}, ["in"] = {grave="i"..GRAVE.."n", tilde="i".."n"..TILDE}, ["ir"] = {grave="i"..GRAVE.."r", tilde="i".."r"..TILDE}, -- Mixed diphthongs (u series - grave/tilde) ["ul"] = {grave="u"..GRAVE.."l", tilde="u".."l"..TILDE}, ["um"] = {grave="u"..GRAVE.."m", tilde="u".."m"..TILDE}, ["un"] = {grave="u"..GRAVE.."n", tilde="u".."n"..TILDE}, ["ur"] = {grave="u"..GRAVE.."r", tilde="u".."r"..TILDE}, -- Foreign diphthongs (grave only) ["eu"] = {grave="e"..GRAVE.."u"}, ["oi"] = {grave="o"..GRAVE.."i"}, ["ou"] = {grave="o"..GRAVE.."u"}, ["ol"] = {grave="o"..GRAVE.."l"}, ["om"] = {grave="o"..GRAVE.."m"}, ["on"] = {grave="o"..GRAVE.."n"}, ["or"] = {grave="o"..GRAVE.."r"}, } -- ============================================================================ -- SECTION 2: Lexical Normalization (Avoid NFD destruction) -- ============================================================================ -- Helper: Remove all accent marks from text (moved here for early use). -- Delegates to Module:lt-common to keep the de-accenting logic shared. local function remove_all_accents(text) return m_lt_common.to_stem_bare(mw.ustring.toNFD(text)) end -- Extract pagename from input or load from headword data local function get_pagename(input) -- Check for manual override: <base:xxx> local manual = input:match("<base:([^>]+)>") if manual then return manual, input:gsub("<base:[^>]+>", "") end -- Load from headword data local success, data = pcall(function() return mw.loadData("Module:headword/data").pagename end) if success and data then return data, input end return nil, input end -- Identify respelling j and (j) positions local function identify_respelling_glides(input_with_accents, pagename) if not pagename then return {} -- No pagename, no respelling detection end -- Remove all accents from input local input_clean = remove_all_accents(input_with_accents) -- Remove special markers (^, .) input_clean = ugsub(input_clean, "[%^%.]", "") -- Remove <base:...> if present input_clean = ugsub(input_clean, "<base:[^>]+>", "") -- Remove literal ˌ input_clean = ugsub(input_clean, "ˌ", "") -- Remove softening mark ʼ input_clean = ugsub(input_clean, "ʼ", "") -- Remove (j) markers - replace with j for comparison input_clean = ugsub(input_clean, "%(j%)", "j") -- Remove spaces for comparison input_clean = ugsub(input_clean, " ", "") -- Convert to NFC for comparison input_clean = mw.ustring.toNFC(input_clean) -- Normalize pagename (lowercase, remove spaces) local pagename_clean = ulower(pagename) pagename_clean = ugsub(pagename_clean, " ", "") -- Find respelling j positions (j in input but not in pagename) local respelling_positions = {} local input_idx = 1 local page_idx = 1 while input_idx <= ulen(input_clean) do local input_char = usub(input_clean, input_idx, input_idx) if input_char == "j" then -- Check if this j exists in pagename at corresponding position local page_char = page_idx <= ulen(pagename_clean) and usub(pagename_clean, page_idx, page_idx) if page_char ~= "j" then -- This is a respelling j insert(respelling_positions, input_idx) input_idx = input_idx + 1 -- Don't advance page_idx else -- This is an original j input_idx = input_idx + 1 page_idx = page_idx + 1 end else input_idx = input_idx + 1 page_idx = page_idx + 1 end end return respelling_positions end -- Safe mapping to extract tones without destroying precomposed characters local TONE_MAP = { ["á"]="a,acute", ["à"]="a,grave", ["ã"]="a,tilde", ["é"]="e,acute", ["è"]="e,grave", ["ẽ"]="e,tilde", ["í"]="i,acute", ["ì"]="i,grave", ["ĩ"]="i,tilde", ["ý"]="y,acute", ["ỳ"]="y,grave", ["ỹ"]="y,tilde", ["ú"]="u,acute", ["ù"]="u,grave", ["ũ"]="u,tilde", ["ó"]="o,acute", ["ò"]="o,grave", ["õ"]="o,tilde", -- Precomposed vowels with macrons/ogoneks + tones (represented here via standard combinations) ["ą́"]="ą,acute", ["ą̃"]="ą,tilde", ["ę́"]="ę,acute", ["ę̃"]="ę,tilde", ["ė́"]="ė,acute", ["ė̃"]="ė,tilde", ["į́"]="į,acute", ["į̃"]="į,tilde", ["ų́"]="ų,acute", ["ų̃"]="ų,tilde", ["ū́"]="ū,acute", ["ū̃"]="ū,tilde", -- Tilde on liquids (for semi-diphthongs) ["l̃"]="l,tilde", ["m̃"]="m,tilde", ["ñ"]="n,tilde", ["r̃"]="r,tilde", ["j̃"]="j,tilde" } -- Resolves NFD back to safe representation if input was somehow NFD. -- The PUA rejection (with replacement hints), non-standard format tracking, -- and i/j-with-dotabove canonicalization are all delegated to Module:lt-common -- so that all Lithuanian modules share one implementation. Only the -- TONE_MAP-based codepoint parser remains local since it produces the -- token list specifically consumed by lt-pron's tokenizer. local function safe_normalize(text) -- Reject PUA characters with helpful "use X instead" hints. m_lt_common.reject_pua(text) -- Track non-standard input encodings for analytics. Detection runs on -- the raw input (in NFD internally) before any canonicalization, so the -- counts reflect what editors actually typed. local has_dotless, has_precomp_i = m_lt_common.detect_nonstandard(text) if has_dotless then track('dotless-ij') end if has_precomp_i then track('precomposed-i-accent') end -- "Explicit dotabove" (i.e., i/j + U+0307 + accent) is the *correct* -- input form for accented i/j and is tracked separately to monitor -- editor adoption. This check stays local since lt-common's -- detect_nonstandard intentionally only flags the wrong forms. if ufind(mw.ustring.toNFD(text), "[ij]" .. DOTABOVE) then track('explicit-dotabove') end -- Normalize: drops stray dot-aboves between i/j and accents, -- converts dotless ı/ȷ to standard i/j, returns clean NFC. text = m_lt_common.canonicalize_input(text) -- Parse the canonicalized NFC string into {char, tone} tokens. -- TONE_MAP entries are 1- or 2-codepoint precomposed sequences -- (e.g. "á" is one codepoint; "ą́" is "ą" + U+0301). The lookup tries -- the 2-codepoint match first, then falls back to the 1-codepoint match. local result = {} local i = 1 while i <= ulen(text) do local c = usub(text, i, i) local c_lower = ulower(c) -- Convert to lowercase for TONE_MAP lookup -- Look ahead for combining marks if any slipped through local next_c = usub(text, i+1, i+1) local next_c_lower = ulower(next_c) local combined = c_lower .. next_c_lower if TONE_MAP[combined] then local parts = rsplit(TONE_MAP[combined], ",") -- Preserve original case of base character local base_char = parts[1] if c ~= c_lower then base_char = uupper(base_char) end insert(result, {char = base_char, tone = parts[2]}) i = i + 2 elseif TONE_MAP[c_lower] then local parts = rsplit(TONE_MAP[c_lower], ",") -- Preserve original case of base character local base_char = parts[1] if c ~= c_lower then base_char = uupper(base_char) end insert(result, {char = base_char, tone = parts[2]}) i = i + 1 else insert(result, {char = c, tone = nil}) i = i + 1 end end return result end -- ============================================================================ -- SECTION 3: Tokenization & Diphthong/Digraph resolution -- ============================================================================ local function get_type(c) local lc = ulower(c) if V_IPA[lc] then return "V" end if CLASS.R[lc] then return "R" end if CLASS.S[lc] then return "S" end if CLASS.T[lc] then return "T" end return "UNKNOWN" end -- Helper: Convert token array back to NFC string for suffix/prefix matching local function tokens_to_string(tok_list) local s = "" for _, t in ipairs(tok_list) do local c = t.char if t.tone == "grave" then c = c .. GRAVE elseif t.tone == "acute" then c = c .. ACUTE elseif t.tone == "tilde" then c = c .. TILDE end s = s .. c end return mw.ustring.toNFC(s) end -- Apply automatic properties based on word structure (e.g. loanwords) local function apply_auto_properties(tokens) local word_str = ulower(tokens_to_string(tokens)) --[[ Suffix detection for loanword quality (currently disabled) local matched_loan_suff = nil for suff, _ in pairs(SUFFIX_LOAN) do if usub(word_str, -ulen(suff)) == suff then matched_loan_suff = suff; break end end if matched_loan_suff then local suff_len = ulen(matched_loan_suff) local acc_len = 0 for i = #tokens, 1, -1 do local t = tokens[i] acc_len = acc_len + ulen(tokens_to_string({t})) if t.type == "V" and ulower(t.char) == "o" then t.auto_targeted = true if not t.force_default then t.loan_quality = true end end if acc_len >= suff_len then break end end end -- Check for redundant asterisks globally for _, t in ipairs(tokens) do if t.force_default and not t.auto_targeted then track('redundant-asterisk') end end --]] return tokens end -- Strict whitelist for valid diphthong and tone combinations local function is_strict_diphthong(c1, t1, c2, t2) local combo = c1 .. c2 -- Unstressed: neither element has a tone if not t1 and not t2 then return (combo == "ie" or combo == "uo" or combo == "ai" or combo == "au" or combo == "ei" or combo == "ui" or combo == "oi" or combo == "ou" or combo == "eu") end -- Tone on the first element (acute or grave) if t1 and not t2 then if t1 == "acute" then return (combo == "ai" or combo == "au" or combo == "ei" or combo == "ie" or combo == "uo" or combo == "oi") elseif t1 == "grave" then return (combo == "ui" or combo == "oi" or combo == "ou" or combo == "eu") end end -- Tone on the second element (tilde) if not t1 and t2 then if t2 == "tilde" then return (combo == "ai" or combo == "au" or combo == "ei" or combo == "ui" or combo == "ie" or combo == "uo" or combo == "eu") end end return false end local function tokenize(text_str, pagename) local raw_chars = safe_normalize(text_str) -- Identify respelling glides local respelling_j_positions = identify_respelling_glides(text_str, pagename) local tokens = {} local i = 1 while i <= #raw_chars do local curr = raw_chars[i] local nxt = raw_chars[i+1] local lc_curr = ulower(curr.char) local lc_nxt = nxt and ulower(nxt.char) -- Explicit Modifiers if curr.char == "^" then local last_v = nil for j = #tokens, 1, -1 do if tokens[j].type == "V" then last_v = tokens[j]; break end end if last_v then local lc_v = ulower(last_v.char) local base_v = usub(lc_v, -1) -- Last char for silent i combinations local is_e_base = (lc_v == "e") or (base_v == "e") local is_o_base = (lc_v == "o") or (base_v == "o") -- Check for valid e: no tone or grave only if is_e_base then if not last_v.tone or last_v.tone == "grave" then last_v.loan_quality = true elseif last_v.tone == "acute" then error("lt-pron: '^' cannot be used with acute 'é' (use only with plain 'e' or grave 'è')") elseif last_v.tone == "tilde" then error("lt-pron: '^' cannot be used with tilde 'ẽ' (use only with plain 'e' or grave 'è')") end -- Check for valid o: no tone only elseif is_o_base then if not last_v.tone then last_v.loan_quality = true elseif last_v.tone == "grave" then error("lt-pron: '^' is redundant for 'ò' (already pronounced [ɔ])") elseif last_v.tone == "acute" or last_v.tone == "tilde" then error("lt-pron: '^' cannot be used with 'ó' or 'õ' (native long vowels)") end else error("lt-pron: '^' can only be used with 'e' (plain/grave) or 'o' (plain). Found: '" .. lc_v .. "'") end end i = i + 1 -- Check for (j) marker elseif curr.char == "(" and i + 2 <= #raw_chars then local char2 = raw_chars[i+1] local char3 = raw_chars[i+2] if char2.char == "j" and char3.char == ")" then insert(tokens, {char = "j", type = "R", tone = nil, is_respelling = true, is_optional = true, original_char = "-"}) i = i + 3 else error("lt-pron: '(' must be followed by 'j)' to form the (j) glide marker") end --[[ Asterisk modifier (currently disabled) elseif curr.char == "*" then local last_v = nil for j = #tokens, 1, -1 do if tokens[j].type == "V" then last_v = tokens[j]; break end end if last_v then last_v.force_default = true end i = i + 1 --]] elseif curr.char == "." then insert(tokens, {char = ".", type = "BOUNDARY"}) i = i + 1 elseif curr.char == "ˌ" then insert(tokens, {char = "ˌ", type = "SECONDARY_STRESS_BOUNDARY"}) i = i + 1 elseif curr.char == "ʼ" then -- Softening mark: palatalize the preceding consonant for j = #tokens, 1, -1 do local tok = tokens[j] if tok.type == "T" or tok.type == "S" or tok.type == "R" then tok.softening_mark = true break end end i = i + 1 elseif lc_curr == "d" and nxt and (lc_nxt == "z" or lc_nxt == "ž") then insert(tokens, {char = curr.char .. nxt.char, type = "T", tone = nil}) i = i + 2 elseif lc_curr == "c" and nxt and lc_nxt == "h" then insert(tokens, {char = curr.char .. nxt.char, type = "S", tone = nil}) i = i + 2 -- Special handling for V + j̃ (final j with tilde): treat as V + ĩ diphthong elseif curr.type ~= "BOUNDARY" and get_type(curr.char) == "V" and nxt and ulower(nxt.char) == "j" and nxt.tone == "tilde" then -- Check if this is word-final (no more non-boundary tokens after j̃) local is_final = true for k = i + 2, #raw_chars do if raw_chars[k].char ~= " " then is_final = false break end end if is_final then -- Combine V + j̃ as a diphthong V + ĩ (e.g., uj̃ → ui̇̃) local v_char = ulower(curr.char) local combined_char = v_char .. "i" -- e.g., "u" + "i" = "ui" local tone = "tilde" -- j̃'s tilde local tone_position = 2 -- Tilde is on the second vowel (i) insert(tokens, { char = combined_char, type = "V", tone = tone, tone_position = tone_position, original_char = v_char .. "j" -- For hyphenation: display as "uj̃" }) i = i + 2 else -- Not final, treat as regular V + j insert(tokens, {char = curr.char, type = "V", tone = curr.tone}) i = i + 1 end elseif curr.type ~= "BOUNDARY" and get_type(curr.char) == "V" and nxt and get_type(nxt.char) == "V" then local nxt_nxt = raw_chars[i+2] local lc_nxt_nxt = nxt_nxt and ulower(nxt_nxt.char) local back_diph = lc_nxt and lc_nxt_nxt and (lc_nxt .. lc_nxt_nxt) local is_silent_i_diph = (lc_curr == "i" and not curr.tone) and (back_diph == "au" or back_diph == "ai" or back_diph == "ou" or back_diph == "oi" or back_diph == "uo") local is_silent_i_mono = (lc_curr == "i" and not curr.tone) and (lc_nxt == "a" or lc_nxt == "ą" or lc_nxt == "o" or lc_nxt == "u" or lc_nxt == "ų" or lc_nxt == "ū" or lc_nxt == "ɔ") local is_valid_diph = is_strict_diphthong(lc_curr, curr.tone, lc_nxt, nxt and nxt.tone) if is_valid_diph and nxt_nxt and get_type(nxt_nxt.char) == "V" then local lc_nxt_nxt = ulower(nxt_nxt.char) local is_next_valid_diph = is_strict_diphthong(lc_nxt, nxt.tone, lc_nxt_nxt, nxt_nxt.tone) if is_next_valid_diph then -- Resolve ambiguous triplets (e.g., auo -> a.uo is standard) -- Break the first valid diphthong unless explicitly stressed if not curr.tone then is_valid_diph = false end end end if is_silent_i_diph then local tone = nxt.tone or nxt_nxt.tone local tone_position = nil if tone then -- For silent i diphthongs (e.g., iau), position is relative to the full string -- Position 2 = middle vowel, Position 3 = last vowel if nxt.tone then tone_position = 2 elseif nxt_nxt.tone then tone_position = 3 end end insert(tokens, {char = curr.char .. nxt.char .. nxt_nxt.char, type = "V", tone = tone, has_silent_i = true, tone_position = tone_position}) i = i + 3 elseif is_silent_i_mono then local tone = nxt.tone local tone_position = nil if tone then -- For silent i monosyllables (e.g., ia), position 2 = second character tone_position = 2 end insert(tokens, {char = curr.char .. nxt.char, type = "V", tone = tone, has_silent_i = true, tone_position = tone_position}) i = i + 2 elseif is_valid_diph then local tone = curr.tone or nxt.tone local tone_position = nil if tone then -- Record which vowel carries the tone (1 = first, 2 = second) if curr.tone then tone_position = 1 elseif nxt.tone then tone_position = 2 end end insert(tokens, {char = curr.char .. nxt.char, type = "V", tone = tone, tone_position = tone_position}) i = i + 2 else insert(tokens, {char = curr.char, type = "V", tone = curr.tone}) i = i + 1 end else if curr.char ~= " " then local tok_type = get_type(curr.char) local is_respelling_j = false -- Check if this is a respelling j if ulower(curr.char) == "j" and #respelling_j_positions > 0 then -- Build cleaned string up to current position to find clean position local cleaned_so_far = "" for k = 1, i do local c = raw_chars[k] if c.char ~= "^" and c.char ~= "." and c.char ~= " " and c.char ~= "ʼ" and c.char ~= "ˌ" then local char_clean = c.char -- Don't add tone marks to cleaned string if not c.tone then cleaned_so_far = cleaned_so_far .. char_clean else -- Add base character without tone cleaned_so_far = cleaned_so_far .. char_clean end end end local clean_pos = ulen(cleaned_so_far) -- Check if this position is in respelling list for _, pos in ipairs(respelling_j_positions) do if pos == clean_pos then is_respelling_j = true break end end end if ulower(curr.char) == "j" then local tok_data = {char = curr.char, type = tok_type, tone = curr.tone, is_respelling = is_respelling_j, is_optional = false} if is_respelling_j then tok_data.original_char = "-" -- Respelling: use "-" to indicate not in orthography end insert(tokens, tok_data) else insert(tokens, {char = curr.char, type = tok_type, tone = curr.tone}) end end i = i + 1 end end -- Validate respelling glides are between vowels for i, tok in ipairs(tokens) do if tok.is_respelling then local prev_is_vowel = false local next_is_vowel = false -- Check previous non-boundary token for j = i - 1, 1, -1 do if tokens[j].type ~= "BOUNDARY" and tokens[j].type ~= "SECONDARY_STRESS_BOUNDARY" then prev_is_vowel = (tokens[j].type == "V") break end end -- Check next non-boundary token for j = i + 1, #tokens do if tokens[j].type ~= "BOUNDARY" and tokens[j].type ~= "SECONDARY_STRESS_BOUNDARY" then next_is_vowel = (tokens[j].type == "V") break end end if not (prev_is_vowel and next_is_vowel) then error("lt-pron: Respelling glide 'j' or '(j)' must be between two vowels") end end end return apply_auto_properties(tokens) end -- ============================================================================ -- SECTION 4: Syllabification -- ============================================================================ -- Pre-syllabification: Merge geminate (doubled) consonants -- This must happen BEFORE syllabification to prevent false mixed diphthongs -- For example: pérrašo → pér-ra-šo would incorrectly treat ér as a mixed diphthong -- By merging rr→r first, we get pé-ra-šo, correctly keeping é as a pure vowel local function merge_geminate_consonants(tokens) local SIBILANTS = {["s"]=true, ["z"]=true, ["š"]=true, ["ž"]=true} local i = 1 while i < #tokens do local tok = tokens[i] local nxt = tokens[i+1] local tok_is_cons = (tok.type == "T" or tok.type == "S" or tok.type == "R") local nxt_is_cons = (nxt.type == "T" or nxt.type == "S" or nxt.type == "R") if tok_is_cons and nxt_is_cons then local tok_char = ulower(tok.char) local nxt_char = ulower(nxt.char) local tok_is_sib = SIBILANTS[tok_char] local nxt_is_sib = SIBILANTS[nxt_char] -- Merge if: (1) both are sibilants, or (2) identical consonants if (tok_is_sib and nxt_is_sib) or (tok_char == nxt_char) then -- Remove the first token (keep the second) table.remove(tokens, i) -- Don't increment i, check the same position again else i = i + 1 end else i = i + 1 end end end local function syllabify(tokens) local syllables = {} local current_syl = {} -- Check for leading secondary stress marker local has_initial_secondary_stress = false if #tokens > 0 and tokens[1].type == "SECONDARY_STRESS_BOUNDARY" then has_initial_secondary_stress = true end local v_indices = {} for i, tok in ipairs(tokens) do if tok.type == "V" then insert(v_indices, i) end end if #v_indices == 0 then return {tokens} end -- Edge case: no vowels local boundaries = {} -- Sonority Sequencing Algorithm combined with Morphophonological Maximum Onset for idx = 1, #v_indices - 1 do local v1_idx = v_indices[idx] local v2_idx = v_indices[idx + 1] local raw_c_tokens = {} local forced_boundary_idx = nil for i = v1_idx + 1, v2_idx - 1 do if tokens[i].type == "BOUNDARY" or tokens[i].type == "SECONDARY_STRESS_BOUNDARY" then forced_boundary_idx = i else insert(raw_c_tokens, {t=tokens[i], orig_idx=i}) end end if forced_boundary_idx then boundaries[forced_boundary_idx] = true elseif #raw_c_tokens == 0 then -- Hiatus boundaries[v2_idx] = true else -- Macro-Token Grouping: Treat consecutive sibilants (S) as a single phonological unit local macro_c = {} local i = 1 while i <= #raw_c_tokens do local current = raw_c_tokens[i] if current.t.type == "S" then local absorbed = {current} local j = i + 1 -- Absorb any subsequent S tokens into this macro unit, keeping track of them while j <= #raw_c_tokens and raw_c_tokens[j].t.type == "S" do insert(absorbed, raw_c_tokens[j]) j = j + 1 end insert(macro_c, {type = "S", orig_idx = current.orig_idx, tokens = absorbed}) i = j else insert(macro_c, {type = current.t.type, orig_idx = current.orig_idx, tokens = {current}}) i = i + 1 end end local m_count = #macro_c if m_count == 1 then -- V.CV (or V.SSV, e.g., sausšala -> sau.sšala) boundaries[macro_c[1].orig_idx] = true elseif m_count == 2 then local t1, t2 = macro_c[1].type, macro_c[2].type -- ST, SR, TR -> V.CCV if (t1=="S" and t2=="T") or (t1=="S" and t2=="R") or (t1=="T" and t2=="R") then boundaries[macro_c[1].orig_idx] = true else -- Handle TSS and RSS sequences properly (e.g., Oksfordas, transformavo). -- If the macro cluster is T+S or R+S, and the S unit absorbed multiple sibilants, -- split between the first and second sibilant (TS.S, RS.S). if (t1=="T" or t1=="R") and t2=="S" and #macro_c[2].tokens > 1 then boundaries[macro_c[2].tokens[2].orig_idx] = true else -- Default VC.CV boundaries[macro_c[2].orig_idx] = true end end elseif m_count == 3 then local t1, t2, t3 = macro_c[1].type, macro_c[2].type, macro_c[3].type if t1=="S" and t2=="T" and t3=="R" then boundaries[macro_c[1].orig_idx] = true -- V.CCCV elseif (t1=="T" and t2=="S" and t3=="T") or (t1=="R" and t2=="S" and t3=="T") or (t1=="T" and t2=="S" and t3=="R") or (t1=="R" and t2=="T" and t3=="R") or (t1=="T" and t2=="T" and t3=="R") or (t1=="R" and t2=="S" and t3=="R") then boundaries[macro_c[2].orig_idx] = true -- VC.CCV (includes RSR, e.g., konfliktas) else boundaries[macro_c[3].orig_idx] = true -- VCC.CV end elseif m_count == 4 then -- Identify the 4-consonant pattern according to the 8 documented combinations local pattern = macro_c[1].type .. macro_c[2].type .. macro_c[3].type .. macro_c[4].type if pattern == "RSTR" or pattern == "TSTR" then -- R.STR, T.STR -> VC.CCCV boundaries[macro_c[2].orig_idx] = true elseif pattern == "RTRR" or pattern == "TSTS" then -- RTR.R, TST.S -> VCCC.CV boundaries[macro_c[4].orig_idx] = true else -- RT.ST, RT.SR, RT.TR, ST.TR -> VCC.CCV boundaries[macro_c[3].orig_idx] = true end elseif m_count >= 5 then -- Fallback for >=5 logical consonant units track('complex-consonant-cluster') boundaries[macro_c[3].orig_idx] = true end end end -- Construct syllables local secondary_stress_syllables = {} for i, tok in ipairs(tokens) do if boundaries[i] and #current_syl > 0 then insert(syllables, current_syl) -- If this is a secondary stress boundary, mark the NEXT syllable if tok.type == "SECONDARY_STRESS_BOUNDARY" then secondary_stress_syllables[#syllables + 1] = true end current_syl = {} end if tok.type ~= "BOUNDARY" and tok.type ~= "SECONDARY_STRESS_BOUNDARY" then insert(current_syl, tok) end end if #current_syl > 0 then insert(syllables, current_syl) end -- Apply secondary stress marks for idx, _ in pairs(secondary_stress_syllables) do if syllables[idx] then syllables[idx].secondary_stress = true end end -- Apply initial secondary stress if present if has_initial_secondary_stress and #syllables > 0 then syllables[1].secondary_stress = true end return syllables end -- ============================================================================ -- SECTION 5: Base IPA Mapping & Stress Assignment -- ============================================================================ local function is_mixed_diphthong(syl, v_idx) local v_tok = syl[v_idx] local lc_v = ulower(v_tok.char) -- Strip silent 'i' for accurate length calculation if v_tok.has_silent_i then lc_v = usub(lc_v, 2) end -- Digraphs (ie, uo, ai, au, etc.) or natively long vowels (ą, ę, ė, į, y, ų, ū) -- do NOT form mixed diphthongs with subsequent resonants. -- Only short a, e, i, u, o can form true mixed diphthongs. if ulen(lc_v) > 1 then return false end local LONG_V = {["ą"]=true, ["ę"]=true, ["ė"]=true, ["į"]=true, ["y"]=true, ["ų"]=true, ["ū"]=true} if LONG_V[lc_v] then return false end -- V + R in the SAME syllable (coda) if v_idx < #syl and syl[v_idx+1].type == "R" then -- j and v are not considered for typical liquid semi-diphthongs length rules if syl[v_idx+1].char ~= "j" and syl[v_idx+1].char ~= "v" then return true end end return false end local function map_base_phonetics(syllables) local stress_prefix = nil for _, syl in ipairs(syllables) do local v_idx = nil for i, tok in ipairs(syl) do if tok.type == "V" then v_idx = i; break end end if v_idx then local v_tok = syl[v_idx] local v_char = v_tok.char local lc_v_char = ulower(v_char) local tone = v_tok.tone local is_mixed = is_mixed_diphthong(syl, v_idx) if is_mixed and not tone then local r_tok = syl[v_idx+1] if r_tok.tone then tone = r_tok.tone end end if v_tok.has_silent_i then v_tok.silent_i = true local actual_vowel = usub(lc_v_char, 2) v_tok.ipa = V_IPA[actual_vowel] or "ɐ" lc_v_char = actual_vowel else -- Apply loan quality base default for 'o' and 'e' v_tok.ipa = V_IPA[lc_v_char] or "ɐ" if lc_v_char == "o" and v_tok.loan_quality then v_tok.ipa = "ɔ" end if lc_v_char == "e" and v_tok.loan_quality then v_tok.ipa = "e" end -- Automatically prepend glide 'j' to syllable-initial 'ie' if lc_v_char == "ie" and v_idx == 1 then v_tok.ipa = "jiɛ" end end if tone then local s_mark = "" local v_base = usub(lc_v_char, 1, 1) if tone == "acute" then s_mark = "¹ˈ" if lc_v_char == "a" then if is_mixed then v_tok.ipa = "ɑˑ" else v_tok.ipa = "ɑː" end elseif lc_v_char == "e" then -- Note: '^' modifier not allowed with acute 'é' as of current rules -- This code path preserved for consistency if is_mixed and v_tok.loan_quality then v_tok.ipa = "ɛ" -- Loanword é in mixed diphthongs is short /ɛ/ without length elseif is_mixed then v_tok.ipa = "æˑ" -- Mixed diphthong: half-long else v_tok.ipa = "æː" -- Pure vowel: full-long end elseif lc_v_char == "ai" then v_tok.ipa = "ɑˑɪ" elseif lc_v_char == "au" then v_tok.ipa = "ɑˑʊ" elseif lc_v_char == "ei" then v_tok.ipa = "æˑɪ" elseif lc_v_char == "eu" then v_tok.ipa = "æˑʊ" elseif lc_v_char == "oi" then v_tok.ipa = "oˑɪ" end elseif tone == "grave" then if is_mixed and (v_base == "i" or v_base == "u" or v_base == "e" or v_base == "o") then s_mark = "¹ˈ" elseif lc_v_char == "ui" or lc_v_char == "oi" or lc_v_char == "ou" or lc_v_char == "eu" then s_mark = "¹ˈ" else s_mark = "ˈ" end -- Handle loanword variants for grave if lc_v_char == "e" and v_tok.loan_quality then v_tok.ipa = "e" -- è^ (loanword) → [e] end if lc_v_char == "o" then v_tok.ipa = "ɔ" -- ò (always loanword) → [ɔ] end elseif tone == "tilde" then s_mark = "²ˈ" if lc_v_char == "ai" then v_tok.ipa = "ɐɪˑ" elseif lc_v_char == "au" then v_tok.ipa = "ɒʊˑ" elseif lc_v_char == "ei" then v_tok.ipa = "ɛɪˑ" elseif lc_v_char == "eu" then v_tok.ipa = "ɛʊˑ" elseif lc_v_char == "ui" then v_tok.ipa = "ʊɪˑ" elseif lc_v_char == "a" then if is_mixed then v_tok.ipa = "ɐ" else v_tok.ipa = "ɑː" end elseif lc_v_char == "e" then if is_mixed then v_tok.ipa = "ɛ" else v_tok.ipa = "æː" end end end syl.stress = s_mark end -- Set base IPA for consonants (case-insensitive mapping) for i, tok in ipairs(syl) do if tok.type ~= "V" then local lc_c = ulower(tok.char) tok.ipa = CONS_IPA[lc_c] or lc_c end end -- Handle tilde half-length on mixed diphthong coda if tone == "tilde" and is_mixed then local r_tok = syl[v_idx+1] r_tok.half_long = true end else -- Syllable with no vowel (e.g. leftover consonant) for i, tok in ipairs(syl) do local lc_c = ulower(tok.char) tok.ipa = CONS_IPA[lc_c] or lc_c end end end end -- ============================================================================ -- SECTION 6: Phonetic Polish Passes -- ============================================================================ -- Pass 1: Palatalization Spreading (Right-to-Left) -- -- right_context_palatalizing (optional, used by the cross-word pipeline): -- When the current word is followed by a liaisoned word whose first effective -- phoneme is "soft" (front V, j, or a palatalized consonant), pass `true` so -- that: -- 1. spread_active starts true (allowing the word's last consonant to -- receive ʲ even though it has no in-word right neighbour); -- 2. is_direct evaluates to true for that last consonant when it is k/g, -- so VLKK §19's "lyk‿jója → [lʲiːkʲ‿…]" pattern is produced. -- When omitted or false, the function behaves exactly like the within-word -- palatalization that this module has always done. local function apply_palatalization(syllables, right_context_palatalizing) -- Flatten tokens for cross-syllable spreading local flat_tokens = {} for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do insert(flat_tokens, tok) end end -- First pass: Apply softening marks (no spreading) for i = 1, #flat_tokens do local tok = flat_tokens[i] if tok.softening_mark and tok.ipa ~= "" then tok.ipa = tok.ipa .. "ʲ" tok.is_palatalized = true end end local spread_active = right_context_palatalizing and true or false for i = #flat_tokens, 1, -1 do local tok = flat_tokens[i] local lc_char = ulower(tok.char) if tok.type == "V" then if tok.silent_i or FRONT_V[lc_char] then spread_active = true else spread_active = false end elseif lc_char == "j" then -- Preserve special IPA for final j (ɪ̯), don't override it if tok.ipa ~= "ɪ̯" and tok.ipa ~= "" then tok.ipa = "j" end tok.is_palatalized = true spread_active = true else if spread_active then -- Check if the palatalization is DIRECT (immediate contact with front V or j) local is_direct = false local nxt = flat_tokens[i+1] if nxt then local nxt_lc = ulower(nxt.char) if (nxt.type == "V" and (nxt.silent_i or FRONT_V[nxt_lc])) or nxt_lc == "j" then is_direct = true end elseif right_context_palatalizing then -- No in-word neighbour, but a liaisoned soft phoneme follows. is_direct = true end tok.is_palatalized = true if lc_char == "k" or lc_char == "g" then if is_direct and tok.ipa ~= "" then tok.ipa = tok.ipa .. "ʲ" end -- DO NOT set spread_active to false! k/g are transparent to spreading. else if tok.ipa ~= "" then tok.ipa = tok.ipa .. "ʲ" end end end end end end -- Pass 2: Voicing Assimilation (Right-to-Left) local function apply_voicing_assimilation(syllables) local flat_tokens = {} for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do insert(flat_tokens, tok) end end local target_voice = nil for i = #flat_tokens, 1, -1 do local tok = flat_tokens[i] if tok.type == "V" or tok.type == "R" then target_voice = nil -- Blocked by vowels and resonants elseif tok.type == "S" or tok.type == "T" then local lc_char = ulower(tok.char) local is_uppercase = (tok.char ~= lc_char) if target_voice == nil then -- Establish new assimilation target target_voice = is_voiced(lc_char) and "voiced" or "voiceless" else -- Assimilate local current_is_voiced = is_voiced(lc_char) if target_voice == "voiced" and not current_is_voiced then local new_char = VOICING_PAIRS[lc_char] or lc_char tok.char = is_uppercase and uupper(new_char) or new_char elseif target_voice == "voiceless" and current_is_voiced then local new_char = VOICING_PAIRS[lc_char] or lc_char tok.char = is_uppercase and uupper(new_char) or new_char end -- Update IPA based on new character, preserving palatalization local lc_new_char = ulower(tok.char) local new_ipa = CONS_IPA[lc_new_char] or lc_new_char if tok.is_palatalized and lc_new_char ~= "k" and lc_new_char ~= "g" then new_ipa = new_ipa .. "ʲ" end tok.ipa = new_ipa end end end end -- Pass 3: Nasal Assimilation (n -> ŋ before velars k/g and post-velar ch/h) -- Per VLKK §6.3, n assimilates to the place of articulation of any following -- velar/uvular consonant, including the fricatives ch [x] and h [ɣ]. local function apply_nasal_assimilation(syllables) local flat_tokens = {} for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do insert(flat_tokens, tok) end end for i = 1, #flat_tokens - 1 do local tok = flat_tokens[i] -- Ignore vowels for lookahead local lookahead = i + 1 while lookahead <= #flat_tokens and flat_tokens[lookahead].type == "V" do if flat_tokens[lookahead].silent_i then lookahead = lookahead + 1 else break end end local nxt = flat_tokens[lookahead] if nxt and ulower(tok.char) == "n" then local lc_nxt = ulower(nxt.char) if lc_nxt == "k" or lc_nxt == "g" or lc_nxt == "ch" or lc_nxt == "h" then -- Check actual IPA string for direct palatalization if ufind(nxt.ipa, "ʲ") then tok.ipa = "ŋʲ" else tok.ipa = "ŋ" end end end end end -- Pass 4: Vowel Quality Adjustments after Palatalization local function adjust_vowel_quality(syllables) for _, syl in ipairs(syllables) do local has_palatal_onset = false for i, tok in ipairs(syl) do if tok.type ~= "V" and (tok.is_palatalized or ulower(tok.char) == "j") then has_palatal_onset = true elseif tok.type == "V" and has_palatal_onset then local ipa = tok.ipa -- Shift a/e quality if ipa == "ɐ" then ipa = "ɛ" elseif ipa == "ɑː" then ipa = "æː" -- Diphthong shifts for iau / iai elseif ipa == "ɒʊ" then ipa = "ɛʊ" elseif ipa == "ɑˑʊ" then ipa = "æˑʊ" elseif ipa == "ɒʊˑ" then ipa = "ɛʊˑ" elseif ipa == "ɐɪ" then ipa = "ɛɪ" elseif ipa == "ɑˑɪ" then ipa = "æˑɪ" elseif ipa == "ɐɪˑ" then ipa = "ɛɪˑ" end -- Dynamic fronting for u/o/ɔ (adds U+031F) -- Matches ONLY the first character (^) to avoid double fronting in uɔ ipa = ugsub(ipa, "^([uʊoɔ])", "%1̟") tok.ipa = ipa has_palatal_onset = false end end end end -- Pass 5: Terminal Devoicing (Word-final obstruent devoicing) local function apply_terminal_devoicing(syllables) if #syllables == 0 then return end local last_syl = syllables[#syllables] local last_tok = last_syl[#last_syl] if last_tok and (last_tok.type == "S" or last_tok.type == "T") then local lc_char = ulower(last_tok.char) local is_uppercase = (last_tok.char ~= lc_char) if is_voiced(lc_char) then local devoiced = VOICING_PAIRS[lc_char] if devoiced then last_tok.char = is_uppercase and uupper(devoiced) or devoiced local new_ipa = CONS_IPA[devoiced] or devoiced if last_tok.is_palatalized then new_ipa = new_ipa .. "ʲ" end last_tok.ipa = new_ipa end end end end -- Pass 6: Place Assimilation (Sibilant + Affricate) -- When a sibilant meets an affricate, the sibilant adjusts its place of articulation: -- s+č→š, z+dž→ž, š+c→s, ž+dz→z local function apply_place_assimilation(syllables) local flat_tokens = {} for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do insert(flat_tokens, tok) end end local PLACE_ASSIM = { ["s"] = {["t͡ʃ"] = "ʃ"}, ["z"] = {["d͡ʒ"] = "ʒ"}, ["ʃ"] = {["t͡s"] = "s"}, ["ʒ"] = {["d͡z"] = "z"}, } for i = 1, #flat_tokens - 1 do local tok = flat_tokens[i] local nxt = flat_tokens[i+1] if tok.type == "S" and nxt.type == "T" then local rule = PLACE_ASSIM[tok.ipa] if rule and rule[nxt.ipa] then tok.ipa = rule[nxt.ipa] end end end end -- Pass 7: Geminate Simplification (Double consonant reduction) -- NOTE: Original geminates (rr, ll, etc.) are already merged in merge_geminate_consonants. -- This pass handles geminates created by phonetic rules (e.g., voicing assimilation: td→dd). -- Sibilants: any two sibilants merge into one (keeping the second) -- Other consonants: only identical pairs merge local function apply_geminate_simplification(syllables) local flat_tokens = {} for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do insert(flat_tokens, tok) end end local SIBILANTS = {["s"]=true, ["z"]=true, ["ʃ"]=true, ["ʒ"]=true} for i = 1, #flat_tokens - 1 do local tok = flat_tokens[i] local nxt = flat_tokens[i+1] local tok_is_cons = (tok.type == "T" or tok.type == "S" or tok.type == "R") local nxt_is_cons = (nxt.type == "T" or nxt.type == "S" or nxt.type == "R") if tok_is_cons and nxt_is_cons then local tok_is_sib = SIBILANTS[tok.ipa] local nxt_is_sib = SIBILANTS[nxt.ipa] if tok_is_sib and nxt_is_sib then tok.ipa = "" elseif tok.ipa == nxt.ipa and tok.ipa ~= "" then tok.ipa = "" end end end end -- Pass 8: Final Consonant Vocalization (v → ʊ̯, j → ɪ̯) -- Word-final v and j (without tilde) become non-syllabic vowels -- Note: j with tilde is already converted to i with tilde in tokenization local function apply_final_consonant_vocalization(syllables) if #syllables == 0 then return end local last_syl = syllables[#syllables] local last_tok = last_syl[#last_syl] if last_tok then local lc_char = ulower(last_tok.char) -- Final v → ʊ̯ if lc_char == "v" then last_tok.ipa = "ʊ̯" -- Keep type as "R" (resonant) for now - it's treated as non-syllabic end -- Final j (without tilde) → ɪ̯ -- Note: j with tilde is already converted to i in tokenization, so won't reach here if lc_char == "j" and last_tok.tone ~= "tilde" then last_tok.ipa = "ɪ̯" -- Keep type as "R" (resonant) for now - it's treated as non-syllabic end end end -- ============================================================================ -- SECTION 7: Output Assembly -- ============================================================================ -- --------------------------------------------------------------------------- -- Cross-word phonology helpers -- --------------------------------------------------------------------------- -- Split a term into segments at spaces only. Returns a plain list of -- non-empty word strings. The liaison marker ‿ is reserved for IPA output -- and must never appear in input; if it does, raise an error so the editor -- knows to use a regular space instead. local function split_into_segments(term) if term and ufind(term, LIAISON) then error("lt-pron: the liaison marker \"" .. LIAISON .. "\" (U+203F) must not appear in the input. Use a regular " .. "space between words; the module decides where to insert ‿ " .. "in the IPA output based on stress.") end local segs = {} for _, w in ipairs(rsplit(term or "", " ")) do if w ~= "" then insert(segs, w) end end return segs end -- True if any syllable in the word has primary or secondary stress. local function word_has_stress(syllables) for _, syl in ipairs(syllables) do if syl.stress or syl.secondary_stress then return true end end return false end -- Compute the clitic group anchor for each word in a phrase. -- -- A "clitic group" is a stressed word together with all unstressed words that -- prosodically attach to it. Two adjacent words share a liaison ‿ iff they -- belong to the same group. The algorithm: -- -- 1. Every stressed word is its own anchor. -- 2. Each unstressed word looks FORWARD for the nearest stressed word -- (proclitic case, e.g., "iš namų̃" — iš leans on namų̃). -- 3. If no stressed word follows, look BACKWARD instead (enclitic case, -- e.g., "sakaũ gi" — gi leans on sakaũ). -- 4. If the entire phrase has no stressed word (rare edge case), all -- unstressed words share a single pseudo-group with anchor 0. -- -- This matches VLKK §4.7 examples like "iš namų̃ [ɪʃ‿nɐ²ˈmuː]" (proclitic) -- and "sakaũ gi [sɐ²ˈkɒʊˑ‿ɡʲɪ]" (enclitic), and produces the correct -- behavior for VLKK §4.9's "išėjaũ į kiẽmą" where the unstressed į proclitic -- to kiẽmą while išėjaũ stands alone. local function compute_clitic_anchors(word_data) local anchors = {} -- Pass 1: stressed words anchor themselves. for i, wd in ipairs(word_data) do if word_has_stress(wd.syllables) then anchors[i] = i end end -- Pass 2: RTL — each unstressed word adopts the next word's anchor. -- Scanning RTL means each position can simply copy anchors[i+1], which -- already points to the nearest stressed word to the right (or nil). for i = #word_data - 1, 1, -1 do if anchors[i] == nil then anchors[i] = anchors[i+1] end end -- Pass 3: LTR — words still without an anchor (no stressed word to the -- right) fall back to the nearest stressed word on the left. for i = 2, #word_data do if anchors[i] == nil then anchors[i] = anchors[i-1] end end -- Pass 4: entire phrase has no stress at all — bundle everything into -- pseudo-group 0 so the words at least share liaison with each other. if #word_data > 0 and anchors[1] == nil then for i = 1, #word_data do anchors[i] = 0 end end return anchors end -- Find the first non-empty token across syllables (skips silent or zero-IPA -- tokens that don't realize a phoneme). local function first_effective_token(syllables) for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do if tok.ipa ~= "" then return tok end end end return nil end -- Find the last non-empty token across syllables. local function last_effective_token(syllables) for s = #syllables, 1, -1 do local syl = syllables[s] for t = #syl, 1, -1 do if syl[t].ipa ~= "" then return syl[t] end end end return nil end -- Return true if the next word's first effective phoneme triggers -- palatalization across the liaison boundary (front V, j, or an already -- palatalized consonant). Must be called AFTER the next word's palatalization -- pass has run, so `is_palatalized` is reliable. local function first_token_palatalizes(syllables) local tok = first_effective_token(syllables) if not tok then return false end local lc = ulower(tok.char) if tok.type == "V" then return tok.silent_i or FRONT_V[lc] or false end if lc == "j" then return true end return tok.is_palatalized == true end -- VLKK §20: the preposition "už" keeps its [ʒ] (i.e. terminal devoicing is -- skipped) when the next liaisoned word starts with a vowel or sonorant -- consonant (n, m, l, r, j, v). Other words always undergo terminal devoicing. local UZ_SKIP_SONORANTS = { ["n"]=true, ["m"]=true, ["l"]=true, ["r"]=true, ["j"]=true, ["v"]=true, } local function should_skip_devoicing_for_uz(word_text, next_syllables) -- remove_all_accents returns NFD; we have to fold it back to NFC before -- comparing against the literal "už" because ž (U+017E) decomposes to -- z + COMBINING CARON (U+030C) in NFD form. local clean = ulower(mw.ustring.toNFC(remove_all_accents(word_text or ""))) if clean ~= "už" then return false end if not next_syllables then return false end local nxt = first_effective_token(next_syllables) if not nxt then return false end if nxt.type == "V" then return true end return UZ_SKIP_SONORANTS[ulower(nxt.char)] == true end -- VLKK §21b / §22b / §23b: at a liaison boundary, the last consonant of W1 -- and the first consonant of W2 may interact. We mirror within-word place -- assimilation and geminate / sibilant simplification, applied just before -- W1's palatalization pass so that any new ipa (e.g. s → ʃ) gets palatalized -- correctly when needed. local CROSSWORD_PLACE_ASSIM = { ["s"] = {["t͡ʃ"] = "ʃ"}, ["z"] = {["d͡ʒ"] = "ʒ"}, ["ʃ"] = {["t͡s"] = "s"}, ["ʒ"] = {["d͡z"] = "z"}, } local CROSSWORD_SIBILANTS = {["s"]=true, ["z"]=true, ["ʃ"]=true, ["ʒ"]=true} local function strip_trailing_palatal(ipa) if not ipa then return "" end local stripped = ugsub(ipa, "ʲ$", "") return stripped end local function apply_crossword_polish_at_junction(w1_syllables, w2_syllables) local w1_last = last_effective_token(w1_syllables) local w2_first = first_effective_token(w2_syllables) if not (w1_last and w2_first) then return end -- W1's last token has not yet been palatalized at this point in the -- pipeline, so its ipa is the bare base form. W2 has already been fully -- polished, so we must strip a trailing ʲ before using it as a key. local w1_base = w1_last.ipa or "" local w2_base = strip_trailing_palatal(w2_first.ipa) if w1_base == "" or w2_base == "" then return end -- Geminate / sibilant cluster: drop W1's last consonant entirely. if w1_base == w2_base or (CROSSWORD_SIBILANTS[w1_base] and CROSSWORD_SIBILANTS[w2_base]) then w1_last.ipa = "" return end -- Place assimilation: rewrite W1's last consonant base. Palatalization, -- if any, will be re-applied by the palatalization pass. local rule = CROSSWORD_PLACE_ASSIM[w1_base] if rule and rule[w2_base] then w1_last.ipa = rule[w2_base] end end -- --------------------------------------------------------------------------- -- Word-level rendering -- --------------------------------------------------------------------------- -- Tokenize, syllabify, and assign base phonetics for one word. Returns a -- table { text, syllables } with the word's mutable phonological state. local function prepare_word_state(word, pagename) local tokens = tokenize(word, pagename) merge_geminate_consonants(tokens) local syllables = syllabify(tokens) map_base_phonetics(syllables) return {text = word, syllables = syllables} end -- Render a fully polished syllables list into an IPA string. Identical to -- the original tail of process_single_word_ipa. local function render_word_ipa(syllables) local parts = {} for s_idx, syl in ipairs(syllables) do local syl_str = "" local hiatus_sep = "" if s_idx > 1 then local prev_syl = syllables[s_idx - 1] local prev_last_tok = prev_syl[#prev_syl] local curr_first_tok = syl[1] local prev_ends_with_v = (prev_last_tok and prev_last_tok.type == "V" and not prev_last_tok.silent_i) local curr_starts_with_v = (curr_first_tok and curr_first_tok.type == "V" and not curr_first_tok.silent_i) if prev_ends_with_v and curr_starts_with_v then local prev_v_char = ulower(prev_last_tok.char) local curr_v_char = ulower(curr_first_tok.char) -- Only handle ie special case if curr_v_char == "ie" then -- 'ie' has implicit 'j' from map_base_phonetics. syl_str = "" hiatus_sep = "" else -- Standard hiatus without glide insertion syl_str = "." hiatus_sep = "" end end end -- Check for conflict between primary and secondary stress if syl.secondary_stress and syl.stress then error("lt-pron: A syllable cannot have both primary stress (tone mark) and secondary stress (ˌ)") end -- Add stress markers (primary or secondary, mutually exclusive) if syl.secondary_stress then syl_str = syl_str .. "ˌ" elseif syl.stress then syl_str = syl_str .. syl.stress end syl_str = syl_str .. hiatus_sep for _, tok in ipairs(syl) do if tok.is_respelling then if tok.is_optional then syl_str = syl_str .. "(j)" else syl_str = syl_str .. tok.ipa end else syl_str = syl_str .. tok.ipa end if tok.half_long then syl_str = syl_str .. "ˑ" end end insert(parts, syl_str) end return concat(parts, "") end -- --------------------------------------------------------------------------- -- Multi-word IPA assembler with cross-word phonology -- --------------------------------------------------------------------------- -- Pipeline for a phrase made of space-separated words. The liaison marker ‿ -- never appears in input; it is inserted into the rendered IPA according to -- clitic-group anchors computed from per-word stress (see -- compute_clitic_anchors above). -- -- Per-word polish order (unchanged from VLKK §17–§19): -- terminal devoicing → voicing assim → place assim → geminate simp → -- final-cons vocalization → cross-word polish at junction → -- palatalization (with cross-word right context) → nasal assim → -- vowel quality. -- -- Cross-word polish at the junction (§21b/§22b/§23b) runs *before* W1's -- palatalization pass so that any rewritten ipa still receives ʲ correctly. -- Words are processed RTL so that each W_i sees the already-polished state -- of W_{i+1} when computing its cross-word context. local function to_ipa(term, provided_pagename) -- Use provided pagename if available, otherwise try to extract from term local pagename, clean_term if provided_pagename then pagename = provided_pagename clean_term = term else pagename, clean_term = get_pagename(term) end term = clean_term -- Split input and pagename on whitespace; bail out if input contains ‿. local input_segs = split_into_segments(term) local pagename_segs = nil if pagename then pagename_segs = split_into_segments(pagename) -- Verify word count matches (only when both are multi-word) if #input_segs > 1 and #pagename_segs > 1 and #input_segs ~= #pagename_segs then error("lt-pron: Input has " .. #input_segs .. " words but pagename has " .. #pagename_segs .. " words. They must match.") end -- If pagename is a single word but input is multi-word, drop pagename -- alignment (respelling detection only makes sense for exact matches). if #pagename_segs == 1 and #input_segs > 1 then pagename_segs = nil end end -- Stage 1: tokenize / syllabify / map base phonetics for every word. local word_data = {} for i, seg_text in ipairs(input_segs) do local seg_pagename = (pagename_segs and pagename_segs[i]) or nil insert(word_data, prepare_word_state(seg_text, seg_pagename)) end if #word_data == 0 then return "" end -- Stage 2: compute clitic anchors and decide liaison per junction. -- Two adjacent words share a ‿ iff they belong to the same clitic group. local anchors = compute_clitic_anchors(word_data) for i = 1, #word_data - 1 do word_data[i].is_liaison = (anchors[i] == anchors[i+1]) end if word_data[#word_data] then word_data[#word_data].is_liaison = false -- no successor end -- Stage 3: flag the už §20 exception. už keeps its [ʒ] (i.e. terminal -- devoicing is skipped) only when it is in a liaison junction with the -- following word AND that word starts with a vowel or sonorant. Whether -- už is proclitic or enclitic in the group doesn't matter — what matters -- is that ‿ sits between už and the next phoneme. for i = 1, #word_data do local wd = word_data[i] wd.skip_terminal_devoicing = false if wd.is_liaison and word_data[i+1] then if should_skip_devoicing_for_uz(wd.text, word_data[i+1].syllables) then wd.skip_terminal_devoicing = true end end end -- Stage 4: run the polish pipeline RTL across word_data so each W_i sees -- W_{i+1}'s polished state when computing cross-word context. for i = #word_data, 1, -1 do local wd = word_data[i] local next_wd = wd.is_liaison and word_data[i+1] or nil -- Within-word polish (passes 1-5) if not wd.skip_terminal_devoicing then apply_terminal_devoicing(wd.syllables) end apply_voicing_assimilation(wd.syllables) apply_place_assimilation(wd.syllables) apply_geminate_simplification(wd.syllables) apply_final_consonant_vocalization(wd.syllables) -- Cross-word fricative simplification / place assimilation at the -- liaison boundary, before W1 palatalizes (so a freshly assimilated -- s → ʃ can still pick up ʲ). if next_wd then apply_crossword_polish_at_junction(wd.syllables, next_wd.syllables) end -- Determine right palatalization context for the cross-word case. local right_palatalizing = false if next_wd then right_palatalizing = first_token_palatalizes(next_wd.syllables) end -- Pass 6: palatalization (with cross-word context). apply_palatalization(wd.syllables, right_palatalizing) -- Remaining within-word passes. apply_nasal_assimilation(wd.syllables) adjust_vowel_quality(wd.syllables) end -- Stage 5: render each word and join with ‿ (same clitic group) or " ". local result_parts = {} for i, wd in ipairs(word_data) do insert(result_parts, render_word_ipa(wd.syllables)) end local result = result_parts[1] or "" for i = 2, #word_data do local sep = word_data[i-1].is_liaison and LIAISON or " " result = result .. sep .. result_parts[i] end return result end -- Rhyme Extractor local IPA_VOWELS = "aɐɑæɛeəɪiɔoʊuɒɜ" local function get_rhyme(ipa) -- Remove liaison and spaces before rhyme calculation local clean_ipa = ugsub(ipa, "[‿ ]", "") -- Search for the last stress mark from right to left local last_stress_pos = nil for i = ulen(clean_ipa), 1, -1 do local char = usub(clean_ipa, i, i) if char == "ˈ" then -- Skip superscript if present if i > 1 then local prev = usub(clean_ipa, i - 1, i - 1) if prev == "¹" or prev == "²" then last_stress_pos = i -- Point to ˈ, skip superscript else last_stress_pos = i end else last_stress_pos = i end break end end if not last_stress_pos then return nil end -- Extract content after the stress mark local after = usub(clean_ipa, last_stress_pos + 1) -- Remove hiatus dots for rhyme grouping after = ugsub(after, "%.", "") -- Find first vowel position (skip onset consonants) local vstart = umatch(after, "()[" .. IPA_VOWELS .. "]") if vstart then return usub(after, vstart) end return after end -- Hyphenation generator (supports multi-word phrases) local function get_hyphenation(term, provided_pagename) -- Use provided pagename if available, otherwise try to extract from term local pagename, clean_term if provided_pagename then pagename = provided_pagename clean_term = term else pagename, clean_term = get_pagename(term) end term = clean_term -- Split input on both ‿ and space, tracking the separator type so the -- final hyphenation string can preserve liaison markers from the input. local input_segs = split_into_segments(term) local pagename_segs = nil if pagename then pagename_segs = split_into_segments(pagename) -- Verify segment count matches (only when both are multi-segment) if #input_segs > 1 and #pagename_segs > 1 and #input_segs ~= #pagename_segs then error("lt-pron: Input has " .. #input_segs .. " words but pagename has " .. #pagename_segs .. " words. They must match.") end -- If pagename is a single word but input is multi-word, drop alignment if #pagename_segs == 1 and #input_segs > 1 then pagename_segs = nil end end local all_word_parts = {} for i, seg_text in ipairs(input_segs) do -- Get the corresponding pagename word, or nil if not available local seg_pagename = (pagename_segs and pagename_segs[i]) or nil local tokens = tokenize(seg_text, seg_pagename) local syllables = syllabify(tokens) local parts = {} for _, syl in ipairs(syllables) do local text = "" for _, tok in ipairs(syl) do -- Skip respelling characters (original_char == "-" means not in orthography) if tok.original_char ~= "-" then -- Re-attach original tone visually for display local t_mark = "" if tok.tone == "acute" then t_mark = ACUTE elseif tok.tone == "grave" then t_mark = GRAVE elseif tok.tone == "tilde" then t_mark = TILDE end -- Use original_char if set (e.g., j̃ → i internally but j in display) -- Otherwise use tok.char local disp_char = tok.original_char or tok.char -- Restore original orthography for ɔ if disp_char == "ɔ" then disp_char = "o" end if disp_char == "Ɔ" then disp_char = "O" end -- Handle tone placement for diphthongs if tok.tone_position then -- Diphthong: place tone on the specified vowel local char_len = ulen(disp_char) if char_len == 2 then -- Two-character diphthong local v1 = usub(disp_char, 1, 1) local v2 = usub(disp_char, 2, 2) if tok.tone_position == 1 then text = text .. v1 .. t_mark .. v2 else text = text .. v1 .. v2 .. t_mark end elseif char_len == 3 then -- Three-character (silent i) diphthong local v1 = usub(disp_char, 1, 1) local v2 = usub(disp_char, 2, 2) local v3 = usub(disp_char, 3, 3) if tok.tone_position == 1 then text = text .. v1 .. t_mark .. v2 .. v3 elseif tok.tone_position == 2 then text = text .. v1 .. v2 .. t_mark .. v3 else text = text .. v1 .. v2 .. v3 .. t_mark end end else -- Single vowel or consonant: tone goes after the character text = text .. disp_char .. t_mark end end end parts[#parts + 1] = makeDisplayText(text) end insert(all_word_parts, concat(parts, "‧")) -- Use ‧ instead of standard - internally to preserve word boundaries end -- Hyphenation always joins multi-word phrases with a plain space — the -- liaison marker ‿ is purely an IPA-output device and never appears here. local combined_string = concat(all_word_parts, " ") local final_parts = {} for _, piece in ipairs(rsplit(combined_string, "‧")) do insert(final_parts, piece) end -- Calculate actual syllable count (for correct num_syl) local syllable_count = 0 for _, word_part in ipairs(all_word_parts) do local word_syls = rsplit(word_part, "‧") syllable_count = syllable_count + #word_syls end -- Store actual syllable count as a field (since __len doesn't work on tables in Lua 5.1) final_parts.syllable_count = syllable_count return final_parts end -- ============================================================================ -- SECTION 7.5: Conjugation Module Support Functions -- ============================================================================ -- Export: Get syllables as string array with accents function export.get_syllables(term) local pagename, clean_term = get_pagename(term) term = clean_term local tokens = tokenize(term, pagename) local syllables = syllabify(tokens) local result = {} for _, syl in ipairs(syllables) do local text = "" for _, tok in ipairs(syl) do -- Skip respelling glides in syllable output if not tok.is_respelling then -- Re-attach original tone local t_mark = "" if tok.tone == "acute" then t_mark = ACUTE elseif tok.tone == "grave" then t_mark = GRAVE elseif tok.tone == "tilde" then t_mark = TILDE end -- Restore original orthography for ɔ local disp_char = tok.char if disp_char == "ɔ" then disp_char = "o" end if disp_char == "Ɔ" then disp_char = "O" end -- Handle tone placement for diphthongs if tok.tone_position then -- Diphthong: place tone on the specified vowel local char_len = ulen(disp_char) if char_len == 2 then -- Two-character diphthong local v1 = usub(disp_char, 1, 1) local v2 = usub(disp_char, 2, 2) if tok.tone_position == 1 then text = text .. v1 .. t_mark .. v2 else text = text .. v1 .. v2 .. t_mark end elseif char_len == 3 then -- Three-character (silent i) diphthong local v1 = usub(disp_char, 1, 1) local v2 = usub(disp_char, 2, 2) local v3 = usub(disp_char, 3, 3) if tok.tone_position == 1 then text = text .. v1 .. t_mark .. v2 .. v3 elseif tok.tone_position == 2 then text = text .. v1 .. v2 .. t_mark .. v3 else text = text .. v1 .. v2 .. v3 .. t_mark end end else -- Single vowel or consonant: tone goes after the character text = text .. disp_char .. t_mark end end end -- Return NFC format with proper dotabove insertion insert(result, makeDisplayText(text)) end return result end -- Export: Check if a syllable is heavy or light function export.is_heavy_syllable(syllable) local pagename, clean_syllable = get_pagename(syllable) syllable = clean_syllable -- Validate single syllable local tokens = tokenize(syllable, pagename) local syllables = syllabify(tokens) if #syllables ~= 1 then error("is_heavy_syllable: input must be a single syllable, got " .. #syllables .. " syllables") end local syl = syllables[1] -- Find the vowel token local v_idx = nil for i, tok in ipairs(syl) do if tok.type == "V" then v_idx = i break end end if not v_idx then error("is_heavy_syllable: no vowel found in syllable") end local v_tok = syl[v_idx] local lc_v = ulower(v_tok.char) -- Handle silent i (e.g., "iau" where i is silent) if v_tok.has_silent_i then lc_v = usub(lc_v, 2) end -- Long vowels (inherently long, regardless of accent) local LONG_VOWELS = { ["ą"]=true, ["ę"]=true, ["ė"]=true, ["į"]=true, ["y"]=true, ["ų"]=true, ["ū"]=true, ["o"]=true, ["ɔ"]=true -- o/ɔ are always long (except ò, but we treat all o as long) } if LONG_VOWELS[lc_v] then return true end -- Diphthongs (length > 1) if ulen(lc_v) > 1 then return true end -- Mixed diphthongs: short vowel + liquid in coda position local SHORT_VOWELS = {["a"]=true, ["e"]=true, ["i"]=true, ["u"]=true} if SHORT_VOWELS[lc_v] and v_idx < #syl then local next_tok = syl[v_idx + 1] -- Liquid in coda (not j or v, which don't form mixed diphthongs) if next_tok.type == "R" and next_tok.char ~= "j" and next_tok.char ~= "v" then return true end end -- Otherwise, it's a light syllable return false end -- Export: Change accent of a syllable function export.change_accent(syllable, target_accent) local pagename, clean_syllable = get_pagename(syllable) syllable = clean_syllable -- Validate target_accent parameter local VALID_ACCENTS = {acute=true, tilde=true, grave=true, none=true} if not VALID_ACCENTS[target_accent] then error("change_accent: invalid target_accent '" .. tostring(target_accent) .. "', must be 'acute', 'tilde', 'grave', or 'none'") end -- Validate single syllable local tokens = tokenize(syllable, pagename) local syllables = syllabify(tokens) if #syllables ~= 1 then error("change_accent: input must be a single syllable, got " .. #syllables .. " syllables") end -- Remove all existing accents local clean = remove_all_accents(syllable) -- If target is 'none', return clean syllable if target_accent == "none" then return clean end -- Re-tokenize the clean syllable to analyze structure local clean_tokens = tokenize(clean, pagename) local clean_syllables = syllabify(clean_tokens) local syl = clean_syllables[1] -- Find vowel position and extract vowel part local onset = "" local vowel_part = "" local coda = "" local v_idx = nil for i, tok in ipairs(syl) do if tok.type == "V" then v_idx = i break else onset = onset .. tok.char end end if not v_idx then error("change_accent: no vowel found in syllable") end local v_tok = syl[v_idx] vowel_part = ulower(v_tok.char) -- Handle silent i if v_tok.has_silent_i then vowel_part = usub(vowel_part, 2) end -- Check if this is a mixed diphthong local is_mixed = false if v_idx < #syl then local next_tok = syl[v_idx + 1] if next_tok.type == "R" and next_tok.char ~= "j" and next_tok.char ~= "v" then -- Mixed diphthong: vowel + liquid vowel_part = vowel_part .. ulower(next_tok.char) is_mixed = true -- Collect remaining coda after the liquid for i = v_idx + 2, #syl do coda = coda .. syl[i].char end else -- Regular syllable: collect all coda for i = v_idx + 1, #syl do coda = coda .. syl[i].char end end end -- Look up the accented form in ACCENT_PAIRS if not ACCENT_PAIRS[vowel_part] then error("change_accent: vowel/diphthong '" .. vowel_part .. "' not found in accent pairs table") end local accented_vowel = ACCENT_PAIRS[vowel_part][target_accent] if not accented_vowel then error("change_accent: accent type '" .. target_accent .. "' is not allowed for vowel/diphthong '" .. vowel_part .. "'") end -- Reconstruct the syllable with new accent (in NFD format) local result = onset .. accented_vowel .. coda return result end -- ============================================================================ -- SECTION 8: Module Exports & Template Formatting (Preserved) -- ============================================================================ local q_spec = {store = "insert-flattened", type = "qualifier"} local a_spec = {store = "insert-flattened", type = "labels"} local ref_spec = {store = "insert-flattened", item_dest = "refs", type = "references"} -- Generate audio object, supporting file#caption syntax local function generate_audio_obj(arg) local file, caption = arg:match("^(.-)%s*#%s*(.*)$") file = file or arg return {file = file, caption = caption} end -- Parse rhyme specification with optional syllable count local function parse_rhyme(arg, parse_err) local function generate_obj(term) return {rhyme = term} end local param_mods = { s = { item_dest = "num_syl", type = "number", sublist = true, }, } -- Add q/qq/a/aa/ref support if inline modifiers are present if arg:find("<") then param_mods.q = q_spec param_mods.qq = q_spec param_mods.a = a_spec param_mods.aa = a_spec param_mods.ref = ref_spec end return require(parse_util_module).parse_inline_modifiers(arg, { param_mods = param_mods, generate_obj = generate_obj, parse_err = parse_err, splitchar = ",", }) end -- Parse hyphenation specification (dot-separated syllables) local function parse_hyph(arg, parse_err) local function generate_obj(term) local parts = rsplit(term, "%.") return {hyph = parts, syllabification = term} end local param_mods = {} -- Add q/qq/a/aa/ref support if inline modifiers are present if arg:find("<") then param_mods.q = q_spec param_mods.qq = q_spec param_mods.a = a_spec param_mods.aa = a_spec param_mods.ref = ref_spec end return require(parse_util_module).parse_inline_modifiers(arg, { param_mods = param_mods, generate_obj = generate_obj, parse_err = parse_err, splitchar = ",", }) end -- Parse homophone specification local function parse_homophone(arg, parse_err) local function generate_obj(term) return {term = term} end local param_mods = { t = {item_dest = "gloss"}, gloss = {}, pos = {}, alt = {}, lit = {}, id = {}, g = { item_dest = "genders", sublist = true, }, } -- Add q/qq/a/aa/ref support if inline modifiers are present if arg:find("<") then param_mods.q = q_spec param_mods.qq = q_spec param_mods.a = a_spec param_mods.aa = a_spec param_mods.ref = ref_spec end return require(parse_util_module).parse_inline_modifiers(arg, { param_mods = param_mods, generate_obj = generate_obj, parse_err = parse_err, splitchar = ",", }) end local audio_nested_mods = { ["a"] = a_spec, ["aa"] = a_spec, ["q"] = q_spec, ["qq"] = q_spec, ["text"] = {}, ["IPA"] = {sublist = true}, ["t"] = {item_dest = "gloss"}, ["gloss"] = {}, ["pos"] = {}, ["lit"] = {}, ["g"] = { item_dest = "genders", sublist = true, }, } local function parse_one_term(raw, parse_err) if not raw:find("<") then return {term = raw, audio_list = {}, rhyme_list = {}, hyph_list = {}, pagename = nil} end -- Extract base spelling before parse_inline_modifiers local pagename = nil if raw:find("<base:") then pagename = raw:match("<base:([^>]+)>") raw = raw:gsub("<base:[^>]+>", "") end local parsed = require(parse_util_module).parse_inline_modifiers(raw, { param_mods = { ["q"] = q_spec, ["qq"] = q_spec, ["a"] = a_spec, ["aa"] = a_spec, ["ref"] = ref_spec, ["audio"] = { store = "insert", item_dest = "audio_list", convert = function(arg, perr) if arg:find("<") then local parsed_audio = require(parse_util_module).parse_inline_modifiers(arg, { param_mods = audio_nested_mods, generate_obj = generate_audio_obj, parse_err = perr, }) parsed_audio.lang = get_lang() local textobj = require(audio_module).construct_audio_textobj(parsed_audio) parsed_audio.text = textobj parsed_audio.gloss = nil parsed_audio.pos = nil parsed_audio.lit = nil parsed_audio.genders = nil return parsed_audio end local audio_obj = generate_audio_obj(arg) audio_obj.lang = get_lang() local textobj = require(audio_module).construct_audio_textobj(audio_obj) audio_obj.text = textobj return audio_obj end, }, ["rhyme"] = { store = "insert-flattened", item_dest = "rhyme_list", convert = parse_rhyme, }, ["hyph"] = { store = "insert-flattened", item_dest = "hyph_list", convert = parse_hyph, }, ["hmp"] = { store = "insert-flattened", item_dest = "hmp_list", convert = parse_homophone, }, }, generate_obj = function(t) return {term = t, audio_list = {}, rhyme_list = {}, hyph_list = {}, hmp_list = {}} end, parse_err = parse_err, }) parsed.audio_list = parsed.audio_list or {} parsed.rhyme_list = parsed.rhyme_list or {} parsed.hyph_list = parsed.hyph_list or {} parsed.hmp_list = parsed.hmp_list or {} parsed.pagename = pagename return parsed end -- Format rhyme objects with qualifiers local function fmt_rhyme(rhyme_objs, bullet) if not rhyme_objs or #rhyme_objs == 0 then return nil end local rhyme_data = {} for _, robj in ipairs(rhyme_objs) do insert(rhyme_data, { rhyme = robj.rhyme, num_syl = robj.num_syl, q = robj.q, qq = robj.qq, a = robj.a, aa = robj.aa, }) end return bullet .. require(rhymes_module).format_rhymes({ lang = get_lang(), rhymes = rhyme_data }) end -- Format hyphenation objects with qualifiers local function fmt_hyph(hyph_objs, bullet) if not hyph_objs or #hyph_objs == 0 then return nil end local hyph_data = {} for _, hobj in ipairs(hyph_objs) do insert(hyph_data, { hyph = hobj.hyph, q = hobj.q, qq = hobj.qq, a = hobj.a, aa = hobj.aa, }) end return bullet .. require(hyphenation_module).format_hyphenations({ lang = get_lang(), hyphs = hyph_data, caption = "Syllabification" }) end -- Format audio object local function fmt_audio(audio_obj, bullet) return bullet .. require(audio_module).format_audio(audio_obj) end -- Format homophone objects with qualifiers local function fmt_hmp(hmp_objs, bullet) if not hmp_objs or #hmp_objs == 0 then return nil end return bullet .. require(homophones_module).format_homophones({ lang = get_lang(), homophones = hmp_objs, }) end local function is_multiword_term(term) -- split_into_segments returns a list of non-empty word strings. return #split_into_segments(term) > 1 end function export.show(frame) local parargs = frame:getParent().args local args = require(parameters_module).process(parargs, { [1] = {default = "nãmas"}, ["bullets"] = {type = "number", default = 1}, }) local input = args[1] local nb = args.bullets local b1 = string.rep("*", nb) .. " " local b2 = string.rep("*", nb + 1) .. " " local raw_terms = require(parse_util_module).split_escaping(input, ",") local parsed_terms = {} for i, raw in ipairs(raw_terms) do raw = raw:match("^%s*(.-)%s*$") local pt = parse_one_term(raw, function(msg) error("lt-pron: " .. msg .. " (term " .. i .. ")") end) parsed_terms[#parsed_terms + 1] = pt end m_IPA = m_IPA or require("Module:IPA") local text_parts = {} for _, pt in ipairs(parsed_terms) do -- Determine bullet level: same level as IPA for single pronunciation, indented for multiple local content_bullet = (#parsed_terms == 1) and b1 or b2 -- Generate IPA local ipa_str = to_ipa(pt.term, pt.pagename) -- Handle rhyme: manual override, suppression, or auto-generation local rhyme_objs = nil local suppress_rhyme = false if #pt.rhyme_list > 0 then for _, robj in ipairs(pt.rhyme_list) do if robj.rhyme == "-" then suppress_rhyme = true break end end if not suppress_rhyme then rhyme_objs = {} for _, robj in ipairs(pt.rhyme_list) do -- If num_syl not specified, try to get from auto-generated hyphenation if not robj.num_syl then local auto_hyph = get_hyphenation(pt.term, pt.pagename) if auto_hyph and #auto_hyph > 0 then -- Use syllable_count field if available (for multi-word phrases), otherwise use array length local syl_count = auto_hyph.syllable_count or #auto_hyph robj.num_syl = {syl_count} end end insert(rhyme_objs, robj) end end else -- Auto-generate rhyme (skip if term ends with - or is a multiword term) if not pt.term:match("%-$") and not is_multiword_term(pt.term) then local rhyme_str = get_rhyme(ipa_str) if rhyme_str then local auto_hyph = get_hyphenation(pt.term, pt.pagename) -- Use syllable_count field if available (for multi-word phrases), otherwise use array length local num_syl = (auto_hyph and #auto_hyph > 0) and {auto_hyph.syllable_count or #auto_hyph} or nil rhyme_objs = {{rhyme = rhyme_str, num_syl = num_syl}} end end end -- Handle hyphenation: manual override, suppression, or auto-generation local hyph_objs = nil local suppress_hyph = false if #pt.hyph_list > 0 then for _, hobj in ipairs(pt.hyph_list) do if hobj.syllabification == "-" then suppress_hyph = true break end end if not suppress_hyph then hyph_objs = pt.hyph_list end else -- Auto-generate hyphenation local auto_hyph = get_hyphenation(pt.term, pt.pagename) if auto_hyph and #auto_hyph > 0 then hyph_objs = {{hyph = auto_hyph, syllabification = concat(auto_hyph, ".")}} end end -- Format IPA with qualifiers and references local ipa_item = {pron = "[" .. ipa_str .. "]"} if pt.q then ipa_item.q = pt.q end if pt.qq then ipa_item.qq = pt.qq end if pt.a then ipa_item.a = pt.a end if pt.aa then ipa_item.aa = pt.aa end if pt.refs then ipa_item.refs = pt.refs end text_parts[#text_parts + 1] = b1 .. m_IPA.format_IPA_full({ lang = get_lang(), items = {ipa_item} }) -- Audio for _, aud in ipairs(pt.audio_list or {}) do text_parts[#text_parts + 1] = fmt_audio(aud, content_bullet) end -- Rhyme if rhyme_objs then local r = fmt_rhyme(rhyme_objs, content_bullet) if r then text_parts[#text_parts + 1] = r end end -- Hyphenation if hyph_objs then local h = fmt_hyph(hyph_objs, content_bullet) if h then text_parts[#text_parts + 1] = h end end -- Homophones if pt.hmp_list and #pt.hmp_list > 0 then local hmp = fmt_hmp(pt.hmp_list, content_bullet) if hmp then text_parts[#text_parts + 1] = hmp end end end return concat(text_parts, "\n") end export.toIPA = to_ipa export.hyphenate = get_hyphenation export.rhyme = get_rhyme return export pqyyto9ntkmwafrrq481zamx4xplqhv မဝ်ဂျူ:lt-pron/testcases 828 219830 395930 300401 2026-05-29T18:17:59Z 咽頭べさ 33 395930 Scribunto text/plain local tests = require("Module:UnitTests") local m_pron = require("Module:User:TongcyDai/lt-pron") local unpack = unpack or table.unpack -- ── helpers ────────────────────────────────────────────────────────────────── local function tag_IPA(s) return '<span class="IPA">' .. s .. '</span>' end -- Compare toIPA output function tests:check_IPA(term, expected, comment) self:equals( term, tag_IPA(mw.ustring.toNFC(m_pron.toIPA(term))), tag_IPA(expected), { comment = comment, show_difference = true } ) end -- Compare hyphenate output function tests:check_hyph(term, expected, comment) local parts = m_pron.hyphenate(term) self:equals( term, table.concat(parts, "‧"), expected, { comment = comment, show_difference = true } ) end -- Compare rhyme output function tests:check_rhyme(term, expected, comment) local ipa = m_pron.toIPA(term) self:equals( term, mw.ustring.toNFC(m_pron.rhyme(ipa) or ""), expected, { comment = comment, show_difference = true } ) end -- ════════════════════════════════════════════════════════════════════════════ -- IPA TESTS -- ════════════════════════════════════════════════════════════════════════════ -- A: Consonants and basic palatalization (one example per consonant, in two -- versions: hard variant + soft variant before front vowel). function tests:test_IPA_A_consonants_and_palatalization() local examples = { -- B, b { "bai̇̃gti", "²ˈbɐɪˑktʲɪ", "b" }, { "bi̇̀rbt", "¹ˈbʲɪrpt", "bʲ" }, -- C, c { "cùkrus", "ˈt͡sʊkrʊs", "c" }, { "ci̇̀bė", "ˈt͡sʲɪbʲeː", "cʲ" }, -- Č, č { "čaižùs", "t͡ʃɐɪˈʒʊs", "č" }, { "Kãčinas", "²ˈkɑːt͡ʃʲɪnɐs", "čʲ" }, -- D, d { "daũg", "²ˈdɒʊˑk", "d" }, { "di̇̀delis", "ˈdʲɪdʲɛlʲɪs", "dʲ" }, -- F, f { "fãzė", "²ˈfɑːzʲeː", "f" }, { "filė̃", "fʲɪ²ˈlʲeː", "fʲ" }, -- G, g { "gãlas", "²ˈɡɑːlɐs", "g" }, { "girià", "ɡʲɪˈrʲɛ", "gʲ" }, -- H, h { "hãlė", "²ˈɣɑːlʲeː", "h" }, { "hi̇̀mnas", "¹ˈɣʲɪmnɐs", "hʲ" }, -- J, j { "jáunas<base:jaunas>", "¹ˈjæˑʊnɐs", "j" }, { "vajè", "ʋɐˈjɛ", "j" }, -- K, k { "kãras", "²ˈkɑːrɐs", "k" }, { "kitóks", "kʲɪ¹ˈtoːks", "kʲ" }, -- L, l { "lãbas", "²ˈlɑːbɐs", "l" }, { "lė̃kti", "²ˈlʲeːktʲɪ", "lʲ" }, -- M, m { "mamà", "mɐˈmɐ", "m" }, { "méilė", "¹ˈmʲæˑɪlʲeː", "mʲ" }, -- N, n { "nósis", "¹ˈnoːsʲɪs", "n" }, { "knygà", "knʲiːˈɡɐ", "nʲ" }, -- P, p { "pãdas", "²ˈpɑːdɐs", "p" }, { "pẽčius", "²ˈpʲæːt͡ʃʲʊ̟s", "pʲ" }, -- R, r { "rai̇̃dė", "²ˈrɐɪˑdʲeː", "r" }, { "kairė̃", "kɐɪ²ˈrʲeː", "rʲ" }, -- S, s { "sõdas", "²ˈsoːdɐs", "s" }, { "si̇́ela", "¹ˈsʲiɛlɐ", "sʲ" }, -- Š, š { "šókti", "¹ˈʃoːktʲɪ", "š" }, { "šỹpsena", "²ˈʃʲiːpʲsʲɛnɐ", "šʲ" }, -- T, t { "tetà", "tʲɛˈtɐ", "tʲ, t" }, -- V, v { "svajõnė", "sʋɐ²ˈjo̟ːnʲeː", "v" }, { "vi̇̀ltis", "¹ˈʋʲɪlʲtʲɪs", "vʲ" }, -- Z, z { "zui̇̃kis", "²ˈzʊɪˑkʲɪs", "z" }, { "zi̇̀r̃zinti", "¹ˈzʲɪrʲzʲɪnʲtʲɪ", "zʲ" }, -- Ž, ž { "žolė̃", "ʒoː²ˈlʲeː", "ž" }, { "žẽmė", "²ˈʒʲæːmʲeː", "žʲ" }, -- Ch, ch { "chalãtas", "xɐ²ˈlɑːtɐs", "ch" }, { "cherèsas", "xʲɛˈrʲɛsɐs", "chʲ" }, -- Dz, dz { "dzū̃kai", "²ˈd͡zuːkɐɪ", "dz" }, { "dzi̇̀ngt", "¹ˈd͡zʲɪŋkt", "dzʲ" }, -- Dž, dž { "džáulis", "¹ˈd͡ʒɑˑʊlʲɪs", "dž" }, { "džiãzas", "²ˈd͡ʒʲæːzɐs", "džʲ" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- B: Monophthongs (short vs. long, native vs. loanword variants). function tests:test_IPA_B_monophthongs() local examples = { -- A, a { "tàvo", "ˈtɐʋoː", "à" }, { "vãkaras", "²ˈʋɑːkɐrɐs", "ã, a" }, -- Ą, ą { "rýtą", "¹ˈrʲiːtɑː", "ą" }, { "ą́žuolas", "¹ˈɑːʒuɔlɐs", "ą́" }, { "šą̃la", "²ˈʃɑːlɐ", "ą̃" }, -- E, e { "Pelesà", "pʲɛlʲɛˈsɐ", "e" }, { "nèšti", "ˈnʲɛʃʲtʲɪ", "è" }, { "mètras", "ˈmʲɛtrɐs", "è" }, { "mẽnas", "²ˈmʲæːnɐs", "ẽ" }, -- E with `^`: closed short e in loanwords (VLKK §6.7.3 — written `ẹ`) { "se^ktà", "sʲekˈtɐ", "e^ (closed short e in loanwords)" }, { "re^ži̇̀mas", "rʲeˈʒʲɪmɐs", "e^ (closed short e in loanwords)" }, -- Ę, ę { "tęsinỹs", "tʲæːsʲɪ²ˈnʲiːs", "ę" }, { "tavę̃s", "tɐ²ˈʋʲæːs", "ę̃" }, { "tę́vas", "¹ˈtʲæːʋɐs", "ę́" }, -- Ė, ė { "ėdė́jas", "eː¹ˈdʲeːjɛs", "ė, ė́" }, { "gėlė̃", "ɡʲeː²ˈlʲeː", "ė, ė̃" }, -- I, i { "liki̇̀mas", "lʲɪˈkʲɪmɐs", "i, i̇̀" }, -- Į, į { "įlį̃sti", "iː²ˈlʲiːsʲtʲɪ", "į, į̃" }, { "į́spūdis", "¹ˈiːspuːdʲɪs", "į́" }, -- Y, y { "mylė́ti", "mʲiː¹ˈlʲeːtʲɪ", "y" }, { "ýda", "¹ˈiːdɐ", "ý" }, { "knỹgė", "²ˈknʲiːɡʲeː", "ỹ" }, -- O, o (long native [oː] vs. short loanword [ɔ]) { "norė́ti", "noː¹ˈrʲeːtʲɪ", "o (native, long)" }, { "óras", "¹ˈoːrɐs", "ó" }, { "keliõnė", "kʲɛ²ˈlʲo̟ːnʲeː", "õ" }, { "òmas", "ˈɔmɐs", "ò (loanword, short)" }, -- FIXME: ò /oː/ exists? { "stòksas", "ˈstɔksɐs", "ò (loanword, grave)" }, { "Zo^jà", "zɔˈjɛ", "o^ (loanword [ɔ], unstressed)" }, { "žo^ngliẽrius", "ʒɔŋ²ˈɡlʲiɛrʲʊ̟s", "o^ (loanword [ɔ], unstressed)" }, { "fo^to^parodà", "fɔtɔpɐroːˈdɐ", "o^ (loanword foto- prefix)" }, { "ho^mo^ni̇̀mas", "ɣɔmɔˈnʲɪmɐs", "o^ (loanword homo- prefix)" }, -- U, u { "ugni̇̀s", "ʊˈɡnʲɪs", "u" }, { "pùsė", "ˈpʊsʲeː", "ù" }, -- Ų, ų { "siųstùvas", "sʲu̟ːˈstʊʋɐs", "ų" }, { "įskų́sti", "iː¹ˈskuːsʲtʲɪ", "ų́" }, { "sių̃sti", "²ˈsʲu̟ːsʲtʲɪ", "ų̃" }, -- Ū, ū { "sū́nūs", "¹ˈsuːnuːs", "ū́, ū" }, { "rū̃gštis", "²ˈruːkʃʲtʲɪs", "ū̃" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C1: Diphthongs ai / au — three accent positions each. function tests:test_IPA_C1_diphthongs_ai_au() local examples = { -- ai { "táikino", "¹ˈtɑˑɪkʲɪnoː", "ái (acute)" }, { "tai̇̃ką", "²ˈtɐɪˑkɑː", "ai̇̃ (tilde)" }, { "taiki̇̀klis", "tɐɪˈkʲɪklʲɪs", "ai (unstressed)" }, -- au { "tráukia", "¹ˈtrɑˑʊkʲɛ", "áu (acute)" }, { "patraũklų", "pɐ²ˈtrɒʊˑkluː", "aũ (tilde)" }, { "pértrauka", "¹ˈpʲæˑrtrɒʊkɐ", "au (unstressed)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C2: Diphthongs ei / ui — three accent positions each. function tests:test_IPA_C2_diphthongs_ei_ui() local examples = { -- ei { "méilė", "¹ˈmʲæˑɪlʲeː", "éi (acute)" }, { "mei̇̃liai", "²ˈmʲɛɪˑlʲɛɪ", "ei̇̃ (tilde)" }, { "meilikáuti", "mʲɛɪlʲɪ¹ˈkɑˑʊtʲɪ", "ei (unstressed)" }, -- ui { "kùisytis", "¹ˈkʊɪsʲiːtʲɪs", "ùi (grave; first element short)" }, { "kui̇̃sti", "²ˈkʊɪˑsʲtʲɪ", "ui̇̃ (tilde)" }, { "kuitinė́tis", "kʊɪtʲɪ¹ˈnʲeːtʲɪs", "ui (unstressed)" }, { "bùivo^las", "¹ˈbʊɪʋɔlɐs", "ùi (grave; with loanword o^)" }, -- úi (acute with first element half-long) is a free-style variant; not tested separately } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C3: Variable diphthongs ie / uo (treated phonemically as monophthongs by VLKK, -- but written as digraphs and patterning with diphthongs in accent placement). function tests:test_IPA_C3_diphthongs_ie_uo() local examples = { -- ie { "si̇́ena", "¹ˈsʲiɛnɐ", "íe (acute)" }, { "jiẽ<base:jie>", "²ˈjiɛ", "iẽ (tilde)" }, { "Diẽvas", "²ˈdʲiɛʋɐs", "iẽ (tilde)" }, { "Ki̇̀msienė", "¹ˈkʲɪmʲsʲiɛnʲeː", "ie (unstressed)" }, -- uo { "úodas", "¹ˈuɔdɐs", "úo (acute)" }, { "ruduõ", "rʊ²ˈduɔ", "uõ (tilde)" }, { "Aluojà<base:Aluoja>", "ɐluɔˈjɛ", "uo (unstressed)" }, { "vaizduõtė", "ʋɐɪ²ˈzduɔtʲeː", "uõ (tilde, after consonant cluster)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C4: Mixed diphthongs of the a-series — al, am, an, ar. function tests:test_IPA_C4_mixed_a() local examples = { -- al { "álkanas", "¹ˈɑˑlkɐnɐs", "ál (acute)" }, { "al̃kis", "²ˈɐlʲˑkʲɪs", "al̃ (tilde)" }, { "alkanáuti", "ɐlkɐ¹ˈnɑˑʊtʲɪ", "al (unstressed)" }, -- am { "skámbčioti", "¹ˈskɑˑmʲpʲt͡ʃʲo̟ːtʲɪ", "ám (acute)" }, { "skam̃biai", "²ˈskɐmʲˑbʲɛɪ", "am̃ (tilde)" }, { "skambùmas", "skɐmˈbʊmɐs", "am (unstressed)" }, -- an { "ránkioja<base:rankioja>", "¹ˈrɑˑŋʲkʲo̟ːjɛ", "án (acute)" }, { "rañkdarbis", "²ˈrɐŋˑɡdɐrʲbʲɪs", "añ (tilde)" }, { "rankinùkas", "rɐŋʲkʲɪˈnʊkɐs", "an (unstressed)" }, -- ar { "sárgas", "¹ˈsɑˑrɡɐs", "ár (acute)" }, { "sar̃giai", "²ˈsɐrʲˑɡʲɛɪ", "ar̃ (tilde)" }, { "sargýba", "sɐrʲ¹ˈɡʲiːbɐ", "ar (unstressed)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C5: Mixed diphthongs of the e-series — el, em, en, er. -- Foreign-word variants with grave (èl, èm, èn, èr) read tvirtapradiškai -- are tested as alternates per VLKK §9.21, §9.23. function tests:test_IPA_C5_mixed_e() local examples = { -- el { "kélmas", "¹ˈkʲæˑlmɐs", "él (acute)" }, { "Kel̃mė", "²ˈkʲɛlʲˑmʲeː", "el̃ (tilde)" }, { "kelmùtis", "kʲɛlˈmʊtʲɪs", "el (unstressed)" }, { "èlfas", "¹ˈɛlfɐs", "èl (loanword, grave; tvirtapradiškai)" }, { "el̃fas", "²ˈɛlˑfɐs", "el̃ (loanword, tilde)" }, -- em { "drémžti", "¹ˈdʲrʲæˑmʲʃʲtʲɪ", "ém (acute; ž → š before t)" }, { "drem̃bti", "²ˈdʲrʲɛmʲˑpʲtʲɪ", "em̃ (tilde)" }, { "Trempai̇̃", "tʲrʲɛm²ˈpɐɪˑ", "em (unstressed)" }, { "Jaržèmskis", "jɛrʲ¹ˈʒʲɛmʲsʲkʲɪs", "èm (loanword, grave)" }, { "kem̃pingas", "²ˈkʲɛmʲˑpʲɪŋɡɐs", "em̃ (loanword, tilde)" }, -- en { "véngia", "¹ˈʋʲæˑŋʲɡʲɛ", "én (acute)" }, { "žeñgsena", "²ˈʒʲɛŋˑksʲɛnɐ", "eñ (tilde)" }, { "vengi̇̀mas", "ʋʲɛŋʲˈɡʲɪmɐs", "en (unstressed)" }, { "ménkė", "¹ˈmʲæˑŋʲkʲeː", "én (acute, before nk)" }, { "meñkinti", "²ˈmʲɛŋʲˑkʲɪnʲtʲɪ", "eñ (tilde, before nk)" }, { "menkystà", "mʲɛŋʲkʲiːˈstɐ", "en (unstressed, before nk)" }, { "hènris", "¹ˈɣʲɛnʲrʲɪs", "èn (loanword, grave)" }, { "ceñtas", "²ˈt͡sʲɛnˑtɐs", "eñ (loanword, tilde)" }, { "ãmen", "²ˈɑːmʲɛn", "en (loanword, unstressed final)" }, -- er { "nérti", "¹ˈnʲæˑrʲtʲɪ", "ér (acute)" }, { "ner̃šti", "²ˈnʲɛrʲˑʃʲtʲɪ", "er̃ (tilde)" }, { "nerštãvietė", "nʲɛr²ˈʃtɑːʋʲiɛtʲeː", "er (unstressed)" }, { "ko^ncèrtas", "kɔnʲ¹ˈt͡sʲɛrtɐs", "èr (loanword, grave)" }, { "ko^ncer̃tas", "kɔnʲ²ˈt͡sʲɛrˑtɐs", "er̃ (loanword, tilde)" }, { "ter̃minas", "²ˈtʲɛrʲˑmʲɪnɐs", "er̃ (loanword, tilde)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C6: Mixed diphthongs of the i-series — il, im, in, ir. function tests:test_IPA_C6_mixed_i() local examples = { -- il { "pi̇̀lti", "¹ˈpʲɪlʲtʲɪ", "ìl (grave)" }, { "pil̃vas", "²ˈpʲɪlˑʋɐs", "il̃ (tilde)" }, { "pilti̇̀nis", "pʲɪlʲˈtʲɪnʲɪs", "il (unstressed)" }, -- im { "ti̇̀mptelėjimas<base:timptelėjimas>", "¹ˈtʲɪmʲpʲtʲɛlʲeːjɪmɐs", "ìm (grave)" }, { "tim̃pinti", "²ˈtʲɪmʲˑpʲɪnʲtʲɪ", "im̃ (tilde)" }, { "timpinė́ti", "tʲɪmʲpʲɪ¹ˈnʲeːtʲɪ", "im (unstressed)" }, -- in { "gi̇̀nti", "¹ˈɡʲɪnʲtʲɪ", "ìn (grave)" }, { "giñklas", "²ˈɡʲɪŋˑklɐs", "iñ (tilde)" }, { "ginkluõtė", "ɡʲɪŋ²ˈkluɔtʲeː", "in (unstressed)" }, -- ir { "di̇̀rti", "¹ˈdʲɪrʲtʲɪ", "ìr (grave)" }, { "dir̃žas", "²ˈdʲɪrˑʒɐs", "ir̃ (tilde)" }, { "dirži̇̀nis", "dʲɪrʲˈʒʲɪnʲɪs", "ir (unstressed)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C7: Mixed diphthongs of the u-series — ul, um, un, ur. function tests:test_IPA_C7_mixed_u() local examples = { -- ul { "dùlkė", "¹ˈdʊlʲkʲeː", "ùl (grave)" }, { "dul̃kti", "²ˈdʊlʲˑktʲɪ", "ul̃ (tilde)" }, { "dulkė́tas", "dʊlʲ¹ˈkʲeːtɐs", "ul (unstressed)" }, -- um { "grùmtis", "¹ˈɡrʊmʲtʲɪs", "ùm (grave)" }, { "grum̃ba", "²ˈɡrʊmˑbɐ", "um̃ (tilde)" }, { "grumtỹnės", "ɡrʊmʲ²ˈtʲiːnʲeːs", "um (unstressed)" }, -- un { "skùndė", "¹ˈskʊnʲdʲeː", "ùn (grave)" }, { "skuñdas", "²ˈskʊnˑdɐs", "uñ (tilde)" }, { "skundi̇̀kas", "skʊnʲˈdʲɪkɐs", "un (unstressed)" }, -- ur { "gùrkšnis", "¹ˈɡʊrʲkʃʲnʲɪs", "ùr (grave)" }, { "gur̃gti", "²ˈɡʊrʲˑktʲɪ", "ur̃ (tilde)" }, { "gurkšnóti", "ɡʊrk¹ˈʃnoːtʲɪ", "ur (unstressed)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C8: Mixed diphthongs of the foreign o-series — ol, om, on, or -- (per VLKK §9.22, default reading is tvirtapradiškai with grave). function tests:test_IPA_C8_mixed_o_foreign() local examples = { -- ol { "kòlba", "¹ˈkɔlbɐ", "òl (grave; default reading)" }, { "hòldingas", "¹ˈɣɔlʲdʲɪŋɡɐs", "òl (grave)" }, -- om { "do^mkrãtas", "dɔm²ˈkrɑːtɐs", "om (unstressed)" }, { "pòmpa", "¹ˈpɔmpɐ", "òm (grave)" }, -- on { "po^ntònas", "pɔnˈtɔnɐs", "on (unstressed) / òn (grave)" }, { "fòndas", "¹ˈfɔndɐs", "òn (grave)" }, -- or { "po^rtrètas", "pɔrʲˈtʲrʲɛtɐs", "or (unstressed)" }, { "fòrma", "¹ˈfɔrmɐ", "òr (grave)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C9: Foreign diphthongs eu, oi, ou — three accent positions where attested. function tests:test_IPA_C9_diphthongs_foreign_eu_oi_ou() local examples = { -- eu { "plèura", "¹ˈpʲlʲɛʊrɐ", "èu (grave; tvirtapradiškai)" }, { "eũras", "²ˈɛʊˑrɐs", "eũ (tilde)" }, { "Euro^pà", "ɛʊrɔˈpɐ", "eu (unstressed)" }, -- éu does not exist (é is long; éu would be long+long) -- oi { "Kóiva", "¹ˈkoˑɪʋɐ", "ói (acute; rare)" }, { "mòira", "¹ˈmɔɪrɐ", "òi (grave)" }, { "sinusòidė", "sʲɪnʊ¹ˈsɔɪdʲeː", "òi (grave)" }, { "bròileris", "¹ˈbrɔɪlʲɛrʲɪs", "òi (grave)" }, { "oikumenà", "ɔɪkʊmʲɛˈnɐ", "oi (unstressed)" }, -- oi̇̃ does not exist (per VLKK: oi reads tvirtapradiškai only) -- ou { "šòu", "¹ˈʃɔʊ", "òu (grave)" }, { "klòunas", "¹ˈklɔʊnɐs", "òu (grave)" }, { "klounadà", "klɔʊnɐˈdɐ", "ou (unstressed)" }, -- óu does not exist (ó is long) -- oũ does not exist (per VLKK: ou reads tvirtapradiškai only) } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- D: Fake diphthongs — vowel sequences that LOOK like diphthongs but are -- actually two separate vowels in adjacent syllables (hiatus). The module -- must NOT collapse these into a single diphthong nucleus. function tests:test_IPA_D_fake_diphthongs() local examples = { -- ai (fake) { "nebepàima", "nʲɛbʲɛˈpɐ.ɪmɐ", "ài" }, { "archãika", "ɐr²ˈxɑː.ɪkɐ", "ãi" }, { "pai̇́eško", "pɐ¹ˈjiɛʃkoː", "ai̇́" }, { "betai̇̀nas", "bʲɛtɐ.ˈɪnɐs", "ai̇̀" }, -- au (fake) { "pàurzgė", "ˈpɐ.ʊrʲzʲɡʲeː", "àu" }, { "šilãuogė", "ʃʲɪ²ˈlɑː.uɔɡʲeː", "ãu" }, { "Naùmo^vas", "nɐ.ˈʊmɔʋɐs", "aù" }, { "nepaúosto", "nʲɛpɐ.¹ˈuɔstoː", "aú" }, -- ei (fake) { "nebèima", "nʲɛˈbʲɛ.ɪmɐ", "èi" }, { "nebei̇̀rti", "nʲɛbʲɛ.¹ˈɪrʲtʲɪ", "ei̇̀" }, { "neji̇́eško<base:neieško>", "nʲɛ¹ˈjiɛʃkoː", "ei̇́ (no-j-insertion not found yet)" }, -- ẽi not found -- ui (fake) { "sui̇̀ro", "sʊ.ˈɪroː", "ui̇̀" }, { "sui̇́eško", "sʊ¹ˈjiɛʃkoː", "ui̇́" }, -- úi, ũi not found -- ie (fake) { "besi̇̀elgė", "bʲɛˈsʲɪ.ɛlʲɡʲeː", "i̇̀e" }, { "ˌpo^lièsteris", "ˌpɔlʲɪ.ˈɛsʲtʲɛrʲɪs", "i̇̀e" }, { "įsiérzina", "iːsʲɪ.¹ˈæˑrʲzʲɪnɐ", "ié" }, -- i̇̃e not found -- uo (fake) { "sùošė", "ˈsʊ.oːʃʲeː", "ùo" }, -- ũo, uó not found; for uò only /ʊˈɔ/ examples found -- foreign: ao { "mao^ji̇̀zmas<base:maojizmas>", "mɐ.ɔˈjɪzmɐs", "ao (no-j-insertion variant)" }, -- foreign: oi (fake) -- õi, oi̇́ not found -- foreign: ou (fake) { "fino^ùgrai", "fʲɪnɔ.ˈʊɡrɐɪ", "où" }, -- õu, oú not exist -- foreign: eu (fake) { "neúosti", "nʲɛ.¹ˈuɔsʲtʲɪ", "eú" }, { "teùrginis", "tʲɛ.¹ˈʊrʲɡʲɪnʲɪs", "eù" }, -- ẽu not exist } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- E: Fronting of o/u after a palatalized consonant or j (VLKK IPA rec §4.4): -- [oː → o̟ː], [ʊ → ʊ̟], [uː → u̟ː], [uɔ → u̟ɔ]. function tests:test_IPA_E_o_u_fronting_after_palatal() local examples = { -- After palatalized consonant + o/u { "sagióti", "sɐ¹ˈɡʲo̟ːtʲɪ", "Cʲ + o → o̟ː" }, { "angijo^mà", "ɐŋʲɡʲɪjɔ̟ˈmɐ", "Cʲ + o^ → ɔ̟" }, { "siuñčia", "²ˈsʲʊ̟nʲˑt͡ʃʲɛ", "Cʲ + u → ʊ̟" }, { "sių̃sti", "²ˈsʲu̟ːsʲtʲɪ", "Cʲ + ų → u̟ː" }, { "ãčiū", "²ˈɑːt͡ʃʲu̟ː", "Cʲ + ū → u̟ː" }, { "liuobà", "lʲu̟ɔˈbɐ", "Cʲ + uo → u̟ɔ" }, -- After j + o/u (j inherently palatal, triggers fronting) { "at.jójo<base:atjojo>", "ɐtʲ¹ˈjo̟ːjo̟ː", "j + o → o̟ː" }, { "Lo^jo^là<base:Lojola>", "lɔjɔ̟ˈlɐ", "j + o^ → ɔ̟" }, { "Jùlė<base:Julė>", "ˈjʊ̟lʲeː", "j + u → ʊ̟" }, { "ãkcijų<base:akcijų>", "²ˈɑːkt͡sʲɪju̟ː", "j + ų → u̟ː" }, { "jū́ra<base:jūra>", "¹ˈju̟ːrɐ", "j + ū → u̟ː" }, { "júodas<base:juodas>", "¹ˈju̟ɔdɐs", "j + uo → u̟ɔ" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- F: Palatalization spreading — palatalization of a front vowel propagates -- leftward through preceding consonants, including through k/g (which do -- not directly palatalize but transmit the feature; VLKK §13). function tests:test_IPA_F_palatalization_spreading() local examples = { -- Spreading through obstruent clusters { "skri̇́eti", "¹ˈsʲkrʲiɛtʲɪ", "Spreading left through r and k" }, { "displė̃jus<base:displėjus>", "dʲɪ²ˈsʲpʲlʲeːjʊ̟s", "Spreading left through cluster spl" }, -- Spreading to next syllable's onset { "pùlti", "¹ˈpʊlʲtʲɪ", "Palatalized l before t (softened by following i)" }, { "méilė", "¹ˈmʲæˑɪlʲeː", "Palatalized resonant cluster" }, -- Secondary stress should not block spreading { "išˌverstaãkis", "ɪʃʲˌʋʲɛrstɐ.²ˈɑːkʲɪs", "Secondary stress should not stop palatalization" }, -- VLKK §13: l palatalizes through k/g before another soft consonant { "al̃ksnis", "²ˈɐlʲˑksʲnʲɪs", "lʲ through k before sʲnʲ (alksnis)" }, { "álgebra", "¹ˈɑˑlʲɡʲɛbrɐ", "lʲ through gʲ before front vowel (álgebra)" }, { "buhálteris", "bʊ¹ˈɣɑˑlʲtʲɛrʲɪs", "lʲ before tʲ (buhálteris)" }, { "Báltija", "¹ˈbɑˑlʲtʲɪjɛ", "lʲ before tʲ (Báltija)" }, { "fakultètas", "fɐkʊlʲˈtʲɛtɐs", "lʲ before tʲ (fakultètas)" }, { "fi̇̀lme", "¹ˈfʲɪlʲmʲɛ", "lʲ before mʲ (fi̇̀lme)" }, { "smùlkmena", "¹ˈsmʊlʲkmʲɛnɐ", "lʲ through kʲ before mʲ (smùlkmena)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- G: ng / nk reverse palatalization rule. -- n + k/g normally velarizes to ŋ. The ŋ palatalizes only when the FOLLOWING -- k/g itself palatalizes (i.e., when the cluster is directly followed by -- a front vowel). When the k/g stays hard (because next is a consonant), -- ŋ also stays hard. function tests:test_IPA_G_ng_nk_reverse_palatalization() local examples = { { "žiñgsnis", "²ˈʒʲɪŋˑksʲnʲɪs", "indirect: ng + s → ŋ stays hard" }, { "plunksnẽlė", "plʊŋk²ˈsʲnʲæːlʲeː", "indirect: nk + s → ŋ stays hard" }, { "anketà", "ɐŋʲkʲɛˈtɐ", "direct: nk + e → ŋʲkʲ" }, { "Bangỹs", "bɐŋʲ²ˈɡʲiːs", "direct: ng + y → ŋʲɡʲ" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- H1: Voicing assimilation (regressive: a stop/fricative agrees in voicing -- with the next obstruent). VLKK §16, §17. function tests:test_IPA_H1_voicing_assimilation() local examples = { { "di̇̀rbti", "¹ˈdʲɪrʲpʲtʲɪ", "b → p before t (devoicing)" }, { "apgáuti", "ɐb¹ˈɡɑˑʊtʲɪ", "p → b before g (voicing)" }, { "už.trùkti", "ʊʃˈtrʊktʲɪ", "ž → š before t (devoicing)" }, { "li̇̀pdo", "ˈlʲɪbdoː", "p → b before d (voicing)" }, { "kàsdavo", "ˈkɐzdɐʋoː", "s → z before d (voicing)" }, { "iš.gir̃do", "ɪʒʲ²ˈɡʲɪrˑdoː", "š → ž before g (voicing)" }, { "iš.džiū́ti", "ɪʒʲ¹ˈd͡ʒʲu̟ːtʲɪ", "š → ž before dž (voicing)" }, { "degtùkas", "dʲɛkˈtʊkɐs", "g → k before t (devoicing)" }, { "žiebtùvas", "ʒʲiɛpˈtʊʋɐs", "b → p before t (devoicing)" }, { "grį̇̃žti", "²ˈɡrʲiːʃʲtʲɪ", "ž → š before t (devoicing)" }, { "už.púola", "ʊʃ¹ˈpuɔlɐ", "ž → š before p (devoicing)" }, { "už.króvė", "ʊʃ¹ˈkroːʋʲeː", "ž → š before k (devoicing)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- H2: Word-final devoicing (VLKK §18). function tests:test_IPA_H2_word_final_devoicing() local examples = { { "juolàb<base:juolab>", "ju̟ɔˈlɐp", "b → p word-finally" }, { "visàd", "ʋʲɪˈsɐt", "d → t word-finally" }, { "jóg<base:jog>", "¹ˈjo̟ːk", "g → k word-finally" }, { "ùž", "ˈʊʃ", "ž → š word-finally" }, { "daũg", "²ˈdɒʊˑk", "final g → k" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- H3: Place assimilation between sibilants and affricates (VLKK §23): -- s + č → š; z + dž → ž; š + c → s; ž + dz → z. function tests:test_IPA_H3_place_assimilation() local examples = { { "mókesčiai", "¹ˈmoːkʲɛʃʲt͡ʃʲɛɪ", "s + č → š (place assim.)" }, { "kàsčiau", "ˈkɐʃʲt͡ʃʲɛʊ", "s + č → š (place assim.)" }, { "vabzdžiai̇̃", "ʋɐbʲ²ˈʒʲd͡ʒʲɛɪˑ", "z + dž → ž (place assim.)" }, { "išcukrúoti", "ɪst͡sʊ¹ˈkruɔtʲɪ", "š + c → s (place assim.)" }, { "už.cỹpti", "ʊsʲ²ˈt͡sʲiːpʲtʲɪ", "ž + c → z (devoicing) → s (place assim.)" }, -- ž + dz not found } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- H4: Geminate simplification — two identical consonants reduce to one -- (VLKK §21, plus the same effect on stops once they have been levelled -- by voicing assimilation, e.g. d + t → t + t → t). function tests:test_IPA_H4_geminate_simplification() local examples = { -- Sibilants (identical pairs) { "pùsseserė", "ˈpʊsʲɛsʲɛrʲeː", "ss → s" }, { "iššóko", "ɪ¹ˈʃoːkoː", "šš → š" }, { "užžiẽbti", "ʊ²ˈʒʲiɛpʲtʲɪ", "žž → ž (also b → p before t)" }, -- zz not found -- Sonorants (Liquids and Nasals) { "so^ciˌjalliberãlas<base:socialliberãlas>", "sɔt͡sʲɪˌjɛlʲɪbʲɛ²ˈrɑːlɐs", "ll → l" }, { "šė́mmargas", "¹ˈʃʲeːmɐrɡɐs", "mm → m" }, { "viennỹtis", "ʋʲiɛ²ˈnʲiːtʲɪs", "nn → n" }, { "pérrašo", "¹ˈpʲæːrɐʃoː", "rr → r" }, -- Bilabial stops (after voicing assimilation) -- bb not found { "tar̃ppievis", "²ˈtɐrʲˑpʲiɛʋʲɪs", "pp → p" }, { "bóbpalaikė", "¹ˈboːpɐlɐɪkʲeː", "bp → pp → p (devoicing + degemination)" }, -- pb not found -- Alveolar stops (after voicing assimilation) -- dd not found { "añttrobis", "²ˈɐnˑtroːbʲɪs", "tt → t" }, { "Šmi̇̀dtas", "ˈʃʲmʲɪtɐs", "dt → tt → t (devoicing + degemination)" }, { "atdarà", "ɐdɐˈrɐ", "td → dd → d (voicing + degemination)" }, -- Velar stops (after voicing assimilation) -- gg not found (needs g + g) { "kiekkar̃t", "kʲiɛ²ˈkɐrˑt", "kk → k" }, { "daugkar̃t", "dɒʊ²ˈkɐrˑt", "gk → kk → k (devoicing + degemination)" }, { "ki̇́ekgi", "¹ˈkʲiɛɡʲɪ", "kg → gg → g (voicing + degemination)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- H5: Sibilant simplification — when two DIFFERENT sibilants meet at a -- morpheme boundary, only the second is pronounced (VLKK §22). function tests:test_IPA_H5_sibilant_simplification() local examples = { { "išsprę́sti", "ɪ¹ˈsʲpʲrʲæːsʲtʲɪ", "šs → s" }, { "ùžsienis", "ˈʊsʲiɛnʲɪs", "žs → s (via šs)" }, { "pùsšimtis", "ˈpʊʃʲɪmʲtʲɪs", "sš → š" }, { "pùszuikis", "ˈpʊzʊɪkʲɪs", "sz → z" }, { "pùsžalis", "ˈpʊʒɐlʲɪs", "sž → ž" }, { "išžarà", "ɪʒɐˈrɐ", "šž → ž" }, { "ùžšovas", "ˈʊʃoːʋɐs", "žš → š" }, -- zš not found -- šz not found -- zs not found } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- I: Word-final j and v become non-syllabic [ɪ̯], [ʊ̯] (VLKK IPA rec §7.5). function tests:test_IPA_I_final_j_v_nonsyllabic() local examples = { { "rytój<base:rytoj>", "rʲiː¹ˈtoːɪ̯", "final j after long o → ɪ̯" }, { "tuõj<base:tuoj>", "²ˈtuɔɪ̯", "final j after uo → ɪ̯" }, { "viduj̃<base:viduj>", "ʋʲɪ²ˈdʊɪˑ", "final j with tilde after short u" }, { "viršuj̃<base:virsuj>", "ʋʲɪr²ˈʃʊɪˑ", "final j with tilde after short u" }, { "sudiẽv", "sʊ²ˈdʲiɛʊ̯", "final v after ie → ʊ̯" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- J1: Hiatus — vowel sequences pronounced as two separate syllables, marked -- either by morpheme boundary (native: prefix `.`) or explicitly preserved -- (foreign: user-marked `.`). VLKK §24, §25, §27.2 (i-second variant). function tests:test_IPA_J1_hiatus() local examples = { -- Native prefix boundaries { "pa.upỹs", "pɐ.ʊ²ˈpʲiːs", "prefix pa- + u" }, { "priim̃ti", "pʲrʲɪ.²ˈɪmʲˑtʲɪ", "prefix pri- + i" }, { "pri̇̀ima", "ˈpʲrʲɪ.ɪmɐ", "prefix pri- + i" }, { "pri̇̀ėmė", "ˈpʲrʲɪ.eːmʲeː", "prefix pri- + ė" }, { "priei̇̃ti", "pʲrʲɪ.²ˈɛɪˑtʲɪ", "prefix pri- + ei" }, { "priė̃jo", "pʲrʲɪ.²ˈeːjo̟ː", "prefix pri- + ė" }, { "nù.imtas", "ˈnʊ.ɪmtɐs", "prefix nu- + i (user-marked)" }, -- Foreign words: hiatus preserved between non-i vowels (VLKK §25) { "di.acetãtas", "dʲɪ.ɐt͡sʲɛ²ˈtɑːtɐs", "foreign i.a (user-marked)" }, { "di.akrilãtas", "dʲɪ.ɐkrʲɪ²ˈlɑːtɐs", "foreign i.a (user-marked)" }, { "fino^ùgrai", "fʲɪnɔ.ˈʊɡrɐɪ", "foreign o.u" }, { "paleo^nto^lògas", "pɐlʲɛ.ɔntɔˈlɔɡɐs", "paleo- + onto-: only e.o is hiatus, last ɔ is in coda" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- J2: J-insertion — in foreign words, an epenthetic [j] is inserted between -- vowel sequences containing i (VLKK §27). Input is a respelling that -- spells out the inserted j, optionally with `(j)` for the variable -- forms in §27.2. function tests:test_IPA_J2_j_insertion() local examples = { -- §27.1: i first → j obligatorily inserted { "dijãkonas", "dʲɪ²ˈjæːkoːnɐs", "ia → ija (i first, accented vowel)" }, { "dijakonỹstė", "dʲɪjɛkoː²ˈnʲiːsʲtʲeː", "ia → ija (i first, unaccented)" }, { "dijalèktas", "dʲɪjɛˈlʲɛktɐs", "ia → ija (i first, unaccented)" }, { "pijani̇̀nas<base:pianinas>", "pʲɪjɛˈnʲɪnɐs", "ia → ija (respell j)" }, { "dijèzas<base:diezas>", "dʲɪˈjɛzɐs", "ie → ije (respell j)" }, { "audijo^fònas<base:audiofonas>", "ɒʊdʲɪjɔ̟ˈfɔnɐs", "io → ijo (respell j)" }, -- §27.2: i second → j optional, written as `(j)` in respelling { "teji̇̀stas<base:teistas>", "tʲɛˈjɪstɐs", "ei → eji (respell j)" }, { "stò(j)ikas<base:stoikas>", "ˈstɔ(j)ɪkɐs", "oi: variant with (j)" }, { "babu(j)i̇̀nai<base:babuinai>", "bɐbʊˈ(j)ɪnɐɪ", "ui: variant with (j)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- K: Word juncture — clitic liaison `‿` is inserted between an unstressed -- word and a following stressed word; word-final voiced obstruents devoice. -- Input uses a regular space; the module inserts `‿` automatically. function tests:test_IPA_K_word_juncture() local examples = { -- Basic liaison { "be ãbejo", "bʲɛ‿²ˈɑːbʲɛjo̟ː", "clitic be + main word" }, { "kaip kàd", "kɐɪp‿ˈkɐt", "clitic kaip + main word" }, { "kadà ne kadà", "kɐˈdɐ nʲɛ‿kɐˈdɐ", "stressed + clitic + stressed" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- L: Secondary stress (`ˌ`) — different positions and interactions with -- primary stress. function tests:test_IPA_L_secondary_stress() local examples = { { "ˌho^mo^fòbė", "ˌɣɔmɔˈfɔbʲeː", "initial secondary stress" }, { "saˌvanoriáuti", "sɐˌʋɐnoː¹ˈrʲæˑʊtʲɪ", "medial secondary stress" }, { "nebekõneˌveikti", "nʲɛbʲɛ²ˈkoːnʲɛˌʋʲɛɪktʲɪ", "secondary AFTER primary stress" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- M: Optional soft l in loanwords (VLKK §15) — by default the module reads -- l as hard before a hard consonant; the user marks softening explicitly -- with U+2019 (the right single quotation mark) after l. function tests:test_IPA_M_l_dual_reading() local examples = { { "pòlka", "¹ˈpɔlkɐ", "default: hard l" }, { "pòlʼka", "¹ˈpɔlʲkɐ", "with U+02BC: soft lʲ" }, { "válsas", "¹ˈʋɑˑlsɐs", "default: hard l" }, { "válʼsas", "¹ˈʋɑˑlʲsɐs", "with U+02BC: soft lʲ" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- ════════════════════════════════════════════════════════════════════════════ -- SYLLABIFICATION TESTS (Phonotactic models) -- ════════════════════════════════════════════════════════════════════════════ -- A: 2-consonant cluster models. function tests:test_hyphen_A_models_2C() local examples = { -- Onset patterns (V-CCV) { "vèsti", "vè‧sti", "ST onset" }, { "dažnai̇̃", "da‧žnai̇̃", "SR onset" }, { "veiklõs", "vei‧klõs", "TR onset" }, -- Split patterns (VC-CV) { "kalbõs", "kal‧bõs", "RT split" }, { "ámžiaus", "ám‧žiaus", "RS split" }, } for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end end -- B: 3-consonant cluster models. function tests:test_hyphen_B_models_3C() local examples = { -- Onset { "displė̃jus<base:displėjus>", "di‧splė̃‧jus", "STR onset (V-CCCV)" }, -- Splits { "pýksta", "pýk‧sta", "T+ST split" }, { "mir̃šta", "mir̃‧šta", "R+ST split" }, { "mókslo", "mók‧slo", "T+SR split" }, { "lengvai̇̃", "len‧gvai̇̃", "R+TR split" }, { "atkrei̇̃pia", "at‧krei̇̃‧pia", "T+TR split" }, { "di̇̀rbti", "di̇̀rb‧ti", "RT+T split" }, { "elgsenõs", "elg‧se‧nõs", "RT+S split" }, { "piktžolė̃s", "pikt‧žo‧lė̃s", "TT+S split" }, { "Oksfòrdas", "Oks‧fòr‧das", "TS+S split (foreign)" }, { "transfòrmavo", "trans‧fòr‧ma‧vo", "RS+S split" }, } for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end end -- C: 4-consonant cluster models. function tests:test_hyphen_C_models_4C() local examples = { { "konstrùkcija<base:konstrukcija>", "kon‧strùk‧ci‧ja", "R+STR split" }, { "apskritai̇̃", "ap‧skri‧tai̇̃", "T+STR split" }, { "ankstà", "ank‧stà", "RT+ST split" }, { "ži̇̀ngsnis", "ži̇̀ng‧snis", "RT+SR split" }, { "ántplūdžio", "ánt‧plū‧džio", "RT+TR split" }, { "postprodùkcija<base:postprodukcija>", "post‧pro‧dùk‧ci‧ja", "ST+TR split" }, { "kontrmotỹvas", "kontr‧mo‧tỹ‧vas", "RTR+R split" }, { "Obstfelderis", "Obst‧fel‧de‧ris", "TST+S split" }, -- FIXME: need accentuation -- Hyphenation for theoretically-existing consonant clusters, -- per Bendrinės lietuvių kalbos skiemuo monografija: -- S-STR -- RS-SR, RR-ST, ST-SR, RR-TR -- RTR-T, RST-T, RTT-S, TST-T } for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end end -- D: Morphology-driven hyphenation — native prefixes vs. pseudo-prefixes, -- and prefix boundaries that introduce hiatus. function tests:test_hyphen_D_morphology() local examples = { -- Native prefixes (user-marked with `.`) { "ap.rašýti", "ap‧ra‧šý‧ti", "Native prefix ap-" }, { "at.nèšti", "at‧nè‧šti", "Native prefix at-" }, { "iš.mókyti", "iš‧mó‧ky‧ti", "Native prefix iš-" }, -- Pseudo-prefixes (Internationalisms): no morphological boundary { "atòmas", "a‧tò‧mas", "Pseudo-prefix" }, { "apãratas", "a‧pã‧ra‧tas", "Pseudo-prefix" }, -- Prefix boundary with hiatus { "pa.upỹs", "pa‧u‧pỹs", "Prefix boundary with hiatus" }, { "priim̃ti", "pri‧im̃‧ti", "Prefix boundary with hiatus" }, -- User-marked hiatus and secondary-stress boundary { "Kiurasã.o", "Kiu‧ra‧sã‧o", "User-marked hiatus" }, { "išˌverstaãkis", "iš‧ver‧sta‧ã‧kis", "Secondary stress also marks syllable boundary" }, } for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end end -- ════════════════════════════════════════════════════════════════════════════ -- RHYME TESTS -- ════════════════════════════════════════════════════════════════════════════ -- A: Basic rhyme extraction across vowel/diphthong types and stress patterns. function tests:test_rhyme_A_basic() local examples = { { "nakti̇̀s", "ɪs", "Short i rhyme" }, { "kalbà", "ɐ", "Short a rhyme" }, { "homològas", "ɔɡɐs", "Loanword o rhyme" }, { "naũjas", "ɒʊˑjɛs", "Diphthong rhyme" }, { "var̃das", "ɐrˑdɐs", "Mixed diphthong rhyme" }, { "mótina", "oːtʲɪnɐ", "Long o rhyme" }, { "vil̃kas", "ɪlˑkɐs", "Mixed diphthong rhyme" }, { "nebekõneˌveikti", "oːnʲɛˌʋʲɛɪktʲɪ", "Secondary stress should be stripped" }, } for _, ex in ipairs(examples) do self:check_rhyme(unpack(ex)) end end return tests o9r9dsdnddxow5l5n7phfg6fsgmn4j7 ima 0 295361 395887 2026-05-29T15:58:58Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{also|Appendix:ဗီုပြၚ်နာနာသာ်မဆေၚ်စပ်ကဵု "ima"}} ==မအရေဝ်ပံၚ်ကောံ== ===သၚ်္ကေတ=== {{mul-symbol}} # {{ISO 639|3}} ==အၚ်္ဂလိက်== ===သမ္ဗန္ဓ=== {{en-head|contr}} # {{alternative form of|en|Imma}} ==အာရာကဳ== ===နိရုတ်=== ဝေါဟာကၠုၚ်နူ {{inh|akr|p..." 395887 wikitext text/x-wiki {{also|Appendix:ဗီုပြၚ်နာနာသာ်မဆေၚ်စပ်ကဵု "ima"}} ==မအရေဝ်ပံၚ်ကောံ== ===သၚ်္ကေတ=== {{mul-symbol}} # {{ISO 639|3}} ==အၚ်္ဂလိက်== ===သမ္ဗန္ဓ=== {{en-head|contr}} # {{alternative form of|en|Imma}} ==အာရာကဳ== ===နိရုတ်=== ဝေါဟာကၠုၚ်နူ {{inh|akr|poz-oce-pro|*ʀumaq}} ===ဗွဟ်ရမ္သာၚ်=== * {{IPA|akr|/ima/}} ===နာမ်=== {{head|akr|noun}} # သ္ၚိ။ ===နိဿဲ=== * {{R:akr:lex}} ‒ [https://marama.huma-num.fr/Lex/Araki/i.htm#%E2%93%94ima entry ''ima'']. ==အာက်သှ်== ===နာမ်=== {{head|ahs|noun}} # လ္ၚဴ။ ==ဗေတ် နာန်ဗာတ်သ်== ===ဗွဟ်ရမ္သာၚ်=== * {{IPA|nmb|/ima/}} ===ကြိယာ=== {{head|nmb|verbs}} # သကဵုကၠုၚ်။ ===နိဿဲ=== * ''[https://core.ac.uk/download/pdf/159465135.pdf Big Nambas Grammar] Pacific Linguistics - G.J. Fox'' ==နာဝါတ်ဒဝ်ဝၚ်ဂန္ထ== ===နာမ်=== {{head|nci|noun form|head=īmā}} # {{inflection of|nci|maitl||3|s|possessed|form}} ==ဒါဝ်== ===နိရုတ်=== ဝေါဟာကၠုၚ်နူ {{inh|dta|xgn-pro|*ïmaxan}} ===နာမ်=== {{head|dta|noun}} # ဗ္ၜေံ။ ==ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်== ===နာမ်=== {{head|nhe|noun form}} # {{inflection of|nhe|mayitl||3|s|possessed|form}} ==ဂရိန်လာန်== ===နိရုတ်=== ဝေါဟာကၠုၚ်နူ {{inh|kl|esx-inu-pro|*ima}}၊ နူကဵုဝေါဟာ {{inh|kl|esx-esk-pro|*imV}} ===ဗွဟ်ရမ္သာၚ်=== * {{kl-IPA}} ===လုပ်ကၠောန်စွံလဝ်=== {{head|kl|particle}} # သာ်၊ ဏံ (ညံၚ်ရဴဗက်အလိုက်ဂမၠိုၚ်)၊ ၜိုတ်ဏံဂှ်။ ==ဟာန်ဂါရေဝ်== ===ဗွဟ်ရမ္သာၚ်=== * {{hu-IPA}} * {{hyphenation|hu|ima}} * {{rhymes|hu|mɒ|s=2}} ===နာမ်=== {{hu-noun|pl=imák}} # ပရေၚ်ရာဒၞာမိက်။ #: {{syn|hu|imádság|fohász}} ====လဟုတ်စှ်ေ==== {{hu-infl-nom|imá|o}}{{hu-pos-tok|imá}} ==အဳလဝ်ကာနဝ်== ===နိရုတ်=== ဝေါဟာကၠုၚ်နူ {{inh|ilo|poz-pro|*(qa)lima}}၊ နူကဵုဝေါဟာ {{inh|ilo|map-pro|*(qa)lima}} ===နာမ်=== {{ilo-noun}} # တဲ။ ==အဳတလဳ== ====နာမဝိသေသန==== {{head|it|adjective form}} # {{inflection of|it|imo|imo|f|s}} ==အဳတာဝေတ်== ===နာမ်=== {{head|itv|noun}} # တဲ။ ==ဂျပါန်== ===ဗီုအက္ခရ်ရောမ=== {{ja-romaji}} # {{ja-romanization of|いま}} ==ကာန်ခါနာအဳ== ===ဗွဟ်ရမ္သာၚ်=== {{kne-pr|íma}} ===နာမ်=== {{kne-noun|íma}} # တဲ။ ===နိဿဲ=== * {{R:kne:Vanoverbergh 1933|íma|page=186}} ==လပ်တေန်== ===ဗွဟ်ရမ္သာၚ်=== * {{la-IPA|eccl=yes|īma}} ====နာမဝိသေသန==== {{head|la|adjective form|head=īma}} # {{inflection of|la|īmus||nom//voc|f|s|;|nom//acc//voc|n|p}} ====နာမဝိသေသန ၂ ==== {{head|la|adjective form|head=īmā}} # {{inflection of|la|īmus||abl|f|s}} ===နိဿဲ=== * {{R:la:du Cange}} ==လေန်မဝ်သေန် ကလေန်ဂါ== ===နိရုတ်=== ဝေါဟာကၠုၚ်နူ {{inh|kmk|poz-pro|*lima,*qalima}}၊ နူကဵုဝေါဟာ {{inh|kmk|map-pro|*(qa)lima}} ===နာမ်=== {{head|kmk|noun}} # တဲ။ ==လေတ်တူယဵုနဳယျာ== ===ဗွဟ်ရမ္သာၚ်=== {{lt-pr|i̇̀ma}} ===ကြိယာ=== {{head|lt|verb form|head=i̇̀ma}} # {{infl of|lt|imti||3|s//p|pres}} ==ဠူၜေအ်ဂါန် ကလဳၚ်္ဂါ== ===နိရုတ်=== ဝေါဟာကၠုၚ်နူ {{inh|knb|poz-pro|*lima,*qalima}}၊ နူကဵုဝေါဟာ {{inh|knb|map-pro|*(qa)lima}} ===နာမ်=== {{head|knb|noun}} # တဲ။ ==ပါဠိ== ===ဗီုပြၚ်ခ္ဍံက်လိက်ပါဠိမန်=== *'''{{#invoke:pi-Latn-translit|tr|[[{{PAGENAME}}]]|Mymr|variation=1}}''' ===ပွံၚ်အက္ခရ်နဲတၞဟ်=== {{pi-alt}} ====နာမဝိသေသန==== {{pi-adj}} # ဏံ (ဗွဲမကြပ်နကဵု)။ ====လဟုတ်စှ်ေ==== {{pi-decl-noun|ima|g=m|novoc=1 |noms_mod=replace|noms=ayaṃ |nomp_mod=replace|nomp=ime |inss_mod=replace|inss=iminā|inss2=anena <!-- anena should be verified; PTS denies it. Others also. --> |gens=assa |dats_mod=replace|dats=imassa|dats2=assa |abls_mod=replace|abls=imasmā|abls2=imamhā|abls3=asmā |locs_mod=replace|locs=imasmiṃ|locs2=asmiṃ |genp_mod=replace|genp=imesaṃ|genp2=esaṃ|genp3=imesānaṃ|genp4=esānaṃ |datp_mod=replace|datp=imesaṃ|datp2=esaṃ|datp3=imesānaṃ|datp4=esānaṃ |insp=ehi|insp2=ebhi|ablp=ehi|ablp2=ebhi }} {{pi-decl-noun|imā|g=f|novoc=true |noms_mod=replace|noms=ayaṃ |gens2=imissā|gens3=assā|gens4=imissāya|gens5=assāya |dats2=imissā|dats3=assā|dats4=imissāya|dats5=assāya |locs=imissā|locs2=imissaṃ|locs3=assaṃ |genp_mod=replace|genp=imāsaṃ|genp3=imāsānaṃ|genp2=āsaṃ |datp_mod=replace|datp=imāsaṃ|datp3=imāsānaṃ|datp2=āsaṃ }} {{pi-decl-noun|ima|g=n|novoc=true |noms_mod=replace|noms=idaṃ |accs_mod=before|accs=idaṃ |inss_mod=replace|inss=iminā|inss2=anena |gens=assa |dats_mod=replace|dats=imassa|dats2=assa |abls_mod=replace|abls=imasmā|abls2=imamhā|abls3=asmā |locs_mod=replace|locs=imasmiṃ|locs2=asmiṃ |genp_mod=replace|genp=imesaṃ|genp2=esaṃ|genp3=imesānaṃ|genp4=esānaṃ |datp_mod=replace|datp=imesaṃ|datp2=esaṃ|datp3=imesānaṃ|datp4=esānaṃ |insp=ehi|insp2=ebhi|ablp=ehi|ablp2=ebhi }} ===သဗ္ဗနာမ်=== {{head|pi|g=m|pronoun}} # ဣဏံ၊ ၝဏံ။ ===သဗ္ဗနာမ် ၂ === {{head|pi|g=f|pronoun}} # ဣဝွံ၊ ဣၝဏံဝွံ။ ===သဗ္ဗနာမ် ၃ === {{head|pi|g=n|pronoun}} # သာ်ဏံ။ ===နိဿဲ=== {{R:pi:PTS|ayaŋ}} ==ခေန်ချူဝါ== ====နာမဝိသေသန==== {{head|qu|adjective}} # လ္ၚဵု။ ===ကြိယာဝိသေသန=== {{head|qu|adverb}} # လေဝ်၊ ကဵု။ ===နာမ်=== {{head|qu|noun}} # အရာ၊ အရာမွဲမွဲ။ ====လဟုတ်စှ်ေ==== {{qu-noun-v}}{{qu-poss-v}} ===သဗ္ဗနာမ်=== {{head|qu|pronoun}} # မု။ # အရာဏံ။ ==ရဝ်မေနဳယျာ== ===နိရုတ်=== ဝေါဟာကၠုၚ်နူ {{inh|ro|la|līmō|līmāre}} ===ကြိယာ=== {{ro-verb|imă|imat|ime}} # သကဵုကၠိ၊ ကၠိပပ်၊ မသ္ပကၠိမၞုံကဵုခရီု ဝါ အိက်သတ်တိရစ္ဆာန်။ # သကဵုဟွံရှ်ေသှ်ေရဴဂဴအရာဝတ္ထု ဝါ မအရေဝ်နကဵုအရီုအဗၚ်မချိုတ်ပၠိုတ်ဂမၠိုၚ် ဝါ လက်ချဴဂမၠိုၚ်။ ==သာဗ်ခြဝ်ဨရှဳယာန်== ===ကြိယာ=== {{sh-verb form}} #: {{inflection of|sh|imati||3|s|pres}} # ညးမသ္ဒးဒုၚ်စသိုၚ်၊ ဍေံမသ္ဒးဒုၚ်စသိုၚ်။ # အတေံဂှ်၊ ဣတေံဂှ်။ ==သၠဝ်ဝေနဳ== ===ကြိယာ=== {{head|sl|verb form}} # {{infl of|sl|imeti||3|s|pres}} ==တာဂါလံက်== ===နိရုတ်=== ဝေါဟာကၠုၚ်နူ {{bor|tl|nan-hbl|引媽|tr=ín-má}} ===ဗွဟ်ရမ္သာၚ်=== {{tl-pr|imâ}} ===နာမ်=== {{tl-noun|imâ|b=+}} # မိ။ #: {{syn|tl|mama|inay|ina|nanay|inang|nanang|mami}} ===နိဿဲ=== * {{R:KWF Diksiyonaryo}} * {{R:Pambansang Diksiyonaryo}} * {{R:CEDOF|4|178}} * {{R:Vicassan's Pilipino-English Dictionary 1978|page=720}} * {{R:Diksyunaryo tesauro Pilipino-Ingles 1973|page=546}} * {{R:tl:Manuel 1948|page=27}} ==တဳရူရာန်== ===နာမ်=== {{head|tiy|noun}} # ပါၚ်ကၞက်။ ==တူရကဳ== ===နိရုတ်=== {{inh+|tr|ota|ايما}}၊ နူကဵုဝေါဟာ {{der|tr|ar|إِيْمَاء}} ===ဗွဟ်ရမ္သာၚ်=== * {{tr-IPA|i:ma:}} ===နာမ်=== {{tr-noun|imayı|imalar}} # ပရေၚ်ဒမြိပ်မြော်။ # ပရေၚ်လုပ်ဆေၚ်စပ်။ mdfpaultb5iga8fnb9yf1r8pq7lpvhn ထာမ်ပလိက်:R:akr:lex 10 295362 395888 2026-05-29T16:02:37Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[w:en:Alexandre François|François, Alexandre]]. 2008. [https://marama.huma-num.fr/AF-Araki_e.htm ''An online lexicon of Araki (Santo, Vanuatu)'']. Electronic files. Paris: CNRS. <small>[https://marama.huma-num.fr/data/AlexFrancois_Araki_trilingual-lexicon-2008.pdf (Pdf version)]</small> <noinclude>ကဏ္ဍ:ထာမ်ပလိက်နိဿဲအာရာကဳဂမၠိုၚ်|Araki reference templates..." 395888 wikitext text/x-wiki [[w:en:Alexandre François|François, Alexandre]]. 2008. [https://marama.huma-num.fr/AF-Araki_e.htm ''An online lexicon of Araki (Santo, Vanuatu)'']. Electronic files. Paris: CNRS. <small>[https://marama.huma-num.fr/data/AlexFrancois_Araki_trilingual-lexicon-2008.pdf (Pdf version)]</small> <noinclude>[[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲအာရာကဳဂမၠိုၚ်|Araki reference templates]]</noinclude> 59xitabtc6whiz933eb5tmu4qd8jwov ကဏ္ဍ:ထာမ်ပလိက်နိဿဲအာရာကဳဂမၠိုၚ် 14 295363 395889 2026-05-29T16:04:00Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်လေန်အာရာကဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]]" 395889 wikitext text/x-wiki [[ကဏ္ဍ:ထာမ်ပလိက်လေန်အာရာကဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]] t8ym3v0ky82pcjyimqlse6zfbz2t44i ကဏ္ဍ:ထာမ်ပလိက်လေန်အာရာကဳဂမၠိုၚ် 14 295364 395890 2026-05-29T16:05:43Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်အာရာကဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်လေန်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]]" 395890 wikitext text/x-wiki [[ကဏ္ဍ:ထာမ်ပလိက်အာရာကဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်လေန်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]] 16g5clr1jt63yxourl855nbuwanf281 ကဏ္ဍ:ထာမ်ပလိက်အာရာကဳဂမၠိုၚ် 14 295365 395891 2026-05-29T16:07:44Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ဘာသာအာရာကဳ][[ကဏ္ဍ:ထာမ်ပလိက်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]]" 395891 wikitext text/x-wiki [[ကဏ္ဍ:ဘာသာအာရာကဳ][[ကဏ္ဍ:ထာမ်ပလိက်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]] e1j3shyr5wpamoc3zz1tg1bftqlqamc 395892 395891 2026-05-29T16:08:37Z 咽頭べさ 33 395892 wikitext text/x-wiki [[ကဏ္ဍ:ဘာသာအာရာကဳ]][[ကဏ္ဍ:ထာမ်ပလိက်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]] 9kd0bwi0d85iwmhcalfrj1jgsy1drro ကဏ္ဍ:ဘာသာအာရာကဳ 14 295366 395893 2026-05-29T16:09:28Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:အရေဝ်ဘာသာ|အ]][[ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အ]]" 395893 wikitext text/x-wiki [[ကဏ္ဍ:အရေဝ်ဘာသာ|အ]][[ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အ]] sz2twsl0oy4c22ynqtyz2948a9dbmpr ထာမ်ပလိက်:kl-IPA 10 295367 395894 2026-05-29T16:11:56Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "<includeonly>{{#invoke:kl-pron|show}}</includeonly><noinclude>{{#invoke:kl-pron|show}}{{documentation}}</noinclude>" 395894 wikitext text/x-wiki <includeonly>{{#invoke:kl-pron|show}}</includeonly><noinclude>{{#invoke:kl-pron|show}}{{documentation}}</noinclude> 8hoded3o4fv5ca517d12kfpnlgjepdv မဝ်ဂျူ:kl-pron/doc 828 295368 395896 2026-05-29T16:14:57Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "This module implements the {{temp|kl-IPA}} template. <includeonly> {{module cat|kl}} </includeonly>" 395896 wikitext text/x-wiki This module implements the {{temp|kl-IPA}} template. <includeonly> {{module cat|kl}} </includeonly> 3rdcct6jz5t5hvs0qpogbqvfa1xqyi0 ကဏ္ဍ:မဝ်ဂျူဂရိန်လာန်ဂမၠိုၚ် 14 295369 395897 2026-05-29T16:16:54Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဂရိန်လာန်|ဂရိန်လာန်]] » '''မဝ..." 395897 wikitext text/x-wiki [[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဂရိန်လာန်|ဂရိန်လာန်]] » '''မဝ်ဂျူဂမၠိုၚ်''' :[[:ကဏ္ဍ:မဝ်ဂျူဂမၠိုၚ်|မဝ်ဂျူ]]ဘာသာဂရိန်လာန်၊ မနွံကဵုလုပ်အဝေါၚ်ကုဒ် Lua နကဵုမကၠောန်ဗဒှ် ကဵု မစဳရေၚ်ယဵုဒုၚ်သ္ပမာန်ဂမၠိုၚ်။ [[ကဏ္ဍ:ဘာသာဂရိန်လာန်]][[ကဏ္ဍ:မဝ်ဂျူဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]] f1vccp40fwmc1jri2vi49m3a7yvgohr ထာမ်ပလိက်:kl-IPA/documentation 10 295370 395899 2026-05-29T16:21:17Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} This template creates automatic phonetic transcriptions for '''native''' Greenlandic words. It is powered by [[Module:kl-pron]]. ==Parameters== ; <code>1=</code> : The word that should be converted. It is defaulted to <code><nowiki>{{PAGENAME}}</nowiki></code>. ==References== * {{cite-book<!-- -->|last=Fortescue<!-- -->|first=Michael<!-- -->|title=West Greenlandic<!-- -->|p..." 395899 wikitext text/x-wiki {{documentation subpage}} This template creates automatic phonetic transcriptions for '''native''' Greenlandic words. It is powered by [[Module:kl-pron]]. ==Parameters== ; <code>1=</code> : The word that should be converted. It is defaulted to <code><nowiki>{{PAGENAME}}</nowiki></code>. ==References== * {{cite-book<!-- -->|last=Fortescue<!-- -->|first=Michael<!-- -->|title=West Greenlandic<!-- -->|publisher=Routledge<!-- -->|year=1984<!-- -->|isbn=978-0-7099-1069-5<!-- -->}} <includeonly> [[ကဏ္ဍ:ထာမ်ပလိက်မပ္တိတ်ရမျာၚ်ဂရိန်လာန်ဂမၠိုၚ်|IPA]] </includeonly> gxrsddpe6u1o5h6x0j2hy4ibvyypjs9 ကဏ္ဍ:ထာမ်ပလိက်မပ္တိတ်ရမျာၚ်ဂရိန်လာန်ဂမၠိုၚ် 14 295371 395900 2026-05-29T16:22:31Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်ဂရိန်လာန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်ပ္တိတ်ရမျာၚ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]]" 395900 wikitext text/x-wiki [[ကဏ္ဍ:ထာမ်ပလိက်ဂရိန်လာန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်ပ္တိတ်ရမျာၚ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]] slpi9gam7shwujx5y7j0sa2wcwcravu ထာမ်ပလိက်:hu-infl-nom 10 295372 395901 2026-05-29T16:25:28Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:hu-nominals|show|regular}}<!-- -->{{#if:{{{stem2|}}}|{{#if:{{{nocat|}}}|| }}}}<!-- --><noinclude>{{documentation}}</noinclude>" 395901 wikitext text/x-wiki {{#invoke:hu-nominals|show|regular}}<!-- -->{{#if:{{{stem2|}}}|{{#if:{{{nocat|}}}|| }}}}<!-- --><noinclude>{{documentation}}</noinclude> l5fcrfmgeav6a5ls7ud1bawdpsbzamz မဝ်ဂျူ:hu-nominals 828 295373 395902 2026-05-29T17:17:00Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "local m_utilities = require("Module:utilities") local m_links = require("Module:links") local lang = require("Module:languages").getByCode("hu") local export = {} -- Functions that do the actual inflecting by creating the forms of a basic term. local inflections = {} -- The main entry point. -- This is the only function that can be invoked from a template. function export.show(frame) local infl_type = frame.args[..." 395902 Scribunto text/plain local m_utilities = require("Module:utilities") local m_links = require("Module:links") local lang = require("Module:languages").getByCode("hu") local export = {} -- Functions that do the actual inflecting by creating the forms of a basic term. local inflections = {} -- The main entry point. -- This is the only function that can be invoked from a template. function export.show(frame) local infl_type = frame.args[1] or error("Inflection type has not been specified. Please pass parameter 1 to the module invocation") local args = frame:getParent().args if not inflections[infl_type] then error("Unknown inflection type '" .. infl_type .. "'") end local data = {forms = {}, title = nil, categories = {}} -- Generate the forms inflections[infl_type](args, data) -- Postprocess postprocess(args, data) if args["form"] then -- table.insert(data.categories, "hu-decl with form") end return make_table(data) .. m_utilities.format_categories(data.categories, lang) end -- Inflection functions local function check_acc(stem, acc_sg_vowel, acc_sg_vowel2) if (mw.ustring.find(stem, "[nsz]$") or mw.ustring.find(stem, "[aáeéiíoóöőuúüű][lr]$") or mw.ustring.find(stem, "ny$") or mw.ustring.find(stem, "[aáeéiíoóöőuúüű]ly$")) and not mw.ustring.find(stem, "cs$") then acc_sg_vowel2 = "" end if acc_sg_vowel ~= acc_sg_vowel2 then require("Module:debug").track("hu-nominals/acc") end end local function make_stems(stem) local stems = {normal = stem} stems.b = mw.ustring.gsub(stem, "bb$", "b") .. "b" stems.k = mw.ustring.gsub(stem, "kk$", "k") .. "k" stems.n = mw.ustring.gsub(stem, "nn$", "n") .. "n" stems.r = mw.ustring.gsub(stem, "rr$", "r") .. "r" stems.t = mw.ustring.gsub(stem, "tt$", "t") .. "t" stems.v = stem .. "v" -- Remove v after a long consonant stems.v = mw.ustring.gsub(stems.v, "(ccs)v$", "%1") stems.v = mw.ustring.gsub(stems.v, "(ddz)v$", "%1") stems.v = mw.ustring.gsub(stems.v, "(ddzs)v$", "%1") stems.v = mw.ustring.gsub(stems.v, "(ggy)v$", "%1") stems.v = mw.ustring.gsub(stems.v, "(lly)v$", "%1") stems.v = mw.ustring.gsub(stems.v, "(nny)v$", "%1") stems.v = mw.ustring.gsub(stems.v, "(ssz)v$", "%1") stems.v = mw.ustring.gsub(stems.v, "(tty)v$", "%1") stems.v = mw.ustring.gsub(stems.v, "(zzs)v$", "%1") stems.v = mw.ustring.gsub(stems.v, "([bcdfghjklmnpqrstvwz])%1v$", "%1%1") -- Assimilate v to preceding short consonant stems.v = mw.ustring.gsub(stems.v, "csv$", "ccs") stems.v = mw.ustring.gsub(stems.v, "dzv$", "ddz") stems.v = mw.ustring.gsub(stems.v, "dzsv$", "ddzs") stems.v = mw.ustring.gsub(stems.v, "gyv$", "ggy") stems.v = mw.ustring.gsub(stems.v, "lyv$", "lly") stems.v = mw.ustring.gsub(stems.v, "nyv$", "nny") stems.v = mw.ustring.gsub(stems.v, "szv$", "ssz") stems.v = mw.ustring.gsub(stems.v, "thv$", "tht") stems.v = mw.ustring.gsub(stems.v, "tyv$", "tty") stems.v = mw.ustring.gsub(stems.v, "xv$", "xsz") stems.v = mw.ustring.gsub(stems.v, "zsv$", "zzs") stems.v = mw.ustring.gsub(stems.v, "([bcdfghjklmnpqrstvwz])v$", "%1%1") return stems end local function make_plural(data, stem, vh) if vh == "o" then vh = {a = "a", aa = "á", o = "o", oo = "ó", u = "u"} elseif vh == "ö" then vh = {a = "e", aa = "é", o = "ö", oo = "ő", u = "ü"} elseif vh == "e" then vh = {a = "e", aa = "é", o = "e", oo = "ő", u = "ü"} end data.forms["nom_pl"] = {stem .. "k"} data.forms["acc_pl"] = {stem .. "k" .. vh.a .. "t"} data.forms["dat_pl"] = {stem .. "kn" .. vh.a .. "k"} data.forms["ins_pl"] = {stem .. "kk" .. vh.a .. "l"} data.forms["cfi_pl"] = {stem .. "kért"} data.forms["tra_pl"] = {stem .. "kk" .. vh.aa} data.forms["ter_pl"] = {stem .. "kig"} data.forms["esf_pl"] = {stem .. "kként"} data.forms["esm_pl"] = {stem .. "k" .. vh.u .. "l"} data.forms["ine_pl"] = {stem .. "kb" .. vh.a .. "n"} data.forms["spe_pl"] = {stem .. "k" .. vh.o .. "n"} data.forms["ade_pl"] = {stem .. "kn" .. vh.aa .. "l"} data.forms["ill_pl"] = {stem .. "kb" .. vh.a} data.forms["sbl_pl"] = {stem .. "kr" .. vh.a} data.forms["all_pl"] = {stem .. "kh" .. vh.o .. "z"} data.forms["ela_pl"] = {stem .. "kb" .. vh.oo .. "l"} data.forms["del_pl"] = {stem .. "kr" .. vh.oo .. "l"} data.forms["abl_pl"] = {stem .. "kt" .. vh.oo .. "l"} data.forms["np1_pl"] = {stem .. "ké"} data.forms["np2_pl"] = {stem .. "kéi"} end local function make_singular_short(data, stem, stem2, spe_sg_stem, acc_sg_vowel, v, vh) if vh == "o" then vh = {a = "a", aa = "á", o = "o", oo = "ó", u = "u"} elseif vh == "ö" then vh = {a = "e", aa = "é", o = "ö", oo = "ő", u = "ü"} elseif vh == "e" then vh = {a = "e", aa = "é", o = "e", oo = "ő", u = "ü"} end stem2 = make_stems(stem2 or mw.ustring.gsub(stem, "[aeoö]$", "")) if v then stem2.v = stem2.normal .. v end local stem_no_vowel = mw.ustring.gsub(stem, "[aeoö]$", "") spe_sg_stem = spe_sg_stem or stem2.normal spe_sg_stem = spe_sg_stem .. (mw.ustring.find(spe_sg_stem, "[aáeéiíoóöőuúüű]$") and "" or vh.o) local acc_sg_stem = stem if acc_sg_vowel == "-" then acc_sg_stem = stem2.normal elseif acc_sg_vowel then acc_sg_stem = stem_no_vowel .. acc_sg_vowel end data.forms["nom_sg"] = {mw.loadData("Module:headword/data").pagename} data.forms["acc_sg"] = {acc_sg_stem .. "t"} data.forms["dat_sg"] = {stem2.n .. vh.a .. "k"} data.forms["ins_sg"] = {stem2.v .. vh.a .. "l"} data.forms["cfi_sg"] = {stem2.normal .. "ért"} data.forms["tra_sg"] = {stem2.v .. vh.aa} data.forms["ter_sg"] = {stem2.normal .. "ig"} data.forms["esf_sg"] = {stem2.k .. "ént"} data.forms["esm_sg"] = {stem2.normal .. vh.u .. "l"} data.forms["ine_sg"] = {stem2.b .. vh.a .. "n"} data.forms["spe_sg"] = {spe_sg_stem .. "n"} data.forms["ade_sg"] = {stem2.n .. vh.aa .. "l"} data.forms["ill_sg"] = {stem2.b .. vh.a} data.forms["sbl_sg"] = {stem2.r .. vh.a} data.forms["all_sg"] = {stem2.normal .. "h" .. vh.o .. "z"} data.forms["ela_sg"] = {stem2.b .. vh.oo .. "l"} data.forms["del_sg"] = {stem2.r .. vh.oo .. "l"} data.forms["abl_sg"] = {stem2.t .. vh.oo .. "l"} data.forms["np1_sg"] = {stem2.normal .. "é"} data.forms["np2_sg"] = {stem2.normal .. "éi"} end local function make_singular_long(data, stem, vh) if not mw.ustring.find(stem, "%-$") then if not mw.ustring.find(stem, "[iuüáéíóőúű]$") then require("Module:debug").track("hu-nominals/vowel") elseif stem ~= mw.ustring.gsub(mw.loadData("Module:headword/data").pagename, "([aeoö])$", {["a"] = "á", ["e"] = "é", ["o"] = "ó", ["ö"] = "ő"}) then require("Module:debug").track("hu-nominals/pagename") end end if vh == "o" then vh = {a = "a", aa = "á", o = "o", oo = "ó", u = "u"} elseif vh == "ö" then vh = {a = "e", aa = "é", o = "ö", oo = "ő", u = "ü"} elseif vh == "e" then vh = {a = "e", aa = "é", o = "e", oo = "ő", u = "ü"} end local stems = make_stems(stem) data.forms["nom_sg"] = {mw.loadData("Module:headword/data").pagename} data.forms["acc_sg"] = {stems.normal .. "t"} data.forms["dat_sg"] = {stems.n .. vh.a .. "k"} data.forms["ins_sg"] = {stems.v .. vh.a .. "l"} data.forms["cfi_sg"] = {stems.normal .. "ért"} data.forms["tra_sg"] = {stems.v .. vh.aa} data.forms["ter_sg"] = {stems.normal .. "ig"} data.forms["esf_sg"] = {mw.loadData("Module:headword/data").pagename .. (mw.ustring.find(stems.normal, "%-$") and "-" or "") .. "ként"} data.forms["esm_sg"] = {stems.normal .. vh.u .. "l"} data.forms["ine_sg"] = {stems.b .. vh.a .. "n"} data.forms["spe_sg"] = {stems.normal .. "n"} data.forms["ade_sg"] = {stems.n .. vh.aa .. "l"} data.forms["ill_sg"] = {stems.b .. vh.a} data.forms["sbl_sg"] = {stems.r .. vh.a} data.forms["all_sg"] = {stems.normal .. "h" .. vh.o .. "z"} data.forms["ela_sg"] = {stems.b .. vh.oo .. "l"} data.forms["del_sg"] = {stems.r .. vh.oo .. "l"} data.forms["abl_sg"] = {stems.t .. vh.oo .. "l"} data.forms["np1_sg"] = {stems.normal .. "é"} data.forms["np2_sg"] = {stems.normal .. "éi"} end local function make_singular_Vk(data, stem, stem2, spe_sg_stem, acc_t, v, vh) if vh == "o" then vh = {a = "a", aa = "á", o = "o", oo = "ó", u = "u"} elseif vh == "ö" then vh = {a = "e", aa = "é", o = "ö", oo = "ő", u = "ü"} elseif vh == "e" then vh = {a = "e", aa = "é", o = "e", oo = "ő", u = "ü"} end local stems = make_stems(stem) if v then stems.v = stems.normal .. v end stem2 = stem2 or stem spe_sg_stem = spe_sg_stem or stem2 acc_t = acc_t or "t" local fill_vowel = mw.ustring.find(spe_sg_stem, "[aáeéiíoóöőuúüű]$") and "" or vh.o data.forms["nom_sg"] = {mw.loadData("Module:headword/data").pagename} data.forms["acc_sg"] = {stem2 .. acc_t} data.forms["dat_sg"] = {stems.n .. vh.a .. "k"} data.forms["ins_sg"] = {stems.v .. vh.a .. "l"} data.forms["cfi_sg"] = {stems.normal .. "ért"} data.forms["tra_sg"] = {stems.v .. vh.aa} data.forms["ter_sg"] = {stems.normal .. "ig"} data.forms["esf_sg"] = {stems.k .. "ént"} data.forms["esm_sg"] = {stems.normal .. vh.u .. "l"} data.forms["ine_sg"] = {stems.b .. vh.a .. "n"} data.forms["spe_sg"] = {spe_sg_stem .. fill_vowel .. "n"} data.forms["ade_sg"] = {stems.n .. vh.aa .. "l"} data.forms["ill_sg"] = {stems.b .. vh.a} data.forms["sbl_sg"] = {stems.r .. vh.a} data.forms["all_sg"] = {stems.normal .. "h" .. vh.o .. "z"} data.forms["ela_sg"] = {stems.b .. vh.oo .. "l"} data.forms["del_sg"] = {stems.r .. vh.oo .. "l"} data.forms["abl_sg"] = {stems.t .. vh.oo .. "l"} data.forms["np1_sg"] = {stems.normal .. "é"} data.forms["np2_sg"] = {stems.normal .. "éi"} end inflections["regular"] = function(args, data) local stem = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if stem == "" then error("Parameter 1 (base stem) may not be empty.") end local vh = args[2] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "o") local acc_sg_vowel = args[3]; if acc_sg_vowel == "" then acc_sg_vowel = nil end local stem2 = args["stem2"]; if stem2 == "" then stem2 = nil end local spe_sg_stem = args["spe_sg_stem"]; if spe_sg_stem == "" then spe_sg_stem = nil end local v = args["v"]; if v == "" then v = nil end if not (vh == "o" or vh == "ö" or vh == "e") then error("Vowel harmony type must be \"o\", \"ö\" or \"e\".") end local vh_pl = vh if mw.ustring.find(stem, "[aeoö]$") then data.title = "stem in " .. m_links.full_link({lang = lang, alt = "-" .. mw.ustring.match(stem, "([aeoö])$") .. "-"}, "term") if vh == "ö" and mw.ustring.find(stem, "e$") then vh_pl = "e" end make_singular_short(data, stem, stem2, spe_sg_stem, acc_sg_vowel, v, vh) elseif mw.ustring.find(stem, "[iuüáéíóőúű%-]$") or mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" then data.title = "stem in long/high vowel" make_singular_long(data, stem, vh) else error("The stem must end in vowel or \"-\".") end if vh == "o" then data.title = data.title .. ", back&nbsp;harmony" elseif vh == "ö" then data.title = data.title .. ", front&nbsp;rounded&nbsp;harmony" elseif vh == "e" then data.title = data.title .. ", front&nbsp;unrounded&nbsp;harmony" end make_plural(data, stem, vh_pl) end inflections["ak"] = function(args, data) data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-ak"}, "term") .. ", back&nbsp;harmony" local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end local final = args[2] or "" local acc_t = args[3]; if acc_t == "" then acc_t = nil end local stem2 = args["stem"]; if stem2 == "" then stem2 = nil end local spe_sg_stem = args["sup"]; if spe_sg_stem == "" then spe_sg_stem = nil end local v = args["v"]; if v == "" then v = nil end if acc_t and not mw.ustring.find(acc_t, "^[aeoö]t$") then error("Invalid accusative singular") end make_singular_Vk(data, base .. final, stem2, spe_sg_stem, acc_t, v, "o") make_plural(data, (stem2 or base .. final) .. "a", "o") end inflections["ek"] = function(args, data) data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-ek"}, "term") .. ", front&nbsp;unrounded&nbsp;harmony" local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end local final = args[2] or "" local acc_t = args[3]; if acc_t == "" then acc_t = nil end local stem2 = args["stem"]; if stem2 == "" then stem2 = nil end local spe_sg_stem = args["sup"]; if spe_sg_stem == "" then spe_sg_stem = nil end local v = args["v"]; if v == "" then v = nil end if acc_t and not mw.ustring.find(acc_t, "^[aeoö]t$") then error("Invalid accusative singular") end make_singular_Vk(data, base .. final, stem2, spe_sg_stem, acc_t, v, "e") make_plural(data, (stem2 or base .. final) .. "e", "e") end inflections["ek2"] = function(args, data) data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-ek"}, "term") .. ", front&nbsp;rounded&nbsp;harmony" local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end local final = args[2] or "" local acc_t = args[3]; if acc_t == "" then acc_t = nil end local stem2 = args["stem"]; if stem2 == "" then stem2 = nil end local spe_sg_stem = args["sup"]; if spe_sg_stem == "" then spe_sg_stem = nil end local v = args["v"]; if v == "" then v = nil end if acc_t and not mw.ustring.find(acc_t, "^[aeoö]t$") then error("Invalid accusative singular") end make_singular_Vk(data, base .. final, stem2, spe_sg_stem, acc_t, v, "ö") make_plural(data, (stem2 or base .. final) .. "e", "e") end inflections["ok"] = function(args, data) data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-ok"}, "term") .. ", back&nbsp;harmony" local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end local final = args[2] or "" local acc_t = args[3]; if acc_t == "" then acc_t = nil end local stem2 = args["stem"]; if stem2 == "" then stem2 = nil end local spe_sg_stem = args["sup"]; if spe_sg_stem == "" then spe_sg_stem = nil end local v = args["v"]; if v == "" then v = nil end if acc_t and not mw.ustring.find(acc_t, "^[aeoö]t$") then error("Invalid accusative singular") end make_singular_Vk(data, base .. final, stem2, spe_sg_stem, acc_t, v, "o") make_plural(data, (stem2 or base .. final) .. "o", "o") end inflections["ök"] = function(args, data) data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-ök"}, "term") .. ", front&nbsp;rounded&nbsp;harmony" local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end local final = args[2] or "" local acc_t = args[3]; if acc_t == "" then acc_t = nil end local stem2 = args["stem"]; if stem2 == "" then stem2 = nil end local spe_sg_stem = args["sup"]; if spe_sg_stem == "" then spe_sg_stem = nil end local v = args["v"]; if v == "" then v = nil end if acc_t and not mw.ustring.find(acc_t, "^[aeoö]t$") then error("Invalid accusative singular") end make_singular_Vk(data, base .. final, stem2, spe_sg_stem, acc_t, v, "ö") make_plural(data, (stem2 or base .. final) .. "ö", "ö") end inflections["k-back"] = function(args, data) if args["stem"] or args["sup"] then require("Module:debug").track("hu-nominals/stem2") end data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-k"}, "term") .. ", back&nbsp;harmony" local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end local final = args[2] or "" make_singular_long(data, base .. final, "o") make_plural(data, base .. final, "o") end inflections["k-front1"] = function(args, data) if args["stem"] or args["sup"] then require("Module:debug").track("hu-nominals/stem2") end data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-k"}, "term") .. ", front&nbsp;unrounded&nbsp;harmony" local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end local final = args[2] or "" make_singular_long(data, base .. final, "e") make_plural(data, base .. final, "e") end inflections["k-front2"] = function(args, data) if args["stem"] or args["sup"] then require("Module:debug").track("hu-nominals/stem2") end data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-k"}, "term") .. ", front&nbsp;rounded&nbsp;harmony" local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end local final = args[2] or "" make_singular_long(data, base .. final, "ö") make_plural(data, base .. final, "ö") end function postprocess(args, data) local n = args["n"] or args["form"]; if n == "" then n = nil end if n and not (n == "sg" or n == "sing" or n == "pl" or n == "isg") then error("The parameter \"n\" must be \"sg\", \"pl\", \"isg\" or empty.") -- isg: "i-type singular" like képeim, képeid, képei, which need to be inflected like singular but displayed as plural end if n == "isg" then data.forms["esm_pl"] = data.forms["esm_sg"] end -- sortedPairs saves a list of keys so that we can modify the table -- while iterating over it. for key, form in require "Module:table".sortedPairs(data.forms) do -- Do not show singular or plural forms for nominals that don't have them if (n == "pl" and key:find("_sg$")) or ((n == "sg" or n == "sing") and key:find("_pl$")) then data.forms[key] = nil end -- if "isg" is given for "n", singular forms are copied into the plural, then the singular forms are set to nil if (n == "isg" and key:find("_sg$")) then data.forms[string.gsub(key, "sg", "pl")] = form data.forms[key] = nil end end local function any_arg(...) for _, key in ipairs {...} do if args[key] and args[key] ~= "" then return true end end return false end local has_esm_sg = any_arg("esm_sg", "ul", "ül") local has_esm_pl = any_arg("esm_pl", "akul", "ekül", "okul", "ökül", "kul", "kül") local has_noposs = any_arg("noposs", "é", "éi") if not has_esm_sg then data.forms["esm_sg"] = nil end if not ((n == "isg" and has_esm_sg) or has_esm_pl) then data.forms["esm_pl"] = nil end -- Blank the non-attributive possessive rows if noposs is specified in the call if has_noposs then data.forms["np1_sg"] = nil data.forms["np2_sg"] = nil data.forms["np1_pl"] = nil data.forms["np2_pl"] = nil end end -- Make the table function make_table(data) local function show_form(forms, code) local form = forms[code] if not form then return "&mdash;" elseif type(form) ~= "table" then error("a non-table value was given in the list of inflected forms.") end local ret = {} local accel = { form = code:gsub("%f[^_](%a%a)$", {sg = "s", pl = "p"}):gsub("_", "|"), } for key, subform in ipairs(form) do table.insert(ret, m_links.full_link({ lang = lang, term = subform, accel = accel, })) end return table.concat(ret, "<br/>") end local function repl(param) if param == "lemma" then return m_links.full_link({lang = lang, alt = mw.loadData("Module:headword/data").pagename}, "term") elseif param == "info" then return data.title and " (" .. data.title .. ")" or "" else return show_form(data.forms, param) end end local wikicode = mw.getCurrentFrame():expandTemplate{ title = 'inflection-table-top', args = { title = 'Inflection{{{info}}}', tall = 'yes', palette = 'green' } } .. [=[ ! class="outer" | ! class="outer" | ကိုန်ဨကဝုစ် ! class="outer" | ကိုန်ဗဟုဝစ် |- ! [[nominative case|မဒုၚ်ယၟု]] | {{{nom_sg}}} | {{{nom_pl}}} |- ! [[accusative case|ကမ္မကာရက]] | {{{acc_sg}}} | {{{acc_pl}}} |- ! [[dative case|ပြကမ္မကာရက]] | {{{dat_sg}}} | {{{dat_pl}}} |- ! [[instrumental case|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်]] | {{{ins_sg}}} | {{{ins_pl}}} |- ! တၞုၚ်တၞောတ်-ပရေၚ်ပရောဟိုတ် | {{{cfi_sg}}} | {{{cfi_pl}}} |- ! [[translative case|ပရေၚ်ကၠာဲပ္တိတ်]] | {{{tra_sg}}} | {{{tra_pl}}} |- ! [[terminative case|ပရေၚ်တုဲဒှ်]] | {{{ter_sg}}} | {{{ter_pl}}} |- ! ဓဝ်ဍာံ-အဓိက | {{{esf_sg}}} | {{{esf_pl}}} |- ! မဝ်ဒါယ်လ်-အဓိက | {{{esm_sg}}} | {{{esm_pl}}} |- ! [[inessive case|ပရေၚ်ဟွံဆေၚ်စပ်]] | {{{ine_sg}}} | {{{ine_pl}}} |- ! [[superessive case|တၞုၚ်တၞောတ်]] | {{{spe_sg}}} | {{{spe_pl}}} |- ! [[adessive case|ပရေၚ်စောဲလာံ]] | {{{ade_sg}}} | {{{ade_pl}}} |- ! [[illative case|ဟၟဲကဵုအဓိပ္ပါဲ]] | {{{ill_sg}}} | {{{ill_pl}}} |- ! ဒုၚ်အသၟဝ်တန်ဍောတ် | {{{sbl_sg}}} | {{{sbl_pl}}} |- ! [[allative case|တသိုက်ပိုန်ဂြပ်]] | {{{all_sg}}} | {{{all_pl}}} |- ! [[elative case|မစၞောန်ထ္ၜးအဆက်နာမ်]] | {{{ela_sg}}} | {{{ela_pl}}} |- ! [[delative case|ပြကမ္မကာရက]] | {{{del_sg}}} | {{{del_pl}}} |- ! [[ablative case|ပရေၚ်မလၚ်]] | {{{abl_sg}}} | {{{abl_pl}}} |- ! ကိုန်ဨကဝုစ် &ndash; မစောဲစုတ်-ဟွံသေၚ်<br>ပၟိက်သၟိက်မိက်ဂွံပိုၚ်ပြဳ | {{{np1_sg}}} | {{{np1_pl}}} |- ! ကိုန်ဗဟုဝစ် &ndash; မစောဲစုတ်-ဟွံသေၚ်<br>ပၟိက်သၟိက်မိက်ဂွံပိုၚ်ပြဳ | {{{np2_sg}}} | {{{np2_pl}}} ]=] .. mw.getCurrentFrame():expandTemplate{ title = 'inflection-table-bottom' } return mw.ustring.gsub(wikicode, "{{{([a-z0-9_]+)}}}", repl) end return export nhntc9bonrnvix3xb5370hairjwfgzh မဝ်ဂျူ:hu-nominals/doc 828 295374 395903 2026-05-29T17:19:01Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "Used by {{temp|hu-infl-nom}}. <includeonly> {{module cat|hu}} </includeonly>" 395903 wikitext text/x-wiki Used by {{temp|hu-infl-nom}}. <includeonly> {{module cat|hu}} </includeonly> qaoh54e43yviih96cq10jutnnizn4ta ကဏ္ဍ:မဝ်ဂျူဟာန်ဂါရေဝ်ဂမၠိုၚ် 14 295375 395904 2026-05-29T17:22:29Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဟာန်ဂါရေဝ်|ဟာန်ဂါရေဝ်]] » '''..." 395904 wikitext text/x-wiki [[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဟာန်ဂါရေဝ်|ဟာန်ဂါရေဝ်]] » '''မဝ်ဂျူဂမၠိုၚ်''' :[[:ကဏ္ဍ:မဝ်ဂျူဂမၠိုၚ်|မဝ်ဂျူ]]ဘာသာဟာန်ဂါရေဝ်၊ မနွံကဵုလုပ်အဝေါၚ်ကုဒ် Lua နကဵုမကၠောန်ဗဒှ် ကဵု မစဳရေၚ်ယဵုဒုၚ်သ္ပမာန်ဂမၠိုၚ်။ [[ကဏ္ဍ:ဘာသာဟာန်ဂါရေဝ်]][[ကဏ္ဍ:မဝ်ဂျူဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]] b4ccoudzqws7hl3e3krjv7u90pc0zx3 ကဏ္ဍ:မဝ်ဂျူပွမပြံၚ်လှာဲဟာန်ဂါရေဝ်ဂမၠိုၚ် 14 295376 395905 2026-05-29T17:24:46Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:မဝ်ဂျူဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:မဝ်ဂျူဗီုအပြံၚ်အလှာဲဝေါဟာဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]]" 395905 wikitext text/x-wiki [[ကဏ္ဍ:မဝ်ဂျူဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:မဝ်ဂျူဗီုအပြံၚ်အလှာဲဝေါဟာဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]] q3ns4hqjhfaufqq1zk7hdn38wdu0c0a ထာမ်ပလိက်:hu-infl-nom/documentation 10 295377 395906 2026-05-29T17:30:24Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} {{uses lua|Module:hu-nominals}} This template is for use with Hungarian regular nominals (nouns, adjectives, numerals and pronouns). It can also be used for abbreviations of nominals where suffixes are attached with a hyphen (-). ==Parameters== ; <code>1=</code> : The stem of the word, which must end in a vowel or a hyphen (for some abbreviations). : To construct the stem, take the plural of..." 395906 wikitext text/x-wiki {{documentation subpage}} {{uses lua|Module:hu-nominals}} This template is for use with Hungarian regular nominals (nouns, adjectives, numerals and pronouns). It can also be used for abbreviations of nominals where suffixes are attached with a hyphen (-). ==Parameters== ; <code>1=</code> : The stem of the word, which must end in a vowel or a hyphen (for some abbreviations). : To construct the stem, take the plural of the word and remove the final {{m|hu|-k}}. Examples: :: ''Lemma'': {{m|hu|ház||house}} :: ''Plural'': {{m|hu|házak||houses}} :: ''Stem (parameter 1)'': ''háza'' :: ''Lemma'': {{m|hu|CD}} :: ''Plural'': {{m|hu|CD-k}} :: ''Stem (parameter 1)'': ''CD-'' : To construct the stem for the inflection of a '''possessive form''', take the accusative and remove the final {{m|hu|-t}}: :: ''Lemma'': {{m|hu|ablak||window}} :: ''3rd person possessive'': {{m|hu|ablaka||his/her/its window}} :: ''Accusative of 3rd person possessive'': {{m|hu|ablakát}} :: ''Stem (parameter 1)'': ''ablaká'' ; <code>2=</code> : The vowel in the ending of the allative singular ({{m|hu|-hoz}}/{{m|hu|-hez}}/{{m|hu|-höz}}). Values can be: :: <code>o</code> (for back a/á/o/ó/u/ú harmony) :: <code>e</code> (for front unrounded e/é/i/í harmony) :: <code>ö</code> (for front rounded ö/ő/ü/ű harmony) ; <code>3=</code> : The vowel before the ending in the accusative singular. By default, this is the final vowel of the stem in parameter 1. Specifying this parameter replaces that vowel with another. If the word forms its accusative singular using the second stem (without the final vowel), use "<code>-</code>". ; <code>stem2=</code> : Specifies the second stem: the stem that is used to form all of the singular cases except the accusative. By default, this is the regular stem with a final short low or mid vowel (''a'', ''e'', ''o'', ''ö'') removed. Most of the time, stem2 will be the lemma. ; <code>pos=</code> : The part of speech of the word in the ''plural,'' IF <code>stem2</code> was supplied AND the word is not a noun. (All other words with <code>stem2</code> are categorized under "<code>Hungarian ''nouns'' with alternating stems</code>" because most of these words are nouns.) ; <code>spe_sg_stem=</code> : This should only be used together with <code>stem2=</code>, and specifies the stem of the superessive singular, if it is different from the <code>stem2=</code> value. ; <code>v=</code> : Specifies which consonant replaces the "v" of the instrumental and translative singular endings. By default, this is the same as the final consonant of the second stem, or ''v'' if it ends in a vowel. For abbreviations or words with a foreign spelling, it's not possible to determine this from the spelling, so this parameter provides it. Proper nouns ending in two identical consonants, the parameter's value must be preceded by a hyphen: :: ''Lemma'': {{m|hu|Baross}} :: ''Parameter'': v=-s :: ''Instrumental singular case'': Baross-sal :: ''Translative singular case'': Baross-sá :If the final double consonants are -tt, -nn, -kk, -bb, -rr, -hh, use the old templates that start with {{temp|hu-decl}}. They will add the hyphen not just to the instrumental and translative cases, but to all appropriate cases. ; <code>n=</code> : Can be <code>sg</code>, <code>pl</code>, or <code>isg</code>. The first two specify that the word has only singular or only plural forms, respectively (if left empty, the default is to show both). The value <code>isg</code> (for <span style="white-space:nowrap;">“{{m|hu|-i|id=possessive}}-type</span> singular”) is used when the singular inflection is needed, although the meaning of the word is plural, in the case of [[Appendix:Hungarian possessive suffixes|multiple-possession forms]], which should be displayed in the plural column. ; <code>esm_sg=1</code> : Specify this if the word has an essive-modal singular form. By default, this form is omitted. ; <code>esm_pl=1</code> : Specify this if the word has an essive-modal plural form. By default, this form is omitted. ; <code>noposs=y</code> : Specify this if the two rows for the non-attributive possessive forms should be blanked. By default, these forms are included. ==Examples== ===Basic usage=== For most words, the first two parameters are enough. For {{m|hu|ház}}: : <code><nowiki>{{hu-infl-nom|háza|o}}</nowiki></code> For {{m|hu|ablak}}: : <code><nowiki>{{hu-infl-nom|ablako|o}}</nowiki></code> For {{m|hu|ábra}}: : <code><nowiki>{{hu-infl-nom|ábrá|o}}</nowiki></code> For {{m|hu|öv}}: : <code><nowiki>{{hu-infl-nom|öve|ö}}</nowiki></code> For {{m|hu|összefüggő}}: : <code><nowiki>{{hu-infl-nom|összefüggőe|ö}}</nowiki></code> For {{m|hu|kedd}}: : <code><nowiki>{{hu-infl-nom|kedde|e}}</nowiki></code> For {{m|hu|kefe}}: : <code><nowiki>{{hu-infl-nom|kefé|e}}</nowiki></code> If the word has essive-modal forms, the parameters for those are added too. For {{m|hu|díj}}: : <code><nowiki>{{hu-infl-nom|díja|o|esm_sg=1|esm_pl=1}}</nowiki></code> For {{m|hu|eszperantó}}: : <code><nowiki>{{hu-infl-nom|eszperantó|o|esm_sg=1}}</nowiki></code> For {{m|hu|török}}: : <code><nowiki>{{hu-infl-nom|törökö|ö|esm_sg=1}}</nowiki></code> For {{m|hu|nő}}: : <code><nowiki>{{hu-infl-nom|nő|ö|esm_sg=1}}</nowiki></code> For {{m|hu|kert}}: : <code><nowiki>{{hu-infl-nom|kerte|e|esm_sg=1|esm_pl=1}}</nowiki></code> Singular-only and plural-only nouns must specify the <code>n=</code> parameter. For {{m|hu|Románia}}: : <code><nowiki>{{hu-infl-nom|Romániá|o|n=sg}}</nowiki></code> For {{m|hu|Budapest}}: : <code><nowiki>{{hu-infl-nom|Budapeste|e|n=sg}}</nowiki></code> For {{m|hu|Falkland-szigetek}}: : <code><nowiki>{{hu-infl-nom|Falkland-szigete|e|n=pl}}</nowiki></code> ===Vowel dropped in accusative singular=== The third parameter must be specified if the final vowel of the stem is dropped before the accusative singular ending {{m|hu|-t}}. For {{m|hu|asztal}}: : <code><nowiki>{{hu-infl-nom|asztalo|o|-}}</nowiki></code> For {{m|hu|Tokaj}}: : <code><nowiki>{{hu-infl-nom|Tokajo|o|-|n=sg}}</nowiki></code> For {{m|hu|kör}}: : <code><nowiki>{{hu-infl-nom|körö|ö|-}}</nowiki></code> For {{m|hu|Köln}}: : <code><nowiki>{{hu-infl-nom|Kölnö|ö|-|n=sg}}</nowiki></code> For {{m|hu|ösvény}}: : <code><nowiki>{{hu-infl-nom|ösvénye|e|-}}</nowiki></code> ===Alternating stems=== Some words will alternate between two stems. The simplest (when it comes to parameters) are those where short vowels alternate with long ones. For {{m|hu|szamár}}: : <code><nowiki>{{hu-infl-nom|szamara|o|stem2=szamár}}</nowiki></code> For {{m|hu|úr}}: : <code><nowiki>{{hu-infl-nom|ura|o|stem2=úr}}</nowiki></code> For {{m|hu|tűz}}: : <code><nowiki>{{hu-infl-nom|tüze|ö|stem2=tűz}}</nowiki></code> For {{m|hu|levél}}: : <code><nowiki>{{hu-infl-nom|levele|e|stem2=levél}}</nowiki></code> For {{m|hu|név}}: : <code><nowiki>{{hu-infl-nom|neve|e|stem2=név}}</nowiki></code> Some words have a fill vowel before the final consonant, which is lost whenever the final vowel of the stem is present (in the plural, accusative singular and superessive singular). In these cases, both stem parameters must be specified. For {{m|hu|hatalom}} (plural {{m|hu|hatalmak}}): : <code><nowiki>{{hu-infl-nom|hatalma|o|stem2=hatalom|spe_sg_stem=hatalm}}</nowiki></code> For {{m|hu|álom}} (plural {{m|hu|álmok}}): : <code><nowiki>{{hu-infl-nom|álmo|o|stem2=álom|spe_sg_stem=álm}}</nowiki></code> For {{m|hu|tükör}} (plural {{m|hu|tükrök}}): : <code><nowiki>{{hu-infl-nom|tükrö|ö|stem2=tükör|spe_sg_stem=tükr}}</nowiki></code> For {{m|hu|fejedelem}} (plural {{m|hu|fejedelmek}}): : <code><nowiki>{{hu-infl-nom|fejedelme|e|stem2=fejedelem|spe_sg_stem=fejedelm|esm_sg=1|esm_pl=1}}</nowiki></code> There are 10 nouns with a v-stem ({{m|hu|cső}}, {{m|hu|fű}}, {{m|hu|hó}}, {{m|hu|kő}}, {{m|hu|lé}}, {{m|hu|ló}}, {{m|hu|mű}}, {{m|hu|szó}}, {{m|hu|tó}}, {{m|hu|tő}}). They will need one or both stem parameters, depending on how the superessive singular is formed. For {{m|hu|tó}}: : <code><nowiki>{{hu-infl-nom|tava|o|stem2=tó|spe_sg_stem=tav}}</nowiki></code> For {{m|hu|szó}}: : <code><nowiki>{{hu-infl-nom|szava|o|-|stem2=szó}}</nowiki></code> For {{m|hu|lé}}: : <code><nowiki>{{hu-infl-nom|leve|e|stem2=lé}}</nowiki></code> For {{m|hu|mű}}: : <code><nowiki>{{hu-infl-nom|műve|ö|stem2=mű|spe_sg_stem=műv}}</nowiki></code> ===Abbreviations and foreign spellings=== Abbreviations and words with foreign spellings must use the <code>v=</code> parameter, to indicate which final consonant is actually pronounced. For {{m|hu|ápr.}} (abbreviation of {{m|hu|április}}): : <code><nowiki>{{hu-infl-nom|ápr.-o|o|-|v=s}}</nowiki></code> For {{m|hu|FÁK}}: : <code><nowiki>{{hu-infl-nom|FÁK-a|o|v=k|n=sg}}</nowiki></code> For {{m|hu|szept.}} (abbreviation of {{m|hu|szeptember}}: : <code><nowiki>{{hu-infl-nom|szept.-e|e|-|v=r}}</nowiki></code> For {{m|hu|OS}} : <code><nowiki>{{hu-infl-nom|OS-e|e|-|v=s}}</nowiki></code> ===Possessive forms=== The possessive forms of nominals can further be inflected by appending case suffixes. Final -a and -e change to -á and -é, respectively. For {{m|hu|ablaka||his/her/its window}}: : <code><nowiki>{{hu-infl-nom|ablaká|o|n=sg|esm_sg=1}}</nowiki></code> For {{m|hu|ablakunk||our window}}: : <code><nowiki>{{hu-infl-nom|ablakunka|o|n=sg|esm_sg=1}}</nowiki></code> For {{m|hu|kertje||his/her/its garden}}: : <code><nowiki>{{hu-infl-nom|kertjé|e|n=sg|esm_sg=1}}</nowiki></code> <includeonly> [[ကဏ္ဍ:ထာမ်ပလိက်ဆၜိုတ်ဒုၚ်ယၟုအပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်|*]] </includeonly> ivjds6cw08s1ujada471aq8p4xpc96v ကဏ္ဍ:ထာမ်ပလိက်ဆၜိုတ်ဒုၚ်ယၟုအပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ် 14 295378 395907 2026-05-29T17:31:36Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်ဆၜိုတ်ဒုၚ်ယၟုအပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]]" 395907 wikitext text/x-wiki [[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်ဆၜိုတ်ဒုၚ်ယၟုအပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]] oxbqz3zbe55nceozpd83ifpwv7td5ov ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ် 14 295379 395908 2026-05-29T17:33:41Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်ဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]]" 395908 wikitext text/x-wiki [[ကဏ္ဍ:ထာမ်ပလိက်ဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]] nba0hyv5ehbcreo072skheuy4cohah4 ထာမ်ပလိက်:hu-pos-tok 10 295380 395909 2026-05-29T17:35:51Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{hu-infl-pos-table<!-- -->|n={{#switch:{{{n|{{{form|}}}}}}|sg|sing=sg|pl=pl}}<!-- -->|1sg_sg={{{1}}}m<!-- -->|2sg_sg={{{1}}}d<!-- -->|3sg_sg=[[{{{1}}}ja]]{{#if:{{{3sg_sg2|}}}|, [[{{{3sg_sg2}}}]]}}<!-- NB: the first form needs linking whenever there is a second variant -->|1pl_sg={{{1}}}nk<!-- -->|2pl_sg={{{1}}}tok<!-- -->|3pl_sg=[[{{{1}}}juk]]{{#if:{{{3pl_sg2|}}}|, [[{{{3pl_sg2}}}]]}}<!-- NB: the first form needs li..." 395909 wikitext text/x-wiki {{hu-infl-pos-table<!-- -->|n={{#switch:{{{n|{{{form|}}}}}}|sg|sing=sg|pl=pl}}<!-- -->|1sg_sg={{{1}}}m<!-- -->|2sg_sg={{{1}}}d<!-- -->|3sg_sg=[[{{{1}}}ja]]{{#if:{{{3sg_sg2|}}}|, [[{{{3sg_sg2}}}]]}}<!-- NB: the first form needs linking whenever there is a second variant -->|1pl_sg={{{1}}}nk<!-- -->|2pl_sg={{{1}}}tok<!-- -->|3pl_sg=[[{{{1}}}juk]]{{#if:{{{3pl_sg2|}}}|, [[{{{3pl_sg2}}}]]}}<!-- NB: the first form needs linking whenever there is a second variant -->|1sg_pl=[[{{{1}}}{{{2|}}}im]]{{#if:{{{2|}}}|&emsp;<small>(''or'' [[{{{1}}}im]])</small>}}<!-- -->|2sg_pl=[[{{{1}}}{{{2|}}}id]]{{#if:{{{2|}}}|&emsp;<small>(''or'' [[{{{1}}}id]])</small>}}<!-- -->|3sg_pl=[[{{{1}}}{{{2|}}}i]]{{#if:{{{2|}}}|&emsp;<small>(''or'' [[{{{1}}}i]])</small>}}<!-- -->|1pl_pl=[[{{{1}}}{{{2|}}}ink]]{{#if:{{{2|}}}|&emsp;<small>(''or'' [[{{{1}}}ink]])</small>}}<!-- -->|2pl_pl=[[{{{1}}}{{{2|}}}itok]]{{#if:{{{2|}}}|&emsp;<small>(''or'' [[{{{1}}}itok]])</small>}}<!-- -->|3pl_pl=[[{{{1}}}{{{2|}}}ik]]{{#if:{{{2|}}}|&emsp;<small>(''or'' [[{{{1}}}ik]])</small>}}<!-- -->|perspron={{{perspron|}}}<!-- -->}}<!-- -->{{#if:{{{form|}}}| }}<!-- --><noinclude>{{documentation}}</noinclude> 3hkzdkl57rp8glydup0600cd9ckhyjd ထာမ်ပလိက်:hu-pos-tok/documentation 10 295381 395910 2026-05-29T17:38:30Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} == Usage == This template generates the possessive forms of Hungarian nouns with the following characteristics: * Their final letter is a vowel: {{m|hu|hordó|t=barrel}}. * They form their second-person possessive plural by adding '''{{l|hu|-tok}}''': {{m|hu|hordótok|t=your barrel}}. The possessive template should be placed after the ====Declension==== header, under the regular declension..." 395910 wikitext text/x-wiki {{documentation subpage}} == Usage == This template generates the possessive forms of Hungarian nouns with the following characteristics: * Their final letter is a vowel: {{m|hu|hordó|t=barrel}}. * They form their second-person possessive plural by adding '''{{l|hu|-tok}}''': {{m|hu|hordótok|t=your barrel}}. The possessive template should be placed after the ====Declension==== header, under the regular declension template. ==Unnamed parameters== ; <code>1=</code> : stem, remains unchanged in all forms. If the noun lengthens the final vowel, provide the modified stem. ; <code>2=</code> : the third-person singular possessive ending for a plural noun, the default is '''-i'''; provide only if the ending requires an additional -ja, mostly for nouns ending in '''-i'''. ==Named parameters== ; <code>n=sg</code> :add this parameter if the noun has only singular forms. The plural column will contain dashes. ; <code>n=pl</code> :add this parameter if the noun has only plural forms. The singular column will contain dashes. ; <code>3sg_sg2=</code> : the full variant for third-person singular single-possession form, placed below the regular form separated by a new line ; <code>3pl_sg2=</code> : the full variant for third-person plural single-possession form, placed below the regular form separated by a new line ==Examples== For {{m|hu|hordó|t=barrel}}: :<code><nowiki>{{hu-pos-tok|hordó}}</nowiki></code> For {{m|hu|éjszaka|t=night}} (it lengthens the final vowel): :<code><nowiki>{{hu-pos-tok|éjszaká}}</nowiki></code> For {{m|hu|kocsi|t=car}} (it requires an additional -ja): :<code><nowiki>{{hu-pos-tok|kocsi|ja}}</nowiki></code> For {{m|hu|csikó|t=foal}} (it has variant forms in third-person): :<code><nowiki>{{hu-pos-tok|csikó|3sg_sg2=csikaja|3pl_sg2=csikajuk}}</nowiki></code> <includeonly> [[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်|pos-tok]] </includeonly> lbp3ji2v2th6tmoapbrv3xrh9243cqq ထာမ်ပလိက်:hu-infl-pos-table 10 295382 395911 2026-05-29T17:59:27Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{inflection-table-top|title={{#if:{{{perspron|}}}|သဗ္ဗနာမ်ဗီုပြၚ်အဆက်လက္ကရဴဆေၚ်စပ်ကဵုပူဂဵုနကဵုဝေါဟာ {{m|hu||{{pagename}}}}|ဗီုပြၚ်ပၟိက်သၟိက်မိက်ဂွံပိုၚ်ပြဳနကဵုဝေါဟာ {{m|hu||{{pagename}}}}}}|palette=green|tall=yes}} |- ! class="outer" | တ..." 395911 wikitext text/x-wiki {{inflection-table-top|title={{#if:{{{perspron|}}}|သဗ္ဗနာမ်ဗီုပြၚ်အဆက်လက္ကရဴဆေၚ်စပ်ကဵုပူဂဵုနကဵုဝေါဟာ {{m|hu||{{pagename}}}}|ဗီုပြၚ်ပၟိက်သၟိက်မိက်ဂွံပိုၚ်ပြဳနကဵုဝေါဟာ {{m|hu||{{pagename}}}}}}|palette=green|tall=yes}} |- ! class="outer" | တၠဒြပ် ! class="outer" | ပိုန်ဘိုက်မွဲတၠ ! class="outer" | ပိုန်ဘိုက်သ္ကုတ်ကရေက်ဗွဲမဂၠိုၚ် |- ! ပူဂဵုဒယှ်ေမရနုက်ကဵု၁ | {{#switch:{{{n|}}}|sg|={{#if:{{{1sg_sg|}}}|{{l-self|hu|{{{1sg_sg}}}}}|&mdash;}}|#default=&mdash;}} | {{#switch:{{{n|}}}|pl|={{#if:{{{1sg_pl|}}}|{{l-self|hu|{{{1sg_pl}}}}}|&mdash;}}|#default=&mdash;}} |- ! ပူဂဵုဒယှ်ေမရနုက်ကဵု၂ | {{#switch:{{{n|}}}|sg|={{#if:{{{2sg_sg|}}}|{{l-self|hu|{{{2sg_sg}}}}}|&mdash;}}|#default=&mdash;}} | {{#switch:{{{n|}}}|pl|={{#if:{{{2sg_pl|}}}|{{l-self|hu|{{{2sg_pl}}}}}|&mdash;}}|#default=&mdash;}} |- ! ပူဂဵုဒယှ်ေမရနုက်ကဵု၃ | {{#switch:{{{n|}}}|sg|={{#if:{{{3sg_sg|}}}|{{l-self|hu|{{{3sg_sg}}}}}|&mdash;}}|#default=&mdash;}} | {{#switch:{{{n|}}}|pl|={{#if:{{{3sg_pl|}}}|{{l-self|hu|{{{3sg_pl}}}}}|&mdash;}}|#default=&mdash;}} |- ! ကိုန်ဗဟုဝစ်ပူဂဵုမရနုက်ကဵု၁ | {{#switch:{{{n|}}}|sg|={{#if:{{{1pl_sg|}}}|{{l-self|hu|{{{1pl_sg}}}}}|&mdash;}}|#default=&mdash;}} | {{#switch:{{{n|}}}|pl|={{#if:{{{1pl_pl|}}}|{{l-self|hu|{{{1pl_pl}}}}}|&mdash;}}|#default=&mdash;}} |- ! ကိုန်ဗဟုဝစ်ပူဂဵုမရနုက်ကဵု၂ | {{#switch:{{{n|}}}|sg|={{#if:{{{2pl_sg|}}}|{{l-self|hu|{{{2pl_sg}}}}}|&mdash;}}|#default=&mdash;}} | {{#switch:{{{n|}}}|pl|={{#if:{{{2pl_pl|}}}|{{l-self|hu|{{{2pl_pl}}}}}|&mdash;}}|#default=&mdash;}} |- ! ကိုန်ဗဟုဝစ်ပူဂဵုမရနုက်ကဵု၃ | {{#switch:{{{n|}}}|sg|={{#if:{{{3pl_sg|}}}|{{l-self|hu|{{{3pl_sg}}}}}|&mdash;}}|#default=&mdash;}} | {{#switch:{{{n|}}}|pl|={{#if:{{{3pl_pl|}}}|{{l-self|hu|{{{3pl_pl}}}}}|&mdash;}}|#default=&mdash;}} {{inflection-table-bottom}}<noinclude>{{documentation}}</noinclude> 8yrvrsj6we6iubqx7hyhd3hthmojvmv ထာမ်ပလိက်:hu-infl-pos-table/documentation 10 295383 395912 2026-05-29T18:01:05Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} This template is used internally by other Hungarian possessive templates. It should not be used directly in entries. <includeonly> [[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်|pos]] </includeonly>" 395912 wikitext text/x-wiki {{documentation subpage}} This template is used internally by other Hungarian possessive templates. It should not be used directly in entries. <includeonly> [[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်|pos]] </includeonly> qwycy3wvo1aamxb1yi0rkhrmphqdkka ထာမ်ပလိက်:R:kne:Vanoverbergh 1933 10 295384 395913 2026-05-29T18:02:41Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:quote|call_template |template=cite-book |en,kne |year=1933 |author=Morice Vanoverbergh |title=A Dictionary of Lepanto Igorot or Kankanay. As it is spoken at Bauco |publisher=Verlag der Internationalen Zeitschrift „Anthropos“ |location=Mödling bei Wien, St. Gabriel, Österreich |series=Linguistische Anthropos-Bibliothek |seriesvolume=XII |oclc=1110007 |url=https://nlpdl.nlp.gov.ph/499/1933/83-11234/home...." 395913 wikitext text/x-wiki {{#invoke:quote|call_template |template=cite-book |en,kne |year=1933 |author=Morice Vanoverbergh |title=A Dictionary of Lepanto Igorot or Kankanay. As it is spoken at Bauco |publisher=Verlag der Internationalen Zeitschrift „Anthropos“ |location=Mödling bei Wien, St. Gabriel, Österreich |series=Linguistische Anthropos-Bibliothek |seriesvolume=XII |oclc=1110007 |url=https://nlpdl.nlp.gov.ph/499/1933/83-11234/home.htm |entry={{#if: {{{2|}}}| {{{1}}}” & “{{{2}}} | {{{entry|{{{1|{{pagename}}}}}}}} }} |pageparam=page |propagateparams=t |allowparams=entry,1,2 }}<noinclude> {{Documentation}} </noinclude> 4cg8ixadsdp17nn37pma63762yanqle ထာမ်ပလိက်:documentation 10 295385 395915 2026-05-29T18:03:51Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:documentation]] ဇရေင် [[ထာမ်ပလိက်:Documentation]] 395915 wikitext text/x-wiki #REDIRECT [[ထာမ်ပလိက်:Documentation]] f14wl7hp80hta0be113l18fo0r050tu ထာမ်ပလိက်:documentation/documentation 10 295386 395917 2026-05-29T18:03:52Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:documentation/documentation]] ဇရေင် [[ထာမ်ပလိက်:Documentation/documentation]] 395917 wikitext text/x-wiki #REDIRECT [[ထာမ်ပလိက်:Documentation/documentation]] ndatdhxysyw58k43c7f9goth0gc7na3 ထာမ်ပလိက်:R:kne:Vanoverbergh 1933/documentation 10 295387 395918 2026-05-29T18:04:45Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} ==Template data== <templatedata> { "params": { "1": { "aliases": [ "entry" ], "label": "Entry name", "description": "Defaults to the page name", "type": "string" }, "page": { "label": "Page number", "description": "The page number of the entry", "type": "number" }, "text": { "label": "Quoted text, if applicable", "type": "string" }, "t": {..." 395918 wikitext text/x-wiki {{documentation subpage}} ==Template data== <templatedata> { "params": { "1": { "aliases": [ "entry" ], "label": "Entry name", "description": "Defaults to the page name", "type": "string" }, "page": { "label": "Page number", "description": "The page number of the entry", "type": "number" }, "text": { "label": "Quoted text, if applicable", "type": "string" }, "t": { "label": "Quoted text English translation, if applicable", "type": "string" } }, "format": "inline", "description": "This template is for referencing the comprehensive Kankanaey dictionary authored by Maurice Vanoverbergh." } </templatedata> ==Considerations== ===Southern or Northern?=== It is unclear whether this dictionary is focused on Southern terminologies or Northern. This dictionary is targeted at speech in Bauko, which is known to partly have both Southern and Northern Kankanaeys. It is known that "{{m|kne|aw}}" is Southern while "{{m|kne|owen}}" is Northern, however this dictionary lists "{{m|kne|aw}}" and not "{{m|kne|owen}}". However, it is also known that "{{m|kne|maga}}" is Southern while "{{m|kne|maid}}" is Northern, however this dictionary lists "{{m|kne|maid}}" and not "{{m|kne|maga}}". ===Lack of part of speech=== This dictionary does not list by default each entry's part of speech; only certain entries have them. Extra care is needed to decide what the actual part of speech an entry is. ===Verbs=== Vanoverbergh lists verbs as their roots. This is in contranst to Wiktionary. Additionally, he lists verbs as having tenses. This is incorrect as Kankanaey verbs have ''aspect'', not ''tense''. ===Affixes=== Affixes are not marked with a dash "-". This makes it hard to identify whether an entry is an affix or a regular word. ===e versus a=== Vanoverbergh sometimes uses "e" instead of "a" due to dialectal differences. This is most evident in "{{m|kne|man-}}", as he uses "{{m|kne|men-}}". ===Sort order of "ng"=== This dictionary's sorting of "ng" is inconsistent. Sometimes it is listed after "n" as if it was a separate letter, while sometimes it is listed as if "ng" are two separate "n" and "g" letters. <includeonly> {{refcat|kne}} </includeonly> tqrjex3n7e9nxz475t4a3p1k0j2p2f3 ကဏ္ဍ:ထာမ်ပလိက်နိဿဲကာန်ခါနာအဳဂမၠိုၚ် 14 295388 395919 2026-05-29T18:07:05Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်ကာန်ခါနာအဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]]" 395919 wikitext text/x-wiki [[ကဏ္ဍ:ထာမ်ပလိက်ကာန်ခါနာအဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]] cee4tssjue4jwu4fu8rqzlqsoeg9h49 395920 395919 2026-05-29T18:08:16Z 咽頭べさ 33 395920 wikitext text/x-wiki [[ကဏ္ဍ:ထာမ်ပလိက်လေန်ကာန်ခါနာအဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]] rmj31uo5xx4c2s0iqc0d6huaj5btgfx ကဏ္ဍ:ထာမ်ပလိက်လေန်ကာန်ခါနာအဳဂမၠိုၚ် 14 295389 395921 2026-05-29T18:09:04Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်ကာန်ခါနာအဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်လေန်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]]" 395921 wikitext text/x-wiki [[ကဏ္ဍ:ထာမ်ပလိက်ကာန်ခါနာအဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်လေန်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]] szp9rg3hsd6euw0zbyajadlkw7383y4 ကဏ္ဍ:ထာမ်ပလိက်ကာန်ခါနာအဳဂမၠိုၚ် 14 295390 395922 2026-05-29T18:10:10Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ဘာသာကာန်ခါနာအဳ]][[ကဏ္ဍ:ထာမ်ပလိက်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]]" 395922 wikitext text/x-wiki [[ကဏ္ဍ:ဘာသာကာန်ခါနာအဳ]][[ကဏ္ဍ:ထာမ်ပလိက်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]] gq7byk3je0s1hp3gc28efozdjb74vch ထာမ်ပလိက်:R:du Cange 10 295391 395924 2026-05-29T18:12:02Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:R:du Cange]] ဇရေင် [[ထာမ်ပလိက်:R:la:du Cange]] 395924 wikitext text/x-wiki #REDIRECT [[ထာမ်ပလိက်:R:la:du Cange]] 7ltcolshs7lfdswy6qxx2oo5mu02fn5 ထာမ်ပလိက်:R:du Cange/documentation 10 295392 395926 2026-05-29T18:12:02Z 咽頭べさ 33 咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:R:du Cange/documentation]] ဇရေင် [[ထာမ်ပလိက်:R:la:du Cange/documentation]] 395926 wikitext text/x-wiki #REDIRECT [[ထာမ်ပလိက်:R:la:du Cange/documentation]] 9c3iz9rfn1c3m47dtsc0ubfdah1sb12 ထာမ်ပလိက်:lt-pr 10 295393 395927 2026-05-29T18:13:11Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:lt-pron|show}}<noinclude>{{documentation}}</noinclude>" 395927 wikitext text/x-wiki {{#invoke:lt-pron|show}}<noinclude>{{documentation}}</noinclude> p6h7t8o8qyxdia67brluph5p1k5ncri မဝ်ဂျူ:lt-pron/doc 828 295394 395929 2026-05-29T18:16:29Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{status|beta}} This module is not to be directly used. It is used by {{tl|lt-pr}}, see there for usage. ===Testcases=== {{#invoke:lt-pron/testcases|run_tests}} <includeonly> {{module cat|lt}} </includeonly>" 395929 wikitext text/x-wiki {{status|beta}} This module is not to be directly used. It is used by {{tl|lt-pr}}, see there for usage. ===Testcases=== {{#invoke:lt-pron/testcases|run_tests}} <includeonly> {{module cat|lt}} </includeonly> p0t1w8xp43giai1v597sd21xaf3cjo2 မဝ်ဂျူ:lt-pron/testcases/doc 828 295395 395931 2026-05-29T18:19:15Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:lt-pron/testcases|run_tests|comments=1}} <includeonly> {{module cat|lt}} </includeonly>" 395931 wikitext text/x-wiki {{#invoke:lt-pron/testcases|run_tests|comments=1}} <includeonly> {{module cat|lt}} </includeonly> ri2w4m31wo5ypnah5pl6hduwchdvcb1 မဝ်ဂျူ:User:TongcyDai/lt-pron 828 295396 395932 2026-05-29T18:20:12Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "--[==[ Backend for {{lt-pr}}: IPA, hyphenation, and rhyme generation. Author: TongcyDai ]==] local export = {} local m_debug = require("Module:debug") local m_str = require("Module:string utilities") local m_lt_common = require("Module:lt-common") local u = m_str.char local ulower = m_str.lower local uupper = m_str.upper local usub = m_str.sub local ulen = m_str.len local ugsub = m_str.gsub local ufind =..." 395932 Scribunto text/plain --[==[ Backend for {{lt-pr}}: IPA, hyphenation, and rhyme generation. Author: TongcyDai ]==] local export = {} local m_debug = require("Module:debug") local m_str = require("Module:string utilities") local m_lt_common = require("Module:lt-common") local u = m_str.char local ulower = m_str.lower local uupper = m_str.upper local usub = m_str.sub local ulen = m_str.len local ugsub = m_str.gsub local ufind = m_str.find local umatch = m_str.match local rsplit = m_str.split -- Accent mark constants (re-exported from Module:lt-common to keep all -- Lithuanian modules in sync). local GRAVE = m_lt_common.GRAVE -- U+0300 local ACUTE = m_lt_common.ACUTE -- U+0301 local TILDE = m_lt_common.TILDE -- U+0303 local DOTABOVE = m_lt_common.DOTABOVE -- U+0307 local OGONEK = m_lt_common.OGONEK -- U+0328 -- M4: Reuse Module:lt-common's display formatter directly instead of -- maintaining a parallel local copy. local makeDisplayText = m_lt_common.makeDisplayText -- Liaison marker: U+203F UNDERTIE — separates clitics from their stressed host -- in input. The phonological grammar treats it like a "soft" word boundary that -- is transparent to several cross-word processes (palatalization spread, -- geminate / fricative simplification, place assimilation), per VLKK §19–§23. local LIAISON = u(0x203F) -- Lazy-loaded external modules local m_IPA local audio_module = "Module:audio" local homophones_module = "Module:homophones" local hyphenation_module = "Module:hyphenation" local rhymes_module = "Module:rhymes" local parameters_module = "Module:parameters" local parse_util_module = "Module:parse utilities" local concat = table.concat local insert = table.insert local lang_obj local function get_lang() if not lang_obj then lang_obj = require("Module:languages").getByCode("lt") end return lang_obj end local function track(reason) m_debug.track("lt-pron/" .. reason) end -- ============================================================================ -- SECTION 1: Orthography & Phonology Definitions -- ============================================================================ -- Suffix table for automatic phonetic adjustments (currently disabled) -- Exact matching lists (must include precomposed normalized tone markers) --[[ local SUFFIX_LOAN = { ["fòbas"]=true, ["fòbė"]=true, ["fòbija"]=true, ["fònas"]=true, ["fònė"]=true, ["lògas"]=true, ["lògija"]=true, ["skòpas"]=true } --]] -- Consonant classes for syllabification (Sonority Hierarchy) local CLASS = { R = {["l"]=true, ["m"]=true, ["n"]=true, ["r"]=true, ["v"]=true, ["j"]=true}, S = {["s"]=true, ["z"]=true, ["š"]=true, ["ž"]=true, ["f"]=true, ["x"]=true, ["h"]=true, ["ch"]=true}, T = {["p"]=true, ["b"]=true, ["t"]=true, ["d"]=true, ["k"]=true, ["g"]=true, ["c"]=true, ["dz"]=true, ["č"]=true, ["dž"]=true} } -- Front vowels trigger palatalization local FRONT_V = { ["e"]=true, ["ę"]=true, ["ė"]=true, ["i"]=true, ["į"]=true, ["y"]=true, ["ie"]=true, ["ei"]=true, ["eu"]=true } -- Vowel -> Base IPA mapping (Unstressed short/inherent) local V_IPA = { ["a"] = "ɐ", ["ą"] = "ɑː", ["e"] = "ɛ", ["ę"] = "æː", ["ė"] = "eː", ["i"] = "ɪ", ["į"] = "iː", ["y"] = "iː", ["u"] = "ʊ", ["ų"] = "uː", ["ū"] = "uː", ["o"] = "oː", ["ie"] = "iɛ", ["uo"] = "uɔ", -- Unstressed simple diphthongs ["ai"] = "ɐɪ", ["au"] = "ɒʊ", ["ei"] = "ɛɪ", ["eu"] = "ɛʊ", ["ui"] = "ʊɪ", ["oi"] = "ɔɪ", ["ou"] = "ɔʊ" } -- Consonant -> Base IPA mapping (Unpalatalized) local CONS_IPA = { ["b"] = "b", ["c"] = "t͡s", ["č"] = "t͡ʃ", ["d"] = "d", ["dz"] = "d͡z", ["dž"] = "d͡ʒ", ["ch"] = "x", ["f"] = "f", ["g"] = "ɡ", ["h"] = "ɣ", ["j"] = "j", ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", ["p"] = "p", ["r"] = "r", ["s"] = "s", ["š"] = "ʃ", ["t"] = "t", ["v"] = "ʋ", ["z"] = "z", ["ž"] = "ʒ", } -- Voicing pairs for Voicing Assimilation local VOICING_PAIRS = { ["p"]="b", ["b"]="p", ["t"]="d", ["d"]="t", ["k"]="g", ["g"]="k", ["c"]="dz", ["dz"]="c", ["č"]="dž", ["dž"]="č", ["s"]="z", ["z"]="s", ["š"]="ž", ["ž"]="š", ["x"]="ɣ", ["ɣ"]="x" } local function is_voiced(c) local voiced_set = {["b"]=true, ["d"]=true, ["g"]=true, ["dz"]=true, ["dž"]=true, ["z"]=true, ["ž"]=true, ["ɣ"]=true} return voiced_set[c] == true end -- Accent pairs for conjugation module support -- Maps base vowel/diphthong to accented forms (falling/rising) local ACCENT_PAIRS = { -- Long vowels (acute or tilde) ["ą"] = {acute="ą"..ACUTE, tilde="ą"..TILDE}, ["ę"] = {acute="ę"..ACUTE, tilde="ę"..TILDE}, ["ė"] = {acute="ė"..ACUTE, tilde="ė"..TILDE}, ["y"] = {acute="y"..ACUTE, tilde="y"..TILDE}, ["į"] = {acute="į"..ACUTE, tilde="į"..TILDE}, ["ū"] = {acute="ū"..ACUTE, tilde="ū"..TILDE}, ["ų"] = {acute="ų"..ACUTE, tilde="ų"..TILDE}, -- a/e can be short (grave) or long (tilde) ["a"] = {grave="a"..GRAVE, tilde="a"..TILDE}, ["e"] = {grave="e"..GRAVE, tilde="e"..TILDE}, -- o: ó/õ/o are long, ò is short ["o"] = {acute="o"..ACUTE, grave="o"..GRAVE, tilde="o"..TILDE}, -- Short vowels i/u (only grave) ["i"] = {grave="i"..GRAVE}, ["u"] = {grave="u"..GRAVE}, -- Simple diphthongs ["ai"] = {acute="a"..ACUTE.."i", tilde="a".."i"..TILDE}, ["au"] = {acute="a"..ACUTE.."u", tilde="a".."u"..TILDE}, ["ei"] = {acute="e"..ACUTE.."i", tilde="e".."i"..TILDE}, ["ui"] = {grave="u"..GRAVE.."i", tilde="u".."i"..TILDE}, -- Complex diphthongs ["ie"] = {acute="i"..ACUTE.."e", tilde="i".."e"..TILDE}, ["uo"] = {acute="u"..ACUTE.."o", tilde="u".."o"..TILDE}, -- Mixed diphthongs (a series - acute/tilde) ["al"] = {acute="a"..ACUTE.."l", tilde="a".."l"..TILDE}, ["am"] = {acute="a"..ACUTE.."m", tilde="a".."m"..TILDE}, ["an"] = {acute="a"..ACUTE.."n", tilde="a".."n"..TILDE}, ["ar"] = {acute="a"..ACUTE.."r", tilde="a".."r"..TILDE}, -- Mixed diphthongs (e series - acute/grave/tilde, grave for foreign) ["el"] = {acute="e"..ACUTE.."l", grave="e"..GRAVE.."l", tilde="e".."l"..TILDE}, ["em"] = {acute="e"..ACUTE.."m", grave="e"..GRAVE.."m", tilde="e".."m"..TILDE}, ["en"] = {acute="e"..ACUTE.."n", grave="e"..GRAVE.."n", tilde="e".."n"..TILDE}, ["er"] = {acute="e"..ACUTE.."r", grave="e"..GRAVE.."r", tilde="e".."r"..TILDE}, -- Mixed diphthongs (i series - grave/tilde) ["il"] = {grave="i"..GRAVE.."l", tilde="i".."l"..TILDE}, ["im"] = {grave="i"..GRAVE.."m", tilde="i".."m"..TILDE}, ["in"] = {grave="i"..GRAVE.."n", tilde="i".."n"..TILDE}, ["ir"] = {grave="i"..GRAVE.."r", tilde="i".."r"..TILDE}, -- Mixed diphthongs (u series - grave/tilde) ["ul"] = {grave="u"..GRAVE.."l", tilde="u".."l"..TILDE}, ["um"] = {grave="u"..GRAVE.."m", tilde="u".."m"..TILDE}, ["un"] = {grave="u"..GRAVE.."n", tilde="u".."n"..TILDE}, ["ur"] = {grave="u"..GRAVE.."r", tilde="u".."r"..TILDE}, -- Foreign diphthongs (grave only) ["eu"] = {grave="e"..GRAVE.."u"}, ["oi"] = {grave="o"..GRAVE.."i"}, ["ou"] = {grave="o"..GRAVE.."u"}, ["ol"] = {grave="o"..GRAVE.."l"}, ["om"] = {grave="o"..GRAVE.."m"}, ["on"] = {grave="o"..GRAVE.."n"}, ["or"] = {grave="o"..GRAVE.."r"}, } -- ============================================================================ -- SECTION 2: Lexical Normalization (Avoid NFD destruction) -- ============================================================================ -- Helper: Remove all accent marks from text (moved here for early use). -- Delegates to Module:lt-common to keep the de-accenting logic shared. local function remove_all_accents(text) return m_lt_common.to_stem_bare(mw.ustring.toNFD(text)) end -- Extract pagename from input or load from headword data local function get_pagename(input) -- Check for manual override: <base:xxx> local manual = input:match("<base:([^>]+)>") if manual then return manual, input:gsub("<base:[^>]+>", "") end -- Load from headword data local success, data = pcall(function() return mw.loadData("Module:headword/data").pagename end) if success and data then return data, input end return nil, input end -- Identify respelling j and (j) positions local function identify_respelling_glides(input_with_accents, pagename) if not pagename then return {} -- No pagename, no respelling detection end -- Remove all accents from input local input_clean = remove_all_accents(input_with_accents) -- Remove special markers (^, .) input_clean = ugsub(input_clean, "[%^%.]", "") -- Remove <base:...> if present input_clean = ugsub(input_clean, "<base:[^>]+>", "") -- Remove literal ˌ input_clean = ugsub(input_clean, "ˌ", "") -- Remove softening mark ʼ input_clean = ugsub(input_clean, "ʼ", "") -- Remove (j) markers - replace with j for comparison input_clean = ugsub(input_clean, "%(j%)", "j") -- Remove spaces for comparison input_clean = ugsub(input_clean, " ", "") -- Convert to NFC for comparison input_clean = mw.ustring.toNFC(input_clean) -- Normalize pagename (lowercase, remove spaces) local pagename_clean = ulower(pagename) pagename_clean = ugsub(pagename_clean, " ", "") -- Find respelling j positions (j in input but not in pagename) local respelling_positions = {} local input_idx = 1 local page_idx = 1 while input_idx <= ulen(input_clean) do local input_char = usub(input_clean, input_idx, input_idx) if input_char == "j" then -- Check if this j exists in pagename at corresponding position local page_char = page_idx <= ulen(pagename_clean) and usub(pagename_clean, page_idx, page_idx) if page_char ~= "j" then -- This is a respelling j insert(respelling_positions, input_idx) input_idx = input_idx + 1 -- Don't advance page_idx else -- This is an original j input_idx = input_idx + 1 page_idx = page_idx + 1 end else input_idx = input_idx + 1 page_idx = page_idx + 1 end end return respelling_positions end -- Safe mapping to extract tones without destroying precomposed characters local TONE_MAP = { ["á"]="a,acute", ["à"]="a,grave", ["ã"]="a,tilde", ["é"]="e,acute", ["è"]="e,grave", ["ẽ"]="e,tilde", ["í"]="i,acute", ["ì"]="i,grave", ["ĩ"]="i,tilde", ["ý"]="y,acute", ["ỳ"]="y,grave", ["ỹ"]="y,tilde", ["ú"]="u,acute", ["ù"]="u,grave", ["ũ"]="u,tilde", ["ó"]="o,acute", ["ò"]="o,grave", ["õ"]="o,tilde", -- Precomposed vowels with macrons/ogoneks + tones (represented here via standard combinations) ["ą́"]="ą,acute", ["ą̃"]="ą,tilde", ["ę́"]="ę,acute", ["ę̃"]="ę,tilde", ["ė́"]="ė,acute", ["ė̃"]="ė,tilde", ["į́"]="į,acute", ["į̃"]="į,tilde", ["ų́"]="ų,acute", ["ų̃"]="ų,tilde", ["ū́"]="ū,acute", ["ū̃"]="ū,tilde", -- Tilde on liquids (for semi-diphthongs) ["l̃"]="l,tilde", ["m̃"]="m,tilde", ["ñ"]="n,tilde", ["r̃"]="r,tilde", ["j̃"]="j,tilde" } -- Resolves NFD back to safe representation if input was somehow NFD. -- The PUA rejection (with replacement hints), non-standard format tracking, -- and i/j-with-dotabove canonicalization are all delegated to Module:lt-common -- so that all Lithuanian modules share one implementation. Only the -- TONE_MAP-based codepoint parser remains local since it produces the -- token list specifically consumed by lt-pron's tokenizer. local function safe_normalize(text) -- Reject PUA characters with helpful "use X instead" hints. m_lt_common.reject_pua(text) -- Track non-standard input encodings for analytics. Detection runs on -- the raw input (in NFD internally) before any canonicalization, so the -- counts reflect what editors actually typed. local has_dotless, has_precomp_i = m_lt_common.detect_nonstandard(text) if has_dotless then track('dotless-ij') end if has_precomp_i then track('precomposed-i-accent') end -- "Explicit dotabove" (i.e., i/j + U+0307 + accent) is the *correct* -- input form for accented i/j and is tracked separately to monitor -- editor adoption. This check stays local since lt-common's -- detect_nonstandard intentionally only flags the wrong forms. if ufind(mw.ustring.toNFD(text), "[ij]" .. DOTABOVE) then track('explicit-dotabove') end -- Normalize: drops stray dot-aboves between i/j and accents, -- converts dotless ı/ȷ to standard i/j, returns clean NFC. text = m_lt_common.canonicalize_input(text) -- Parse the canonicalized NFC string into {char, tone} tokens. -- TONE_MAP entries are 1- or 2-codepoint precomposed sequences -- (e.g. "á" is one codepoint; "ą́" is "ą" + U+0301). The lookup tries -- the 2-codepoint match first, then falls back to the 1-codepoint match. local result = {} local i = 1 while i <= ulen(text) do local c = usub(text, i, i) local c_lower = ulower(c) -- Convert to lowercase for TONE_MAP lookup -- Look ahead for combining marks if any slipped through local next_c = usub(text, i+1, i+1) local next_c_lower = ulower(next_c) local combined = c_lower .. next_c_lower if TONE_MAP[combined] then local parts = rsplit(TONE_MAP[combined], ",") -- Preserve original case of base character local base_char = parts[1] if c ~= c_lower then base_char = uupper(base_char) end insert(result, {char = base_char, tone = parts[2]}) i = i + 2 elseif TONE_MAP[c_lower] then local parts = rsplit(TONE_MAP[c_lower], ",") -- Preserve original case of base character local base_char = parts[1] if c ~= c_lower then base_char = uupper(base_char) end insert(result, {char = base_char, tone = parts[2]}) i = i + 1 else insert(result, {char = c, tone = nil}) i = i + 1 end end return result end -- ============================================================================ -- SECTION 3: Tokenization & Diphthong/Digraph resolution -- ============================================================================ local function get_type(c) local lc = ulower(c) if V_IPA[lc] then return "V" end if CLASS.R[lc] then return "R" end if CLASS.S[lc] then return "S" end if CLASS.T[lc] then return "T" end return "UNKNOWN" end -- Helper: Convert token array back to NFC string for suffix/prefix matching local function tokens_to_string(tok_list) local s = "" for _, t in ipairs(tok_list) do local c = t.char if t.tone == "grave" then c = c .. GRAVE elseif t.tone == "acute" then c = c .. ACUTE elseif t.tone == "tilde" then c = c .. TILDE end s = s .. c end return mw.ustring.toNFC(s) end -- Apply automatic properties based on word structure (e.g. loanwords) local function apply_auto_properties(tokens) local word_str = ulower(tokens_to_string(tokens)) --[[ Suffix detection for loanword quality (currently disabled) local matched_loan_suff = nil for suff, _ in pairs(SUFFIX_LOAN) do if usub(word_str, -ulen(suff)) == suff then matched_loan_suff = suff; break end end if matched_loan_suff then local suff_len = ulen(matched_loan_suff) local acc_len = 0 for i = #tokens, 1, -1 do local t = tokens[i] acc_len = acc_len + ulen(tokens_to_string({t})) if t.type == "V" and ulower(t.char) == "o" then t.auto_targeted = true if not t.force_default then t.loan_quality = true end end if acc_len >= suff_len then break end end end -- Check for redundant asterisks globally for _, t in ipairs(tokens) do if t.force_default and not t.auto_targeted then track('redundant-asterisk') end end --]] return tokens end -- Strict whitelist for valid diphthong and tone combinations local function is_strict_diphthong(c1, t1, c2, t2) local combo = c1 .. c2 -- Unstressed: neither element has a tone if not t1 and not t2 then return (combo == "ie" or combo == "uo" or combo == "ai" or combo == "au" or combo == "ei" or combo == "ui" or combo == "oi" or combo == "ou" or combo == "eu") end -- Tone on the first element (acute or grave) if t1 and not t2 then if t1 == "acute" then return (combo == "ai" or combo == "au" or combo == "ei" or combo == "ie" or combo == "uo" or combo == "oi") elseif t1 == "grave" then return (combo == "ui" or combo == "oi" or combo == "ou" or combo == "eu") end end -- Tone on the second element (tilde) if not t1 and t2 then if t2 == "tilde" then return (combo == "ai" or combo == "au" or combo == "ei" or combo == "ui" or combo == "ie" or combo == "uo" or combo == "eu") end end return false end local function tokenize(text_str, pagename) local raw_chars = safe_normalize(text_str) -- Identify respelling glides local respelling_j_positions = identify_respelling_glides(text_str, pagename) local tokens = {} local i = 1 while i <= #raw_chars do local curr = raw_chars[i] local nxt = raw_chars[i+1] local lc_curr = ulower(curr.char) local lc_nxt = nxt and ulower(nxt.char) -- Explicit Modifiers if curr.char == "^" then local last_v = nil for j = #tokens, 1, -1 do if tokens[j].type == "V" then last_v = tokens[j]; break end end if last_v then local lc_v = ulower(last_v.char) local base_v = usub(lc_v, -1) -- Last char for silent i combinations local is_e_base = (lc_v == "e") or (base_v == "e") local is_o_base = (lc_v == "o") or (base_v == "o") -- Check for valid e: no tone or grave only if is_e_base then if not last_v.tone or last_v.tone == "grave" then last_v.loan_quality = true elseif last_v.tone == "acute" then error("lt-pron: '^' cannot be used with acute 'é' (use only with plain 'e' or grave 'è')") elseif last_v.tone == "tilde" then error("lt-pron: '^' cannot be used with tilde 'ẽ' (use only with plain 'e' or grave 'è')") end -- Check for valid o: no tone only elseif is_o_base then if not last_v.tone then last_v.loan_quality = true elseif last_v.tone == "grave" then error("lt-pron: '^' is redundant for 'ò' (already pronounced [ɔ])") elseif last_v.tone == "acute" or last_v.tone == "tilde" then error("lt-pron: '^' cannot be used with 'ó' or 'õ' (native long vowels)") end else error("lt-pron: '^' can only be used with 'e' (plain/grave) or 'o' (plain). Found: '" .. lc_v .. "'") end end i = i + 1 -- Check for (j) marker elseif curr.char == "(" and i + 2 <= #raw_chars then local char2 = raw_chars[i+1] local char3 = raw_chars[i+2] if char2.char == "j" and char3.char == ")" then insert(tokens, {char = "j", type = "R", tone = nil, is_respelling = true, is_optional = true, original_char = "-"}) i = i + 3 else error("lt-pron: '(' must be followed by 'j)' to form the (j) glide marker") end --[[ Asterisk modifier (currently disabled) elseif curr.char == "*" then local last_v = nil for j = #tokens, 1, -1 do if tokens[j].type == "V" then last_v = tokens[j]; break end end if last_v then last_v.force_default = true end i = i + 1 --]] elseif curr.char == "." then insert(tokens, {char = ".", type = "BOUNDARY"}) i = i + 1 elseif curr.char == "ˌ" then insert(tokens, {char = "ˌ", type = "SECONDARY_STRESS_BOUNDARY"}) i = i + 1 elseif curr.char == "ʼ" then -- Softening mark: palatalize the preceding consonant for j = #tokens, 1, -1 do local tok = tokens[j] if tok.type == "T" or tok.type == "S" or tok.type == "R" then tok.softening_mark = true break end end i = i + 1 elseif lc_curr == "d" and nxt and (lc_nxt == "z" or lc_nxt == "ž") then insert(tokens, {char = curr.char .. nxt.char, type = "T", tone = nil}) i = i + 2 elseif lc_curr == "c" and nxt and lc_nxt == "h" then insert(tokens, {char = curr.char .. nxt.char, type = "S", tone = nil}) i = i + 2 -- Special handling for V + j̃ (final j with tilde): treat as V + ĩ diphthong elseif curr.type ~= "BOUNDARY" and get_type(curr.char) == "V" and nxt and ulower(nxt.char) == "j" and nxt.tone == "tilde" then -- Check if this is word-final (no more non-boundary tokens after j̃) local is_final = true for k = i + 2, #raw_chars do if raw_chars[k].char ~= " " then is_final = false break end end if is_final then -- Combine V + j̃ as a diphthong V + ĩ (e.g., uj̃ → ui̇̃) local v_char = ulower(curr.char) local combined_char = v_char .. "i" -- e.g., "u" + "i" = "ui" local tone = "tilde" -- j̃'s tilde local tone_position = 2 -- Tilde is on the second vowel (i) insert(tokens, { char = combined_char, type = "V", tone = tone, tone_position = tone_position, original_char = v_char .. "j" -- For hyphenation: display as "uj̃" }) i = i + 2 else -- Not final, treat as regular V + j insert(tokens, {char = curr.char, type = "V", tone = curr.tone}) i = i + 1 end elseif curr.type ~= "BOUNDARY" and get_type(curr.char) == "V" and nxt and get_type(nxt.char) == "V" then local nxt_nxt = raw_chars[i+2] local lc_nxt_nxt = nxt_nxt and ulower(nxt_nxt.char) local back_diph = lc_nxt and lc_nxt_nxt and (lc_nxt .. lc_nxt_nxt) local is_silent_i_diph = (lc_curr == "i" and not curr.tone) and (back_diph == "au" or back_diph == "ai" or back_diph == "ou" or back_diph == "oi" or back_diph == "uo") local is_silent_i_mono = (lc_curr == "i" and not curr.tone) and (lc_nxt == "a" or lc_nxt == "ą" or lc_nxt == "o" or lc_nxt == "u" or lc_nxt == "ų" or lc_nxt == "ū" or lc_nxt == "ɔ") local is_valid_diph = is_strict_diphthong(lc_curr, curr.tone, lc_nxt, nxt and nxt.tone) if is_valid_diph and nxt_nxt and get_type(nxt_nxt.char) == "V" then local lc_nxt_nxt = ulower(nxt_nxt.char) local is_next_valid_diph = is_strict_diphthong(lc_nxt, nxt.tone, lc_nxt_nxt, nxt_nxt.tone) if is_next_valid_diph then -- Resolve ambiguous triplets (e.g., auo -> a.uo is standard) -- Break the first valid diphthong unless explicitly stressed if not curr.tone then is_valid_diph = false end end end if is_silent_i_diph then local tone = nxt.tone or nxt_nxt.tone local tone_position = nil if tone then -- For silent i diphthongs (e.g., iau), position is relative to the full string -- Position 2 = middle vowel, Position 3 = last vowel if nxt.tone then tone_position = 2 elseif nxt_nxt.tone then tone_position = 3 end end insert(tokens, {char = curr.char .. nxt.char .. nxt_nxt.char, type = "V", tone = tone, has_silent_i = true, tone_position = tone_position}) i = i + 3 elseif is_silent_i_mono then local tone = nxt.tone local tone_position = nil if tone then -- For silent i monosyllables (e.g., ia), position 2 = second character tone_position = 2 end insert(tokens, {char = curr.char .. nxt.char, type = "V", tone = tone, has_silent_i = true, tone_position = tone_position}) i = i + 2 elseif is_valid_diph then local tone = curr.tone or nxt.tone local tone_position = nil if tone then -- Record which vowel carries the tone (1 = first, 2 = second) if curr.tone then tone_position = 1 elseif nxt.tone then tone_position = 2 end end insert(tokens, {char = curr.char .. nxt.char, type = "V", tone = tone, tone_position = tone_position}) i = i + 2 else insert(tokens, {char = curr.char, type = "V", tone = curr.tone}) i = i + 1 end else if curr.char ~= " " then local tok_type = get_type(curr.char) local is_respelling_j = false -- Check if this is a respelling j if ulower(curr.char) == "j" and #respelling_j_positions > 0 then -- Build cleaned string up to current position to find clean position local cleaned_so_far = "" for k = 1, i do local c = raw_chars[k] if c.char ~= "^" and c.char ~= "." and c.char ~= " " and c.char ~= "ʼ" and c.char ~= "ˌ" then local char_clean = c.char -- Don't add tone marks to cleaned string if not c.tone then cleaned_so_far = cleaned_so_far .. char_clean else -- Add base character without tone cleaned_so_far = cleaned_so_far .. char_clean end end end local clean_pos = ulen(cleaned_so_far) -- Check if this position is in respelling list for _, pos in ipairs(respelling_j_positions) do if pos == clean_pos then is_respelling_j = true break end end end if ulower(curr.char) == "j" then local tok_data = {char = curr.char, type = tok_type, tone = curr.tone, is_respelling = is_respelling_j, is_optional = false} if is_respelling_j then tok_data.original_char = "-" -- Respelling: use "-" to indicate not in orthography end insert(tokens, tok_data) else insert(tokens, {char = curr.char, type = tok_type, tone = curr.tone}) end end i = i + 1 end end -- Validate respelling glides are between vowels for i, tok in ipairs(tokens) do if tok.is_respelling then local prev_is_vowel = false local next_is_vowel = false -- Check previous non-boundary token for j = i - 1, 1, -1 do if tokens[j].type ~= "BOUNDARY" and tokens[j].type ~= "SECONDARY_STRESS_BOUNDARY" then prev_is_vowel = (tokens[j].type == "V") break end end -- Check next non-boundary token for j = i + 1, #tokens do if tokens[j].type ~= "BOUNDARY" and tokens[j].type ~= "SECONDARY_STRESS_BOUNDARY" then next_is_vowel = (tokens[j].type == "V") break end end if not (prev_is_vowel and next_is_vowel) then error("lt-pron: Respelling glide 'j' or '(j)' must be between two vowels") end end end return apply_auto_properties(tokens) end -- ============================================================================ -- SECTION 4: Syllabification -- ============================================================================ -- Pre-syllabification: Merge geminate (doubled) consonants -- This must happen BEFORE syllabification to prevent false mixed diphthongs -- For example: pérrašo → pér-ra-šo would incorrectly treat ér as a mixed diphthong -- By merging rr→r first, we get pé-ra-šo, correctly keeping é as a pure vowel local function merge_geminate_consonants(tokens) local SIBILANTS = {["s"]=true, ["z"]=true, ["š"]=true, ["ž"]=true} local i = 1 while i < #tokens do local tok = tokens[i] local nxt = tokens[i+1] local tok_is_cons = (tok.type == "T" or tok.type == "S" or tok.type == "R") local nxt_is_cons = (nxt.type == "T" or nxt.type == "S" or nxt.type == "R") if tok_is_cons and nxt_is_cons then local tok_char = ulower(tok.char) local nxt_char = ulower(nxt.char) local tok_is_sib = SIBILANTS[tok_char] local nxt_is_sib = SIBILANTS[nxt_char] -- Merge if: (1) both are sibilants, or (2) identical consonants if (tok_is_sib and nxt_is_sib) or (tok_char == nxt_char) then -- Remove the first token (keep the second) table.remove(tokens, i) -- Don't increment i, check the same position again else i = i + 1 end else i = i + 1 end end end local function syllabify(tokens) local syllables = {} local current_syl = {} -- Check for leading secondary stress marker local has_initial_secondary_stress = false if #tokens > 0 and tokens[1].type == "SECONDARY_STRESS_BOUNDARY" then has_initial_secondary_stress = true end local v_indices = {} for i, tok in ipairs(tokens) do if tok.type == "V" then insert(v_indices, i) end end if #v_indices == 0 then return {tokens} end -- Edge case: no vowels local boundaries = {} -- Sonority Sequencing Algorithm combined with Morphophonological Maximum Onset for idx = 1, #v_indices - 1 do local v1_idx = v_indices[idx] local v2_idx = v_indices[idx + 1] local raw_c_tokens = {} local forced_boundary_idx = nil for i = v1_idx + 1, v2_idx - 1 do if tokens[i].type == "BOUNDARY" or tokens[i].type == "SECONDARY_STRESS_BOUNDARY" then forced_boundary_idx = i else insert(raw_c_tokens, {t=tokens[i], orig_idx=i}) end end if forced_boundary_idx then boundaries[forced_boundary_idx] = true elseif #raw_c_tokens == 0 then -- Hiatus boundaries[v2_idx] = true else -- Macro-Token Grouping: Treat consecutive sibilants (S) as a single phonological unit local macro_c = {} local i = 1 while i <= #raw_c_tokens do local current = raw_c_tokens[i] if current.t.type == "S" then local absorbed = {current} local j = i + 1 -- Absorb any subsequent S tokens into this macro unit, keeping track of them while j <= #raw_c_tokens and raw_c_tokens[j].t.type == "S" do insert(absorbed, raw_c_tokens[j]) j = j + 1 end insert(macro_c, {type = "S", orig_idx = current.orig_idx, tokens = absorbed}) i = j else insert(macro_c, {type = current.t.type, orig_idx = current.orig_idx, tokens = {current}}) i = i + 1 end end local m_count = #macro_c if m_count == 1 then -- V.CV (or V.SSV, e.g., sausšala -> sau.sšala) boundaries[macro_c[1].orig_idx] = true elseif m_count == 2 then local t1, t2 = macro_c[1].type, macro_c[2].type -- ST, SR, TR -> V.CCV if (t1=="S" and t2=="T") or (t1=="S" and t2=="R") or (t1=="T" and t2=="R") then boundaries[macro_c[1].orig_idx] = true else -- Handle TSS and RSS sequences properly (e.g., Oksfordas, transformavo). -- If the macro cluster is T+S or R+S, and the S unit absorbed multiple sibilants, -- split between the first and second sibilant (TS.S, RS.S). if (t1=="T" or t1=="R") and t2=="S" and #macro_c[2].tokens > 1 then boundaries[macro_c[2].tokens[2].orig_idx] = true else -- Default VC.CV boundaries[macro_c[2].orig_idx] = true end end elseif m_count == 3 then local t1, t2, t3 = macro_c[1].type, macro_c[2].type, macro_c[3].type if t1=="S" and t2=="T" and t3=="R" then boundaries[macro_c[1].orig_idx] = true -- V.CCCV elseif (t1=="T" and t2=="S" and t3=="T") or (t1=="R" and t2=="S" and t3=="T") or (t1=="T" and t2=="S" and t3=="R") or (t1=="R" and t2=="T" and t3=="R") or (t1=="T" and t2=="T" and t3=="R") or (t1=="R" and t2=="S" and t3=="R") then boundaries[macro_c[2].orig_idx] = true -- VC.CCV (includes RSR, e.g., konfliktas) else boundaries[macro_c[3].orig_idx] = true -- VCC.CV end elseif m_count == 4 then -- Identify the 4-consonant pattern according to the 8 documented combinations local pattern = macro_c[1].type .. macro_c[2].type .. macro_c[3].type .. macro_c[4].type if pattern == "RSTR" or pattern == "TSTR" then -- R.STR, T.STR -> VC.CCCV boundaries[macro_c[2].orig_idx] = true elseif pattern == "RTRR" or pattern == "TSTS" then -- RTR.R, TST.S -> VCCC.CV boundaries[macro_c[4].orig_idx] = true else -- RT.ST, RT.SR, RT.TR, ST.TR -> VCC.CCV boundaries[macro_c[3].orig_idx] = true end elseif m_count >= 5 then -- Fallback for >=5 logical consonant units track('complex-consonant-cluster') boundaries[macro_c[3].orig_idx] = true end end end -- Construct syllables local secondary_stress_syllables = {} for i, tok in ipairs(tokens) do if boundaries[i] and #current_syl > 0 then insert(syllables, current_syl) -- If this is a secondary stress boundary, mark the NEXT syllable if tok.type == "SECONDARY_STRESS_BOUNDARY" then secondary_stress_syllables[#syllables + 1] = true end current_syl = {} end if tok.type ~= "BOUNDARY" and tok.type ~= "SECONDARY_STRESS_BOUNDARY" then insert(current_syl, tok) end end if #current_syl > 0 then insert(syllables, current_syl) end -- Apply secondary stress marks for idx, _ in pairs(secondary_stress_syllables) do if syllables[idx] then syllables[idx].secondary_stress = true end end -- Apply initial secondary stress if present if has_initial_secondary_stress and #syllables > 0 then syllables[1].secondary_stress = true end return syllables end -- ============================================================================ -- SECTION 5: Base IPA Mapping & Stress Assignment -- ============================================================================ local function is_mixed_diphthong(syl, v_idx) local v_tok = syl[v_idx] local lc_v = ulower(v_tok.char) -- Strip silent 'i' for accurate length calculation if v_tok.has_silent_i then lc_v = usub(lc_v, 2) end -- Digraphs (ie, uo, ai, au, etc.) or natively long vowels (ą, ę, ė, į, y, ų, ū) -- do NOT form mixed diphthongs with subsequent resonants. -- Only short a, e, i, u, o can form true mixed diphthongs. if ulen(lc_v) > 1 then return false end local LONG_V = {["ą"]=true, ["ę"]=true, ["ė"]=true, ["į"]=true, ["y"]=true, ["ų"]=true, ["ū"]=true} if LONG_V[lc_v] then return false end -- V + R in the SAME syllable (coda) if v_idx < #syl and syl[v_idx+1].type == "R" then -- j and v are not considered for typical liquid semi-diphthongs length rules if syl[v_idx+1].char ~= "j" and syl[v_idx+1].char ~= "v" then return true end end return false end local function map_base_phonetics(syllables) local stress_prefix = nil for _, syl in ipairs(syllables) do local v_idx = nil for i, tok in ipairs(syl) do if tok.type == "V" then v_idx = i; break end end if v_idx then local v_tok = syl[v_idx] local v_char = v_tok.char local lc_v_char = ulower(v_char) local tone = v_tok.tone local is_mixed = is_mixed_diphthong(syl, v_idx) if is_mixed and not tone then local r_tok = syl[v_idx+1] if r_tok.tone then tone = r_tok.tone end end if v_tok.has_silent_i then v_tok.silent_i = true local actual_vowel = usub(lc_v_char, 2) v_tok.ipa = V_IPA[actual_vowel] or "ɐ" lc_v_char = actual_vowel else -- Apply loan quality base default for 'o' and 'e' v_tok.ipa = V_IPA[lc_v_char] or "ɐ" if lc_v_char == "o" and v_tok.loan_quality then v_tok.ipa = "ɔ" end if lc_v_char == "e" and v_tok.loan_quality then v_tok.ipa = "e" end -- Automatically prepend glide 'j' to syllable-initial 'ie' if lc_v_char == "ie" and v_idx == 1 then v_tok.ipa = "jiɛ" end end if tone then local s_mark = "" local v_base = usub(lc_v_char, 1, 1) if tone == "acute" then s_mark = "¹ˈ" if lc_v_char == "a" then if is_mixed then v_tok.ipa = "ɑˑ" else v_tok.ipa = "ɑː" end elseif lc_v_char == "e" then -- Note: '^' modifier not allowed with acute 'é' as of current rules -- This code path preserved for consistency if is_mixed and v_tok.loan_quality then v_tok.ipa = "ɛ" -- Loanword é in mixed diphthongs is short /ɛ/ without length elseif is_mixed then v_tok.ipa = "æˑ" -- Mixed diphthong: half-long else v_tok.ipa = "æː" -- Pure vowel: full-long end elseif lc_v_char == "ai" then v_tok.ipa = "ɑˑɪ" elseif lc_v_char == "au" then v_tok.ipa = "ɑˑʊ" elseif lc_v_char == "ei" then v_tok.ipa = "æˑɪ" elseif lc_v_char == "eu" then v_tok.ipa = "æˑʊ" elseif lc_v_char == "oi" then v_tok.ipa = "oˑɪ" end elseif tone == "grave" then if is_mixed and (v_base == "i" or v_base == "u" or v_base == "e" or v_base == "o") then s_mark = "¹ˈ" elseif lc_v_char == "ui" or lc_v_char == "oi" or lc_v_char == "ou" or lc_v_char == "eu" then s_mark = "¹ˈ" else s_mark = "ˈ" end -- Handle loanword variants for grave if lc_v_char == "e" and v_tok.loan_quality then v_tok.ipa = "e" -- è^ (loanword) → [e] end if lc_v_char == "o" then v_tok.ipa = "ɔ" -- ò (always loanword) → [ɔ] end elseif tone == "tilde" then s_mark = "²ˈ" if lc_v_char == "ai" then v_tok.ipa = "ɐɪˑ" elseif lc_v_char == "au" then v_tok.ipa = "ɒʊˑ" elseif lc_v_char == "ei" then v_tok.ipa = "ɛɪˑ" elseif lc_v_char == "eu" then v_tok.ipa = "ɛʊˑ" elseif lc_v_char == "ui" then v_tok.ipa = "ʊɪˑ" elseif lc_v_char == "a" then if is_mixed then v_tok.ipa = "ɐ" else v_tok.ipa = "ɑː" end elseif lc_v_char == "e" then if is_mixed then v_tok.ipa = "ɛ" else v_tok.ipa = "æː" end end end syl.stress = s_mark end -- Set base IPA for consonants (case-insensitive mapping) for i, tok in ipairs(syl) do if tok.type ~= "V" then local lc_c = ulower(tok.char) tok.ipa = CONS_IPA[lc_c] or lc_c end end -- Handle tilde half-length on mixed diphthong coda if tone == "tilde" and is_mixed then local r_tok = syl[v_idx+1] r_tok.half_long = true end else -- Syllable with no vowel (e.g. leftover consonant) for i, tok in ipairs(syl) do local lc_c = ulower(tok.char) tok.ipa = CONS_IPA[lc_c] or lc_c end end end end -- ============================================================================ -- SECTION 6: Phonetic Polish Passes -- ============================================================================ -- Pass 1: Palatalization Spreading (Right-to-Left) -- -- right_context_palatalizing (optional, used by the cross-word pipeline): -- When the current word is followed by a liaisoned word whose first effective -- phoneme is "soft" (front V, j, or a palatalized consonant), pass `true` so -- that: -- 1. spread_active starts true (allowing the word's last consonant to -- receive ʲ even though it has no in-word right neighbour); -- 2. is_direct evaluates to true for that last consonant when it is k/g, -- so VLKK §19's "lyk‿jója → [lʲiːkʲ‿…]" pattern is produced. -- When omitted or false, the function behaves exactly like the within-word -- palatalization that this module has always done. local function apply_palatalization(syllables, right_context_palatalizing) -- Flatten tokens for cross-syllable spreading local flat_tokens = {} for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do insert(flat_tokens, tok) end end -- First pass: Apply softening marks (no spreading) for i = 1, #flat_tokens do local tok = flat_tokens[i] if tok.softening_mark and tok.ipa ~= "" then tok.ipa = tok.ipa .. "ʲ" tok.is_palatalized = true end end local spread_active = right_context_palatalizing and true or false for i = #flat_tokens, 1, -1 do local tok = flat_tokens[i] local lc_char = ulower(tok.char) if tok.type == "V" then if tok.silent_i or FRONT_V[lc_char] then spread_active = true else spread_active = false end elseif lc_char == "j" then -- Preserve special IPA for final j (ɪ̯), don't override it if tok.ipa ~= "ɪ̯" and tok.ipa ~= "" then tok.ipa = "j" end tok.is_palatalized = true spread_active = true else if spread_active then -- Check if the palatalization is DIRECT (immediate contact with front V or j) local is_direct = false local nxt = flat_tokens[i+1] if nxt then local nxt_lc = ulower(nxt.char) if (nxt.type == "V" and (nxt.silent_i or FRONT_V[nxt_lc])) or nxt_lc == "j" then is_direct = true end elseif right_context_palatalizing then -- No in-word neighbour, but a liaisoned soft phoneme follows. is_direct = true end tok.is_palatalized = true if lc_char == "k" or lc_char == "g" then if is_direct and tok.ipa ~= "" then tok.ipa = tok.ipa .. "ʲ" end -- DO NOT set spread_active to false! k/g are transparent to spreading. else if tok.ipa ~= "" then tok.ipa = tok.ipa .. "ʲ" end end end end end end -- Pass 2: Voicing Assimilation (Right-to-Left) local function apply_voicing_assimilation(syllables) local flat_tokens = {} for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do insert(flat_tokens, tok) end end local target_voice = nil for i = #flat_tokens, 1, -1 do local tok = flat_tokens[i] if tok.type == "V" or tok.type == "R" then target_voice = nil -- Blocked by vowels and resonants elseif tok.type == "S" or tok.type == "T" then local lc_char = ulower(tok.char) local is_uppercase = (tok.char ~= lc_char) if target_voice == nil then -- Establish new assimilation target target_voice = is_voiced(lc_char) and "voiced" or "voiceless" else -- Assimilate local current_is_voiced = is_voiced(lc_char) if target_voice == "voiced" and not current_is_voiced then local new_char = VOICING_PAIRS[lc_char] or lc_char tok.char = is_uppercase and uupper(new_char) or new_char elseif target_voice == "voiceless" and current_is_voiced then local new_char = VOICING_PAIRS[lc_char] or lc_char tok.char = is_uppercase and uupper(new_char) or new_char end -- Update IPA based on new character, preserving palatalization local lc_new_char = ulower(tok.char) local new_ipa = CONS_IPA[lc_new_char] or lc_new_char if tok.is_palatalized and lc_new_char ~= "k" and lc_new_char ~= "g" then new_ipa = new_ipa .. "ʲ" end tok.ipa = new_ipa end end end end -- Pass 3: Nasal Assimilation (n -> ŋ before velars k/g and post-velar ch/h) -- Per VLKK §6.3, n assimilates to the place of articulation of any following -- velar/uvular consonant, including the fricatives ch [x] and h [ɣ]. local function apply_nasal_assimilation(syllables) local flat_tokens = {} for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do insert(flat_tokens, tok) end end for i = 1, #flat_tokens - 1 do local tok = flat_tokens[i] -- Ignore vowels for lookahead local lookahead = i + 1 while lookahead <= #flat_tokens and flat_tokens[lookahead].type == "V" do if flat_tokens[lookahead].silent_i then lookahead = lookahead + 1 else break end end local nxt = flat_tokens[lookahead] if nxt and ulower(tok.char) == "n" then local lc_nxt = ulower(nxt.char) if lc_nxt == "k" or lc_nxt == "g" or lc_nxt == "ch" or lc_nxt == "h" then -- Check actual IPA string for direct palatalization if ufind(nxt.ipa, "ʲ") then tok.ipa = "ŋʲ" else tok.ipa = "ŋ" end end end end end -- Pass 4: Vowel Quality Adjustments after Palatalization local function adjust_vowel_quality(syllables) for _, syl in ipairs(syllables) do local has_palatal_onset = false for i, tok in ipairs(syl) do if tok.type ~= "V" and (tok.is_palatalized or ulower(tok.char) == "j") then has_palatal_onset = true elseif tok.type == "V" and has_palatal_onset then local ipa = tok.ipa -- Shift a/e quality if ipa == "ɐ" then ipa = "ɛ" elseif ipa == "ɑː" then ipa = "æː" -- Diphthong shifts for iau / iai elseif ipa == "ɒʊ" then ipa = "ɛʊ" elseif ipa == "ɑˑʊ" then ipa = "æˑʊ" elseif ipa == "ɒʊˑ" then ipa = "ɛʊˑ" elseif ipa == "ɐɪ" then ipa = "ɛɪ" elseif ipa == "ɑˑɪ" then ipa = "æˑɪ" elseif ipa == "ɐɪˑ" then ipa = "ɛɪˑ" end -- Dynamic fronting for u/o/ɔ (adds U+031F) -- Matches ONLY the first character (^) to avoid double fronting in uɔ ipa = ugsub(ipa, "^([uʊoɔ])", "%1̟") tok.ipa = ipa has_palatal_onset = false end end end end -- Pass 5: Terminal Devoicing (Word-final obstruent devoicing) local function apply_terminal_devoicing(syllables) if #syllables == 0 then return end local last_syl = syllables[#syllables] local last_tok = last_syl[#last_syl] if last_tok and (last_tok.type == "S" or last_tok.type == "T") then local lc_char = ulower(last_tok.char) local is_uppercase = (last_tok.char ~= lc_char) if is_voiced(lc_char) then local devoiced = VOICING_PAIRS[lc_char] if devoiced then last_tok.char = is_uppercase and uupper(devoiced) or devoiced local new_ipa = CONS_IPA[devoiced] or devoiced if last_tok.is_palatalized then new_ipa = new_ipa .. "ʲ" end last_tok.ipa = new_ipa end end end end -- Pass 6: Place Assimilation (Sibilant + Affricate) -- When a sibilant meets an affricate, the sibilant adjusts its place of articulation: -- s+č→š, z+dž→ž, š+c→s, ž+dz→z local function apply_place_assimilation(syllables) local flat_tokens = {} for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do insert(flat_tokens, tok) end end local PLACE_ASSIM = { ["s"] = {["t͡ʃ"] = "ʃ"}, ["z"] = {["d͡ʒ"] = "ʒ"}, ["ʃ"] = {["t͡s"] = "s"}, ["ʒ"] = {["d͡z"] = "z"}, } for i = 1, #flat_tokens - 1 do local tok = flat_tokens[i] local nxt = flat_tokens[i+1] if tok.type == "S" and nxt.type == "T" then local rule = PLACE_ASSIM[tok.ipa] if rule and rule[nxt.ipa] then tok.ipa = rule[nxt.ipa] end end end end -- Pass 7: Geminate Simplification (Double consonant reduction) -- NOTE: Original geminates (rr, ll, etc.) are already merged in merge_geminate_consonants. -- This pass handles geminates created by phonetic rules (e.g., voicing assimilation: td→dd). -- Sibilants: any two sibilants merge into one (keeping the second) -- Other consonants: only identical pairs merge local function apply_geminate_simplification(syllables) local flat_tokens = {} for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do insert(flat_tokens, tok) end end local SIBILANTS = {["s"]=true, ["z"]=true, ["ʃ"]=true, ["ʒ"]=true} for i = 1, #flat_tokens - 1 do local tok = flat_tokens[i] local nxt = flat_tokens[i+1] local tok_is_cons = (tok.type == "T" or tok.type == "S" or tok.type == "R") local nxt_is_cons = (nxt.type == "T" or nxt.type == "S" or nxt.type == "R") if tok_is_cons and nxt_is_cons then local tok_is_sib = SIBILANTS[tok.ipa] local nxt_is_sib = SIBILANTS[nxt.ipa] if tok_is_sib and nxt_is_sib then tok.ipa = "" elseif tok.ipa == nxt.ipa and tok.ipa ~= "" then tok.ipa = "" end end end end -- Pass 8: Final Consonant Vocalization (v → ʊ̯, j → ɪ̯) -- Word-final v and j (without tilde) become non-syllabic vowels -- Note: j with tilde is already converted to i with tilde in tokenization local function apply_final_consonant_vocalization(syllables) if #syllables == 0 then return end local last_syl = syllables[#syllables] local last_tok = last_syl[#last_syl] if last_tok then local lc_char = ulower(last_tok.char) -- Final v → ʊ̯ if lc_char == "v" then last_tok.ipa = "ʊ̯" -- Keep type as "R" (resonant) for now - it's treated as non-syllabic end -- Final j (without tilde) → ɪ̯ -- Note: j with tilde is already converted to i in tokenization, so won't reach here if lc_char == "j" and last_tok.tone ~= "tilde" then last_tok.ipa = "ɪ̯" -- Keep type as "R" (resonant) for now - it's treated as non-syllabic end end end -- ============================================================================ -- SECTION 7: Output Assembly -- ============================================================================ -- --------------------------------------------------------------------------- -- Cross-word phonology helpers -- --------------------------------------------------------------------------- -- Split a term into segments at spaces only. Returns a plain list of -- non-empty word strings. The liaison marker ‿ is reserved for IPA output -- and must never appear in input; if it does, raise an error so the editor -- knows to use a regular space instead. local function split_into_segments(term) if term and ufind(term, LIAISON) then error("lt-pron: the liaison marker \"" .. LIAISON .. "\" (U+203F) must not appear in the input. Use a regular " .. "space between words; the module decides where to insert ‿ " .. "in the IPA output based on stress.") end local segs = {} for _, w in ipairs(rsplit(term or "", " ")) do if w ~= "" then insert(segs, w) end end return segs end -- True if any syllable in the word has primary or secondary stress. local function word_has_stress(syllables) for _, syl in ipairs(syllables) do if syl.stress or syl.secondary_stress then return true end end return false end -- Compute the clitic group anchor for each word in a phrase. -- -- A "clitic group" is a stressed word together with all unstressed words that -- prosodically attach to it. Two adjacent words share a liaison ‿ iff they -- belong to the same group. The algorithm: -- -- 1. Every stressed word is its own anchor. -- 2. Each unstressed word looks FORWARD for the nearest stressed word -- (proclitic case, e.g., "iš namų̃" — iš leans on namų̃). -- 3. If no stressed word follows, look BACKWARD instead (enclitic case, -- e.g., "sakaũ gi" — gi leans on sakaũ). -- 4. If the entire phrase has no stressed word (rare edge case), all -- unstressed words share a single pseudo-group with anchor 0. -- -- This matches VLKK §4.7 examples like "iš namų̃ [ɪʃ‿nɐ²ˈmuː]" (proclitic) -- and "sakaũ gi [sɐ²ˈkɒʊˑ‿ɡʲɪ]" (enclitic), and produces the correct -- behavior for VLKK §4.9's "išėjaũ į kiẽmą" where the unstressed į proclitic -- to kiẽmą while išėjaũ stands alone. local function compute_clitic_anchors(word_data) local anchors = {} -- Pass 1: stressed words anchor themselves. for i, wd in ipairs(word_data) do if word_has_stress(wd.syllables) then anchors[i] = i end end -- Pass 2: RTL — each unstressed word adopts the next word's anchor. -- Scanning RTL means each position can simply copy anchors[i+1], which -- already points to the nearest stressed word to the right (or nil). for i = #word_data - 1, 1, -1 do if anchors[i] == nil then anchors[i] = anchors[i+1] end end -- Pass 3: LTR — words still without an anchor (no stressed word to the -- right) fall back to the nearest stressed word on the left. for i = 2, #word_data do if anchors[i] == nil then anchors[i] = anchors[i-1] end end -- Pass 4: entire phrase has no stress at all — bundle everything into -- pseudo-group 0 so the words at least share liaison with each other. if #word_data > 0 and anchors[1] == nil then for i = 1, #word_data do anchors[i] = 0 end end return anchors end -- Find the first non-empty token across syllables (skips silent or zero-IPA -- tokens that don't realize a phoneme). local function first_effective_token(syllables) for _, syl in ipairs(syllables) do for _, tok in ipairs(syl) do if tok.ipa ~= "" then return tok end end end return nil end -- Find the last non-empty token across syllables. local function last_effective_token(syllables) for s = #syllables, 1, -1 do local syl = syllables[s] for t = #syl, 1, -1 do if syl[t].ipa ~= "" then return syl[t] end end end return nil end -- Return true if the next word's first effective phoneme triggers -- palatalization across the liaison boundary (front V, j, or an already -- palatalized consonant). Must be called AFTER the next word's palatalization -- pass has run, so `is_palatalized` is reliable. local function first_token_palatalizes(syllables) local tok = first_effective_token(syllables) if not tok then return false end local lc = ulower(tok.char) if tok.type == "V" then return tok.silent_i or FRONT_V[lc] or false end if lc == "j" then return true end return tok.is_palatalized == true end -- VLKK §20: the preposition "už" keeps its [ʒ] (i.e. terminal devoicing is -- skipped) when the next liaisoned word starts with a vowel or sonorant -- consonant (n, m, l, r, j, v). Other words always undergo terminal devoicing. local UZ_SKIP_SONORANTS = { ["n"]=true, ["m"]=true, ["l"]=true, ["r"]=true, ["j"]=true, ["v"]=true, } local function should_skip_devoicing_for_uz(word_text, next_syllables) -- remove_all_accents returns NFD; we have to fold it back to NFC before -- comparing against the literal "už" because ž (U+017E) decomposes to -- z + COMBINING CARON (U+030C) in NFD form. local clean = ulower(mw.ustring.toNFC(remove_all_accents(word_text or ""))) if clean ~= "už" then return false end if not next_syllables then return false end local nxt = first_effective_token(next_syllables) if not nxt then return false end if nxt.type == "V" then return true end return UZ_SKIP_SONORANTS[ulower(nxt.char)] == true end -- VLKK §21b / §22b / §23b: at a liaison boundary, the last consonant of W1 -- and the first consonant of W2 may interact. We mirror within-word place -- assimilation and geminate / sibilant simplification, applied just before -- W1's palatalization pass so that any new ipa (e.g. s → ʃ) gets palatalized -- correctly when needed. local CROSSWORD_PLACE_ASSIM = { ["s"] = {["t͡ʃ"] = "ʃ"}, ["z"] = {["d͡ʒ"] = "ʒ"}, ["ʃ"] = {["t͡s"] = "s"}, ["ʒ"] = {["d͡z"] = "z"}, } local CROSSWORD_SIBILANTS = {["s"]=true, ["z"]=true, ["ʃ"]=true, ["ʒ"]=true} local function strip_trailing_palatal(ipa) if not ipa then return "" end local stripped = ugsub(ipa, "ʲ$", "") return stripped end local function apply_crossword_polish_at_junction(w1_syllables, w2_syllables) local w1_last = last_effective_token(w1_syllables) local w2_first = first_effective_token(w2_syllables) if not (w1_last and w2_first) then return end -- W1's last token has not yet been palatalized at this point in the -- pipeline, so its ipa is the bare base form. W2 has already been fully -- polished, so we must strip a trailing ʲ before using it as a key. local w1_base = w1_last.ipa or "" local w2_base = strip_trailing_palatal(w2_first.ipa) if w1_base == "" or w2_base == "" then return end -- Geminate / sibilant cluster: drop W1's last consonant entirely. if w1_base == w2_base or (CROSSWORD_SIBILANTS[w1_base] and CROSSWORD_SIBILANTS[w2_base]) then w1_last.ipa = "" return end -- Place assimilation: rewrite W1's last consonant base. Palatalization, -- if any, will be re-applied by the palatalization pass. local rule = CROSSWORD_PLACE_ASSIM[w1_base] if rule and rule[w2_base] then w1_last.ipa = rule[w2_base] end end -- --------------------------------------------------------------------------- -- Word-level rendering -- --------------------------------------------------------------------------- -- Tokenize, syllabify, and assign base phonetics for one word. Returns a -- table { text, syllables } with the word's mutable phonological state. local function prepare_word_state(word, pagename) local tokens = tokenize(word, pagename) merge_geminate_consonants(tokens) local syllables = syllabify(tokens) map_base_phonetics(syllables) return {text = word, syllables = syllables} end -- Render a fully polished syllables list into an IPA string. Identical to -- the original tail of process_single_word_ipa. local function render_word_ipa(syllables) local parts = {} for s_idx, syl in ipairs(syllables) do local syl_str = "" local hiatus_sep = "" if s_idx > 1 then local prev_syl = syllables[s_idx - 1] local prev_last_tok = prev_syl[#prev_syl] local curr_first_tok = syl[1] local prev_ends_with_v = (prev_last_tok and prev_last_tok.type == "V" and not prev_last_tok.silent_i) local curr_starts_with_v = (curr_first_tok and curr_first_tok.type == "V" and not curr_first_tok.silent_i) if prev_ends_with_v and curr_starts_with_v then local prev_v_char = ulower(prev_last_tok.char) local curr_v_char = ulower(curr_first_tok.char) -- Only handle ie special case if curr_v_char == "ie" then -- 'ie' has implicit 'j' from map_base_phonetics. syl_str = "" hiatus_sep = "" else -- Standard hiatus without glide insertion syl_str = "." hiatus_sep = "" end end end -- Check for conflict between primary and secondary stress if syl.secondary_stress and syl.stress then error("lt-pron: A syllable cannot have both primary stress (tone mark) and secondary stress (ˌ)") end -- Add stress markers (primary or secondary, mutually exclusive) if syl.secondary_stress then syl_str = syl_str .. "ˌ" elseif syl.stress then syl_str = syl_str .. syl.stress end syl_str = syl_str .. hiatus_sep for _, tok in ipairs(syl) do if tok.is_respelling then if tok.is_optional then syl_str = syl_str .. "(j)" else syl_str = syl_str .. tok.ipa end else syl_str = syl_str .. tok.ipa end if tok.half_long then syl_str = syl_str .. "ˑ" end end insert(parts, syl_str) end return concat(parts, "") end -- --------------------------------------------------------------------------- -- Multi-word IPA assembler with cross-word phonology -- --------------------------------------------------------------------------- -- Pipeline for a phrase made of space-separated words. The liaison marker ‿ -- never appears in input; it is inserted into the rendered IPA according to -- clitic-group anchors computed from per-word stress (see -- compute_clitic_anchors above). -- -- Per-word polish order (unchanged from VLKK §17–§19): -- terminal devoicing → voicing assim → place assim → geminate simp → -- final-cons vocalization → cross-word polish at junction → -- palatalization (with cross-word right context) → nasal assim → -- vowel quality. -- -- Cross-word polish at the junction (§21b/§22b/§23b) runs *before* W1's -- palatalization pass so that any rewritten ipa still receives ʲ correctly. -- Words are processed RTL so that each W_i sees the already-polished state -- of W_{i+1} when computing its cross-word context. local function to_ipa(term, provided_pagename) -- Use provided pagename if available, otherwise try to extract from term local pagename, clean_term if provided_pagename then pagename = provided_pagename clean_term = term else pagename, clean_term = get_pagename(term) end term = clean_term -- Split input and pagename on whitespace; bail out if input contains ‿. local input_segs = split_into_segments(term) local pagename_segs = nil if pagename then pagename_segs = split_into_segments(pagename) -- Verify word count matches (only when both are multi-word) if #input_segs > 1 and #pagename_segs > 1 and #input_segs ~= #pagename_segs then error("lt-pron: Input has " .. #input_segs .. " words but pagename has " .. #pagename_segs .. " words. They must match.") end -- If pagename is a single word but input is multi-word, drop pagename -- alignment (respelling detection only makes sense for exact matches). if #pagename_segs == 1 and #input_segs > 1 then pagename_segs = nil end end -- Stage 1: tokenize / syllabify / map base phonetics for every word. local word_data = {} for i, seg_text in ipairs(input_segs) do local seg_pagename = (pagename_segs and pagename_segs[i]) or nil insert(word_data, prepare_word_state(seg_text, seg_pagename)) end if #word_data == 0 then return "" end -- Stage 2: compute clitic anchors and decide liaison per junction. -- Two adjacent words share a ‿ iff they belong to the same clitic group. local anchors = compute_clitic_anchors(word_data) for i = 1, #word_data - 1 do word_data[i].is_liaison = (anchors[i] == anchors[i+1]) end if word_data[#word_data] then word_data[#word_data].is_liaison = false -- no successor end -- Stage 3: flag the už §20 exception. už keeps its [ʒ] (i.e. terminal -- devoicing is skipped) only when it is in a liaison junction with the -- following word AND that word starts with a vowel or sonorant. Whether -- už is proclitic or enclitic in the group doesn't matter — what matters -- is that ‿ sits between už and the next phoneme. for i = 1, #word_data do local wd = word_data[i] wd.skip_terminal_devoicing = false if wd.is_liaison and word_data[i+1] then if should_skip_devoicing_for_uz(wd.text, word_data[i+1].syllables) then wd.skip_terminal_devoicing = true end end end -- Stage 4: run the polish pipeline RTL across word_data so each W_i sees -- W_{i+1}'s polished state when computing cross-word context. for i = #word_data, 1, -1 do local wd = word_data[i] local next_wd = wd.is_liaison and word_data[i+1] or nil -- Within-word polish (passes 1-5) if not wd.skip_terminal_devoicing then apply_terminal_devoicing(wd.syllables) end apply_voicing_assimilation(wd.syllables) apply_place_assimilation(wd.syllables) apply_geminate_simplification(wd.syllables) apply_final_consonant_vocalization(wd.syllables) -- Cross-word fricative simplification / place assimilation at the -- liaison boundary, before W1 palatalizes (so a freshly assimilated -- s → ʃ can still pick up ʲ). if next_wd then apply_crossword_polish_at_junction(wd.syllables, next_wd.syllables) end -- Determine right palatalization context for the cross-word case. local right_palatalizing = false if next_wd then right_palatalizing = first_token_palatalizes(next_wd.syllables) end -- Pass 6: palatalization (with cross-word context). apply_palatalization(wd.syllables, right_palatalizing) -- Remaining within-word passes. apply_nasal_assimilation(wd.syllables) adjust_vowel_quality(wd.syllables) end -- Stage 5: render each word and join with ‿ (same clitic group) or " ". local result_parts = {} for i, wd in ipairs(word_data) do insert(result_parts, render_word_ipa(wd.syllables)) end local result = result_parts[1] or "" for i = 2, #word_data do local sep = word_data[i-1].is_liaison and LIAISON or " " result = result .. sep .. result_parts[i] end return result end -- Rhyme Extractor local IPA_VOWELS = "aɐɑæɛeəɪiɔoʊuɒɜ" local function get_rhyme(ipa) -- Remove liaison and spaces before rhyme calculation local clean_ipa = ugsub(ipa, "[‿ ]", "") -- Search for the last stress mark from right to left local last_stress_pos = nil for i = ulen(clean_ipa), 1, -1 do local char = usub(clean_ipa, i, i) if char == "ˈ" then -- Skip superscript if present if i > 1 then local prev = usub(clean_ipa, i - 1, i - 1) if prev == "¹" or prev == "²" then last_stress_pos = i -- Point to ˈ, skip superscript else last_stress_pos = i end else last_stress_pos = i end break end end if not last_stress_pos then return nil end -- Extract content after the stress mark local after = usub(clean_ipa, last_stress_pos + 1) -- Remove hiatus dots for rhyme grouping after = ugsub(after, "%.", "") -- Find first vowel position (skip onset consonants) local vstart = umatch(after, "()[" .. IPA_VOWELS .. "]") if vstart then return usub(after, vstart) end return after end -- Hyphenation generator (supports multi-word phrases) local function get_hyphenation(term, provided_pagename) -- Use provided pagename if available, otherwise try to extract from term local pagename, clean_term if provided_pagename then pagename = provided_pagename clean_term = term else pagename, clean_term = get_pagename(term) end term = clean_term -- Split input on both ‿ and space, tracking the separator type so the -- final hyphenation string can preserve liaison markers from the input. local input_segs = split_into_segments(term) local pagename_segs = nil if pagename then pagename_segs = split_into_segments(pagename) -- Verify segment count matches (only when both are multi-segment) if #input_segs > 1 and #pagename_segs > 1 and #input_segs ~= #pagename_segs then error("lt-pron: Input has " .. #input_segs .. " words but pagename has " .. #pagename_segs .. " words. They must match.") end -- If pagename is a single word but input is multi-word, drop alignment if #pagename_segs == 1 and #input_segs > 1 then pagename_segs = nil end end local all_word_parts = {} for i, seg_text in ipairs(input_segs) do -- Get the corresponding pagename word, or nil if not available local seg_pagename = (pagename_segs and pagename_segs[i]) or nil local tokens = tokenize(seg_text, seg_pagename) local syllables = syllabify(tokens) local parts = {} for _, syl in ipairs(syllables) do local text = "" for _, tok in ipairs(syl) do -- Skip respelling characters (original_char == "-" means not in orthography) if tok.original_char ~= "-" then -- Re-attach original tone visually for display local t_mark = "" if tok.tone == "acute" then t_mark = ACUTE elseif tok.tone == "grave" then t_mark = GRAVE elseif tok.tone == "tilde" then t_mark = TILDE end -- Use original_char if set (e.g., j̃ → i internally but j in display) -- Otherwise use tok.char local disp_char = tok.original_char or tok.char -- Restore original orthography for ɔ if disp_char == "ɔ" then disp_char = "o" end if disp_char == "Ɔ" then disp_char = "O" end -- Handle tone placement for diphthongs if tok.tone_position then -- Diphthong: place tone on the specified vowel local char_len = ulen(disp_char) if char_len == 2 then -- Two-character diphthong local v1 = usub(disp_char, 1, 1) local v2 = usub(disp_char, 2, 2) if tok.tone_position == 1 then text = text .. v1 .. t_mark .. v2 else text = text .. v1 .. v2 .. t_mark end elseif char_len == 3 then -- Three-character (silent i) diphthong local v1 = usub(disp_char, 1, 1) local v2 = usub(disp_char, 2, 2) local v3 = usub(disp_char, 3, 3) if tok.tone_position == 1 then text = text .. v1 .. t_mark .. v2 .. v3 elseif tok.tone_position == 2 then text = text .. v1 .. v2 .. t_mark .. v3 else text = text .. v1 .. v2 .. v3 .. t_mark end end else -- Single vowel or consonant: tone goes after the character text = text .. disp_char .. t_mark end end end parts[#parts + 1] = makeDisplayText(text) end insert(all_word_parts, concat(parts, "‧")) -- Use ‧ instead of standard - internally to preserve word boundaries end -- Hyphenation always joins multi-word phrases with a plain space — the -- liaison marker ‿ is purely an IPA-output device and never appears here. local combined_string = concat(all_word_parts, " ") local final_parts = {} for _, piece in ipairs(rsplit(combined_string, "‧")) do insert(final_parts, piece) end -- Calculate actual syllable count (for correct num_syl) local syllable_count = 0 for _, word_part in ipairs(all_word_parts) do local word_syls = rsplit(word_part, "‧") syllable_count = syllable_count + #word_syls end -- Store actual syllable count as a field (since __len doesn't work on tables in Lua 5.1) final_parts.syllable_count = syllable_count return final_parts end -- ============================================================================ -- SECTION 7.5: Conjugation Module Support Functions -- ============================================================================ -- Export: Get syllables as string array with accents function export.get_syllables(term) local pagename, clean_term = get_pagename(term) term = clean_term local tokens = tokenize(term, pagename) local syllables = syllabify(tokens) local result = {} for _, syl in ipairs(syllables) do local text = "" for _, tok in ipairs(syl) do -- Skip respelling glides in syllable output if not tok.is_respelling then -- Re-attach original tone local t_mark = "" if tok.tone == "acute" then t_mark = ACUTE elseif tok.tone == "grave" then t_mark = GRAVE elseif tok.tone == "tilde" then t_mark = TILDE end -- Restore original orthography for ɔ local disp_char = tok.char if disp_char == "ɔ" then disp_char = "o" end if disp_char == "Ɔ" then disp_char = "O" end -- Handle tone placement for diphthongs if tok.tone_position then -- Diphthong: place tone on the specified vowel local char_len = ulen(disp_char) if char_len == 2 then -- Two-character diphthong local v1 = usub(disp_char, 1, 1) local v2 = usub(disp_char, 2, 2) if tok.tone_position == 1 then text = text .. v1 .. t_mark .. v2 else text = text .. v1 .. v2 .. t_mark end elseif char_len == 3 then -- Three-character (silent i) diphthong local v1 = usub(disp_char, 1, 1) local v2 = usub(disp_char, 2, 2) local v3 = usub(disp_char, 3, 3) if tok.tone_position == 1 then text = text .. v1 .. t_mark .. v2 .. v3 elseif tok.tone_position == 2 then text = text .. v1 .. v2 .. t_mark .. v3 else text = text .. v1 .. v2 .. v3 .. t_mark end end else -- Single vowel or consonant: tone goes after the character text = text .. disp_char .. t_mark end end end -- Return NFC format with proper dotabove insertion insert(result, makeDisplayText(text)) end return result end -- Export: Check if a syllable is heavy or light function export.is_heavy_syllable(syllable) local pagename, clean_syllable = get_pagename(syllable) syllable = clean_syllable -- Validate single syllable local tokens = tokenize(syllable, pagename) local syllables = syllabify(tokens) if #syllables ~= 1 then error("is_heavy_syllable: input must be a single syllable, got " .. #syllables .. " syllables") end local syl = syllables[1] -- Find the vowel token local v_idx = nil for i, tok in ipairs(syl) do if tok.type == "V" then v_idx = i break end end if not v_idx then error("is_heavy_syllable: no vowel found in syllable") end local v_tok = syl[v_idx] local lc_v = ulower(v_tok.char) -- Handle silent i (e.g., "iau" where i is silent) if v_tok.has_silent_i then lc_v = usub(lc_v, 2) end -- Long vowels (inherently long, regardless of accent) local LONG_VOWELS = { ["ą"]=true, ["ę"]=true, ["ė"]=true, ["į"]=true, ["y"]=true, ["ų"]=true, ["ū"]=true, ["o"]=true, ["ɔ"]=true -- o/ɔ are always long (except ò, but we treat all o as long) } if LONG_VOWELS[lc_v] then return true end -- Diphthongs (length > 1) if ulen(lc_v) > 1 then return true end -- Mixed diphthongs: short vowel + liquid in coda position local SHORT_VOWELS = {["a"]=true, ["e"]=true, ["i"]=true, ["u"]=true} if SHORT_VOWELS[lc_v] and v_idx < #syl then local next_tok = syl[v_idx + 1] -- Liquid in coda (not j or v, which don't form mixed diphthongs) if next_tok.type == "R" and next_tok.char ~= "j" and next_tok.char ~= "v" then return true end end -- Otherwise, it's a light syllable return false end -- Export: Change accent of a syllable function export.change_accent(syllable, target_accent) local pagename, clean_syllable = get_pagename(syllable) syllable = clean_syllable -- Validate target_accent parameter local VALID_ACCENTS = {acute=true, tilde=true, grave=true, none=true} if not VALID_ACCENTS[target_accent] then error("change_accent: invalid target_accent '" .. tostring(target_accent) .. "', must be 'acute', 'tilde', 'grave', or 'none'") end -- Validate single syllable local tokens = tokenize(syllable, pagename) local syllables = syllabify(tokens) if #syllables ~= 1 then error("change_accent: input must be a single syllable, got " .. #syllables .. " syllables") end -- Remove all existing accents local clean = remove_all_accents(syllable) -- If target is 'none', return clean syllable if target_accent == "none" then return clean end -- Re-tokenize the clean syllable to analyze structure local clean_tokens = tokenize(clean, pagename) local clean_syllables = syllabify(clean_tokens) local syl = clean_syllables[1] -- Find vowel position and extract vowel part local onset = "" local vowel_part = "" local coda = "" local v_idx = nil for i, tok in ipairs(syl) do if tok.type == "V" then v_idx = i break else onset = onset .. tok.char end end if not v_idx then error("change_accent: no vowel found in syllable") end local v_tok = syl[v_idx] vowel_part = ulower(v_tok.char) -- Handle silent i if v_tok.has_silent_i then vowel_part = usub(vowel_part, 2) end -- Check if this is a mixed diphthong local is_mixed = false if v_idx < #syl then local next_tok = syl[v_idx + 1] if next_tok.type == "R" and next_tok.char ~= "j" and next_tok.char ~= "v" then -- Mixed diphthong: vowel + liquid vowel_part = vowel_part .. ulower(next_tok.char) is_mixed = true -- Collect remaining coda after the liquid for i = v_idx + 2, #syl do coda = coda .. syl[i].char end else -- Regular syllable: collect all coda for i = v_idx + 1, #syl do coda = coda .. syl[i].char end end end -- Look up the accented form in ACCENT_PAIRS if not ACCENT_PAIRS[vowel_part] then error("change_accent: vowel/diphthong '" .. vowel_part .. "' not found in accent pairs table") end local accented_vowel = ACCENT_PAIRS[vowel_part][target_accent] if not accented_vowel then error("change_accent: accent type '" .. target_accent .. "' is not allowed for vowel/diphthong '" .. vowel_part .. "'") end -- Reconstruct the syllable with new accent (in NFD format) local result = onset .. accented_vowel .. coda return result end -- ============================================================================ -- SECTION 8: Module Exports & Template Formatting (Preserved) -- ============================================================================ local q_spec = {store = "insert-flattened", type = "qualifier"} local a_spec = {store = "insert-flattened", type = "labels"} local ref_spec = {store = "insert-flattened", item_dest = "refs", type = "references"} -- Generate audio object, supporting file#caption syntax local function generate_audio_obj(arg) local file, caption = arg:match("^(.-)%s*#%s*(.*)$") file = file or arg return {file = file, caption = caption} end -- Parse rhyme specification with optional syllable count local function parse_rhyme(arg, parse_err) local function generate_obj(term) return {rhyme = term} end local param_mods = { s = { item_dest = "num_syl", type = "number", sublist = true, }, } -- Add q/qq/a/aa/ref support if inline modifiers are present if arg:find("<") then param_mods.q = q_spec param_mods.qq = q_spec param_mods.a = a_spec param_mods.aa = a_spec param_mods.ref = ref_spec end return require(parse_util_module).parse_inline_modifiers(arg, { param_mods = param_mods, generate_obj = generate_obj, parse_err = parse_err, splitchar = ",", }) end -- Parse hyphenation specification (dot-separated syllables) local function parse_hyph(arg, parse_err) local function generate_obj(term) local parts = rsplit(term, "%.") return {hyph = parts, syllabification = term} end local param_mods = {} -- Add q/qq/a/aa/ref support if inline modifiers are present if arg:find("<") then param_mods.q = q_spec param_mods.qq = q_spec param_mods.a = a_spec param_mods.aa = a_spec param_mods.ref = ref_spec end return require(parse_util_module).parse_inline_modifiers(arg, { param_mods = param_mods, generate_obj = generate_obj, parse_err = parse_err, splitchar = ",", }) end -- Parse homophone specification local function parse_homophone(arg, parse_err) local function generate_obj(term) return {term = term} end local param_mods = { t = {item_dest = "gloss"}, gloss = {}, pos = {}, alt = {}, lit = {}, id = {}, g = { item_dest = "genders", sublist = true, }, } -- Add q/qq/a/aa/ref support if inline modifiers are present if arg:find("<") then param_mods.q = q_spec param_mods.qq = q_spec param_mods.a = a_spec param_mods.aa = a_spec param_mods.ref = ref_spec end return require(parse_util_module).parse_inline_modifiers(arg, { param_mods = param_mods, generate_obj = generate_obj, parse_err = parse_err, splitchar = ",", }) end local audio_nested_mods = { ["a"] = a_spec, ["aa"] = a_spec, ["q"] = q_spec, ["qq"] = q_spec, ["text"] = {}, ["IPA"] = {sublist = true}, ["t"] = {item_dest = "gloss"}, ["gloss"] = {}, ["pos"] = {}, ["lit"] = {}, ["g"] = { item_dest = "genders", sublist = true, }, } local function parse_one_term(raw, parse_err) if not raw:find("<") then return {term = raw, audio_list = {}, rhyme_list = {}, hyph_list = {}, pagename = nil} end -- Extract base spelling before parse_inline_modifiers local pagename = nil if raw:find("<base:") then pagename = raw:match("<base:([^>]+)>") raw = raw:gsub("<base:[^>]+>", "") end local parsed = require(parse_util_module).parse_inline_modifiers(raw, { param_mods = { ["q"] = q_spec, ["qq"] = q_spec, ["a"] = a_spec, ["aa"] = a_spec, ["ref"] = ref_spec, ["audio"] = { store = "insert", item_dest = "audio_list", convert = function(arg, perr) if arg:find("<") then local parsed_audio = require(parse_util_module).parse_inline_modifiers(arg, { param_mods = audio_nested_mods, generate_obj = generate_audio_obj, parse_err = perr, }) parsed_audio.lang = get_lang() local textobj = require(audio_module).construct_audio_textobj(parsed_audio) parsed_audio.text = textobj parsed_audio.gloss = nil parsed_audio.pos = nil parsed_audio.lit = nil parsed_audio.genders = nil return parsed_audio end local audio_obj = generate_audio_obj(arg) audio_obj.lang = get_lang() local textobj = require(audio_module).construct_audio_textobj(audio_obj) audio_obj.text = textobj return audio_obj end, }, ["rhyme"] = { store = "insert-flattened", item_dest = "rhyme_list", convert = parse_rhyme, }, ["hyph"] = { store = "insert-flattened", item_dest = "hyph_list", convert = parse_hyph, }, ["hmp"] = { store = "insert-flattened", item_dest = "hmp_list", convert = parse_homophone, }, }, generate_obj = function(t) return {term = t, audio_list = {}, rhyme_list = {}, hyph_list = {}, hmp_list = {}} end, parse_err = parse_err, }) parsed.audio_list = parsed.audio_list or {} parsed.rhyme_list = parsed.rhyme_list or {} parsed.hyph_list = parsed.hyph_list or {} parsed.hmp_list = parsed.hmp_list or {} parsed.pagename = pagename return parsed end -- Format rhyme objects with qualifiers local function fmt_rhyme(rhyme_objs, bullet) if not rhyme_objs or #rhyme_objs == 0 then return nil end local rhyme_data = {} for _, robj in ipairs(rhyme_objs) do insert(rhyme_data, { rhyme = robj.rhyme, num_syl = robj.num_syl, q = robj.q, qq = robj.qq, a = robj.a, aa = robj.aa, }) end return bullet .. require(rhymes_module).format_rhymes({ lang = get_lang(), rhymes = rhyme_data }) end -- Format hyphenation objects with qualifiers local function fmt_hyph(hyph_objs, bullet) if not hyph_objs or #hyph_objs == 0 then return nil end local hyph_data = {} for _, hobj in ipairs(hyph_objs) do insert(hyph_data, { hyph = hobj.hyph, q = hobj.q, qq = hobj.qq, a = hobj.a, aa = hobj.aa, }) end return bullet .. require(hyphenation_module).format_hyphenations({ lang = get_lang(), hyphs = hyph_data, caption = "Syllabification" }) end -- Format audio object local function fmt_audio(audio_obj, bullet) return bullet .. require(audio_module).format_audio(audio_obj) end -- Format homophone objects with qualifiers local function fmt_hmp(hmp_objs, bullet) if not hmp_objs or #hmp_objs == 0 then return nil end return bullet .. require(homophones_module).format_homophones({ lang = get_lang(), homophones = hmp_objs, }) end local function is_multiword_term(term) -- split_into_segments returns a list of non-empty word strings. return #split_into_segments(term) > 1 end function export.show(frame) local parargs = frame:getParent().args local args = require(parameters_module).process(parargs, { [1] = {default = "nãmas"}, ["bullets"] = {type = "number", default = 1}, }) local input = args[1] local nb = args.bullets local b1 = string.rep("*", nb) .. " " local b2 = string.rep("*", nb + 1) .. " " local raw_terms = require(parse_util_module).split_escaping(input, ",") local parsed_terms = {} for i, raw in ipairs(raw_terms) do raw = raw:match("^%s*(.-)%s*$") local pt = parse_one_term(raw, function(msg) error("lt-pron: " .. msg .. " (term " .. i .. ")") end) parsed_terms[#parsed_terms + 1] = pt end m_IPA = m_IPA or require("Module:IPA") local text_parts = {} for _, pt in ipairs(parsed_terms) do -- Determine bullet level: same level as IPA for single pronunciation, indented for multiple local content_bullet = (#parsed_terms == 1) and b1 or b2 -- Generate IPA local ipa_str = to_ipa(pt.term, pt.pagename) -- Handle rhyme: manual override, suppression, or auto-generation local rhyme_objs = nil local suppress_rhyme = false if #pt.rhyme_list > 0 then for _, robj in ipairs(pt.rhyme_list) do if robj.rhyme == "-" then suppress_rhyme = true break end end if not suppress_rhyme then rhyme_objs = {} for _, robj in ipairs(pt.rhyme_list) do -- If num_syl not specified, try to get from auto-generated hyphenation if not robj.num_syl then local auto_hyph = get_hyphenation(pt.term, pt.pagename) if auto_hyph and #auto_hyph > 0 then -- Use syllable_count field if available (for multi-word phrases), otherwise use array length local syl_count = auto_hyph.syllable_count or #auto_hyph robj.num_syl = {syl_count} end end insert(rhyme_objs, robj) end end else -- Auto-generate rhyme (skip if term ends with - or is a multiword term) if not pt.term:match("%-$") and not is_multiword_term(pt.term) then local rhyme_str = get_rhyme(ipa_str) if rhyme_str then local auto_hyph = get_hyphenation(pt.term, pt.pagename) -- Use syllable_count field if available (for multi-word phrases), otherwise use array length local num_syl = (auto_hyph and #auto_hyph > 0) and {auto_hyph.syllable_count or #auto_hyph} or nil rhyme_objs = {{rhyme = rhyme_str, num_syl = num_syl}} end end end -- Handle hyphenation: manual override, suppression, or auto-generation local hyph_objs = nil local suppress_hyph = false if #pt.hyph_list > 0 then for _, hobj in ipairs(pt.hyph_list) do if hobj.syllabification == "-" then suppress_hyph = true break end end if not suppress_hyph then hyph_objs = pt.hyph_list end else -- Auto-generate hyphenation local auto_hyph = get_hyphenation(pt.term, pt.pagename) if auto_hyph and #auto_hyph > 0 then hyph_objs = {{hyph = auto_hyph, syllabification = concat(auto_hyph, ".")}} end end -- Format IPA with qualifiers and references local ipa_item = {pron = "[" .. ipa_str .. "]"} if pt.q then ipa_item.q = pt.q end if pt.qq then ipa_item.qq = pt.qq end if pt.a then ipa_item.a = pt.a end if pt.aa then ipa_item.aa = pt.aa end if pt.refs then ipa_item.refs = pt.refs end text_parts[#text_parts + 1] = b1 .. m_IPA.format_IPA_full({ lang = get_lang(), items = {ipa_item} }) -- Audio for _, aud in ipairs(pt.audio_list or {}) do text_parts[#text_parts + 1] = fmt_audio(aud, content_bullet) end -- Rhyme if rhyme_objs then local r = fmt_rhyme(rhyme_objs, content_bullet) if r then text_parts[#text_parts + 1] = r end end -- Hyphenation if hyph_objs then local h = fmt_hyph(hyph_objs, content_bullet) if h then text_parts[#text_parts + 1] = h end end -- Homophones if pt.hmp_list and #pt.hmp_list > 0 then local hmp = fmt_hmp(pt.hmp_list, content_bullet) if hmp then text_parts[#text_parts + 1] = hmp end end end return concat(text_parts, "\n") end export.toIPA = to_ipa export.hyphenate = get_hyphenation export.rhyme = get_rhyme return export pqyyto9ntkmwafrrq481zamx4xplqhv မဝ်ဂျူ:User:TongcyDai/lt-pron/doc 828 295397 395933 2026-05-29T18:21:03Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:User:TongcyDai/lt-pron/testcases|run_tests|comments=1}}" 395933 wikitext text/x-wiki {{#invoke:User:TongcyDai/lt-pron/testcases|run_tests|comments=1}} t3q3kr2mjufakt1cg21lqt8ndqy72wu မဝ်ဂျူ:User:TongcyDai/lt-pron/testcases 828 295398 395934 2026-05-29T18:22:03Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "local tests = require("Module:UnitTests") local m_pron = require("Module:User:TongcyDai/lt-pron") local unpack = unpack or table.unpack -- ── helpers ────────────────────────────────────────────────────────────────── local function tag_IPA(s) return '<span class="IPA">' .. s .. '<..." 395934 Scribunto text/plain local tests = require("Module:UnitTests") local m_pron = require("Module:User:TongcyDai/lt-pron") local unpack = unpack or table.unpack -- ── helpers ────────────────────────────────────────────────────────────────── local function tag_IPA(s) return '<span class="IPA">' .. s .. '</span>' end -- Compare toIPA output function tests:check_IPA(term, expected, comment) self:equals( term, tag_IPA(mw.ustring.toNFC(m_pron.toIPA(term))), tag_IPA(expected), { comment = comment, show_difference = true } ) end -- Compare hyphenate output function tests:check_hyph(term, expected, comment) local parts = m_pron.hyphenate(term) self:equals( term, table.concat(parts, "‧"), expected, { comment = comment, show_difference = true } ) end -- Compare rhyme output function tests:check_rhyme(term, expected, comment) local ipa = m_pron.toIPA(term) self:equals( term, mw.ustring.toNFC(m_pron.rhyme(ipa) or ""), expected, { comment = comment, show_difference = true } ) end -- ════════════════════════════════════════════════════════════════════════════ -- IPA TESTS -- ════════════════════════════════════════════════════════════════════════════ -- A: Consonants and basic palatalization (one example per consonant, in two -- versions: hard variant + soft variant before front vowel). function tests:test_IPA_A_consonants_and_palatalization() local examples = { -- B, b { "bai̇̃gti", "²ˈbɐɪˑktʲɪ", "b" }, { "bi̇̀rbt", "¹ˈbʲɪrpt", "bʲ" }, -- C, c { "cùkrus", "ˈt͡sʊkrʊs", "c" }, { "ci̇̀bė", "ˈt͡sʲɪbʲeː", "cʲ" }, -- Č, č { "čaižùs", "t͡ʃɐɪˈʒʊs", "č" }, { "Kãčinas", "²ˈkɑːt͡ʃʲɪnɐs", "čʲ" }, -- D, d { "daũg", "²ˈdɒʊˑk", "d" }, { "di̇̀delis", "ˈdʲɪdʲɛlʲɪs", "dʲ" }, -- F, f { "fãzė", "²ˈfɑːzʲeː", "f" }, { "filė̃", "fʲɪ²ˈlʲeː", "fʲ" }, -- G, g { "gãlas", "²ˈɡɑːlɐs", "g" }, { "girià", "ɡʲɪˈrʲɛ", "gʲ" }, -- H, h { "hãlė", "²ˈɣɑːlʲeː", "h" }, { "hi̇̀mnas", "¹ˈɣʲɪmnɐs", "hʲ" }, -- J, j { "jáunas<base:jaunas>", "¹ˈjæˑʊnɐs", "j" }, { "vajè", "ʋɐˈjɛ", "j" }, -- K, k { "kãras", "²ˈkɑːrɐs", "k" }, { "kitóks", "kʲɪ¹ˈtoːks", "kʲ" }, -- L, l { "lãbas", "²ˈlɑːbɐs", "l" }, { "lė̃kti", "²ˈlʲeːktʲɪ", "lʲ" }, -- M, m { "mamà", "mɐˈmɐ", "m" }, { "méilė", "¹ˈmʲæˑɪlʲeː", "mʲ" }, -- N, n { "nósis", "¹ˈnoːsʲɪs", "n" }, { "knygà", "knʲiːˈɡɐ", "nʲ" }, -- P, p { "pãdas", "²ˈpɑːdɐs", "p" }, { "pẽčius", "²ˈpʲæːt͡ʃʲʊ̟s", "pʲ" }, -- R, r { "rai̇̃dė", "²ˈrɐɪˑdʲeː", "r" }, { "kairė̃", "kɐɪ²ˈrʲeː", "rʲ" }, -- S, s { "sõdas", "²ˈsoːdɐs", "s" }, { "si̇́ela", "¹ˈsʲiɛlɐ", "sʲ" }, -- Š, š { "šókti", "¹ˈʃoːktʲɪ", "š" }, { "šỹpsena", "²ˈʃʲiːpʲsʲɛnɐ", "šʲ" }, -- T, t { "tetà", "tʲɛˈtɐ", "tʲ, t" }, -- V, v { "svajõnė", "sʋɐ²ˈjo̟ːnʲeː", "v" }, { "vi̇̀ltis", "¹ˈʋʲɪlʲtʲɪs", "vʲ" }, -- Z, z { "zui̇̃kis", "²ˈzʊɪˑkʲɪs", "z" }, { "zi̇̀r̃zinti", "¹ˈzʲɪrʲzʲɪnʲtʲɪ", "zʲ" }, -- Ž, ž { "žolė̃", "ʒoː²ˈlʲeː", "ž" }, { "žẽmė", "²ˈʒʲæːmʲeː", "žʲ" }, -- Ch, ch { "chalãtas", "xɐ²ˈlɑːtɐs", "ch" }, { "cherèsas", "xʲɛˈrʲɛsɐs", "chʲ" }, -- Dz, dz { "dzū̃kai", "²ˈd͡zuːkɐɪ", "dz" }, { "dzi̇̀ngt", "¹ˈd͡zʲɪŋkt", "dzʲ" }, -- Dž, dž { "džáulis", "¹ˈd͡ʒɑˑʊlʲɪs", "dž" }, { "džiãzas", "²ˈd͡ʒʲæːzɐs", "džʲ" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- B: Monophthongs (short vs. long, native vs. loanword variants). function tests:test_IPA_B_monophthongs() local examples = { -- A, a { "tàvo", "ˈtɐʋoː", "à" }, { "vãkaras", "²ˈʋɑːkɐrɐs", "ã, a" }, -- Ą, ą { "rýtą", "¹ˈrʲiːtɑː", "ą" }, { "ą́žuolas", "¹ˈɑːʒuɔlɐs", "ą́" }, { "šą̃la", "²ˈʃɑːlɐ", "ą̃" }, -- E, e { "Pelesà", "pʲɛlʲɛˈsɐ", "e" }, { "nèšti", "ˈnʲɛʃʲtʲɪ", "è" }, { "mètras", "ˈmʲɛtrɐs", "è" }, { "mẽnas", "²ˈmʲæːnɐs", "ẽ" }, -- E with `^`: closed short e in loanwords (VLKK §6.7.3 — written `ẹ`) { "se^ktà", "sʲekˈtɐ", "e^ (closed short e in loanwords)" }, { "re^ži̇̀mas", "rʲeˈʒʲɪmɐs", "e^ (closed short e in loanwords)" }, -- Ę, ę { "tęsinỹs", "tʲæːsʲɪ²ˈnʲiːs", "ę" }, { "tavę̃s", "tɐ²ˈʋʲæːs", "ę̃" }, { "tę́vas", "¹ˈtʲæːʋɐs", "ę́" }, -- Ė, ė { "ėdė́jas", "eː¹ˈdʲeːjɛs", "ė, ė́" }, { "gėlė̃", "ɡʲeː²ˈlʲeː", "ė, ė̃" }, -- I, i { "liki̇̀mas", "lʲɪˈkʲɪmɐs", "i, i̇̀" }, -- Į, į { "įlį̃sti", "iː²ˈlʲiːsʲtʲɪ", "į, į̃" }, { "į́spūdis", "¹ˈiːspuːdʲɪs", "į́" }, -- Y, y { "mylė́ti", "mʲiː¹ˈlʲeːtʲɪ", "y" }, { "ýda", "¹ˈiːdɐ", "ý" }, { "knỹgė", "²ˈknʲiːɡʲeː", "ỹ" }, -- O, o (long native [oː] vs. short loanword [ɔ]) { "norė́ti", "noː¹ˈrʲeːtʲɪ", "o (native, long)" }, { "óras", "¹ˈoːrɐs", "ó" }, { "keliõnė", "kʲɛ²ˈlʲo̟ːnʲeː", "õ" }, { "òmas", "ˈɔmɐs", "ò (loanword, short)" }, -- FIXME: ò /oː/ exists? { "stòksas", "ˈstɔksɐs", "ò (loanword, grave)" }, { "Zo^jà", "zɔˈjɛ", "o^ (loanword [ɔ], unstressed)" }, { "žo^ngliẽrius", "ʒɔŋ²ˈɡlʲiɛrʲʊ̟s", "o^ (loanword [ɔ], unstressed)" }, { "fo^to^parodà", "fɔtɔpɐroːˈdɐ", "o^ (loanword foto- prefix)" }, { "ho^mo^ni̇̀mas", "ɣɔmɔˈnʲɪmɐs", "o^ (loanword homo- prefix)" }, -- U, u { "ugni̇̀s", "ʊˈɡnʲɪs", "u" }, { "pùsė", "ˈpʊsʲeː", "ù" }, -- Ų, ų { "siųstùvas", "sʲu̟ːˈstʊʋɐs", "ų" }, { "įskų́sti", "iː¹ˈskuːsʲtʲɪ", "ų́" }, { "sių̃sti", "²ˈsʲu̟ːsʲtʲɪ", "ų̃" }, -- Ū, ū { "sū́nūs", "¹ˈsuːnuːs", "ū́, ū" }, { "rū̃gštis", "²ˈruːkʃʲtʲɪs", "ū̃" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C1: Diphthongs ai / au — three accent positions each. function tests:test_IPA_C1_diphthongs_ai_au() local examples = { -- ai { "táikino", "¹ˈtɑˑɪkʲɪnoː", "ái (acute)" }, { "tai̇̃ką", "²ˈtɐɪˑkɑː", "ai̇̃ (tilde)" }, { "taiki̇̀klis", "tɐɪˈkʲɪklʲɪs", "ai (unstressed)" }, -- au { "tráukia", "¹ˈtrɑˑʊkʲɛ", "áu (acute)" }, { "patraũklų", "pɐ²ˈtrɒʊˑkluː", "aũ (tilde)" }, { "pértrauka", "¹ˈpʲæˑrtrɒʊkɐ", "au (unstressed)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C2: Diphthongs ei / ui — three accent positions each. function tests:test_IPA_C2_diphthongs_ei_ui() local examples = { -- ei { "méilė", "¹ˈmʲæˑɪlʲeː", "éi (acute)" }, { "mei̇̃liai", "²ˈmʲɛɪˑlʲɛɪ", "ei̇̃ (tilde)" }, { "meilikáuti", "mʲɛɪlʲɪ¹ˈkɑˑʊtʲɪ", "ei (unstressed)" }, -- ui { "kùisytis", "¹ˈkʊɪsʲiːtʲɪs", "ùi (grave; first element short)" }, { "kui̇̃sti", "²ˈkʊɪˑsʲtʲɪ", "ui̇̃ (tilde)" }, { "kuitinė́tis", "kʊɪtʲɪ¹ˈnʲeːtʲɪs", "ui (unstressed)" }, { "bùivo^las", "¹ˈbʊɪʋɔlɐs", "ùi (grave; with loanword o^)" }, -- úi (acute with first element half-long) is a free-style variant; not tested separately } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C3: Variable diphthongs ie / uo (treated phonemically as monophthongs by VLKK, -- but written as digraphs and patterning with diphthongs in accent placement). function tests:test_IPA_C3_diphthongs_ie_uo() local examples = { -- ie { "si̇́ena", "¹ˈsʲiɛnɐ", "íe (acute)" }, { "jiẽ<base:jie>", "²ˈjiɛ", "iẽ (tilde)" }, { "Diẽvas", "²ˈdʲiɛʋɐs", "iẽ (tilde)" }, { "Ki̇̀msienė", "¹ˈkʲɪmʲsʲiɛnʲeː", "ie (unstressed)" }, -- uo { "úodas", "¹ˈuɔdɐs", "úo (acute)" }, { "ruduõ", "rʊ²ˈduɔ", "uõ (tilde)" }, { "Aluojà<base:Aluoja>", "ɐluɔˈjɛ", "uo (unstressed)" }, { "vaizduõtė", "ʋɐɪ²ˈzduɔtʲeː", "uõ (tilde, after consonant cluster)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C4: Mixed diphthongs of the a-series — al, am, an, ar. function tests:test_IPA_C4_mixed_a() local examples = { -- al { "álkanas", "¹ˈɑˑlkɐnɐs", "ál (acute)" }, { "al̃kis", "²ˈɐlʲˑkʲɪs", "al̃ (tilde)" }, { "alkanáuti", "ɐlkɐ¹ˈnɑˑʊtʲɪ", "al (unstressed)" }, -- am { "skámbčioti", "¹ˈskɑˑmʲpʲt͡ʃʲo̟ːtʲɪ", "ám (acute)" }, { "skam̃biai", "²ˈskɐmʲˑbʲɛɪ", "am̃ (tilde)" }, { "skambùmas", "skɐmˈbʊmɐs", "am (unstressed)" }, -- an { "ránkioja<base:rankioja>", "¹ˈrɑˑŋʲkʲo̟ːjɛ", "án (acute)" }, { "rañkdarbis", "²ˈrɐŋˑɡdɐrʲbʲɪs", "añ (tilde)" }, { "rankinùkas", "rɐŋʲkʲɪˈnʊkɐs", "an (unstressed)" }, -- ar { "sárgas", "¹ˈsɑˑrɡɐs", "ár (acute)" }, { "sar̃giai", "²ˈsɐrʲˑɡʲɛɪ", "ar̃ (tilde)" }, { "sargýba", "sɐrʲ¹ˈɡʲiːbɐ", "ar (unstressed)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C5: Mixed diphthongs of the e-series — el, em, en, er. -- Foreign-word variants with grave (èl, èm, èn, èr) read tvirtapradiškai -- are tested as alternates per VLKK §9.21, §9.23. function tests:test_IPA_C5_mixed_e() local examples = { -- el { "kélmas", "¹ˈkʲæˑlmɐs", "él (acute)" }, { "Kel̃mė", "²ˈkʲɛlʲˑmʲeː", "el̃ (tilde)" }, { "kelmùtis", "kʲɛlˈmʊtʲɪs", "el (unstressed)" }, { "èlfas", "¹ˈɛlfɐs", "èl (loanword, grave; tvirtapradiškai)" }, { "el̃fas", "²ˈɛlˑfɐs", "el̃ (loanword, tilde)" }, -- em { "drémžti", "¹ˈdʲrʲæˑmʲʃʲtʲɪ", "ém (acute; ž → š before t)" }, { "drem̃bti", "²ˈdʲrʲɛmʲˑpʲtʲɪ", "em̃ (tilde)" }, { "Trempai̇̃", "tʲrʲɛm²ˈpɐɪˑ", "em (unstressed)" }, { "Jaržèmskis", "jɛrʲ¹ˈʒʲɛmʲsʲkʲɪs", "èm (loanword, grave)" }, { "kem̃pingas", "²ˈkʲɛmʲˑpʲɪŋɡɐs", "em̃ (loanword, tilde)" }, -- en { "véngia", "¹ˈʋʲæˑŋʲɡʲɛ", "én (acute)" }, { "žeñgsena", "²ˈʒʲɛŋˑksʲɛnɐ", "eñ (tilde)" }, { "vengi̇̀mas", "ʋʲɛŋʲˈɡʲɪmɐs", "en (unstressed)" }, { "ménkė", "¹ˈmʲæˑŋʲkʲeː", "én (acute, before nk)" }, { "meñkinti", "²ˈmʲɛŋʲˑkʲɪnʲtʲɪ", "eñ (tilde, before nk)" }, { "menkystà", "mʲɛŋʲkʲiːˈstɐ", "en (unstressed, before nk)" }, { "hènris", "¹ˈɣʲɛnʲrʲɪs", "èn (loanword, grave)" }, { "ceñtas", "²ˈt͡sʲɛnˑtɐs", "eñ (loanword, tilde)" }, { "ãmen", "²ˈɑːmʲɛn", "en (loanword, unstressed final)" }, -- er { "nérti", "¹ˈnʲæˑrʲtʲɪ", "ér (acute)" }, { "ner̃šti", "²ˈnʲɛrʲˑʃʲtʲɪ", "er̃ (tilde)" }, { "nerštãvietė", "nʲɛr²ˈʃtɑːʋʲiɛtʲeː", "er (unstressed)" }, { "ko^ncèrtas", "kɔnʲ¹ˈt͡sʲɛrtɐs", "èr (loanword, grave)" }, { "ko^ncer̃tas", "kɔnʲ²ˈt͡sʲɛrˑtɐs", "er̃ (loanword, tilde)" }, { "ter̃minas", "²ˈtʲɛrʲˑmʲɪnɐs", "er̃ (loanword, tilde)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C6: Mixed diphthongs of the i-series — il, im, in, ir. function tests:test_IPA_C6_mixed_i() local examples = { -- il { "pi̇̀lti", "¹ˈpʲɪlʲtʲɪ", "ìl (grave)" }, { "pil̃vas", "²ˈpʲɪlˑʋɐs", "il̃ (tilde)" }, { "pilti̇̀nis", "pʲɪlʲˈtʲɪnʲɪs", "il (unstressed)" }, -- im { "ti̇̀mptelėjimas<base:timptelėjimas>", "¹ˈtʲɪmʲpʲtʲɛlʲeːjɪmɐs", "ìm (grave)" }, { "tim̃pinti", "²ˈtʲɪmʲˑpʲɪnʲtʲɪ", "im̃ (tilde)" }, { "timpinė́ti", "tʲɪmʲpʲɪ¹ˈnʲeːtʲɪ", "im (unstressed)" }, -- in { "gi̇̀nti", "¹ˈɡʲɪnʲtʲɪ", "ìn (grave)" }, { "giñklas", "²ˈɡʲɪŋˑklɐs", "iñ (tilde)" }, { "ginkluõtė", "ɡʲɪŋ²ˈkluɔtʲeː", "in (unstressed)" }, -- ir { "di̇̀rti", "¹ˈdʲɪrʲtʲɪ", "ìr (grave)" }, { "dir̃žas", "²ˈdʲɪrˑʒɐs", "ir̃ (tilde)" }, { "dirži̇̀nis", "dʲɪrʲˈʒʲɪnʲɪs", "ir (unstressed)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C7: Mixed diphthongs of the u-series — ul, um, un, ur. function tests:test_IPA_C7_mixed_u() local examples = { -- ul { "dùlkė", "¹ˈdʊlʲkʲeː", "ùl (grave)" }, { "dul̃kti", "²ˈdʊlʲˑktʲɪ", "ul̃ (tilde)" }, { "dulkė́tas", "dʊlʲ¹ˈkʲeːtɐs", "ul (unstressed)" }, -- um { "grùmtis", "¹ˈɡrʊmʲtʲɪs", "ùm (grave)" }, { "grum̃ba", "²ˈɡrʊmˑbɐ", "um̃ (tilde)" }, { "grumtỹnės", "ɡrʊmʲ²ˈtʲiːnʲeːs", "um (unstressed)" }, -- un { "skùndė", "¹ˈskʊnʲdʲeː", "ùn (grave)" }, { "skuñdas", "²ˈskʊnˑdɐs", "uñ (tilde)" }, { "skundi̇̀kas", "skʊnʲˈdʲɪkɐs", "un (unstressed)" }, -- ur { "gùrkšnis", "¹ˈɡʊrʲkʃʲnʲɪs", "ùr (grave)" }, { "gur̃gti", "²ˈɡʊrʲˑktʲɪ", "ur̃ (tilde)" }, { "gurkšnóti", "ɡʊrk¹ˈʃnoːtʲɪ", "ur (unstressed)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C8: Mixed diphthongs of the foreign o-series — ol, om, on, or -- (per VLKK §9.22, default reading is tvirtapradiškai with grave). function tests:test_IPA_C8_mixed_o_foreign() local examples = { -- ol { "kòlba", "¹ˈkɔlbɐ", "òl (grave; default reading)" }, { "hòldingas", "¹ˈɣɔlʲdʲɪŋɡɐs", "òl (grave)" }, -- om { "do^mkrãtas", "dɔm²ˈkrɑːtɐs", "om (unstressed)" }, { "pòmpa", "¹ˈpɔmpɐ", "òm (grave)" }, -- on { "po^ntònas", "pɔnˈtɔnɐs", "on (unstressed) / òn (grave)" }, { "fòndas", "¹ˈfɔndɐs", "òn (grave)" }, -- or { "po^rtrètas", "pɔrʲˈtʲrʲɛtɐs", "or (unstressed)" }, { "fòrma", "¹ˈfɔrmɐ", "òr (grave)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- C9: Foreign diphthongs eu, oi, ou — three accent positions where attested. function tests:test_IPA_C9_diphthongs_foreign_eu_oi_ou() local examples = { -- eu { "plèura", "¹ˈpʲlʲɛʊrɐ", "èu (grave; tvirtapradiškai)" }, { "eũras", "²ˈɛʊˑrɐs", "eũ (tilde)" }, { "Euro^pà", "ɛʊrɔˈpɐ", "eu (unstressed)" }, -- éu does not exist (é is long; éu would be long+long) -- oi { "Kóiva", "¹ˈkoˑɪʋɐ", "ói (acute; rare)" }, { "mòira", "¹ˈmɔɪrɐ", "òi (grave)" }, { "sinusòidė", "sʲɪnʊ¹ˈsɔɪdʲeː", "òi (grave)" }, { "bròileris", "¹ˈbrɔɪlʲɛrʲɪs", "òi (grave)" }, { "oikumenà", "ɔɪkʊmʲɛˈnɐ", "oi (unstressed)" }, -- oi̇̃ does not exist (per VLKK: oi reads tvirtapradiškai only) -- ou { "šòu", "¹ˈʃɔʊ", "òu (grave)" }, { "klòunas", "¹ˈklɔʊnɐs", "òu (grave)" }, { "klounadà", "klɔʊnɐˈdɐ", "ou (unstressed)" }, -- óu does not exist (ó is long) -- oũ does not exist (per VLKK: ou reads tvirtapradiškai only) } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- D: Fake diphthongs — vowel sequences that LOOK like diphthongs but are -- actually two separate vowels in adjacent syllables (hiatus). The module -- must NOT collapse these into a single diphthong nucleus. function tests:test_IPA_D_fake_diphthongs() local examples = { -- ai (fake) { "nebepàima", "nʲɛbʲɛˈpɐ.ɪmɐ", "ài" }, { "archãika", "ɐr²ˈxɑː.ɪkɐ", "ãi" }, { "pai̇́eško", "pɐ¹ˈjiɛʃkoː", "ai̇́" }, { "betai̇̀nas", "bʲɛtɐ.ˈɪnɐs", "ai̇̀" }, -- au (fake) { "pàurzgė", "ˈpɐ.ʊrʲzʲɡʲeː", "àu" }, { "šilãuogė", "ʃʲɪ²ˈlɑː.uɔɡʲeː", "ãu" }, { "Naùmo^vas", "nɐ.ˈʊmɔʋɐs", "aù" }, { "nepaúosto", "nʲɛpɐ.¹ˈuɔstoː", "aú" }, -- ei (fake) { "nebèima", "nʲɛˈbʲɛ.ɪmɐ", "èi" }, { "nebei̇̀rti", "nʲɛbʲɛ.¹ˈɪrʲtʲɪ", "ei̇̀" }, { "neji̇́eško<base:neieško>", "nʲɛ¹ˈjiɛʃkoː", "ei̇́ (no-j-insertion not found yet)" }, -- ẽi not found -- ui (fake) { "sui̇̀ro", "sʊ.ˈɪroː", "ui̇̀" }, { "sui̇́eško", "sʊ¹ˈjiɛʃkoː", "ui̇́" }, -- úi, ũi not found -- ie (fake) { "besi̇̀elgė", "bʲɛˈsʲɪ.ɛlʲɡʲeː", "i̇̀e" }, { "ˌpo^lièsteris", "ˌpɔlʲɪ.ˈɛsʲtʲɛrʲɪs", "i̇̀e" }, { "įsiérzina", "iːsʲɪ.¹ˈæˑrʲzʲɪnɐ", "ié" }, -- i̇̃e not found -- uo (fake) { "sùošė", "ˈsʊ.oːʃʲeː", "ùo" }, -- ũo, uó not found; for uò only /ʊˈɔ/ examples found -- foreign: ao { "mao^ji̇̀zmas<base:maojizmas>", "mɐ.ɔˈjɪzmɐs", "ao (no-j-insertion variant)" }, -- foreign: oi (fake) -- õi, oi̇́ not found -- foreign: ou (fake) { "fino^ùgrai", "fʲɪnɔ.ˈʊɡrɐɪ", "où" }, -- õu, oú not exist -- foreign: eu (fake) { "neúosti", "nʲɛ.¹ˈuɔsʲtʲɪ", "eú" }, { "teùrginis", "tʲɛ.¹ˈʊrʲɡʲɪnʲɪs", "eù" }, -- ẽu not exist } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- E: Fronting of o/u after a palatalized consonant or j (VLKK IPA rec §4.4): -- [oː → o̟ː], [ʊ → ʊ̟], [uː → u̟ː], [uɔ → u̟ɔ]. function tests:test_IPA_E_o_u_fronting_after_palatal() local examples = { -- After palatalized consonant + o/u { "sagióti", "sɐ¹ˈɡʲo̟ːtʲɪ", "Cʲ + o → o̟ː" }, { "angijo^mà", "ɐŋʲɡʲɪjɔ̟ˈmɐ", "Cʲ + o^ → ɔ̟" }, { "siuñčia", "²ˈsʲʊ̟nʲˑt͡ʃʲɛ", "Cʲ + u → ʊ̟" }, { "sių̃sti", "²ˈsʲu̟ːsʲtʲɪ", "Cʲ + ų → u̟ː" }, { "ãčiū", "²ˈɑːt͡ʃʲu̟ː", "Cʲ + ū → u̟ː" }, { "liuobà", "lʲu̟ɔˈbɐ", "Cʲ + uo → u̟ɔ" }, -- After j + o/u (j inherently palatal, triggers fronting) { "at.jójo<base:atjojo>", "ɐtʲ¹ˈjo̟ːjo̟ː", "j + o → o̟ː" }, { "Lo^jo^là<base:Lojola>", "lɔjɔ̟ˈlɐ", "j + o^ → ɔ̟" }, { "Jùlė<base:Julė>", "ˈjʊ̟lʲeː", "j + u → ʊ̟" }, { "ãkcijų<base:akcijų>", "²ˈɑːkt͡sʲɪju̟ː", "j + ų → u̟ː" }, { "jū́ra<base:jūra>", "¹ˈju̟ːrɐ", "j + ū → u̟ː" }, { "júodas<base:juodas>", "¹ˈju̟ɔdɐs", "j + uo → u̟ɔ" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- F: Palatalization spreading — palatalization of a front vowel propagates -- leftward through preceding consonants, including through k/g (which do -- not directly palatalize but transmit the feature; VLKK §13). function tests:test_IPA_F_palatalization_spreading() local examples = { -- Spreading through obstruent clusters { "skri̇́eti", "¹ˈsʲkrʲiɛtʲɪ", "Spreading left through r and k" }, { "displė̃jus<base:displėjus>", "dʲɪ²ˈsʲpʲlʲeːjʊ̟s", "Spreading left through cluster spl" }, -- Spreading to next syllable's onset { "pùlti", "¹ˈpʊlʲtʲɪ", "Palatalized l before t (softened by following i)" }, { "méilė", "¹ˈmʲæˑɪlʲeː", "Palatalized resonant cluster" }, -- Secondary stress should not block spreading { "išˌverstaãkis", "ɪʃʲˌʋʲɛrstɐ.²ˈɑːkʲɪs", "Secondary stress should not stop palatalization" }, -- VLKK §13: l palatalizes through k/g before another soft consonant { "al̃ksnis", "²ˈɐlʲˑksʲnʲɪs", "lʲ through k before sʲnʲ (alksnis)" }, { "álgebra", "¹ˈɑˑlʲɡʲɛbrɐ", "lʲ through gʲ before front vowel (álgebra)" }, { "buhálteris", "bʊ¹ˈɣɑˑlʲtʲɛrʲɪs", "lʲ before tʲ (buhálteris)" }, { "Báltija", "¹ˈbɑˑlʲtʲɪjɛ", "lʲ before tʲ (Báltija)" }, { "fakultètas", "fɐkʊlʲˈtʲɛtɐs", "lʲ before tʲ (fakultètas)" }, { "fi̇̀lme", "¹ˈfʲɪlʲmʲɛ", "lʲ before mʲ (fi̇̀lme)" }, { "smùlkmena", "¹ˈsmʊlʲkmʲɛnɐ", "lʲ through kʲ before mʲ (smùlkmena)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- G: ng / nk reverse palatalization rule. -- n + k/g normally velarizes to ŋ. The ŋ palatalizes only when the FOLLOWING -- k/g itself palatalizes (i.e., when the cluster is directly followed by -- a front vowel). When the k/g stays hard (because next is a consonant), -- ŋ also stays hard. function tests:test_IPA_G_ng_nk_reverse_palatalization() local examples = { { "žiñgsnis", "²ˈʒʲɪŋˑksʲnʲɪs", "indirect: ng + s → ŋ stays hard" }, { "plunksnẽlė", "plʊŋk²ˈsʲnʲæːlʲeː", "indirect: nk + s → ŋ stays hard" }, { "anketà", "ɐŋʲkʲɛˈtɐ", "direct: nk + e → ŋʲkʲ" }, { "Bangỹs", "bɐŋʲ²ˈɡʲiːs", "direct: ng + y → ŋʲɡʲ" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- G2: VLKK §6.3 extends the n → ŋ assimilation to ch [x] and h [ɣ] in -- addition to k and g, because all four are velar/post-velar and pull n's -- place of articulation backwards. ŋʲ surfaces when the following ch/h -- is itself palatalized (front vowel triggers); a back vowel after ch/h -- keeps the whole cluster hard. function tests:test_IPA_G2_nasal_before_ch_h() local examples = { -- VLKK §6.3 explicit example { "brònchai", "¹ˈbrɔŋxɐɪ", "n + ch [x] → ŋ + x (VLKK §6.3)" }, { "mezenchimà", "mʲɛzʲɛŋʲxʲɪˈmɐ", "ŋʲ + xʲ" }, { "menhỹras", "mʲɛŋʲ²ˈɣʲiːrɐs", "ŋ + ɣ" }, { "inhaliãcija", "ɪŋɣɐ²ˈlʲæːt͡sʲɪjɛ", "ŋʲ + ɣʲ" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- H1: Voicing assimilation (regressive: a stop/fricative agrees in voicing -- with the next obstruent). VLKK §16, §17. function tests:test_IPA_H1_voicing_assimilation() local examples = { { "di̇̀rbti", "¹ˈdʲɪrʲpʲtʲɪ", "b → p before t (devoicing)" }, { "apgáuti", "ɐb¹ˈɡɑˑʊtʲɪ", "p → b before g (voicing)" }, { "už.trùkti", "ʊʃˈtrʊktʲɪ", "ž → š before t (devoicing)" }, { "li̇̀pdo", "ˈlʲɪbdoː", "p → b before d (voicing)" }, { "kàsdavo", "ˈkɐzdɐʋoː", "s → z before d (voicing)" }, { "iš.gir̃do", "ɪʒʲ²ˈɡʲɪrˑdoː", "š → ž before g (voicing)" }, { "iš.džiū́ti", "ɪʒʲ¹ˈd͡ʒʲu̟ːtʲɪ", "š → ž before dž (voicing)" }, { "degtùkas", "dʲɛkˈtʊkɐs", "g → k before t (devoicing)" }, { "žiebtùvas", "ʒʲiɛpˈtʊʋɐs", "b → p before t (devoicing)" }, { "grį̇̃žti", "²ˈɡrʲiːʃʲtʲɪ", "ž → š before t (devoicing)" }, { "už.púola", "ʊʃ¹ˈpuɔlɐ", "ž → š before p (devoicing)" }, { "už.króvė", "ʊʃ¹ˈkroːʋʲeː", "ž → š before k (devoicing)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- H2: Word-final devoicing (VLKK §18). function tests:test_IPA_H2_word_final_devoicing() local examples = { { "juolàb<base:juolab>", "ju̟ɔˈlɐp", "b → p word-finally" }, { "visàd", "ʋʲɪˈsɐt", "d → t word-finally" }, { "jóg<base:jog>", "¹ˈjo̟ːk", "g → k word-finally" }, { "ùž", "ˈʊʃ", "ž → š word-finally" }, { "daũg", "²ˈdɒʊˑk", "final g → k" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- H3: Place assimilation between sibilants and affricates (VLKK §23): -- s + č → š; z + dž → ž; š + c → s; ž + dz → z. function tests:test_IPA_H3_place_assimilation() local examples = { { "mókesčiai", "¹ˈmoːkʲɛʃʲt͡ʃʲɛɪ", "s + č → š (place assim.)" }, { "kàsčiau", "ˈkɐʃʲt͡ʃʲɛʊ", "s + č → š (place assim.)" }, { "vabzdžiai̇̃", "ʋɐbʲ²ˈʒʲd͡ʒʲɛɪˑ", "z + dž → ž (place assim.)" }, { "išcukrúoti", "ɪst͡sʊ¹ˈkruɔtʲɪ", "š + c → s (place assim.)" }, { "už.cỹpti", "ʊsʲ²ˈt͡sʲiːpʲtʲɪ", "ž + c → z (devoicing) → s (place assim.)" }, -- ž + dz not found } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- H4: Geminate simplification — two identical consonants reduce to one -- (VLKK §21, plus the same effect on stops once they have been levelled -- by voicing assimilation, e.g. d + t → t + t → t). function tests:test_IPA_H4_geminate_simplification() local examples = { -- Sibilants (identical pairs) { "pùsseserė", "ˈpʊsʲɛsʲɛrʲeː", "ss → s" }, { "iššóko", "ɪ¹ˈʃoːkoː", "šš → š" }, { "užžiẽbti", "ʊ²ˈʒʲiɛpʲtʲɪ", "žž → ž (also b → p before t)" }, -- zz not found -- Sonorants (Liquids and Nasals) { "so^ciˌjalliberãlas<base:socialliberãlas>", "sɔt͡sʲɪˌjɛlʲɪbʲɛ²ˈrɑːlɐs", "ll → l" }, { "šė́mmargas", "¹ˈʃʲeːmɐrɡɐs", "mm → m" }, { "viennỹtis", "ʋʲiɛ²ˈnʲiːtʲɪs", "nn → n" }, { "pérrašo", "¹ˈpʲæːrɐʃoː", "rr → r" }, -- Bilabial stops (after voicing assimilation) -- bb not found { "tar̃ppievis", "²ˈtɐrʲˑpʲiɛʋʲɪs", "pp → p" }, { "bóbpalaikė", "¹ˈboːpɐlɐɪkʲeː", "bp → pp → p (devoicing + degemination)" }, -- pb not found -- Alveolar stops (after voicing assimilation) -- dd not found { "añttrobis", "²ˈɐnˑtroːbʲɪs", "tt → t" }, { "Šmi̇̀dtas", "ˈʃʲmʲɪtɐs", "dt → tt → t (devoicing + degemination)" }, { "atdarà", "ɐdɐˈrɐ", "td → dd → d (voicing + degemination)" }, -- Velar stops (after voicing assimilation) -- gg not found (needs g + g) { "kiekkar̃t", "kʲiɛ²ˈkɐrˑt", "kk → k" }, { "daugkar̃t", "dɒʊ²ˈkɐrˑt", "gk → kk → k (devoicing + degemination)" }, { "ki̇́ekgi", "¹ˈkʲiɛɡʲɪ", "kg → gg → g (voicing + degemination)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- H5: Sibilant simplification — when two DIFFERENT sibilants meet at a -- morpheme boundary, only the second is pronounced (VLKK §22). function tests:test_IPA_H5_sibilant_simplification() local examples = { { "išsprę́sti", "ɪ¹ˈsʲpʲrʲæːsʲtʲɪ", "šs → s" }, { "ùžsienis", "ˈʊsʲiɛnʲɪs", "žs → s (via šs)" }, { "pùsšimtis", "ˈpʊʃʲɪmʲtʲɪs", "sš → š" }, { "pùszuikis", "ˈpʊzʊɪkʲɪs", "sz → z" }, { "pùsžalis", "ˈpʊʒɐlʲɪs", "sž → ž" }, { "išžarà", "ɪʒɐˈrɐ", "šž → ž" }, { "ùžšovas", "ˈʊʃoːʋɐs", "žš → š" }, -- zš not found -- šz not found -- zs not found } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- I: Word-final j and v become non-syllabic [ɪ̯], [ʊ̯] (VLKK IPA rec §7.5). function tests:test_IPA_I_final_j_v_nonsyllabic() local examples = { { "rytój<base:rytoj>", "rʲiː¹ˈtoːɪ̯", "final j after long o → ɪ̯" }, { "tuõj<base:tuoj>", "²ˈtuɔɪ̯", "final j after uo → ɪ̯" }, { "viduj̃<base:viduj>", "ʋʲɪ²ˈdʊɪˑ", "final j with tilde after short u" }, { "viršuj̃<base:virsuj>", "ʋʲɪr²ˈʃʊɪˑ", "final j with tilde after short u" }, { "sudiẽv", "sʊ²ˈdʲiɛʊ̯", "final v after ie → ʊ̯" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- J1: Hiatus — vowel sequences pronounced as two separate syllables, marked -- either by morpheme boundary (native: prefix `.`) or explicitly preserved -- (foreign: user-marked `.`). VLKK §24, §25, §27.2 (i-second variant). function tests:test_IPA_J1_hiatus() local examples = { -- Native prefix boundaries { "pa.upỹs", "pɐ.ʊ²ˈpʲiːs", "prefix pa- + u" }, { "priim̃ti", "pʲrʲɪ.²ˈɪmʲˑtʲɪ", "prefix pri- + i" }, { "pri̇̀ima", "ˈpʲrʲɪ.ɪmɐ", "prefix pri- + i" }, { "pri̇̀ėmė", "ˈpʲrʲɪ.eːmʲeː", "prefix pri- + ė" }, { "priei̇̃ti", "pʲrʲɪ.²ˈɛɪˑtʲɪ", "prefix pri- + ei" }, { "priė̃jo", "pʲrʲɪ.²ˈeːjo̟ː", "prefix pri- + ė" }, { "nù.imtas", "ˈnʊ.ɪmtɐs", "prefix nu- + i (user-marked)" }, -- Foreign words: hiatus preserved between non-i vowels (VLKK §25) { "di.acetãtas", "dʲɪ.ɐt͡sʲɛ²ˈtɑːtɐs", "foreign i.a (user-marked)" }, { "di.akrilãtas", "dʲɪ.ɐkrʲɪ²ˈlɑːtɐs", "foreign i.a (user-marked)" }, { "fino^ùgrai", "fʲɪnɔ.ˈʊɡrɐɪ", "foreign o.u" }, { "paleo^nto^lògas", "pɐlʲɛ.ɔntɔˈlɔɡɐs", "paleo- + onto-: only e.o is hiatus, last ɔ is in coda" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- J2: J-insertion — in foreign words, an epenthetic [j] is inserted between -- vowel sequences containing i (VLKK §27). Input is a respelling that -- spells out the inserted j, optionally with `(j)` for the variable -- forms in §27.2. function tests:test_IPA_J2_j_insertion() local examples = { -- §27.1: i first → j obligatorily inserted { "dijãkonas", "dʲɪ²ˈjæːkoːnɐs", "ia → ija (i first, accented vowel)" }, { "dijakonỹstė", "dʲɪjɛkoː²ˈnʲiːsʲtʲeː", "ia → ija (i first, unaccented)" }, { "dijalèktas", "dʲɪjɛˈlʲɛktɐs", "ia → ija (i first, unaccented)" }, { "pijani̇̀nas<base:pianinas>", "pʲɪjɛˈnʲɪnɐs", "ia → ija (respell j)" }, { "dijèzas<base:diezas>", "dʲɪˈjɛzɐs", "ie → ije (respell j)" }, { "audijo^fònas<base:audiofonas>", "ɒʊdʲɪjɔ̟ˈfɔnɐs", "io → ijo (respell j)" }, -- §27.2: i second → j optional, written as `(j)` in respelling { "teji̇̀stas<base:teistas>", "tʲɛˈjɪstɐs", "ei → eji (respell j)" }, { "stò(j)ikas<base:stoikas>", "ˈstɔ(j)ɪkɐs", "oi: variant with (j)" }, { "babu(j)i̇̀nai<base:babuinai>", "bɐbʊˈ(j)ɪnɐɪ", "ui: variant with (j)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- K: Word juncture — clitic liaison `‿` is inserted between an unstressed -- word and a following stressed word; word-final voiced obstruents devoice. -- Input uses a regular space; the module inserts `‿` automatically. function tests:test_IPA_K_word_juncture() local examples = { -- Basic liaison { "be ãbejo", "bʲɛ‿²ˈɑːbʲɛjo̟ː", "clitic be + main word" }, { "kaip kàd", "kɐɪp‿ˈkɐt", "clitic kaip + main word" }, { "kadà ne kadà", "kɐˈdɐ nʲɛ‿kɐˈdɐ", "stressed + clitic + stressed" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- K2: VLKK §19 — when a clitic and its host are joined by ‿ in the IPA -- output, the host-side word-final voiced stops devoice (default in our -- spec) and a "soft" first phoneme of the next word (front V, j, or a -- palatalized C) palatalizes the now-devoiced consonant via cross-word -- palatalization spreading. Input uses a regular space; the module -- inserts ‿ automatically based on stress / clitic grouping. function tests:test_IPA_K2_crossword_devoicing_palatalization() local examples = { -- VLKK §19 examples (devoiced variant — the one our spec produces) { "kad àtima", "kɐt‿ˈɐtʲɪmɐ", "d → t (back V next, no palat.)" }, { "lyg jója<base:lyg joja>", "lʲiːkʲ‿¹ˈjo̟ːjɛ", "g → k → kʲ (j triggers palat.)" }, { "lig miẽsto", "lʲɪkʲ‿²ˈmʲiɛstoː", "g → k → kʲ (mʲ palatalizes back)" }, { "daug nẽša", "dɒʊkʲ‿²ˈnʲæːʃɐ", "g → k → kʲ (nʲ palatalizes back)" }, { "lyg ródo", "lʲiːk‿¹ˈroːdoː", "g → k stays hard (r before back o)" }, { "kad vẽža", "kɐtʲ‿²ˈʋʲæːʒɐ", "d → t → tʲ (ʋʲ palatalizes back)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- K3: VLKK §20 — the preposition už is the lone exception to word-final -- devoicing: when it forms a clitic group with a following word starting -- with a vowel or sonorant (j, n, m, l, r, v), its ž stays voiced. -- Before a voiceless obstruent the regular devoicing applies. Input -- uses a regular space. function tests:test_IPA_K3_uz_exception_VLKK_20() local examples = { -- VLKK §20 explicit examples { "už akių̃", "ʊʒ‿ɐ²ˈkʲu̟ː", "už before vowel a (keep ž)" }, { "už jų̃", "ʊʒ‿²ˈjuː", "už before sonorant j (keep ž)" }, { "už lañgo", "ʊʒ‿²ˈlɑːŋɡoː", "už before sonorant l (keep ž)" }, { "už miẽsto", "ʊʒ‿²ˈmʲiɛstoː", "už before sonorant m (keep ž)" }, { "už nãmo", "ʊʒ‿²ˈnɑːmoː", "už before sonorant n (keep ž)" }, { "už rýto", "ʊʒ‿¹ˈrʲiːtoː", "už before sonorant r (keep ž)" }, { "už võko", "ʊʒ‿²ˈʋoːkoː", "už before sonorant v (keep ž)" }, { "už stálą", "ʊʃ‿¹ˈstɑːlɑː", "už before voiceless obstruent s (devoices)" } } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- K4: VLKK §21b / §22b — when two identical consonants or two adjacent -- sibilants meet across a liaison boundary in the IPA output, the first -- drops out (only the second is pronounced). This is the formal variant; -- VLKK also lists a colloquial variant that preserves both consonants, -- but our spec follows §21b/§22b literally. Input uses a regular space. function tests:test_IPA_K4_crossword_geminate_sibilant() local examples = { -- VLKK §21b — identical consonants drop the first { "iš šóno", "ɪ‿¹ˈʃoːnoː", "š + š → ∅ + š (identical sibilants)" }, { "už žolė̃s", "ʊ‿ʒoː²ˈlʲeːs", "ž → š → ∅ + ž (identical after devoicing)" }, { "ir rei̇̃kia", "ɪ‿²ˈrʲɛɪˑkʲɛ", "r + r → ∅ + r (identical sonorants)" }, { "ar ródo", "ɐ‿¹ˈroːdoː", "r + r → ∅ + r (identical sonorants)" }, -- VLKK §22b — different sibilants also drop the first { "iš sẽno", "ɪ‿²ˈsʲæːnoː", "š + s → ∅ + s (different sibilants)" }, { "už sõdo", "ʊ‿²ˈsoːdoː", "ž → š → ∅ + s (devoiced then dropped)" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- K5: VLKK §23b — across a liaison boundary, a fricative s/š/z/ž before an -- affricate of the OTHER place of articulation assimilates: s + č → š, -- ž + dz → z, š + c → s, z + dž → ž. Cross-word palatalization re-applies -- after the place change, so a freshly assimilated s → ʃ still picks up -- the ʲ from the palatalized affricate that follows. Input uses a -- regular space. function tests:test_IPA_K5_crossword_place_assim_VLKK_23b() local examples = { -- VLKK §23b examples that match our "always devoice except už" spec. { "vis čiùlba", "ʋʲɪʃʲ‿¹ˈt͡ʃʲʊ̟lbɐ", "s + č → š (then palatalized)" }, { "iš ceñtro", "ɪsʲ‿²ˈt͡sʲɛnˑtroː", "š + c → s (then palatalized)" }, -- TODO: VLKK §23b also lists `už dzū̃ko → uz‿dzū̃ko [ʊz‿²ˈʣuːkoː]`, -- in which ž + dz → z (preserving voicing across the boundary). -- Our spec applies terminal devoicing unconditionally for non-už -- words and only skips it for už before vowels/sonorants, so the -- voiced obstruent dz does NOT trigger the už exception here and -- the module currently emits `ʊʃ‿²ˈd͡zuːkoː` instead. If we ever -- want to match VLKK §23b for this case, we would need to either -- (a) extend the už exception to voiced obstruents, or -- (b) run the cross-word place assimilation before terminal -- devoicing so that ʒ + d͡z → z + d͡z survives. -- Decide on a policy and add the test accordingly. } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- L: Secondary stress (`ˌ`) — different positions and interactions with -- primary stress. function tests:test_IPA_L_secondary_stress() local examples = { { "ˌho^mo^fòbė", "ˌɣɔmɔˈfɔbʲeː", "initial secondary stress" }, { "saˌvanoriáuti", "sɐˌʋɐnoː¹ˈrʲæˑʊtʲɪ", "medial secondary stress" }, { "nebekõneˌveikti", "nʲɛbʲɛ²ˈkoːnʲɛˌʋʲɛɪktʲɪ", "secondary AFTER primary stress" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- M: Optional soft l in loanwords (VLKK §15) — by default the module reads -- l as hard before a hard consonant; the user marks softening explicitly -- with U+2019 (the right single quotation mark) after l. function tests:test_IPA_M_l_dual_reading() local examples = { { "pòlka", "¹ˈpɔlkɐ", "default: hard l" }, { "pòlʼka", "¹ˈpɔlʲkɐ", "with U+02BC: soft lʲ" }, { "válsas", "¹ˈʋɑˑlsɐs", "default: hard l" }, { "válʼsas", "¹ˈʋɑˑlʲsɐs", "with U+02BC: soft lʲ" }, } for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end end -- ════════════════════════════════════════════════════════════════════════════ -- SYLLABIFICATION TESTS (Phonotactic models) -- ════════════════════════════════════════════════════════════════════════════ -- A: 2-consonant cluster models. function tests:test_hyphen_A_models_2C() local examples = { -- Onset patterns (V-CCV) { "vèsti", "vè‧sti", "ST onset" }, { "dažnai̇̃", "da‧žnai̇̃", "SR onset" }, { "veiklõs", "vei‧klõs", "TR onset" }, -- Split patterns (VC-CV) { "kalbõs", "kal‧bõs", "RT split" }, { "ámžiaus", "ám‧žiaus", "RS split" }, } for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end end -- B: 3-consonant cluster models. function tests:test_hyphen_B_models_3C() local examples = { -- Onset { "displė̃jus<base:displėjus>", "di‧splė̃‧jus", "STR onset (V-CCCV)" }, -- Splits { "pýksta", "pýk‧sta", "T+ST split" }, { "mir̃šta", "mir̃‧šta", "R+ST split" }, { "mókslo", "mók‧slo", "T+SR split" }, { "lengvai̇̃", "len‧gvai̇̃", "R+TR split" }, { "atkrei̇̃pia", "at‧krei̇̃‧pia", "T+TR split" }, { "di̇̀rbti", "di̇̀rb‧ti", "RT+T split" }, { "elgsenõs", "elg‧se‧nõs", "RT+S split" }, { "piktžolė̃s", "pikt‧žo‧lė̃s", "TT+S split" }, { "Oksfòrdas", "Oks‧fòr‧das", "TS+S split (foreign)" }, { "transfòrmavo", "trans‧fòr‧ma‧vo", "RS+S split" }, } for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end end -- C: 4-consonant cluster models. function tests:test_hyphen_C_models_4C() local examples = { { "konstrùkcija<base:konstrukcija>", "kon‧strùk‧ci‧ja", "R+STR split" }, { "apskritai̇̃", "ap‧skri‧tai̇̃", "T+STR split" }, { "ankstà", "ank‧stà", "RT+ST split" }, { "ži̇̀ngsnis", "ži̇̀ng‧snis", "RT+SR split" }, { "ántplūdžio", "ánt‧plū‧džio", "RT+TR split" }, { "postprodùkcija<base:postprodukcija>", "post‧pro‧dùk‧ci‧ja", "ST+TR split" }, { "kontrmotỹvas", "kontr‧mo‧tỹ‧vas", "RTR+R split" }, { "Obstfelderis", "Obst‧fel‧de‧ris", "TST+S split" }, -- FIXME: need accentuation -- Hyphenation for theoretically-existing consonant clusters, -- per Bendrinės lietuvių kalbos skiemuo monografija: -- S-STR -- RS-SR, RR-ST, ST-SR, RR-TR -- RTR-T, RST-T, RTT-S, TST-T } for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end end -- D: Morphology-driven hyphenation — native prefixes vs. pseudo-prefixes, -- and prefix boundaries that introduce hiatus. function tests:test_hyphen_D_morphology() local examples = { -- Native prefixes (user-marked with `.`) { "ap.rašýti", "ap‧ra‧šý‧ti", "Native prefix ap-" }, { "at.nèšti", "at‧nè‧šti", "Native prefix at-" }, { "iš.mókyti", "iš‧mó‧ky‧ti", "Native prefix iš-" }, -- Pseudo-prefixes (Internationalisms): no morphological boundary { "atòmas", "a‧tò‧mas", "Pseudo-prefix" }, { "apãratas", "a‧pã‧ra‧tas", "Pseudo-prefix" }, -- Prefix boundary with hiatus { "pa.upỹs", "pa‧u‧pỹs", "Prefix boundary with hiatus" }, { "priim̃ti", "pri‧im̃‧ti", "Prefix boundary with hiatus" }, -- User-marked hiatus and secondary-stress boundary { "Kiurasã.o", "Kiu‧ra‧sã‧o", "User-marked hiatus" }, { "išˌverstaãkis", "iš‧ver‧sta‧ã‧kis", "Secondary stress also marks syllable boundary" }, { "jū́rų žvaigždė̃", "jū́‧rų žvaig‧ždė̃", "space should be kept" }, } for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end end -- ════════════════════════════════════════════════════════════════════════════ -- RHYME TESTS -- ════════════════════════════════════════════════════════════════════════════ -- A: Basic rhyme extraction across vowel/diphthong types and stress patterns. function tests:test_rhyme_A_basic() local examples = { { "nakti̇̀s", "ɪs", "Short i rhyme" }, { "kalbà", "ɐ", "Short a rhyme" }, { "homològas", "ɔɡɐs", "Loanword o rhyme" }, { "naũjas", "ɒʊˑjɛs", "Diphthong rhyme" }, { "var̃das", "ɐrˑdɐs", "Mixed diphthong rhyme" }, { "mótina", "oːtʲɪnɐ", "Long o rhyme" }, { "vil̃kas", "ɪlˑkɐs", "Mixed diphthong rhyme" }, { "nebekõneˌveikti", "oːnʲɛˌʋʲɛɪktʲɪ", "Secondary stress should be stripped" }, } for _, ex in ipairs(examples) do self:check_rhyme(unpack(ex)) end end return tests q50s5g6upf7tf2tiuz3c1yjr610hs4a ကဏ္ဍ:မဝ်ဂျူလေတ်တူယဵုနဳယျာဂမၠိုၚ် 14 295399 395935 2026-05-29T18:25:29Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » :ကဏ္ဍ:ဘာသာလေတ်တူယဵုနဳယျာ|လေတ်တူယဵု..." 395935 wikitext text/x-wiki [[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာလေတ်တူယဵုနဳယျာ|လေတ်တူယဵုနဳယျာ]] » '''မဝ်ဂျူဂမၠိုၚ်''' :[[:ကဏ္ဍ:မဝ်ဂျူဂမၠိုၚ်|မဝ်ဂျူ]]ဘာသာလေတ်တူယဵုနဳယျာ၊ မနွံကဵုလုပ်အဝေါၚ်ကုဒ် Lua နကဵုမကၠောန်ဗဒှ် ကဵု မစဳရေၚ်ယဵုဒုၚ်သ္ပမာန်ဂမၠိုၚ်။ [[ကဏ္ဍ:ဘာသာလေတ်တူယဵုနဳယျာ]][[ကဏ္ဍ:မဝ်ဂျူဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|လ]] 4f8tob05809agchx7g6o25tn27fbwsm မဝ်ဂျူ:lt-common/doc 828 295400 395937 2026-05-29T18:28:26Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|lt}} </includeonly>" 395937 wikitext text/x-wiki {{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|lt}} </includeonly> js7rgpxcn6jiutxja12panln1tuz2fd ထာမ်ပလိက်:lt-pr/documentation 10 295401 395938 2026-05-29T18:30:15Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} {{status|beta}} {{uses lua|Module:lt-pron}} This template generates the pronunciation section for Lithuanian terms, including IPA, rhymes, and syllabification, as well as (if manually specified) audio files and homophones. Unlike {{tl|IPA}}, this template automatically handles: * Syllabification based on sonority hierarchy and morphological boundaries * Stress realization (different phonetic..." 395938 wikitext text/x-wiki {{documentation subpage}} {{status|beta}} {{uses lua|Module:lt-pron}} This template generates the pronunciation section for Lithuanian terms, including IPA, rhymes, and syllabification, as well as (if manually specified) audio files and homophones. Unlike {{tl|IPA}}, this template automatically handles: * Syllabification based on sonority hierarchy and morphological boundaries * Stress realization (different phonetic values under different stress patterns) * Consonant palatalization before front vowels * Voicing assimilation and final devoicing * Nasal velarization and sibilant fusion In most cases, you only need to provide the '''form with stress diacritics''' (acute ´, grave `, or tilde ~), and the module will generate the correct IPA, rhyme, and syllabification automatically. Respelling is only needed in specific cases described below. ==Usage== ===Quick reference table=== {|class="wikitable" ! Page ! Example ! Comment |- | rowspan=2 | {{m|lt|nãmas}} | <code><nowiki>{{lt-pr|nãmas}}</nowiki></code> | rowspan=2 | Most of the time, only the form with stress diacritics is needed. |- | {{lt-pr|nãmas}} |- | rowspan=2 | {{m|lt|aprašýti}} | <code><nowiki>{{lt-pr|ap.rašýti}}</nowiki></code> | rowspan=2 | Use a period (<code>.</code>) to mark syllable boundaries when morphology overrides phonology, such as prefix boundaries. |- | {{lt-pr|ap.rašýti}} |- | rowspan=2 | {{m|lt|paupỹs}} | <code><nowiki>{{lt-pr|pa.upỹs}}</nowiki></code> | rowspan=2 | Use a period (<code>.</code>) to prevent vowel sequences from being treated as diphthongs. |- | {{lt-pr|pa.upỹs}} |- | rowspan=2 | {{m|lt|Zojà}} | <code><nowiki>{{lt-pr|Zo^jà}}</nowiki></code> | rowspan=2 | Use a caret (<code>^</code>) after o to mark loanword short {{IPAchar|[ɔ]}} (native words and early loanwords have long {{IPAchar|[oː]}}). |- | {{lt-pr|Zo^jà}} |- | rowspan=2 | {{m|lt|homònimas}} | <code><nowiki>{{lt-pr|ho^mo^ni̇̀mas}}</nowiki></code> | rowspan=2 | The caret is also used in mixed diphthongs with short o: ol, om, on, or. |- | {{lt-pr|ho^mo^ni̇̀mas}} |- | rowspan=2 | {{m|lt|dièzas}} | <code><nowiki>{{lt-pr|dijèzas}}</nowiki></code> | rowspan=2 | Insert j between vowels to mark a glide (mainly in loanwords). Use <base:...> when the respelling differs from the original spelling. |- | {{lt-pr|dijèzas<base:diezas>}} |- | rowspan=2 | {{m|lt|savanoriáuti}} | <code><nowiki>{{lt-pr|saˌvanoriáuti}}</nowiki></code> | rowspan=2 | Use <code>ˌ</code> to mark secondary stress in long words. |- | {{lt-pr|saˌvanoriáuti}} |- | rowspan=2 | {{m|lt|atsaistyti}} | <code><nowiki>{{lt-pr|atsáistyti,atsaistýti}}</nowiki></code> | rowspan=2 | Multiple comma-separated pronunciations can be given. |- | {{lt-pr|atsáistyti,atsaistýti}} |- | rowspan=2 | {{m|lt|dešinė}} | <code><nowiki>{{lt-pr|dešinė̃<audio:LL-Q9083 (lit)-Trimkev-dešinė.wav>}}</nowiki></code> | rowspan=2 | Use the inline modifier syntax to add audio files and other properties. |- | {{lt-pr|dešinė̃<audio:LL-Q9083 (lit)-Trimkev-dešinė.wav>}} |- | rowspan=2 | {{m|lt|trauks}} | <code><nowiki>{{lt-pr|traũks<q:standard>,tráuks<q:dialectal><audio:Example.ogg>}}</nowiki></code> | rowspan=2 | Multiple modifiers can be stacked. Here we add qualifiers and an audio file to different pronunciations. |- | {{lt-pr|traũks<q:standard>,tráuks<q:dialectal><audio:Example.ogg>}} |} ==When respelling is needed== In most cases, simply entering the form with stress diacritics is sufficient. The module automatically handles syllabification, palatalization, assimilation, and other phonological processes. However, '''respelling is required''' in the following specific cases: ===Syllable boundaries=== Use a period (<code>.</code>) to mark syllable boundaries when '''morphology overrides phonology'''. Native prefixes ({{m|lt|ap-}}, {{m|lt|at-}}, {{m|lt|iš-}}, {{m|lt|nu-}}, {{m|lt|pa-}}, {{m|lt|pri-}}, etc.) create morphological syllable boundaries that override default phonological syllabification. Mark these boundaries with a period when the prefix is followed by a vowel or when syllabification differs from the default. Examples: * {{m|lt|aprašýti}}: <code>ap.rašýti</code> → ap‧ra‧šý‧ti * {{m|lt|atnèšti}}: <code>at.nèšti</code> → at‧nè‧šti * {{m|lt|išmókyti}}: <code>iš.mókyti</code> → iš‧mó‧ky‧ti * {{m|lt|paupỹs}}: <code>pa.upỹs</code> → pa‧u‧pỹs * {{m|lt|nùimtas}}: <code>nù.imtas</code> → nù‧im‧tas When two vowels/consonants should be in separate syllables but would normally be treated as a diphthong/consonant cluster, use a period to separate them too. ===Loanword short o=== Outside of “standard” diphthongs (like {{m|lt||uo}}, {{m|lt||oi}}, {{m|lt||ou}}), the letter '''{{m|lt||o}}''' has two main pronunciations in Lithuanian: * Native words and early loanwords: long {{IPAchar|[oː]}} * Modern loanwords: short {{IPAchar|[ɔ]}} The module can automatically detect the short loanword {{IPAchar|[ɔ]}} when it is written with a grave accent ({{m|lt||ò}}) or in the diphthongs {{m|lt||uo}}, {{m|lt||oi}}, and {{m|lt||ou}}. However, in unaccented positions or mixed diphthongs, plain '''o''' is ambiguous. In these cases, you must use a '''caret''' (<code>^</code>) after the '''o''' to explicitly mark the short loanword pronunciation. ====When to use the caret (<code>^</code>)==== Use the caret for '''unaccented monophthong''' and '''mixed diphthongs with a tilde''' ({{m|lt||ol̃}}, {{m|lt||om̃}}, {{m|lt||oñ}}, {{m|lt||or̃}}) or no accent ({{m|lt||ol}}, {{m|lt||om}}, {{m|lt||on}}, {{m|lt||or}}): * {{m|lt|Z'''o'''jà}}: <code>Z'''o^'''jà</code> → {{IPAchar|[zɔˈjɛ]}} * {{m|lt|f'''o'''t'''o'''parodà}}: <code>f'''o^'''t'''o^'''parodà</code> → {{IPAchar|[fɔtɔpɐroːˈdɐ]}} * {{m|lt|šlãkbet'''on'''is}}: <code>šlãkbeˌt'''o^n'''is</code> → {{IPAchar|[²ˈʃlɑːɡbʲɛˌtɔnʲɪs]}} * {{m|lt|B'''or̃'''tnikas}}: <code>B'''o^r̃'''tnikas</code> → {{IPAchar|[²ˈbɔrʲˑtʲnʲɪkɐs]}} ====When NOT to use the caret==== Do not use the caret if the vowel has a grave accent ('''{{m|lt||ò}}'''), as the module already knows '''{{m|lt||ò}}''' is short. The foreign diphthongs '''{{m|lt||oi}}'''/'''{{m|lt||ou}}''' also automatically produce {{IPAchar|[ɔɪ]}}/{{IPAchar|[ɔʊ]}} (as well as the native diphthong '''{{m|lt||uo}}''' {{IPAchar|[uɔ]}}). * {{m|lt|k'''òl'''ba}}: <code>k'''òl'''ba</code> → {{IPAchar|[¹ˈkɔlbɐ]}} * {{m|lt|z'''òm'''ša}}: <code>z'''òm'''ša</code> → {{IPAchar|[¹ˈzɔmʃɐ]}} * {{m|lt|m'''òi'''ra}}: <code>m'''òi'''ra</code> → {{IPAchar|[¹ˈmɔɪrɐ]}} * {{m|lt|š'''òu'''}}: <code>š'''òu'''</code> → {{IPAchar|[¹ˈʃɔʊ]}} * {{m|lt|'''oi'''kumenà}}: <code>'''oi'''kumenà</code> → {{IPAchar|[ɔɪkʊmʲɛˈnɐ]}} * {{m|lt|kl'''ou'''nadà}}: <code>kl'''ou'''nadà</code> → {{IPAchar|[klɔʊnɐˈdɐ]}} ===Glide insertion=== In some words (mainly loanwords), a glide {{IPAchar|[j]}} appears between vowels even though it's not written in the standard spelling. Use <code>j</code> or <code>(j)</code> in the respelling to indicate this: * <code>j</code>: Mandatory glide (always pronounced) * <code>(j)</code>: Optional glide (some speakers pronounce it, others don't) ====How to determine which to use==== Check the VDU pronunciation transcriber (see Resources below). If it shows: * Only {{IPAchar|[j]}}: use <code>j</code> in respelling * Both {{IPAchar|[j]}} and no {{IPAchar|[j]}} as variants: use <code>(j)</code> in respelling ;Examples: * {{m|lt|dièzas}}: <code>di'''j'''èzas</code> → {{IPAchar|[dʲɪˈjɛzɐs]}} * {{m|lt|pianinas}}: <code>pi'''j'''ani̇̀nas</code> → {{IPAchar|[pʲɪjɛˈnʲɪnɐs]}} * {{m|lt|babuìnai}}: <code>babu'''(j)'''i̇̀nai</code> → {{IPAchar|[bɐbʊˈ(j)ɪnɐɪ]}} ===Secondary stress=== In long words, compound words, or derived words, you can mark secondary stress by inserting <code>ˌ</code> before the vowel of the secondarily stressed syllable: * {{m|lt|savanoriáuti}}: <code>saˌvanoriáuti</code> → {{IPAchar|[sɐˌʋɐnoː¹ˈrʲæˑʊtʲɪ]}} * {{m|lt|homofòbė}}: <code>ˌho^mo^fòbė</code> → {{IPAchar|[ˌɣɔmɔˈfɔbʲeː]}} * {{m|lt|nebekõneveikti}}: <code>nebekõneˌveikti</code> → {{IPAchar|[nʲɛbʲɛ²ˈkoːnʲɛˌʋʲɛɪktʲɪ]}} ==Advanced features== ===Inline modifiers=== The template supports inline modifiers (using the same syntax as {{tl|affix}}, {{tl|desc}}, and similar templates) to add properties to specific pronunciations: ; <code><q:''qualifier''></code>, <code><qq:''qualifier''></code> : Add a left qualifier (q) or right qualifier (qq) to the pronunciation. : Example: <code>nãmas<q:standard></code> ; <code><a:''accent''></code>, <code><aa:''accent''></code> : Add an accent label (a) or right accent label (aa). : Example: <code>nãmas<a:Northern></code> ; <code><ref:''reference''></code> : Add a reference to the pronunciation. : Example: <code><nowiki>nù.imtas<ref:{{R:lt:VLKK}}></nowiki></code> ; <code><audio:''filename''></code> : Add an audio file. Nested modifiers can be used within the audio tag: : Example: <code><audio:file.wav<a:Standard><text:full sentence>></code> ; <code><hmp:''homophone''></code> : Specify a homophone. Multiple homophones can be comma-separated. : Example: <code>tei̇̃gti<hmp:tei̇̃kti></code> ; <code><rhyme:''rhyme''></code> : <!--Override the automatically generated rhyme. -->Use <code><rhyme:-></code> to suppress rhyme generation. ; <code><hyph:''syllabification''></code> : <!--Override the automatically generated syllabification.-->Use <code><hyph:-></code> to suppress syllabification generation. ; <code><base:''name''></code> : Explicitly specify the page name '''without stress diacritics or respelling'''. This is primarily used in template code, documentation pages, and test pages where the module cannot automatically determine the original spelling from the page title. <!--When glide insertion (j) is used, the module compares the respelling against this base spelling to identify which j's are original versus inserted, ensuring correct syllabification in the output.--> : Example: <code>dijèzas<base:diezas></code> ===Multiple pronunciations=== Multiple pronunciations can be specified by separating them with commas (no spaces): <code><nowiki>{{lt-pr|atsáistyti,atsaistýti}}</nowiki></code> Each pronunciation can have its own inline modifiers: <code><nowiki>{{lt-pr|nãmas<q:standard>,nãmas<q:dialectal><a:Northern>}}</nowiki></code> ==Resources== ; [https://kalbu.vdu.lt/en/resources/pronunciation/#dabartines-lietuviu-kalbos-tarties-zodynas VDU Pronunciation Dictionary] : Search for words to find their accented forms and basic grammatical information (headwords only). ; [https://kalbu.vdu.lt/en/resources/pronunciation/#fonetinis-transkribuoklis VDU Phonetic Transcriber] : Generate IPA transcriptions (including inflected forms). ; [https://kalbu.vdu.lt/mokymosi-priemones/kirciuoklis/ VDU Stress Marker] : Generate forms with stress diacritics (including inflected forms). ; [https://kirtis.info/#/krc Online Accentuation] : Generate forms with stress diacritics (including inflected forms). ==Parameters== ; {{para|1}} : The form with stress diacritics, or comma-separated forms for multiple pronunciations. Respelling may be needed as described above. This parameter is '''required'''. <includeonly> {{tcat}} </includeonly> lqv96qlstv85ba4v1481dktahnk5o18