ဝိက်ရှေန်နရဳ
mnwwiktionary
https://mnw.wiktionary.org/wiki/%E1%80%9D%E1%80%AD%E1%80%80%E1%80%BA%E1%80%9B%E1%80%BE%E1%80%B1%E1%80%94%E1%80%BA%E1%80%94%E1%80%9B%E1%80%B3:%E1%80%99%E1%80%AF%E1%80%80%E1%80%BA%E1%80%9C%E1%80%AD%E1%80%80%E1%80%BA%E1%80%90%E1%80%99%E1%80%BA
MediaWiki 1.47.0-wmf.4
case-sensitive
မဳဒဳယာ
တၟေင်
ဓရီုကျာ
ညးလွပ်
ညးလွပ် ဓရီုကျာ
ဝိက်ရှေန်နရဳ
ဝိက်ရှေန်နရဳ ဓရီုကျာ
ဝှာင်
ဝှာင် ဓရီုကျာ
မဳဒဳယာဝဳကဳ
မဳဒဳယာဝဳကဳ ဓရီုကျာ
ထာမ်ပလိက်
ထာမ်ပလိက် ဓရီုကျာ
ရီု
ရီု ဓရီုကျာ
ကဏ္ဍ
ကဏ္ဍ ဓရီုကျာ
အဆက်လက္ကရဴ
အဆက်လက္ကရဴ ဓရီုကျာ
ကာရန်
ကာရန် ဓရီုကျာ
အဘိဓာန်
အဘိဓာန် ဓရီုကျာ
ဗီုပြၚ်သိုၚ်တၟိ
ဗီုပြၚ်သိုၚ်တၟိ ဓရီုကျာ
TimedText
TimedText talk
မဝ်ဂျူ
မဝ်ဂျူ ဓရီုကျာ
Event
Event talk
ထာမ်ပလိက်:Documentation
10
95
395914
154873
2026-05-29T18:03:51Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:documentation]] ဇရေင် [[ထာမ်ပလိက်:Documentation]]
154873
wikitext
text/x-wiki
{{#invoke:documentation|show|hr=above}}
j0s13scsii7qihzcaj44tsxynimxx68
မဝ်ဂျူ:languages/data/3/a
828
654
395876
394330
2026-05-29T15:30:23Z
Intobesa.bot
1035
Bot: ပလေဝ်ဒါန်
395876
Scribunto
text/plain
local m_langdata = require("Module:languages/data")
-- Loaded on demand, as it may not be needed (depending on the data).
local function u(...)
u = require("Module:string utilities").char
return u(...)
end
local c = m_langdata.chars
local p = m_langdata.puaChars
local s = m_langdata.shared
local m = {}
m["aaa"] = {
"ဂါဝ်တူဥူ",
35463,
"alv-yek",
"Latn",
}
m["aab"] = {
"အာဠူမူ-ထေတ်သူ",
35034,
"nic-alu",
"Latn",
}
m["aac"] = {
"အာရဳ",
1811224,
"ngf-gsu",
"Latn",
}
m["aad"] = {
"အာမာန်",
56708,
"paa-sep",
"Latn",
}
-- "aae" is treated as "sq", see [[WT:LT]]
m["aaf"] = {
"အာရာနဒါန်",
3507928,
"dra-mal",
"Mlym",
-- Mlym translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission)
}
m["aag"] = {
"အာန်ဗရေတ်ခ်",
4741706,
"paa-pal",
"Latn",
}
m["aah"] = {
"အာၜေအ်' အာရာဗေါတ်",
4670715,
"paa-ara",
"Latn",
}
m["aai"] = {
"အာရဳဖှာန်မာ-မဳနဳယျာဖှဳယျာ",
4790560,
"poz-ocw",
"Latn",
}
m["aak"] = {
"အာန်ခါဝေ",
3446690,
"ngf-ata",
"Latn",
}
m["aal"] = {
"အာဖှာဲဒေ",
56434,
"cdc-cbm",
"Latn",
}
m["aan"] = {
"အာန္နာမ်ဗေ",
3507873,
"tup-gua",
"Latn",
}
m["aap"] = {
"ဘာရာ အဝ်ရာအ်ရာ",
56807,
"sai-pek",
"Latn",
}
m["aaq"] = {
"ဘာနိုတ်သကေတ်",
3515185,
"alg-abp",
"Latn",
}
m["aas"] = {
"အောတ်သေတ်",
56620,
"cus-sou",
"Latn",
}
-- "aat" is treated as "sq", see [[WT:LT]]
m["aau"] = {
"အာၜေဴ",
3073568,
"paa-sep",
"Latn",
}
m["aaw"] = {
"သဝ်လံန်",
7558834,
"poz-ocw",
"Latn",
}
m["aax"] = {
"မာန်ဒဝ်ဗဝ် အာတ်တာပ်",
12636156,
"ngf-dum",
"Latn",
}
m["aaz"] = {
"အာန်မာရသဳ",
4740192,
"poz-tim",
"Latn",
}
m["aba"] = {
"အေက်ဗေ",
34833,
"alv-lag",
"Latn",
}
m["abb"] = {
"Bankon",
34860,
"bnt-bsa",
"Latn",
}
m["abc"] = {
"Ambala Ayta",
3448896,
"phi",
"Latn",
}
m["abd"] = {
"Camarines Norte Agta",
3399682,
"phi",
"Latn",
}
m["abe"] = {
"အေတ်ဗဒ်နာကဳ",
17502788,
"alg-abp",
"Latn",
}
m["abf"] = {
"Abai Sungai",
4663287,
"poz-san",
"Latn",
}
m["abg"] = {
"Abaga",
3507954,
"ngf-kya",
"Latn",
}
m["abh"] = {
"အာရဗဳ တဇေတ်ကဳ",
56833,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["abi"] = {
"Abidji",
34781,
"alv-lag",
"Latn",
}
m["abj"] = {
"အကာ-ဗဳအ်",
2356391,
"qfa-ads",
"Latn",
}
m["abl"] = {
"Abung",
49215,
"poz-lgx",
"Latn",
}
m["abm"] = {
"Abanyom",
7502,
"nic-eko",
"Latn",
}
m["abn"] = {
"Abua",
34835,
"nic-cde",
"Latn",
}
m["abo"] = {
"Abon",
35121,
"nic-tvn",
"Latn",
}
m["abp"] = {
"အာဗေန်လာန် အာဲတာ",
3436621,
"phi",
"Latn",
}
m["abq"] = {
"အဗါတ်သာ",
27567,
"cau-abz",
"Cyrl, Latn",
translit = {
Cyrl = "abq-translit"
},
override_translit = true,
display_text = {
Cyrl = s["cau-Cyrl-displaytext"]
},
strip_diacritics = {
Cyrl = s["cau-Cyrl-stripdiacritics"],
Latn = s["cau-Latn-stripdiacritics"],
},
sort_key = {
Cyrl = {
from = {
"гъв", "гъь", "гӏв", "джв", "джь", "къв", "къь", "кӏв", "кӏь", "хъв", "хӏв", "чӏв", -- 3 chars
"гв", "гъ", "гь", "гӏ", "дж", "дз", "ё", "жв", "жь", "кв", "къ", "кь", "кӏ", "ль", "лӏ", "пӏ", "тл", "тш", "тӏ", "фӏ", "хв", "хъ", "хь", "хӏ", "цӏ", "чв", "чӏ", "шв", "шӏ" -- 2 chars
},
to = {
"г" .. p[3], "г" .. p[4], "г" .. p[7], "д" .. p[2], "д" .. p[3], "к" .. p[3], "к" .. p[4], "к" .. p[7], "к" .. p[8], "х" .. p[3], "х" .. p[6], "ч" .. p[3],
"г" .. p[1], "г" .. p[2], "г" .. p[5], "г" .. p[6], "д" .. p[1], "д" .. p[4], "е" .. p[1], "ж" .. p[1], "ж" .. p[2], "к" .. p[1], "к" .. p[2], "к" .. p[5], "к" .. p[6], "л" .. p[1], "л" .. p[2], "п" .. p[1], "т" .. p[1], "т" .. p[2], "т" .. p[3], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[4], "х" .. p[5], "ц" .. p[1], "ч" .. p[1], "ч" .. p[2], "ш" .. p[1], "ш" .. p[2]
}
},
},
}
-- "abr" Abron is treated as "ak" Akan, see [[WT:LT]]
m["abs"] = {
"မလေဝ် အာန်ဗဝ်နေတ်",
3124354,
"crp",
"Latn",
ancestors = "ms",
}
m["abt"] = {
"Ambulas",
3508015,
"paa-nnd",
"Latn",
}
m["abu"] = {
"Abure",
34767,
"alv-ptn",
"Latn",
}
m["abv"] = {
"အာရဗဳ ဗာဟာနာ",
56576,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["abw"] = {
"Pal",
7126121,
"ngf-omo",
"Latn",
}
m["abx"] = {
"Inabaknon",
2820163,
"poz-sbj",
"Latn",
}
m["aby"] = {
"Aneme Wake",
3508107,
"ngf-yar",
"Latn",
}
m["abz"] = {
"Abui",
2822110,
"paa-alp",
"Latn",
}
m["aca"] = {
"Achagua",
2822982,
"awd",
"Latn",
}
m["acb"] = {
"Áncá",
11130787,
"nic-mom",
"Latn",
}
m["acd"] = {
"Gikyode",
35256,
"alv-gng",
"Latn",
}
m["ace"] = {
"အာသံနဳစ်",
27683,
"cmc",
"Latn, ms-Arab",
standard_chars = {
Latn = "AaBbCcDdEeÉéÈèËëFfGgHhIiJjKkLlMmNnOoÔôÖöPpQqRrSsTtUuVvWwXxYyZz", -- current orthography (not yet add Arab)
c.punc
},
}
m["ach"] = {
"Acholi",
34926,
"sdv-los",
"Latn",
}
m["aci"] = {
"အကာ-ကာရဳ",
2670418,
"qfa-adn",
"Latn",
}
m["ack"] = {
"အကာ-ကိုဝ်ရာ",
3433680,
"qfa-adn",
"Latn",
}
m["acl"] = {
"အာပ်-ဗေလ်",
3436825,
"qfa-ads",
"Latn",
}
m["acm"] = {
"အာရဗဳ အဳရတ်",
56232,
"sem-arb",
"Arab, Hebr",
strip_diacritics = {
Arab = "ar-stripdiacritics",
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
m["acn"] = {
"Achang",
56582,
"tbq-brm",
"Latn",
}
m["acp"] = {
"Eastern Acipa",
5329945,
"nic-kmk",
"Latn",
}
m["acr"] = {
"Achi",
34774,
"myn",
"Latn",
}
m["acs"] = {
"Acroá",
2829146,
"sai-cje",
"Latn",
}
m["acu"] = {
"Achuar",
2823170,
"sai-jiv",
"Latn",
}
m["acv"] = {
"Achumawi",
56661,
"nai-pal",
"Latn",
}
m["acw"] = {
"အာရဗဳဟဳဂျာဇြဳ",
56608,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["acx"] = {
"အာရဗဳ အဝ်မာန်နဳ",
56630,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["acy"] = {
"အာရဗဳ သာဲပရေက်",
56416,
"sem-arb",
"Latn, Grek",
ancestors = "acm",
strip_diacritics = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.breve},
},
-- Grek display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
standard_chars = {
Latn = "AaBbCcDdΔδEeFfGgĠġĊċIiJjKkLlMmNnOoPpΘθRrSsTtUuVvWwXxYyZzŞş",
c.punc
},
}
m["acz"] = {
"Acheron",
34769,
"alv-tal",
"Latn",
}
m["ada"] = {
"Adangme",
35141,
"alv-gda",
"Latn",
}
m["adb"] = {
"Atauran",
125421255,
"poz-cet",
"Latn",
}
m["add"] = {
"Dzodinka",
35266,
"nic-nka",
"Latn",
}
m["ade"] = {
"Adele",
27740,
"alv-ntg",
"Latn",
}
m["adf"] = {
"အာရဗဳ ဒဝ်ဖာရဳ",
56565,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["adg"] = {
"Andegerebinha",
3508123,
"aus-rnd",
"Latn",
}
m["adh"] = {
"Adhola",
1971400,
"sdv-los",
"Latn",
}
m["adi"] = {
"အဒဳ",
56440,
"sit-tan",
"Latn",
}
m["adj"] = {
"အာဒေတ်အူခရု",
34738,
"alv-lag",
"Latn",
}
m["adl"] = {
"Galo",
2857892,
"sit-tan",
"Latn",
}
m["adn"] = {
"Adang",
3398276,
"paa-alp",
"Latn",
}
m["ado"] = {
"Abu",
56659,
"paa-por",
"Latn",
}
m["adp"] = {
"အဒပ်",
3512402,
"sit-tib",
"Tibt",
ancestors = "dz",
translit = "Tibt-translit",
override_translit = true,
display_text = s["Tibt-displaytext"],
entry_name = s["Tibt-entryname"],
sort_key = "Tibt-sortkey",
wikipedia_article = "Dzongkha", -- Considered a dialect of Dzongkha
}
m["adq"] = {
"Adangbe",
34730,
"alv-gda",
"Latn",
ancestors = "ada",
}
m["adr"] = {
"Adonara",
4684505,
"poz-cet",
"Latn",
}
m["ads"] = {
"Adamorobe Sign Language",
27709,
"sgn",
"Latn", -- when documented
}
m["adt"] = {
"Adnyamathanha",
2225391,
"aus-psw",
"Latn",
}
m["adu"] = {
"Aduge",
34734,
"alv-nwd",
"Latn",
ancestors = "opa",
}
m["adw"] = {
"Amondawa",
12626847,
"tup-gua",
"Latn",
}
m["ady"] = {
"အာက်ဒေါတ်ကာယ်",
27776,
"cau-cir",
"Cyrl, Latn, Arab",
translit = {
Cyrl = "cau-cir-translit",
Arab = "ar-translit",
},
override_translit = true,
display_text = {
Cyrl = s["cau-Cyrl-displaytext"]
},
strip_diacritics = {
Cyrl = s["cau-Cyrl-stripdiacritics"],
Latn = s["cau-Latn-stripdiacritics"],
},
sort_key = {
Cyrl = {
from = {
"кхъу", "къӏу", -- 4 chars
"гъу", "джу", "дзу", "жъу", "къу", "кхъ", "къӏ", "кӏу", "кӏь", "лъу", "лӏу", "пӏу", "сӏу", "тӏу", "фӏу", "хъу", "цӏу", "чъу", "чӏу", "шъу", "шӏу", "щӏу", -- 3 chars
"гу", "гъ", "гь", "дж", "дз", "ё", "жъ", "жь", "ку", "къ", "кь", "кӏ", "лъ", "ль", "лӏ", "пӏ", "сӏ", "тӏ", "фӏ", "ху", "хъ", "хь", "цу", "цӏ", "чу", "чъ", "чӏ", "шъ", "шӏ", "щӏ", "ӏу", "ӏь" -- 2 chars
},
to = {
"к" .. p[5], "к" .. p[7],
"г" .. p[3], "д" .. p[2], "д" .. p[4], "ж" .. p[2], "к" .. p[3], "к" .. p[4], "к" .. p[6], "к" .. p[10], "к" .. p[11], "л" .. p[2], "л" .. p[5], "п" .. p[2], "с" .. p[2], "т" .. p[2], "ф" .. p[2], "х" .. p[3], "ц" .. p[3], "ч" .. p[3], "ч" .. p[5], "ш" .. p[2], "ш" .. p[4], "щ" .. p[2],
"г" .. p[1], "г" .. p[2], "г" .. p[4], "д" .. p[1], "д" .. p[3], "е" .. p[1], "ж" .. p[1], "ж" .. p[3], "к" .. p[1], "к" .. p[2], "к" .. p[8], "к" .. p[9], "л" .. p[1], "л" .. p[3], "л" .. p[4], "п" .. p[1], "с" .. p[1], "т" .. p[1], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[4], "ц" .. p[1], "ц" .. p[2], "ч" .. p[1], "ч" .. p[2], "ч" .. p[4], "ш" .. p[1], "ш" .. p[3], "щ" .. p[1], "ӏ" .. p[1], "ӏ" .. p[2]
}
},
},
}
m["adz"] = {
"Adzera",
3327445,
"poz-ocw",
"Latn",
}
m["aea"] = {
"Areba",
3509129,
"aus-pam",
"Latn",
}
m["aeb"] = {
"အာရဗဳ တူနဳယှေန်",
56240,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["aed"] = {
"Argentine Sign Language",
3322073,
"sgn",
"Latn", -- when documented
}
m["aee"] = {
"ပါသျှယဳ ဒိုဟ်ဗၟံက်သၟဝ်ကျာ",
12642198,
"inc-pas",
"fa-Arab, Latn",
}
m["aek"] = {
"Haeke",
5638166,
"poz-cln",
"Latn",
}
m["ael"] = {
"Ambele",
34818,
"nic-grf",
"Latn",
}
m["aem"] = {
"အါန်",
3507920,
"mkh-vie",
"Latn",
}
m["aen"] = {
"Armenian Sign Language",
3446604,
"sgn",
}
m["aeq"] = {
"Aer",
3246741,
"inc-wes",
"Arab",
}
m["aer"] = {
"အာရေန်တာယ်",
10728232,
"aus-rnd",
"Latn",
}
m["aes"] = {
"Alsea",
2395641,
nil,
"Latn",
}
m["aeu"] = {
"Akeu",
4700657,
"tbq-sil",
"Latn",
}
m["aew"] = {
"Ambakich",
56642,
"paa-eke",
"Latn",
}
m["aey"] = {
"Amele",
3508025,
"ngf-gum",
"Latn",
}
m["aez"] = {
"ဨကာ",
16110528,
"ngf-oro",
"Latn",
}
m["afb"] = {
"အာရဗဳအထံက်ဂၚ်",
56385,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["afd"] = {
"Andai",
4753480,
"paa-arf",
"Latn",
}
m["afe"] = {
"Putukwam",
3914930,
"nic-ben",
"Latn",
}
m["afg"] = {
"Afghan Sign Language",
4689093,
"sgn",
}
m["afh"] = {
"Afrihili",
384707,
"art",
"Latn",
type = "appendix-constructed",
}
m["afi"] = {
"Akrukay",
57003,
"paa-tam",
"Latn",
}
m["afk"] = {
"Nanubae",
6964416,
"paa-arf",
"Latn",
}
m["afn"] = {
"Defaka",
35174,
"nic",
"Latn",
}
m["afo"] = {
"Eloyi",
3914066,
"nic-plt",
"Latn",
}
m["afp"] = {
"Tapei",
16887371,
"paa-arf",
"Latn",
}
m["afs"] = {
"Afro-Seminole Creole",
27867,
"crp",
"Latn",
ancestors = "en",
}
m["aft"] = {
"Afitti",
3400829,
"sdv-nyi",
"Latn",
}
m["afu"] = {
"Awutu",
34847,
"alv-gng",
"Latn",
}
m["afz"] = {
"Obokuitai",
7075258,
"paa-clp",
"Latn",
}
m["aga"] = {
"Aguano",
3331203,
nil,
"Latn",
}
m["agb"] = {
"Legbo",
35584,
"nic-uce",
"Latn",
}
m["agc"] = {
"Agatu",
34732,
"alv-ido",
"Latn",
}
m["agd"] = {
"Agarabi",
3399642,
"ngf-gau",
"Latn",
}
m["age"] = {
"Angal",
10951553,
"ngf-ank",
"Latn",
}
m["agf"] = {
"Arguni",
12473346,
"poz-cet",
"Latn",
}
m["agg"] = {
"Angor",
3508100,
"paa-sng",
"Latn",
}
m["agh"] = {
"Ngelima",
7022266,
"bnt-bta",
"Latn",
}
m["agi"] = {
"Agariya",
663586,
"mun",
"Deva",
}
m["agj"] = {
"Argobba",
29292,
"sem-eth",
"Ethi",
}
m["agk"] = {
"Isarog Agta",
6078982,
"phi",
"Latn",
}
m["agl"] = {
"Fembe",
372927,
"ngf-est",
"Latn",
}
m["agm"] = {
"Angaataha",
3508001,
"ngf-ang",
"Latn",
}
m["agn"] = {
"Agutaynen",
3399717,
"phi-kal",
"Latn",
}
m["ago"] = {
"Tainae",
7676186,
"ngf-taa",
"Latn",
}
m["agq"] = {
"Aghem",
34737,
"nic-rnw",
"Latn",
}
m["agr"] = {
"Aguaruna",
1526530,
"sai-jiv",
"Latn",
}
m["ags"] = {
"Esimbi",
35260,
"nic-bds",
"Latn",
}
m["agt"] = {
"ကာဂါယာန် အာက်ထာ ဗဟဵု",
5017296,
"phi",
"Latn",
}
m["agu"] = {
"အာဂွာကာတေကာ",
35091,
"myn",
"Latn",
}
m["agv"] = {
"ရောမါန်ဒါဒဝ် အာက်ဂါ",
3508085,
"phi",
"Latn",
}
m["agw"] = {
"Kahua",
3191906,
"poz-sls",
"Latn",
}
m["agx"] = {
"အာခူန်",
36498,
"cau-esm",
"Cyrl",
translit = "cau-nec-translit",
override_translit = true,
display_text = s["cau-Cyrl-displaytext"],
strip_diacritics = s["cau-Cyrl-stripdiacritics"],
sort_key = {
from = {"аь", "гъ", "гь", "гӏ", "дж", "ё", "къ", "кь", "кӏ", "оь", "пӏ", "тӏ", "уь", "хъ", "хь", "хӏ", "цӏ", "чӏ"},
to = {"а" .. p[1], "г" .. p[1], "г" .. p[2], "г" .. p[3], "д" .. p[1], "е" .. p[1], "к" .. p[1], "к" .. p[2], "к" .. p[3], "о" .. p[1], "п" .. p[1], "т" .. p[1], "у" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[3], "ц" .. p[1], "ч" .. p[1]}
},
}
m["agy"] = {
"Southern Alta",
7569611,
"phi",
"Latn",
}
m["agz"] = {
"Mount Iriga Agta",
6921432,
"phi",
"Latn",
}
m["aha"] = {
"Ahanta",
34729,
"alv-ctn",
"Latn",
}
m["ahb"] = {
"Axamb",
2874710,
"poz-vnc",
"Latn",
}
m["ahg"] = {
"Qimant",
35663,
"cus-cen",
"Latn",
}
m["ahh"] = {
"Aghu",
3436645,
"ngf-awy",
"Latn",
}
m["ahi"] = {
"Tiagba",
3400073,
"kro-aiz",
"Latn",
}
m["ahk"] = {
"အာခါ",
56643,
"tbq-han",
"Latn, Mymr, Thai",
sort_key = {
Thai = {
from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"},
to = {"", "", "%2%1"}
},
},
}
m["ahl"] = {
"Igo",
35412,
"alv-ktg",
"Latn",
}
m["ahm"] = {
"Mobu",
35967,
"kro-aiz",
"Latn",
}
m["ahn"] = {
"အ'ဟာန်",
34723,
"alv-aah",
"Latn",
}
m["aho"] = {
"အဟုမ်",
34778,
"tai-swe",
"Ahom",
translit = "Ahom-translit",
}
m["ahp"] = {
"Apro",
34810,
"alv-kwa",
"Latn",
}
m["ahr"] = {
"အဟိရာန်နဳ",
15549890,
"raj",
"Deva",
translit = "mr-translit",
}
m["ahs"] = {
"Ashe",
34823,
"nic-plc",
"Latn",
}
m["aht"] = {
"Ahtna",
21058,
"ath-nor",
"Latn",
}
m["aia"] = {
"အာရဝ်သဳ",
2863483,
"poz-sls",
"Latn",
}
m["aib"] = {
"Äynu",
27927,
"qfa-mix",
"Arab, Latn",
ancestors = "ug, fa"
}
m["aic"] = {
"Ainbai",
3332149,
"paa-bew",
"Latn",
}
m["aid"] = {
"အာန်ကဝ်ရေဝ်ထေန်",
3279409,
"aus-pmn",
"Latn",
}
m["aie"] = {
"Amara",
2841180,
"poz-ocw",
"Latn",
}
m["aif"] = {
"Agi",
3331491,
"paa-wpa",
"Latn",
}
m["aig"] = {
"အာန်တဳဂွါ ကဵု အၚ်္ဂလိက် ဗါၜူဒါ ခရဳအတ်လ်",
3244184,
"crp",
"Latn",
ancestors = "en",
}
m["aih"] = {
"အာဲ-ချာန်",
2827749,
"qfa-kms",
"Latn, Hani",
sort_key = {
Hani = "Hani-sortkey"
},
}
m["aii"] = {
"အာက်သဳရိ နဳအဝ်-အာရာမေဣ",
29440,
"sem-nna",
"Syrc",
translit = "aii-translit",
strip_diacritics = "Syrc-stripdiacritics",
}
m["aij"] = {
"Lishanid Noshan",
3436467,
"sem-nna",
"Hebr",
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
m["aik"] = {
"Ake",
34808,
"nic-pls",
"Latn",
}
m["ail"] = {
"Aimele",
3327418,
"ngf-bos",
"Latn",
}
m["aim"] = {
"Aimol",
4697175,
"tbq-kuk",
"Latn, Beng",
}
m["ain"] = {
"အာဲနု",
27969,
"qfa-ain",
"Kana, Latn, Cyrl",
sort_key = {
Kana = "Kana-sortkey"
},
}
m["aio"] = {
"အာဲတောန်",
3399725,
"tai-swe",
"Mymr",
translit = "aio-phk-translit",
display_text = s["aio-displaytext"],
strip_diacritics = s["aio-stripdiacritics"],
}
m["aip"] = {
"Burumakok",
5000984,
"ngf-wok",
"Latn",
}
m["air"] = {
"Airoran",
3321131,
"paa-saa",
"Latn",
}
m["ait"] = {
"အာရေဝ်ခေန်",
3446679,
"tup",
"Latn",
}
m["aiw"] = {
"Aari",
7495,
"omv-aro",
"Latn",
}
m["aix"] = {
"Aighon",
3504287,
"poz-ocw",
"Latn",
}
m["aiy"] = {
"Ali",
34814,
"gba-eas",
"Latn",
}
m["aja"] = {
"အာဂျာ (အေက်ဖရိက လ္ပာ်ဗၟံက်)",
3237491,
"csu-bkr",
"Latn",
}
m["ajg"] = {
"Aja (West Africa)",
35035,
"alv-gbe",
"Latn",
}
m["aji"] = {
"အဂျဳ",
2828867,
"poz-cln",
"Latn",
}
m["ajn"] = {
"Andajin",
16111302,
"aus-wor",
"Latn",
}
m["ajp"] = {
"အာရဗဳလပ်ဗေန်ထေန်သမၠုၚ်ကျာ",
55633582,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["ajw"] = {
"Ajawa",
56645,
"cdc-wst",
"Latn",
}
m["ajz"] = {
"Amri Karbi",
3508092,
"tbq-kuk",
"Latn",
ancestors = "mjw",
}
m["akb"] = {
"Angkola Batak",
2640686,
"btk",
"Latn, Batk",
}
m["akc"] = {
"Mpur",
3327139,
"qfa-iso", -- Papuan; based on Palmer (2018), Ethnologue and Glottolog
"Latn",
}
m["akd"] = {
"Ukpet-Ehom",
36618,
"nic-ucr",
"Latn",
}
m["ake"] = {
"အကာဝယဝ်",
28059,
"sai-pem",
"Latn",
}
m["akf"] = {
"Akpa",
34801,
"alv-ido",
"Latn",
}
m["akg"] = {
"အနှတ်ခါလာန်ဂူ",
4750964,
"poz-cet",
"Latn",
}
m["akh"] = {
"Angal Heneng",
10950354,
"ngf-ank",
"Latn",
}
m["aki"] = {
"Aiome",
56735,
"paa-aia",
"Latn",
}
m["akj"] = {
"ဇေရု",
2919121,
"qfa-adn",
"Latn, Deva",
}
m["akk"] = {
"အခါဒဳယာန်",
35518,
"sem-eas",
"Xsux, Latn",
}
m["akl"] = {
"အာက်ခလာန်",
8773,
"phi",
"Latn",
}
m["akm"] = {
"Aka-Bo",
35361,
"qfa-adn",
"Latn",
}
m["ako"] = {
"အာကူရဳအဝ်",
56650,
"sai-tar",
"Latn",
}
m["akp"] = {
"Siwu",
36470,
"alv-ntg",
"Latn",
}
m["akq"] = {
"Ak",
56654,
"paa-sep",
"Latn",
}
m["akr"] = {
"အာရာကဳ",
2699882,
"poz-vnn",
"Latn",
}
m["aks"] = {
"Akaselem",
34817,
"nic-grm",
"Latn",
}
m["akt"] = {
"Akolet",
3330162,
"poz-ocw",
"Latn",
}
m["aku"] = {
"Akum",
34799,
"nic-ykb",
"Latn",
}
m["akv"] = {
"အာပ်ခါဝက်",
56423,
"cau-and",
"Cyrl",
translit = "cau-nec-translit",
override_translit = true,
display_text = s["cau-Cyrl-displaytext"],
strip_diacritics = s["cau-Cyrl-stripdiacritics"],
}
m["akw"] = {
"Akwa",
34802,
"bnt-mbo",
"Latn",
}
m["akx"] = {
"အကာ-ကေဒဵု",
3436816,
"qfa-adc",
"Latn",
}
m["aky"] = {
"အကာ-ကောန်",
3436784,
"qfa-adc",
"Latn",
}
m["akz"] = {
"အာလာဗာမာ",
1815020,
"nai-mus",
"Latn",
}
m["ala"] = {
"Alago",
34813,
"alv-ido",
"Latn",
}
m["alc"] = {
"ခါဝေတ်သကာ",
56544,
"aqa",
"Latn",
}
m["ald"] = {
"Alladian",
34837,
"alv-lag",
"Latn",
}
m["ale"] = {
"အာလောတ်",
27210,
"esx",
"Latn, Cyrl",
}
m["alf"] = {
"Alege",
34815,
"nic-ben",
"Latn",
}
m["alh"] = {
"Alawa",
2147917,
"aus-gun",
"Latn",
}
m["ali"] = {
"Amaimon",
3327427,
"ngf-mad",
"Latn",
}
m["alj"] = {
"အလံၚ်ဂံၚ်",
3327423,
"phi",
"Latn",
}
m["alk"] = {
"Alak",
2714690,
"mkh",
"Latn",
}
m["all"] = {
"Allar",
3393634,
"dra-mal",
"Mlym",
-- Mlym translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission)
}
-- "aln" is treated as "sq", see [[WT:LT]]
m["alm"] = {
"Amblong",
11022615,
"poz-vnn",
"Latn",
}
m["alo"] = {
"Larike-Wakasihu",
3217929,
"poz-cma",
"Latn",
}
m["alp"] = {
"Alune",
3327367,
"poz-cet",
"Latn",
}
m["alq"] = {
"အာယ်လ်ကေန်ဂွေန်",
28092,
"alg",
"Latn, Cans",
ancestors = "oj",
}
m["alr"] = {
"အဠူတေ",
28213,
"qfa-ckn",
"Cyrl",
strip_diacritics = {
from = {"['’]"},
to = {"ʼ"}
},
sort_key = {
from = {"вʼ", "гʼ", "ғ", "ә", "ё", "ӄ", "ӈ"},
to = {"в" .. p[1], "г" .. p[1], "г" .. p[2], "е" .. p[1], "е" .. p[2], "к" .. p[1], "н" .. p[1]}
},
}
m["alt"] = {
"အာန်တာဲ ဒိုဟ်သမၠုၚ်ကျာ",
1991779,
"trk-kkp",
"Cyrl",
translit = "Altai-translit",
sort_key = {
from = {"ј", "ё", "ҥ", "ӧ", "ӱ"},
to = {"д" .. p[1], "е" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1]}
},
}
m["alu"] = {
"'အာရေဝ်'အာရာ",
5160,
"poz-sls",
"Latn",
}
m["alw"] = {
"Alaba",
56652,
"cus-hec",
"Latn",
}
m["alx"] = {
"Amol",
3504260,
"paa-pal",
"Latn",
}
m["aly"] = {
"Alyawarr",
3327389,
"aus-rnd",
"Latn",
}
m["alz"] = {
"Alur",
56507,
"sdv-los",
"Latn",
}
m["ama"] = {
"Amanayé",
3508053,
"tup-gua",
"Latn",
}
m["amb"] = {
"Ambo",
3450142,
"nic-tvn",
"Latn",
}
m["amc"] = {
"Amahuaca",
2669150,
"sai-pan",
"Latn",
}
m["ame"] = {
"Yanesha'",
3088540,
"awd",
"Latn",
}
m["amf"] = {
"ဟာမေ-ဗါန်နာ",
35764,
"omv-aro",
"Latn, Ethi",
sort_key = "amf-utilities"
}
m["amg"] = {
"Amurdag",
3360016,
"aus-wdj",
"Latn",
}
m["ami"] = {
"ဨမေတ်",
35132,
"map",
"Latn",
}
m["amj"] = {
"Amdang",
28335,
"ssa-fur",
"Latn",
}
m["amk"] = {
"အီုဗါဲ",
1875885,
"poz-hce",
"Latn",
}
m["aml"] = {
"War-Jaintia",
56321,
"aav-khs",
"Latn",
}
m["amm"] = {
"အာမာ",
3446626,
"paa-lma",
"Latn",
}
m["amn"] = {
"အာမနှာတ်",
3327399,
"paa-war",
"Latn",
}
m["amo"] = {
"Amo",
34826,
"nic-kne",
"Latn",
}
m["amp"] = {
"Alamblak",
56688,
"paa-sep",
"Latn",
}
m["amq"] = {
"Amahai",
3327384,
"poz-cma",
"Latn",
}
m["amr"] = {
"Amarakaeri",
35128,
"sai-har",
"Latn",
}
m["ams"] = {
"အမာမဳ-အဝ်ဃှဳမာ လ္ပာ်ဒိုဟ်သမၠုၚ်ကျာ",
2840986,
"jpx-nry",
"Jpan",
translit = s["jpx-translit"],
display_text = s["jpx-displaytext"],
strip_diacritics = s["jpx-stripdiacritics"],
sort_key = s["jpx-sortkey"],
}
m["amt"] = {
"Amto",
56517,
"paa-amu",
"Latn",
}
m["amu"] = {
"ဂေရေရဝ် အာမတ်သဂဝ်",
3501942,
"omq",
"Latn",
}
m["amv"] = {
"Ambelau",
2669214,
"poz-cma",
"Latn",
}
m["amw"] = {
"နဳအဝ်-အာရမေအဳ လ္ပာ်ပလိုတ်",
34226,
"sem-arw",
"Armi, Syrc, Latn",
strip_diacritics = {
Syrc = "Syrc-stripdiacritics"
},
}
m["amx"] = {
"Anmatyerre",
10412317,
"aus-rnd",
"Latn",
}
m["amy"] = {
"Ami",
10408315,
"aus-dal",
"Latn",
}
m["amz"] = {
"Atampaya",
3446651,
"aus-pam",
"Latn",
}
m["ana"] = {
"Andaqui",
2846078,
nil,
"Latn",
}
m["anb"] = {
"Andoa",
2846171,
"sai-zap",
"Latn",
}
m["anc"] = {
"Ngas",
35999,
"cdc-wst",
"Latn",
}
m["and"] = {
"အာန်သာတ်သ်",
3513300,
"poz-hce",
"Latn",
}
m["ane"] = {
"သာရခူ",
3571097,
"poz-cln",
"Latn",
}
m["anf"] = {
"Animere",
34783,
"alv-ktg",
"Latn",
}
m["ang"] = {
"အၚ်္ဂလိက်တြေံ",
42365,
"gmw-ang",
"Latn, Runr",
translit = {
Runr = "Runr-translit"
},
strip_diacritics = {
Latn = {
remove_diacritics = c.acute .. c.circ .. c.macron .. c.breve .. c.dotabove .. c.diaer .. c.dotbelow,
from = {"[Ƿƿ]"},
to = {{
["Ƿ"] = "W", ["ƿ"] = "w",
}},
},
},
sort_key = {
Latn = {
remove_diacritics = c.acute .. c.circ .. c.macron .. c.breve .. c.dotabove .. c.diaer .. c.dotbelow,
from = {"[æƀꝺðꝼᵹȝłœꞃꞅꞇþꝥꝧƿ]"},
to = {{
["æ"] = "ae", ["ƀ"] = "b", ["ꝺ"] = "d", ["ð"] = "d" .. p[1], ["ꝼ"] = "f",
["ᵹ"] = "g", ["ȝ"] = "g" .. p[1], ["ł"] = "l", ["œ"] = "oe", ["ꞃ"] = "r",
["ꞅ"] = "s", ["ꞇ"] = "t", ["þ"] = "t" .. p[1], ["ꝥ"] = "t" .. p[1],
["ꝧ"] = "t" .. p[1], ["ƿ"] = "w",
}},
},
},
standard_chars = {
Latn = "AaÆæBbCcDdÐðEeFfGgHhIiLlMmNnOoŒœPpRrSsTtÞþUuWwXxYy",
c.punc,
},
}
m["anh"] = {
"Nend",
6991554,
"ngf-wso",
"Latn",
}
m["ani"] = {
"အာန်ဒဳ",
34849,
"cau-and",
"Cyrl",
translit = "cau-nec-translit",
override_translit = true,
display_text = s["cau-Cyrl-displaytext"],
strip_diacritics = s["cau-Cyrl-stripdiacritics"],
}
m["anj"] = {
"Anor",
56458,
"paa-aia",
"Latn",
}
m["ank"] = {
"Goemai",
35272,
"cdc-wst",
"Latn",
}
m["anl"] = {
"Anu",
4777679,
"sit-mru",
"Latn",
}
m["anm"] = {
"Anāl",
56235,
"tbq-kuk",
"Latn",
}
m["ann"] = {
"Obolo",
36614,
"nic-lcr",
"Latn",
}
m["ano"] = {
"Andoque",
2669225,
"qfa-iso",
"Latn",
}
m["anp"] = {
"အာန်ဂဳကာ",
28378,
"inc-bih",
"Deva, Kthi",
translit = {
Deva = "hi-translit",
Kthi = "bho-Kthi-translit",
},
}
m["anq"] = {
"ဂျရာဝါ",
2475526,
"qfa-ong",
"Latn",
}
m["anr"] = {
"Andh",
4754314,
"inc-sou",
"Deva",
}
m["ans"] = {
"Anserma",
3446613,
"sai-chc",
"Latn",
}
m["ant"] = {
"Antakarinya",
921304,
"aus-psw",
"Latn",
}
m["anu"] = {
"Anuak",
56677,
"sdv-lon",
"Latn",
}
m["anv"] = {
"ဒါန်ညာ",
35187,
"nic-mam",
"Latn",
}
m["anw"] = {
"Anaang",
2845320,
"nic-ief",
"Latn",
}
m["anx"] = {
"Andra-Hus",
2846195,
"poz-aay",
"Latn",
}
m["any"] = {
"Anyi",
28395,
"alv-ctn",
"Latn",
}
m["anz"] = {
"Anem",
56512,
"qfa-dis", -- Papuan; might be an isolate or in a putative West New Britain family
"Latn",
}
m["aoa"] = {
"Angolar",
34994,
"crp",
"Latn",
ancestors = "pt",
}
m["aob"] = {
"Abom",
3446647,
"qfa-dis", -- Papuan; possibly a divergent Tirio language (Anim family), or a top-level TNG node
"Latn",
}
m["aoc"] = {
"ပေမန်",
10729616,
"sai-pem",
"Latn",
}
m["aod"] = {
"Andarum",
3507888,
"paa-ata",
"Latn",
}
m["aoe"] = {
"Angal Enen",
10951638,
"ngf-ank",
"Latn",
}
m["aof"] = {
"Bragat",
3507977,
"paa-pal",
"Latn",
}
m["aog"] = {
"Angoram",
56366, -- cf 6754745 for merged dialect
"paa-lse",
"Latn",
}
m["aoi"] = {
"Anindilyakwa",
2714654,
"aus-arn",
"Latn",
}
m["aoj"] = {
"Mufian",
3507881,
"paa-ara",
"Latn",
}
m["aok"] = {
"Arhö",
4790086,
"poz-cln",
"Latn",
}
m["aol"] = {
"Alorese",
3332062,
"poz",
"Latn",
}
m["aom"] = {
"Ömie",
8078975,
"ngf-koi",
"Latn",
}
m["aon"] = {
"Bumbita Arapesh",
3508044,
"paa-ara",
"Latn",
}
m["aor"] = {
"Aore",
12627129,
"poz-vnn",
"Latn",
}
m["aos"] = {
"Taikat",
7676018,
"paa-taa",
"Latn",
}
m["aot"] = {
"အိန္ဒိ အာတုံ",
5646,
"tbq-bdg",
"Latn, Beng",
}
m["aou"] = {
"အအ်ဥူ",
16109994,
"gio",
"Latn", -- also Hani?
}
m["aox"] = {
"Atorada",
3507932,
"awd",
"Latn",
}
m["aoz"] = {
"Uab Meto",
3441962,
"poz-tim",
"Latn",
}
m["apb"] = {
"သာ'အ်",
36294,
"poz-sls",
"Latn",
}
m["apc"] = {
"အာရဗဳလပ်ဗေန်ထေန်သၟဝ်ကျာ",
22809485,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["apd"] = {
"အာရဗဳ သုဒါန်နဳ",
56573,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["ape"] = {
"Bukiyip",
3507895,
"paa-ara",
"Latn",
}
m["apf"] = {
"Pahanan Agta",
7135432,
"phi",
"Latn",
}
m["apg"] = {
"Ampanang",
4748035,
"poz",
"Latn",
}
m["aph"] = {
"Athpare",
3449126,
"sit-kie",
"Deva, Latn",
}
m["api"] = {
"Apiaká",
3507941,
"tup-gua",
"Latn",
}
m["apj"] = {
"Jicarilla",
28277,
"apa",
"Latn",
}
m["apk"] = {
"Plains Apache",
27861,
"apa",
"Latn",
}
m["apl"] = {
"Lipan",
28269,
"apa",
"Latn",
}
m["apm"] = {
"Chiricahua",
13368,
"apa",
"Latn",
}
m["apn"] = {
"အဖဳနာရဲ",
2858311,
"sai-nje",
"Latn",
}
m["apo"] = {
"Ambul",
12627135,
"poz-ocw",
"Latn",
}
m["app"] = {
"Apma",
2669188,
"poz-vnn",
"Latn",
}
m["apq"] = {
"အ-ၜေအ်သိခွါ",
28466,
"qfa-adc",
"Latn",
}
m["apr"] = {
"Arop-Lokep",
2863482,
"poz-ocw",
"Latn",
}
m["aps"] = {
"Arop-Sissano",
12627242,
"poz-ocw",
"Latn",
}
m["apt"] = {
"Apatani",
56306,
"sit-tan",
"Latn",
}
m["apu"] = {
"Apurinã",
2859081,
"awd",
"Latn",
}
m["apv"] = {
"Alapmunte",
16110782,
"sai-nmk",
"Latn",
}
m["apw"] = {
"အာဖေန်ချဳ လ္ပာ်ပလိုတ်",
28060,
"apa",
"Latn",
}
m["apx"] = {
"Aputai",
12473343,
"poz-tim",
"Latn",
}
m["apy"] = {
"အာက်ပါလာဲန်",
2736980,
"sai-gui",
"Latn",
}
m["apz"] = {
"သာပဵုယဝ်ကာ",
7398693,
"ngf-woj",
"Latn",
}
m["aqc"] = {
"အာဆိ",
34915,
"cau-lzg",
"Cyrl",
translit = "cau-nec-translit",
override_translit = true,
display_text = s["cau-Cyrl-displaytext"],
strip_diacritics = s["cau-Cyrl-stripdiacritics"],
sort_key = {
from = {
"ккъӏв", "ххьӏв", -- 5 chars
"гъӏв", "ёоӏ", "ккъӏ", "ккъв", "къӏв", "ллъв", "ххьӏ", "хъӏв", "хьӏв", "ццӏв", "ччӏв", -- 4 chars
"ааӏ", "гӏв", "гъӏ", "гъв", "гьв", "ееӏ", "ёӏ", "ёо", "ииӏ", "кӏв", "ккв", "ккъ", "къӏ", "къв", "кьв", "лӏв", "ллъ", "лъв", "льв", "ооӏ", "пӏв", "ппв", "ссв", "тӏв", "ттв", "ууӏ", "хӏв", "ххв", "хъӏ", "хъв", "хьӏ", "цӏв", "ццӏ", "ццв", "чӏв", "ччӏ", "ээӏ", "юуӏ", "яаӏ", -- 3 chars
"аӏ", "аа", "гӏ", "гв", "гъ", "гь", "дв", "еӏ", "ее", "ё", "жв", "зв", "иӏ", "ии", "кӏ", "кв", "кк", "къ", "кь", "лӏ", "лв", "лъ", "ль", "оӏ", "оо", "пӏ", "пв", "пп", "св", "сс", "тӏ", "тв", "тт", "уӏ", "уу", "фв", "хӏ", "хв", "хх", "хъ", "цӏ", "цв", "цц", "чӏ", "чв", "шв", "щв", "эӏ", "ээ", "юӏ", "юу", "яӏ", "яа" -- 2 chars
},
to = {
"к" .. p[8], "х" .. p[7],
"г" .. p[6], "е" .. p[7], "к" .. p[7], "к" .. p[9], "к" .. p[12], "л" .. p[5], "х" .. p[6], "х" .. p[10], "х" .. p[13], "ц" .. p[6], "ч" .. p[5],
"а" .. p[3], "г" .. p[2], "г" .. p[5], "г" .. p[7], "г" .. p[9], "е" .. p[3], "е" .. p[5], "е" .. p[6], "и" .. p[3], "к" .. p[2], "к" .. p[5], "к" .. p[6], "к" .. p[11], "к" .. p[13], "к" .. p[15], "л" .. p[2], "л" .. p[4], "л" .. p[7], "л" .. p[9], "о" .. p[3], "п" .. p[2], "п" .. p[5], "с" .. p[3], "т" .. p[2], "т" .. p[5], "у" .. p[3], "х" .. p[2], "х" .. p[5], "х" .. p[9], "х" .. p[11], "х" .. p[12], "ц" .. p[2], "ц" .. p[5], "ц" .. p[7], "ч" .. p[2], "ч" .. p[4], "э" .. p[3], "ю" .. p[3], "я" .. p[3],
"а" .. p[1], "а" .. p[2], "г" .. p[1], "г" .. p[3], "г" .. p[4], "г" .. p[8], "д" .. p[1], "е" .. p[1], "е" .. p[2], "е" .. p[4], "ж" .. p[1], "з" .. p[1], "и" .. p[1], "и" .. p[2], "к" .. p[1], "к" .. p[3], "к" .. p[4], "к" .. p[10], "к" .. p[14], "л" .. p[1], "л" .. p[3], "л" .. p[6], "л" .. p[8], "о" .. p[1], "о" .. p[2], "п" .. p[1], "п" .. p[3], "п" .. p[4], "с" .. p[1], "с" .. p[2], "т" .. p[1], "т" .. p[3], "т" .. p[4], "у" .. p[1], "у" .. p[2], "ф" .. p[1], "х" .. p[1], "х" .. p[3], "х" .. p[4], "х" .. p[8], "ц" .. p[1], "ц" .. p[3], "ц" .. p[4], "ч" .. p[1], "ч" .. p[3], "ш" .. p[1], "щ" .. p[1], "э" .. p[1], "э" .. p[2], "ю" .. p[1], "ю" .. p[2], "я" .. p[1], "я" .. p[2]
}
},
}
m["aqd"] = {
"Ampari Dogon",
4748057,
"nic-dgw",
"Latn",
}
m["aqg"] = {
"Arigidi",
34829,
"alv-von",
"Latn",
}
m["aqm"] = {
"Atohwaim",
11732297,
"paa-kay",
"Latn",
}
m["aqn"] = {
"Northern Alta",
7058116,
"phi",
"Latn",
}
m["aqp"] = {
"Atakapa",
10975683,
"qfa-iso",
"Latn",
}
m["aqr"] = {
"Arhâ",
4790085,
"poz-cln",
"Latn",
}
m["aqt"] = {
"Angaité",
15736037,
"sai-mas",
"Latn",
}
m["aqz"] = {
"Akuntsu",
4701960,
"tup",
"Latn",
}
m["arc"] = {
"အာရမေအဳ",
28602,
"sem-ara",
"Hebr, Armi, Syrc, Palm, Nbat, Phnx, Mand, Samr, Hatr, Elym",
translit = {
Armi = "Armi-translit",
Palm = "Palm-translit",
},
strip_diacritics = {
-- The first three were added by [[User:Wikitiki89]] in 2015 for use with Syriac, which has diacritics that look
-- like a diaeresis (syāmē) and macrons above and below (mṭalqānā); see Wikipedia [[w:Syriac alphabet]]. But
-- I don't know if they are actually represented using these diacritics.
Syrc = {remove_diacritics = c.macron .. c.diaer .. c.macronbelow .. u(0x0730) .. "-" .. u(0x0748)},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
-- Samr strip_diacritics, sort_key in [[Module:scripts/data]]; previously no sort_key for Samr, presumably a mistake
-- Phnx translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission)
}
m["ard"] = {
"Arabana",
3507959,
"aus-kar",
"Latn",
}
m["are"] = {
"Western Arrernte",
12645549,
"aus-rnd",
"Latn",
}
m["arh"] = {
"Arhuaco",
2640621,
"cba",
"Latn",
}
m["ari"] = {
"Arikara",
56539,
"cdd",
"Latn",
strip_diacritics = {remove_diacritics = c.acute},
}
m["arj"] = {
"Arapaso",
9627356,
"sai-tuc",
"Latn",
}
m["ark"] = {
"Arikapú",
3446640,
"sai-mje",
"Latn",
}
m["arl"] = {
"Arabela",
2591221,
"sai-zap",
"Latn",
}
m["arn"] = {
"မာၜေအ်ဓုန်ကာန်",
33730,
"sai-ara",
"Latn",
}
m["aro"] = {
"Araona",
958414,
"sai-tac",
"Latn",
}
m["arp"] = {
"အာရာပါဟဝ်",
56417,
"alg-ara",
"Latn",
}
m["arq"] = {
"အာရဗဳ အာန်လ်ဂျဳရဳယျာ",
56499,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["arr"] = {
"Arara-Karo",
35539,
"tup",
"Latn",
}
m["ars"] = {
"အာရဗဳ နေတ်ဒဳ",
56574,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["aru"] = {
"Arua",
2746221,
"auf",
"Latn",
}
m["arv"] = {
"Arbore",
56883,
"cus-eas",
"Latn",
}
m["arw"] = {
"အာရတ်ဝါတ်",
2655664,
"awd-taa",
"Latn",
}
m["arx"] = {
"Aruá",
3507907,
"tup",
"Latn",
}
m["ary"] = {
"အာရဗဳ မဝ်ရဝ်ကာန်",
56426,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["arz"] = {
"အာရဗဳ အဳဂျေပ်",
29919,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["asa"] = {
"Pare",
36403,
"bnt-par",
"Latn",
}
m["asb"] = {
"Assiniboine",
2591288,
"sio-dkt",
"Latn",
}
m["asc"] = {
"Casuarina Coast Asmat",
11732046,
"ngf-asm",
"Latn",
}
m["ase"] = {
"အရေဝ်ဘာသာကွတ်တဲအမေရိကာန်",
14759,
"sgn",
"Sgnw",
}
m["asf"] = {
"Auslan",
29525,
"sgn",
"Latn", -- when documented
}
m["asg"] = {
"Cishingini",
35199,
"nic-kam",
"Latn",
}
m["ash"] = {
"Abishira",
2871740,
"qfa-dis", -- extinct, poorly documented; isolate or in a proposed Tequiraca-Canichana family by Kaufman (1994)
"Latn",
}
m["asi"] = {
"Buruwai",
5001031,
"ngf-sab",
"Latn",
}
m["asj"] = {
"Nsari",
36418,
"nic-bbe",
"Latn",
}
m["ask"] = {
"အာပ်သကေန်",
29379,
"nur-sou",
"Arab, Latn",
}
m["asl"] = {
"Asilulu",
12473347,
"poz-cma",
"Latn",
}
m["asn"] = {
"ဃှေန်ဂူ အေက်သဝေနဳ",
8044571,
"tup-gua",
"Latn",
}
m["aso"] = {
"Dano",
5220979,
"ngf-gah",
"Latn",
}
m["asp"] = {
"Algerian Sign Language",
3135421,
"sgn",
}
m["asq"] = {
"Austrian Sign Language",
36668,
"sgn",
"Latn", -- when documented
}
m["asr"] = {
"Asuri",
3504321,
"mun",
"Latn", -- when documented
}
m["ass"] = {
"Ipulo",
35408,
"nic-tvc",
"Latn",
}
m["ast"] = {
"အေက်သတဝ်ရေန်",
29507,
"roa-asl",
"Latn",
}
m["asu"] = {
"Tocantins Asurini",
32041490,
"tup-gua",
"Latn",
}
m["asv"] = {
"Asoa",
56296,
"csu-maa",
"Latn",
}
m["asw"] = {
"Australian Aboriginal Sign Language",
955216,
"sgn",
"Latn", -- when documented
}
m["asx"] = {
"Muratayak",
11732766,
"ngf-war",
"Latn",
}
m["asy"] = {
"Yaosakor Asmat",
16113158,
"ngf-asm",
"Latn",
}
m["asz"] = {
"As",
2866218,
"poz-hce",
"Latn",
}
m["ata"] = {
"Pele-Ata",
56511,
"qfa-dis", -- Papuan; possibly in a putative West New Britain family, or an isolate
"Latn",
}
m["atb"] = {
"ဇြာဲဝါ",
56594,
"tbq-brm",
"Latn, Lisu", -- also Hani?
translit = {Lisu = "Lisu-translit"},
sort_key = {Lisu = s["Lisu-sortkey"]},
}
m["atc"] = {
"Atsahuaca",
4817730,
"sai-pan",
"Latn",
}
m["atd"] = {
"Ata Manobo",
12627315,
"mno",
"Latn",
}
m["ate"] = {
"အေက်တာမ်ဗါဝ်လ်",
4813055,
"ngf-wso",
"Latn",
}
m["atg"] = {
"Okpela",
7082551,
"alv-yek",
"Latn",
}
m["ati"] = {
"Attié",
34844,
"alv-lag",
"Latn",
}
m["atj"] = {
"အထိကာမိတ်",
56590,
"alg",
"Latn",
ancestors = "cr",
}
m["atk"] = {
"Ati",
3217458,
"phi",
"Latn",
}
m["atl"] = {
"Mount Iraya Agta",
6921430,
"phi",
"Latn",
}
m["atm"] = {
"Ata",
4812603,
"phi",
"Latn",
}
m["ato"] = {
"Atong (Cameroon)",
34824,
"nic-grs",
"Latn",
}
m["atp"] = {
"Pudtol Atta",
12640726,
"phi",
"Latn",
}
m["atq"] = {
"Aralle-Tabulahan",
4783889,
"poz-ssw",
"Latn",
}
m["atr"] = {
"ဝါဲမဳရဳ-အာထရဝ်ရဳ",
56865,
"sai-car",
"Latn",
}
m["ats"] = {
"ဂရတ် ဗါန်တေ",
56628,
"alg-ara",
"Latn",
}
m["att"] = {
"ပါန်ပလဝ်နာ အာတ်တာ",
12639245,
"phi",
"Latn",
}
m["atu"] = {
"Reel",
7306882,
"sdv-dnu",
"Latn",
}
m["atv"] = {
"အာန်တာယ် လ္ပာ်သၟဝ်ကျာ",
2640863,
"trk-ssb",
"Cyrl",
translit = "Altai-translit",
}
m["atw"] = {
"Atsugewi",
56718,
"nai-pal",
"Latn",
}
m["atx"] = {
"Arutani",
56609,
nil,
"Latn",
}
m["aty"] = {
"Aneityum",
2379113,
"poz-vns",
"Latn",
}
m["atz"] = {
"Arta",
3508067,
"phi",
"Latn",
}
m["aua"] = {
"Asumboa",
4811870,
"poz-tem",
"Latn",
}
m["aub"] = {
"Alugu",
12626798,
"tbq-urp",
"Latn", -- also Hani?
}
m["auc"] = {
"Huaorani",
758570,
"qfa-iso",
"Latn",
}
m["aud"] = {
"Anuta",
35326,
"poz-pnp",
"Latn",
}
m["aug"] = {
"Aguna",
34733,
"alv-gbe",
"Latn",
}
m["auh"] = {
"Aushi",
2872082,
"bnt-sbi",
"Latn",
}
m["aui"] = {
"Anuki",
3508132,
"poz-ocw",
"Latn",
}
m["auj"] = {
"အာဂျဳလာ",
56398,
"ber",
"Latn, Arab, Tfng",
}
m["auk"] = {
"Heyo",
3504295,
"paa-hya",
"Latn",
}
m["aul"] = {
"Aulua",
427300,
"poz-vnc",
"Latn",
}
m["aum"] = {
"အာသူ",
34798,
"alv-ngb",
"Latn",
}
m["aun"] = {
"Molmo One",
12637224,
"paa-trr",
"Latn",
}
m["auo"] = {
"Auyokawa",
56247,
"cdc-wst",
"Latn",
}
m["aup"] = {
"Makayam",
6738863,
"paa-tir",
"Latn",
}
m["auq"] = {
"Anus",
23855,
"poz-ocw",
"Latn",
}
m["aur"] = {
"Aruek",
3504279,
"paa-kom",
"Latn",
}
m["aut"] = {
"Austral",
2669261,
"poz-pep",
"Latn",
}
m["auu"] = {
"Auye",
4827334,
"ngf-pan",
"Latn",
}
m["auw"] = {
"Awyi",
3513326,
"paa-taa",
"Latn",
}
m["aux"] = {
"အာဝ်ရာက်",
3507995,
"tup-gua",
"Latn",
}
m["auy"] = {
"Auyana",
2873211,
"ngf-gau",
"Latn",
}
m["auz"] = {
"အာရဗဳ ဥူသဗက်ကဳ",
3399507,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["avb"] = {
"Avau",
12627412,
"poz-ocw",
"Latn",
}
m["avd"] = {
"အာယ်ဝဳရဳ-ဝဳဒါရဳ",
3327357,
"xme",
"fa-Arab",
ancestors = "xme-mid",
}
m["avi"] = {
"Avikam",
34840,
"alv-lag",
"Latn",
}
m["avk"] = {
"ခါဝ်တာဝါယ်",
1377116,
"art",
"Latn",
type = "appendix-constructed",
}
m["avm"] = {
"Angkamuthi",
62603022,
"aus-pmn",
"Latn",
}
m["avn"] = {
"Avatime",
34796,
"alv-ktg",
"Latn",
}
m["avo"] = {
"Agavotaguerra",
3508007,
"awd",
"Latn",
}
m["avs"] = {
"Aushiri",
3409318,
"sai-zap",
"Latn",
}
m["avt"] = {
"Au",
3446608,
"paa-wap",
"Latn",
}
m["avu"] = {
"အာတ်ဝါဝ်ခါယျ",
56685,
"csu-mma",
"Latn",
}
m["avv"] = {
"Avá-Canoeiro",
4829584,
"tup-gua",
"Latn",
}
m["awa"] = {
"အဝါဒဳ",
29579,
"inc-hie",
"Deva, Kthi, fa-Arab",
ancestors = "inc-oaw",
translit = {
Deva = "hi-translit"
},
}
m["awb"] = {
"Awa (New Guinea)",
2874650,
"ngf-gau",
"Latn",
}
m["awc"] = {
"Cicipu",
35193,
"nic-kam",
"Latn",
}
m["awe"] = {
"အာဝပ်တဳ",
4830038,
"tup",
"Latn",
}
m["awg"] = {
"အာန်ဂူတဳမဳရဳ",
4764288,
"aus-pam",
"Latn",
}
m["awh"] = {
"Awbono",
3446684,
"paa-baa",
"Latn",
}
m["awi"] = {
"Aekyom",
3399691,
"paa-kae",
"Latn",
}
m["awk"] = {
"အဝါဗာကဴ",
3449138,
"aus-pam",
"Latn",
}
m["awm"] = {
"Arawum",
4784537,
"ngf-rai",
"Latn",
}
m["awn"] = {
"Awngi",
34934,
"cus-cen",
"Ethi",
}
m["awo"] = {
"Awak",
3446643,
"alv-wjk",
"Latn",
}
m["awr"] = {
"Awera",
56379,
"paa-flp",
"Latn",
}
m["aws"] = {
"South Awyu",
12633986,
"ngf-awy",
"Latn",
}
m["awt"] = {
"Araweté",
4784535,
"tup-gua",
"Latn",
}
m["awu"] = {
"Central Awyu",
12628801,
"ngf-awy",
"Latn",
}
m["awv"] = {
"Jair Awyu",
16110177,
"ngf-awy",
"Latn",
}
m["aww"] = {
"Awun",
56369,
"paa-sep",
"Latn",
}
m["awx"] = {
"Awara",
2874670,
"ngf-waa",
"Latn",
}
m["awy"] = {
"Edera Awyu",
12630425,
"ngf-awy",
"Latn",
}
m["axb"] = {
"Abipón",
11252539,
"sai-guc",
"Latn",
}
m["axe"] = {
"Ayerrerenge",
16112737,
"aus-pam",
"Latn",
}
m["axg"] = {
"Arára (Mato Grosso)",
3446660,
nil,
"Latn",
}
m["axk"] = {
"Aka (Central Africa)",
11010149,
"bnt-ngn",
"Latn",
}
m["axl"] = {
"Lower Southern Aranda",
6693295,
"aus-rnd",
"Latn",
}
m["axm"] = {
"အာမေနဳယျာအဒေါဝ်",
4438498,
"hyx",
"Armn",
ancestors = "xcl",
translit = "Armn-translit",
override_translit = true,
strip_diacritics = {
remove_diacritics = "՞՜՛՟",
from = {"եւ", "ՙ", "՚"},
to = {"և", "ʻ", "’"}
}
}
m["axx"] = {
"Xârâgurè",
8045635,
"poz-cln",
"Latn",
}
m["aya"] = {
"Awar",
56876,
"paa-baw",
"Latn",
}
m["ayb"] = {
"Ayizo",
34841,
"alv-pph",
"Latn",
}
m["ayd"] = {
"Ayabadhu",
3509164,
"aus-pmn",
"Latn",
}
m["aye"] = {
"Ayere",
34788,
"alv-aah",
"Latn",
}
m["ayg"] = {
"Nyanga (Togo)",
35446,
"alv-gng",
"Latn",
}
m["ayi"] = {
"Leyigha",
3914492,
"nic-uce",
"Latn",
}
m["ayk"] = {
"Akuku",
3450179,
"alv-nwd",
"Latn",
}
m["ayl"] = {
"အာရဗဳလေတ်ဗျာ",
56503,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["ayn"] = {
"အာရဗဳ ယာက်မနဳ",
1686766,
"sem-arb",
"Arab, Hebr",
strip_diacritics = {
Arab = "ar-stripdiacritics",
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
m["ayo"] = {
"Ayoreo",
56634,
"sai-zam",
"Latn",
}
m["ayp"] = {
"အာရဗဳ မာက်သဝ်ပဝ်တေမဳယာန် သၟဝ်ကျာ",
56577,
"sem-arb",
"Arab",
ancestors = "acm",
strip_diacritics = "ar-stripdiacritics",
}
m["ayq"] = {
"Ayi",
56449,
"paa-sep",
"Latn",
}
m["ays"] = {
"Sorsogon Ayta",
7563752,
"phi",
"Latn",
}
m["ayt"] = {
"Bataan Ayta",
4921648,
"phi",
"Latn",
}
m["ayu"] = {
"Ayu",
34786,
"alv",
"Latn",
}
-- ayy deleted and removed from ISO; per the removal request, "no linguistic data exists for any [Ayta] language that the
-- ancestors of this group might have once spoken. And thus, there is no evidence that this group ever had a language
-- distinct from any other Philippine language." [Lobel]
m["ayz"] = {
"Maybrat",
4830892,
"paa-may",
-- either an isolate; grouped with Abun and the West Bird's Head family; or in the putative West Papuan family
"Latn",
}
m["aza"] = {
"Azha",
4832486,
"tbq-axi",
"Latn",
}
m["azd"] = {
"ဒူရာန်ဂဝ် နာဟွာတာယ်လ်လ္ပာ်ဖာဗၟံက်",
16115449,
"azc-dur",
"Latn",
}
m["azg"] = {
"San Pedro Amuzgos Amuzgo",
35092,
"omq",
"Latn",
}
m["azm"] = {
"Ipalapa Amuzgo",
12633013,
"omq",
"Latn",
}
m["azn"] = {
"Western Durango နာဟွာတာယ်လ်",
12645553,
"azc-dur",
"Latn",
}
m["azo"] = {
"Awing",
34856,
"nic-nge",
"Latn",
}
m["azt"] = {
"Faire Atta",
12630884,
"phi",
"Latn",
}
m["azz"] = {
"ဟာဲလာန် ပွာယ်ဗလာ နာဟွာတာယ်လ်",
12953754,
"azc-nah",
"Latn",
}
return require("Module:languages").finalizeData(m, "language")
b3pj7g4zpr457pzqai4ac8r1vla0z1n
395877
395876
2026-05-29T15:33:19Z
Intobesa.bot
1035
Bot: ပလေဝ်ဒါန်
395877
Scribunto
text/plain
local m_langdata = require("Module:languages/data")
-- Loaded on demand, as it may not be needed (depending on the data).
local function u(...)
u = require("Module:string utilities").char
return u(...)
end
local c = m_langdata.chars
local p = m_langdata.puaChars
local s = m_langdata.shared
local m = {}
m["aaa"] = {
"ဂါဝ်တူဥူ",
35463,
"alv-yek",
"Latn",
}
m["aab"] = {
"အာဠူမူ-ထေတ်သူ",
35034,
"nic-alu",
"Latn",
}
m["aac"] = {
"အာရဳ",
1811224,
"ngf-gsu",
"Latn",
}
m["aad"] = {
"အာမာန်",
56708,
"paa-sep",
"Latn",
}
-- "aae" is treated as "sq", see [[WT:LT]]
m["aaf"] = {
"အာရာနဒါန်",
3507928,
"dra-mal",
"Mlym",
-- Mlym translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission)
}
m["aag"] = {
"အာန်ဗရေတ်ခ်",
4741706,
"paa-pal",
"Latn",
}
m["aah"] = {
"အာၜေအ်' အာရာဗေါတ်",
4670715,
"paa-ara",
"Latn",
}
m["aai"] = {
"အာရဳဖှာန်မာ-မဳနဳယျာဖှဳယျာ",
4790560,
"poz-ocw",
"Latn",
}
m["aak"] = {
"အာန်ခါဝေ",
3446690,
"ngf-ata",
"Latn",
}
m["aal"] = {
"အာဖှာဲဒေ",
56434,
"cdc-cbm",
"Latn",
}
m["aan"] = {
"အာန္နာမ်ဗေ",
3507873,
"tup-gua",
"Latn",
}
m["aap"] = {
"ဘာရာ အဝ်ရာအ်ရာ",
56807,
"sai-pek",
"Latn",
}
m["aaq"] = {
"ဘာနိုတ်သကေတ်",
3515185,
"alg-abp",
"Latn",
}
m["aas"] = {
"အောတ်သေတ်",
56620,
"cus-sou",
"Latn",
}
-- "aat" is treated as "sq", see [[WT:LT]]
m["aau"] = {
"အာၜေဴ",
3073568,
"paa-sep",
"Latn",
}
m["aaw"] = {
"သဝ်လံန်",
7558834,
"poz-ocw",
"Latn",
}
m["aax"] = {
"မာန်ဒဝ်ဗဝ် အာတ်တာပ်",
12636156,
"ngf-dum",
"Latn",
}
m["aaz"] = {
"အာန်မာရသဳ",
4740192,
"poz-tim",
"Latn",
}
m["aba"] = {
"အေက်ဗေ",
34833,
"alv-lag",
"Latn",
}
m["abb"] = {
"Bankon",
34860,
"bnt-bsa",
"Latn",
}
m["abc"] = {
"Ambala Ayta",
3448896,
"phi",
"Latn",
}
m["abd"] = {
"Camarines Norte Agta",
3399682,
"phi",
"Latn",
}
m["abe"] = {
"အေတ်ဗဒ်နာကဳ",
17502788,
"alg-abp",
"Latn",
}
m["abf"] = {
"Abai Sungai",
4663287,
"poz-san",
"Latn",
}
m["abg"] = {
"Abaga",
3507954,
"ngf-kya",
"Latn",
}
m["abh"] = {
"အာရဗဳ တဇေတ်ကဳ",
56833,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["abi"] = {
"Abidji",
34781,
"alv-lag",
"Latn",
}
m["abj"] = {
"အကာ-ဗဳအ်",
2356391,
"qfa-ads",
"Latn",
}
m["abl"] = {
"Abung",
49215,
"poz-lgx",
"Latn",
}
m["abm"] = {
"Abanyom",
7502,
"nic-eko",
"Latn",
}
m["abn"] = {
"Abua",
34835,
"nic-cde",
"Latn",
}
m["abo"] = {
"Abon",
35121,
"nic-tvn",
"Latn",
}
m["abp"] = {
"အာဗေန်လာန် အာဲတာ",
3436621,
"phi",
"Latn",
}
m["abq"] = {
"အဗါတ်သာ",
27567,
"cau-abz",
"Cyrl, Latn",
translit = {
Cyrl = "abq-translit"
},
override_translit = true,
display_text = {
Cyrl = s["cau-Cyrl-displaytext"]
},
strip_diacritics = {
Cyrl = s["cau-Cyrl-stripdiacritics"],
Latn = s["cau-Latn-stripdiacritics"],
},
sort_key = {
Cyrl = {
from = {
"гъв", "гъь", "гӏв", "джв", "джь", "къв", "къь", "кӏв", "кӏь", "хъв", "хӏв", "чӏв", -- 3 chars
"гв", "гъ", "гь", "гӏ", "дж", "дз", "ё", "жв", "жь", "кв", "къ", "кь", "кӏ", "ль", "лӏ", "пӏ", "тл", "тш", "тӏ", "фӏ", "хв", "хъ", "хь", "хӏ", "цӏ", "чв", "чӏ", "шв", "шӏ" -- 2 chars
},
to = {
"г" .. p[3], "г" .. p[4], "г" .. p[7], "д" .. p[2], "д" .. p[3], "к" .. p[3], "к" .. p[4], "к" .. p[7], "к" .. p[8], "х" .. p[3], "х" .. p[6], "ч" .. p[3],
"г" .. p[1], "г" .. p[2], "г" .. p[5], "г" .. p[6], "д" .. p[1], "д" .. p[4], "е" .. p[1], "ж" .. p[1], "ж" .. p[2], "к" .. p[1], "к" .. p[2], "к" .. p[5], "к" .. p[6], "л" .. p[1], "л" .. p[2], "п" .. p[1], "т" .. p[1], "т" .. p[2], "т" .. p[3], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[4], "х" .. p[5], "ц" .. p[1], "ч" .. p[1], "ч" .. p[2], "ш" .. p[1], "ш" .. p[2]
}
},
},
}
-- "abr" Abron is treated as "ak" Akan, see [[WT:LT]]
m["abs"] = {
"မလေဝ် အာန်ဗဝ်နေတ်",
3124354,
"crp",
"Latn",
ancestors = "ms",
}
m["abt"] = {
"Ambulas",
3508015,
"paa-nnd",
"Latn",
}
m["abu"] = {
"Abure",
34767,
"alv-ptn",
"Latn",
}
m["abv"] = {
"အာရဗဳ ဗာဟာနာ",
56576,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["abw"] = {
"Pal",
7126121,
"ngf-omo",
"Latn",
}
m["abx"] = {
"Inabaknon",
2820163,
"poz-sbj",
"Latn",
}
m["aby"] = {
"Aneme Wake",
3508107,
"ngf-yar",
"Latn",
}
m["abz"] = {
"Abui",
2822110,
"paa-alp",
"Latn",
}
m["aca"] = {
"Achagua",
2822982,
"awd",
"Latn",
}
m["acb"] = {
"Áncá",
11130787,
"nic-mom",
"Latn",
}
m["acd"] = {
"Gikyode",
35256,
"alv-gng",
"Latn",
}
m["ace"] = {
"အာသံနဳစ်",
27683,
"cmc",
"Latn, ms-Arab",
standard_chars = {
Latn = "AaBbCcDdEeÉéÈèËëFfGgHhIiJjKkLlMmNnOoÔôÖöPpQqRrSsTtUuVvWwXxYyZz", -- current orthography (not yet add Arab)
c.punc
},
}
m["ach"] = {
"Acholi",
34926,
"sdv-los",
"Latn",
}
m["aci"] = {
"အကာ-ကာရဳ",
2670418,
"qfa-adn",
"Latn",
}
m["ack"] = {
"အကာ-ကိုဝ်ရာ",
3433680,
"qfa-adn",
"Latn",
}
m["acl"] = {
"အာပ်-ဗေလ်",
3436825,
"qfa-ads",
"Latn",
}
m["acm"] = {
"အာရဗဳ အဳရတ်",
56232,
"sem-arb",
"Arab, Hebr",
strip_diacritics = {
Arab = "ar-stripdiacritics",
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
m["acn"] = {
"Achang",
56582,
"tbq-brm",
"Latn",
}
m["acp"] = {
"Eastern Acipa",
5329945,
"nic-kmk",
"Latn",
}
m["acr"] = {
"Achi",
34774,
"myn",
"Latn",
}
m["acs"] = {
"Acroá",
2829146,
"sai-cje",
"Latn",
}
m["acu"] = {
"Achuar",
2823170,
"sai-jiv",
"Latn",
}
m["acv"] = {
"Achumawi",
56661,
"nai-pal",
"Latn",
}
m["acw"] = {
"အာရဗဳဟဳဂျာဇြဳ",
56608,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["acx"] = {
"အာရဗဳ အဝ်မာန်နဳ",
56630,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["acy"] = {
"အာရဗဳ သာဲပရေက်",
56416,
"sem-arb",
"Latn, Grek",
ancestors = "acm",
strip_diacritics = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.breve},
},
-- Grek display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
standard_chars = {
Latn = "AaBbCcDdΔδEeFfGgĠġĊċIiJjKkLlMmNnOoPpΘθRrSsTtUuVvWwXxYyZzŞş",
c.punc
},
}
m["acz"] = {
"Acheron",
34769,
"alv-tal",
"Latn",
}
m["ada"] = {
"Adangme",
35141,
"alv-gda",
"Latn",
}
m["adb"] = {
"Atauran",
125421255,
"poz-cet",
"Latn",
}
m["add"] = {
"Dzodinka",
35266,
"nic-nka",
"Latn",
}
m["ade"] = {
"Adele",
27740,
"alv-ntg",
"Latn",
}
m["adf"] = {
"အာရဗဳ ဒဝ်ဖာရဳ",
56565,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["adg"] = {
"Andegerebinha",
3508123,
"aus-rnd",
"Latn",
}
m["adh"] = {
"Adhola",
1971400,
"sdv-los",
"Latn",
}
m["adi"] = {
"အဒဳ",
56440,
"sit-tan",
"Latn",
}
m["adj"] = {
"အာဒေတ်အူခရု",
34738,
"alv-lag",
"Latn",
}
m["adl"] = {
"Galo",
2857892,
"sit-tan",
"Latn",
}
m["adn"] = {
"Adang",
3398276,
"paa-alp",
"Latn",
}
m["ado"] = {
"Abu",
56659,
"paa-por",
"Latn",
}
m["adp"] = {
"အဒပ်",
3512402,
"sit-tib",
"Tibt",
ancestors = "dz",
translit = "Tibt-translit",
override_translit = true,
display_text = s["Tibt-displaytext"],
entry_name = s["Tibt-entryname"],
sort_key = "Tibt-sortkey",
wikipedia_article = "Dzongkha", -- Considered a dialect of Dzongkha
}
m["adq"] = {
"Adangbe",
34730,
"alv-gda",
"Latn",
ancestors = "ada",
}
m["adr"] = {
"Adonara",
4684505,
"poz-cet",
"Latn",
}
m["ads"] = {
"Adamorobe Sign Language",
27709,
"sgn",
"Latn", -- when documented
}
m["adt"] = {
"Adnyamathanha",
2225391,
"aus-psw",
"Latn",
}
m["adu"] = {
"Aduge",
34734,
"alv-nwd",
"Latn",
ancestors = "opa",
}
m["adw"] = {
"Amondawa",
12626847,
"tup-gua",
"Latn",
}
m["ady"] = {
"အာက်ဒေါတ်ကာယ်",
27776,
"cau-cir",
"Cyrl, Latn, Arab",
translit = {
Cyrl = "cau-cir-translit",
Arab = "ar-translit",
},
override_translit = true,
display_text = {
Cyrl = s["cau-Cyrl-displaytext"]
},
strip_diacritics = {
Cyrl = s["cau-Cyrl-stripdiacritics"],
Latn = s["cau-Latn-stripdiacritics"],
},
sort_key = {
Cyrl = {
from = {
"кхъу", "къӏу", -- 4 chars
"гъу", "джу", "дзу", "жъу", "къу", "кхъ", "къӏ", "кӏу", "кӏь", "лъу", "лӏу", "пӏу", "сӏу", "тӏу", "фӏу", "хъу", "цӏу", "чъу", "чӏу", "шъу", "шӏу", "щӏу", -- 3 chars
"гу", "гъ", "гь", "дж", "дз", "ё", "жъ", "жь", "ку", "къ", "кь", "кӏ", "лъ", "ль", "лӏ", "пӏ", "сӏ", "тӏ", "фӏ", "ху", "хъ", "хь", "цу", "цӏ", "чу", "чъ", "чӏ", "шъ", "шӏ", "щӏ", "ӏу", "ӏь" -- 2 chars
},
to = {
"к" .. p[5], "к" .. p[7],
"г" .. p[3], "д" .. p[2], "д" .. p[4], "ж" .. p[2], "к" .. p[3], "к" .. p[4], "к" .. p[6], "к" .. p[10], "к" .. p[11], "л" .. p[2], "л" .. p[5], "п" .. p[2], "с" .. p[2], "т" .. p[2], "ф" .. p[2], "х" .. p[3], "ц" .. p[3], "ч" .. p[3], "ч" .. p[5], "ш" .. p[2], "ш" .. p[4], "щ" .. p[2],
"г" .. p[1], "г" .. p[2], "г" .. p[4], "д" .. p[1], "д" .. p[3], "е" .. p[1], "ж" .. p[1], "ж" .. p[3], "к" .. p[1], "к" .. p[2], "к" .. p[8], "к" .. p[9], "л" .. p[1], "л" .. p[3], "л" .. p[4], "п" .. p[1], "с" .. p[1], "т" .. p[1], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[4], "ц" .. p[1], "ц" .. p[2], "ч" .. p[1], "ч" .. p[2], "ч" .. p[4], "ш" .. p[1], "ш" .. p[3], "щ" .. p[1], "ӏ" .. p[1], "ӏ" .. p[2]
}
},
},
}
m["adz"] = {
"Adzera",
3327445,
"poz-ocw",
"Latn",
}
m["aea"] = {
"Areba",
3509129,
"aus-pam",
"Latn",
}
m["aeb"] = {
"အာရဗဳ တူနဳယှေန်",
56240,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["aed"] = {
"Argentine Sign Language",
3322073,
"sgn",
"Latn", -- when documented
}
m["aee"] = {
"ပါသျှယဳ ဒိုဟ်ဗၟံက်သၟဝ်ကျာ",
12642198,
"inc-pas",
"fa-Arab, Latn",
}
m["aek"] = {
"Haeke",
5638166,
"poz-cln",
"Latn",
}
m["ael"] = {
"Ambele",
34818,
"nic-grf",
"Latn",
}
m["aem"] = {
"အါန်",
3507920,
"mkh-vie",
"Latn",
}
m["aen"] = {
"Armenian Sign Language",
3446604,
"sgn",
}
m["aeq"] = {
"Aer",
3246741,
"inc-wes",
"Arab",
}
m["aer"] = {
"အာရေန်တာယ်",
10728232,
"aus-rnd",
"Latn",
}
m["aes"] = {
"Alsea",
2395641,
nil,
"Latn",
}
m["aeu"] = {
"Akeu",
4700657,
"tbq-sil",
"Latn",
}
m["aew"] = {
"Ambakich",
56642,
"paa-eke",
"Latn",
}
m["aey"] = {
"Amele",
3508025,
"ngf-gum",
"Latn",
}
m["aez"] = {
"ဨကာ",
16110528,
"ngf-oro",
"Latn",
}
m["afb"] = {
"အာရဗဳအထံက်ဂၚ်",
56385,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["afd"] = {
"Andai",
4753480,
"paa-arf",
"Latn",
}
m["afe"] = {
"Putukwam",
3914930,
"nic-ben",
"Latn",
}
m["afg"] = {
"Afghan Sign Language",
4689093,
"sgn",
}
m["afh"] = {
"Afrihili",
384707,
"art",
"Latn",
type = "appendix-constructed",
}
m["afi"] = {
"Akrukay",
57003,
"paa-tam",
"Latn",
}
m["afk"] = {
"Nanubae",
6964416,
"paa-arf",
"Latn",
}
m["afn"] = {
"Defaka",
35174,
"nic",
"Latn",
}
m["afo"] = {
"Eloyi",
3914066,
"nic-plt",
"Latn",
}
m["afp"] = {
"Tapei",
16887371,
"paa-arf",
"Latn",
}
m["afs"] = {
"Afro-Seminole Creole",
27867,
"crp",
"Latn",
ancestors = "en",
}
m["aft"] = {
"Afitti",
3400829,
"sdv-nyi",
"Latn",
}
m["afu"] = {
"Awutu",
34847,
"alv-gng",
"Latn",
}
m["afz"] = {
"Obokuitai",
7075258,
"paa-clp",
"Latn",
}
m["aga"] = {
"Aguano",
3331203,
nil,
"Latn",
}
m["agb"] = {
"Legbo",
35584,
"nic-uce",
"Latn",
}
m["agc"] = {
"Agatu",
34732,
"alv-ido",
"Latn",
}
m["agd"] = {
"Agarabi",
3399642,
"ngf-gau",
"Latn",
}
m["age"] = {
"Angal",
10951553,
"ngf-ank",
"Latn",
}
m["agf"] = {
"Arguni",
12473346,
"poz-cet",
"Latn",
}
m["agg"] = {
"Angor",
3508100,
"paa-sng",
"Latn",
}
m["agh"] = {
"Ngelima",
7022266,
"bnt-bta",
"Latn",
}
m["agi"] = {
"Agariya",
663586,
"mun",
"Deva",
}
m["agj"] = {
"Argobba",
29292,
"sem-eth",
"Ethi",
}
m["agk"] = {
"Isarog Agta",
6078982,
"phi",
"Latn",
}
m["agl"] = {
"Fembe",
372927,
"ngf-est",
"Latn",
}
m["agm"] = {
"Angaataha",
3508001,
"ngf-ang",
"Latn",
}
m["agn"] = {
"Agutaynen",
3399717,
"phi-kal",
"Latn",
}
m["ago"] = {
"Tainae",
7676186,
"ngf-taa",
"Latn",
}
m["agq"] = {
"Aghem",
34737,
"nic-rnw",
"Latn",
}
m["agr"] = {
"Aguaruna",
1526530,
"sai-jiv",
"Latn",
}
m["ags"] = {
"Esimbi",
35260,
"nic-bds",
"Latn",
}
m["agt"] = {
"ကာဂါယာန် အာက်ထာ ဗဟဵု",
5017296,
"phi",
"Latn",
}
m["agu"] = {
"အာဂွာကာတေကာ",
35091,
"myn",
"Latn",
}
m["agv"] = {
"ရောမါန်ဒါဒဝ် အာက်ဂါ",
3508085,
"phi",
"Latn",
}
m["agw"] = {
"Kahua",
3191906,
"poz-sls",
"Latn",
}
m["agx"] = {
"အာခူန်",
36498,
"cau-esm",
"Cyrl",
translit = "cau-nec-translit",
override_translit = true,
display_text = s["cau-Cyrl-displaytext"],
strip_diacritics = s["cau-Cyrl-stripdiacritics"],
sort_key = {
from = {"аь", "гъ", "гь", "гӏ", "дж", "ё", "къ", "кь", "кӏ", "оь", "пӏ", "тӏ", "уь", "хъ", "хь", "хӏ", "цӏ", "чӏ"},
to = {"а" .. p[1], "г" .. p[1], "г" .. p[2], "г" .. p[3], "д" .. p[1], "е" .. p[1], "к" .. p[1], "к" .. p[2], "к" .. p[3], "о" .. p[1], "п" .. p[1], "т" .. p[1], "у" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[3], "ц" .. p[1], "ч" .. p[1]}
},
}
m["agy"] = {
"Southern Alta",
7569611,
"phi",
"Latn",
}
m["agz"] = {
"Mount Iriga Agta",
6921432,
"phi",
"Latn",
}
m["aha"] = {
"Ahanta",
34729,
"alv-ctn",
"Latn",
}
m["ahb"] = {
"Axamb",
2874710,
"poz-vnc",
"Latn",
}
m["ahg"] = {
"Qimant",
35663,
"cus-cen",
"Latn",
}
m["ahh"] = {
"Aghu",
3436645,
"ngf-awy",
"Latn",
}
m["ahi"] = {
"Tiagba",
3400073,
"kro-aiz",
"Latn",
}
m["ahk"] = {
"အာခါ",
56643,
"tbq-han",
"Latn, Mymr, Thai",
sort_key = {
Thai = {
from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"},
to = {"", "", "%2%1"}
},
},
}
m["ahl"] = {
"Igo",
35412,
"alv-ktg",
"Latn",
}
m["ahm"] = {
"Mobu",
35967,
"kro-aiz",
"Latn",
}
m["ahn"] = {
"အ'ဟာန်",
34723,
"alv-aah",
"Latn",
}
m["aho"] = {
"အဟုမ်",
34778,
"tai-swe",
"Ahom",
translit = "Ahom-translit",
}
m["ahp"] = {
"Apro",
34810,
"alv-kwa",
"Latn",
}
m["ahr"] = {
"အဟိရာန်နဳ",
15549890,
"raj",
"Deva",
translit = "mr-translit",
}
m["ahs"] = {
"အာက်သှ်",
34823,
"nic-plc",
"Latn",
}
m["aht"] = {
"Ahtna",
21058,
"ath-nor",
"Latn",
}
m["aia"] = {
"အာရဝ်သဳ",
2863483,
"poz-sls",
"Latn",
}
m["aib"] = {
"Äynu",
27927,
"qfa-mix",
"Arab, Latn",
ancestors = "ug, fa"
}
m["aic"] = {
"Ainbai",
3332149,
"paa-bew",
"Latn",
}
m["aid"] = {
"အာန်ကဝ်ရေဝ်ထေန်",
3279409,
"aus-pmn",
"Latn",
}
m["aie"] = {
"Amara",
2841180,
"poz-ocw",
"Latn",
}
m["aif"] = {
"Agi",
3331491,
"paa-wpa",
"Latn",
}
m["aig"] = {
"အာန်တဳဂွါ ကဵု အၚ်္ဂလိက် ဗါၜူဒါ ခရဳအတ်လ်",
3244184,
"crp",
"Latn",
ancestors = "en",
}
m["aih"] = {
"အာဲ-ချာန်",
2827749,
"qfa-kms",
"Latn, Hani",
sort_key = {
Hani = "Hani-sortkey"
},
}
m["aii"] = {
"အာက်သဳရိ နဳအဝ်-အာရာမေဣ",
29440,
"sem-nna",
"Syrc",
translit = "aii-translit",
strip_diacritics = "Syrc-stripdiacritics",
}
m["aij"] = {
"Lishanid Noshan",
3436467,
"sem-nna",
"Hebr",
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
m["aik"] = {
"Ake",
34808,
"nic-pls",
"Latn",
}
m["ail"] = {
"Aimele",
3327418,
"ngf-bos",
"Latn",
}
m["aim"] = {
"Aimol",
4697175,
"tbq-kuk",
"Latn, Beng",
}
m["ain"] = {
"အာဲနု",
27969,
"qfa-ain",
"Kana, Latn, Cyrl",
sort_key = {
Kana = "Kana-sortkey"
},
}
m["aio"] = {
"အာဲတောန်",
3399725,
"tai-swe",
"Mymr",
translit = "aio-phk-translit",
display_text = s["aio-displaytext"],
strip_diacritics = s["aio-stripdiacritics"],
}
m["aip"] = {
"Burumakok",
5000984,
"ngf-wok",
"Latn",
}
m["air"] = {
"Airoran",
3321131,
"paa-saa",
"Latn",
}
m["ait"] = {
"အာရေဝ်ခေန်",
3446679,
"tup",
"Latn",
}
m["aiw"] = {
"Aari",
7495,
"omv-aro",
"Latn",
}
m["aix"] = {
"Aighon",
3504287,
"poz-ocw",
"Latn",
}
m["aiy"] = {
"Ali",
34814,
"gba-eas",
"Latn",
}
m["aja"] = {
"အာဂျာ (အေက်ဖရိက လ္ပာ်ဗၟံက်)",
3237491,
"csu-bkr",
"Latn",
}
m["ajg"] = {
"Aja (West Africa)",
35035,
"alv-gbe",
"Latn",
}
m["aji"] = {
"အဂျဳ",
2828867,
"poz-cln",
"Latn",
}
m["ajn"] = {
"Andajin",
16111302,
"aus-wor",
"Latn",
}
m["ajp"] = {
"အာရဗဳလပ်ဗေန်ထေန်သမၠုၚ်ကျာ",
55633582,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["ajw"] = {
"Ajawa",
56645,
"cdc-wst",
"Latn",
}
m["ajz"] = {
"Amri Karbi",
3508092,
"tbq-kuk",
"Latn",
ancestors = "mjw",
}
m["akb"] = {
"Angkola Batak",
2640686,
"btk",
"Latn, Batk",
}
m["akc"] = {
"Mpur",
3327139,
"qfa-iso", -- Papuan; based on Palmer (2018), Ethnologue and Glottolog
"Latn",
}
m["akd"] = {
"Ukpet-Ehom",
36618,
"nic-ucr",
"Latn",
}
m["ake"] = {
"အကာဝယဝ်",
28059,
"sai-pem",
"Latn",
}
m["akf"] = {
"Akpa",
34801,
"alv-ido",
"Latn",
}
m["akg"] = {
"အနှတ်ခါလာန်ဂူ",
4750964,
"poz-cet",
"Latn",
}
m["akh"] = {
"Angal Heneng",
10950354,
"ngf-ank",
"Latn",
}
m["aki"] = {
"Aiome",
56735,
"paa-aia",
"Latn",
}
m["akj"] = {
"ဇေရု",
2919121,
"qfa-adn",
"Latn, Deva",
}
m["akk"] = {
"အခါဒဳယာန်",
35518,
"sem-eas",
"Xsux, Latn",
}
m["akl"] = {
"အာက်ခလာန်",
8773,
"phi",
"Latn",
}
m["akm"] = {
"Aka-Bo",
35361,
"qfa-adn",
"Latn",
}
m["ako"] = {
"အာကူရဳအဝ်",
56650,
"sai-tar",
"Latn",
}
m["akp"] = {
"Siwu",
36470,
"alv-ntg",
"Latn",
}
m["akq"] = {
"Ak",
56654,
"paa-sep",
"Latn",
}
m["akr"] = {
"အာရာကဳ",
2699882,
"poz-vnn",
"Latn",
}
m["aks"] = {
"Akaselem",
34817,
"nic-grm",
"Latn",
}
m["akt"] = {
"Akolet",
3330162,
"poz-ocw",
"Latn",
}
m["aku"] = {
"Akum",
34799,
"nic-ykb",
"Latn",
}
m["akv"] = {
"အာပ်ခါဝက်",
56423,
"cau-and",
"Cyrl",
translit = "cau-nec-translit",
override_translit = true,
display_text = s["cau-Cyrl-displaytext"],
strip_diacritics = s["cau-Cyrl-stripdiacritics"],
}
m["akw"] = {
"Akwa",
34802,
"bnt-mbo",
"Latn",
}
m["akx"] = {
"အကာ-ကေဒဵု",
3436816,
"qfa-adc",
"Latn",
}
m["aky"] = {
"အကာ-ကောန်",
3436784,
"qfa-adc",
"Latn",
}
m["akz"] = {
"အာလာဗာမာ",
1815020,
"nai-mus",
"Latn",
}
m["ala"] = {
"Alago",
34813,
"alv-ido",
"Latn",
}
m["alc"] = {
"ခါဝေတ်သကာ",
56544,
"aqa",
"Latn",
}
m["ald"] = {
"Alladian",
34837,
"alv-lag",
"Latn",
}
m["ale"] = {
"အာလောတ်",
27210,
"esx",
"Latn, Cyrl",
}
m["alf"] = {
"Alege",
34815,
"nic-ben",
"Latn",
}
m["alh"] = {
"Alawa",
2147917,
"aus-gun",
"Latn",
}
m["ali"] = {
"Amaimon",
3327427,
"ngf-mad",
"Latn",
}
m["alj"] = {
"အလံၚ်ဂံၚ်",
3327423,
"phi",
"Latn",
}
m["alk"] = {
"Alak",
2714690,
"mkh",
"Latn",
}
m["all"] = {
"Allar",
3393634,
"dra-mal",
"Mlym",
-- Mlym translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission)
}
-- "aln" is treated as "sq", see [[WT:LT]]
m["alm"] = {
"Amblong",
11022615,
"poz-vnn",
"Latn",
}
m["alo"] = {
"Larike-Wakasihu",
3217929,
"poz-cma",
"Latn",
}
m["alp"] = {
"Alune",
3327367,
"poz-cet",
"Latn",
}
m["alq"] = {
"အာယ်လ်ကေန်ဂွေန်",
28092,
"alg",
"Latn, Cans",
ancestors = "oj",
}
m["alr"] = {
"အဠူတေ",
28213,
"qfa-ckn",
"Cyrl",
strip_diacritics = {
from = {"['’]"},
to = {"ʼ"}
},
sort_key = {
from = {"вʼ", "гʼ", "ғ", "ә", "ё", "ӄ", "ӈ"},
to = {"в" .. p[1], "г" .. p[1], "г" .. p[2], "е" .. p[1], "е" .. p[2], "к" .. p[1], "н" .. p[1]}
},
}
m["alt"] = {
"အာန်တာဲ ဒိုဟ်သမၠုၚ်ကျာ",
1991779,
"trk-kkp",
"Cyrl",
translit = "Altai-translit",
sort_key = {
from = {"ј", "ё", "ҥ", "ӧ", "ӱ"},
to = {"д" .. p[1], "е" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1]}
},
}
m["alu"] = {
"'အာရေဝ်'အာရာ",
5160,
"poz-sls",
"Latn",
}
m["alw"] = {
"Alaba",
56652,
"cus-hec",
"Latn",
}
m["alx"] = {
"Amol",
3504260,
"paa-pal",
"Latn",
}
m["aly"] = {
"Alyawarr",
3327389,
"aus-rnd",
"Latn",
}
m["alz"] = {
"Alur",
56507,
"sdv-los",
"Latn",
}
m["ama"] = {
"Amanayé",
3508053,
"tup-gua",
"Latn",
}
m["amb"] = {
"Ambo",
3450142,
"nic-tvn",
"Latn",
}
m["amc"] = {
"Amahuaca",
2669150,
"sai-pan",
"Latn",
}
m["ame"] = {
"Yanesha'",
3088540,
"awd",
"Latn",
}
m["amf"] = {
"ဟာမေ-ဗါန်နာ",
35764,
"omv-aro",
"Latn, Ethi",
sort_key = "amf-utilities"
}
m["amg"] = {
"Amurdag",
3360016,
"aus-wdj",
"Latn",
}
m["ami"] = {
"ဨမေတ်",
35132,
"map",
"Latn",
}
m["amj"] = {
"Amdang",
28335,
"ssa-fur",
"Latn",
}
m["amk"] = {
"အီုဗါဲ",
1875885,
"poz-hce",
"Latn",
}
m["aml"] = {
"War-Jaintia",
56321,
"aav-khs",
"Latn",
}
m["amm"] = {
"အာမာ",
3446626,
"paa-lma",
"Latn",
}
m["amn"] = {
"အာမနှာတ်",
3327399,
"paa-war",
"Latn",
}
m["amo"] = {
"Amo",
34826,
"nic-kne",
"Latn",
}
m["amp"] = {
"Alamblak",
56688,
"paa-sep",
"Latn",
}
m["amq"] = {
"Amahai",
3327384,
"poz-cma",
"Latn",
}
m["amr"] = {
"Amarakaeri",
35128,
"sai-har",
"Latn",
}
m["ams"] = {
"အမာမဳ-အဝ်ဃှဳမာ လ္ပာ်ဒိုဟ်သမၠုၚ်ကျာ",
2840986,
"jpx-nry",
"Jpan",
translit = s["jpx-translit"],
display_text = s["jpx-displaytext"],
strip_diacritics = s["jpx-stripdiacritics"],
sort_key = s["jpx-sortkey"],
}
m["amt"] = {
"Amto",
56517,
"paa-amu",
"Latn",
}
m["amu"] = {
"ဂေရေရဝ် အာမတ်သဂဝ်",
3501942,
"omq",
"Latn",
}
m["amv"] = {
"Ambelau",
2669214,
"poz-cma",
"Latn",
}
m["amw"] = {
"နဳအဝ်-အာရမေအဳ လ္ပာ်ပလိုတ်",
34226,
"sem-arw",
"Armi, Syrc, Latn",
strip_diacritics = {
Syrc = "Syrc-stripdiacritics"
},
}
m["amx"] = {
"Anmatyerre",
10412317,
"aus-rnd",
"Latn",
}
m["amy"] = {
"Ami",
10408315,
"aus-dal",
"Latn",
}
m["amz"] = {
"Atampaya",
3446651,
"aus-pam",
"Latn",
}
m["ana"] = {
"Andaqui",
2846078,
nil,
"Latn",
}
m["anb"] = {
"Andoa",
2846171,
"sai-zap",
"Latn",
}
m["anc"] = {
"Ngas",
35999,
"cdc-wst",
"Latn",
}
m["and"] = {
"အာန်သာတ်သ်",
3513300,
"poz-hce",
"Latn",
}
m["ane"] = {
"သာရခူ",
3571097,
"poz-cln",
"Latn",
}
m["anf"] = {
"Animere",
34783,
"alv-ktg",
"Latn",
}
m["ang"] = {
"အၚ်္ဂလိက်တြေံ",
42365,
"gmw-ang",
"Latn, Runr",
translit = {
Runr = "Runr-translit"
},
strip_diacritics = {
Latn = {
remove_diacritics = c.acute .. c.circ .. c.macron .. c.breve .. c.dotabove .. c.diaer .. c.dotbelow,
from = {"[Ƿƿ]"},
to = {{
["Ƿ"] = "W", ["ƿ"] = "w",
}},
},
},
sort_key = {
Latn = {
remove_diacritics = c.acute .. c.circ .. c.macron .. c.breve .. c.dotabove .. c.diaer .. c.dotbelow,
from = {"[æƀꝺðꝼᵹȝłœꞃꞅꞇþꝥꝧƿ]"},
to = {{
["æ"] = "ae", ["ƀ"] = "b", ["ꝺ"] = "d", ["ð"] = "d" .. p[1], ["ꝼ"] = "f",
["ᵹ"] = "g", ["ȝ"] = "g" .. p[1], ["ł"] = "l", ["œ"] = "oe", ["ꞃ"] = "r",
["ꞅ"] = "s", ["ꞇ"] = "t", ["þ"] = "t" .. p[1], ["ꝥ"] = "t" .. p[1],
["ꝧ"] = "t" .. p[1], ["ƿ"] = "w",
}},
},
},
standard_chars = {
Latn = "AaÆæBbCcDdÐðEeFfGgHhIiLlMmNnOoŒœPpRrSsTtÞþUuWwXxYy",
c.punc,
},
}
m["anh"] = {
"Nend",
6991554,
"ngf-wso",
"Latn",
}
m["ani"] = {
"အာန်ဒဳ",
34849,
"cau-and",
"Cyrl",
translit = "cau-nec-translit",
override_translit = true,
display_text = s["cau-Cyrl-displaytext"],
strip_diacritics = s["cau-Cyrl-stripdiacritics"],
}
m["anj"] = {
"Anor",
56458,
"paa-aia",
"Latn",
}
m["ank"] = {
"Goemai",
35272,
"cdc-wst",
"Latn",
}
m["anl"] = {
"Anu",
4777679,
"sit-mru",
"Latn",
}
m["anm"] = {
"Anāl",
56235,
"tbq-kuk",
"Latn",
}
m["ann"] = {
"Obolo",
36614,
"nic-lcr",
"Latn",
}
m["ano"] = {
"Andoque",
2669225,
"qfa-iso",
"Latn",
}
m["anp"] = {
"အာန်ဂဳကာ",
28378,
"inc-bih",
"Deva, Kthi",
translit = {
Deva = "hi-translit",
Kthi = "bho-Kthi-translit",
},
}
m["anq"] = {
"ဂျရာဝါ",
2475526,
"qfa-ong",
"Latn",
}
m["anr"] = {
"Andh",
4754314,
"inc-sou",
"Deva",
}
m["ans"] = {
"Anserma",
3446613,
"sai-chc",
"Latn",
}
m["ant"] = {
"Antakarinya",
921304,
"aus-psw",
"Latn",
}
m["anu"] = {
"Anuak",
56677,
"sdv-lon",
"Latn",
}
m["anv"] = {
"ဒါန်ညာ",
35187,
"nic-mam",
"Latn",
}
m["anw"] = {
"Anaang",
2845320,
"nic-ief",
"Latn",
}
m["anx"] = {
"Andra-Hus",
2846195,
"poz-aay",
"Latn",
}
m["any"] = {
"Anyi",
28395,
"alv-ctn",
"Latn",
}
m["anz"] = {
"Anem",
56512,
"qfa-dis", -- Papuan; might be an isolate or in a putative West New Britain family
"Latn",
}
m["aoa"] = {
"Angolar",
34994,
"crp",
"Latn",
ancestors = "pt",
}
m["aob"] = {
"Abom",
3446647,
"qfa-dis", -- Papuan; possibly a divergent Tirio language (Anim family), or a top-level TNG node
"Latn",
}
m["aoc"] = {
"ပေမန်",
10729616,
"sai-pem",
"Latn",
}
m["aod"] = {
"Andarum",
3507888,
"paa-ata",
"Latn",
}
m["aoe"] = {
"Angal Enen",
10951638,
"ngf-ank",
"Latn",
}
m["aof"] = {
"Bragat",
3507977,
"paa-pal",
"Latn",
}
m["aog"] = {
"Angoram",
56366, -- cf 6754745 for merged dialect
"paa-lse",
"Latn",
}
m["aoi"] = {
"Anindilyakwa",
2714654,
"aus-arn",
"Latn",
}
m["aoj"] = {
"Mufian",
3507881,
"paa-ara",
"Latn",
}
m["aok"] = {
"Arhö",
4790086,
"poz-cln",
"Latn",
}
m["aol"] = {
"Alorese",
3332062,
"poz",
"Latn",
}
m["aom"] = {
"Ömie",
8078975,
"ngf-koi",
"Latn",
}
m["aon"] = {
"Bumbita Arapesh",
3508044,
"paa-ara",
"Latn",
}
m["aor"] = {
"Aore",
12627129,
"poz-vnn",
"Latn",
}
m["aos"] = {
"Taikat",
7676018,
"paa-taa",
"Latn",
}
m["aot"] = {
"အိန္ဒိ အာတုံ",
5646,
"tbq-bdg",
"Latn, Beng",
}
m["aou"] = {
"အအ်ဥူ",
16109994,
"gio",
"Latn", -- also Hani?
}
m["aox"] = {
"Atorada",
3507932,
"awd",
"Latn",
}
m["aoz"] = {
"Uab Meto",
3441962,
"poz-tim",
"Latn",
}
m["apb"] = {
"သာ'အ်",
36294,
"poz-sls",
"Latn",
}
m["apc"] = {
"အာရဗဳလပ်ဗေန်ထေန်သၟဝ်ကျာ",
22809485,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["apd"] = {
"အာရဗဳ သုဒါန်နဳ",
56573,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["ape"] = {
"Bukiyip",
3507895,
"paa-ara",
"Latn",
}
m["apf"] = {
"Pahanan Agta",
7135432,
"phi",
"Latn",
}
m["apg"] = {
"Ampanang",
4748035,
"poz",
"Latn",
}
m["aph"] = {
"Athpare",
3449126,
"sit-kie",
"Deva, Latn",
}
m["api"] = {
"Apiaká",
3507941,
"tup-gua",
"Latn",
}
m["apj"] = {
"Jicarilla",
28277,
"apa",
"Latn",
}
m["apk"] = {
"Plains Apache",
27861,
"apa",
"Latn",
}
m["apl"] = {
"Lipan",
28269,
"apa",
"Latn",
}
m["apm"] = {
"Chiricahua",
13368,
"apa",
"Latn",
}
m["apn"] = {
"အဖဳနာရဲ",
2858311,
"sai-nje",
"Latn",
}
m["apo"] = {
"Ambul",
12627135,
"poz-ocw",
"Latn",
}
m["app"] = {
"Apma",
2669188,
"poz-vnn",
"Latn",
}
m["apq"] = {
"အ-ၜေအ်သိခွါ",
28466,
"qfa-adc",
"Latn",
}
m["apr"] = {
"Arop-Lokep",
2863482,
"poz-ocw",
"Latn",
}
m["aps"] = {
"Arop-Sissano",
12627242,
"poz-ocw",
"Latn",
}
m["apt"] = {
"Apatani",
56306,
"sit-tan",
"Latn",
}
m["apu"] = {
"Apurinã",
2859081,
"awd",
"Latn",
}
m["apv"] = {
"Alapmunte",
16110782,
"sai-nmk",
"Latn",
}
m["apw"] = {
"အာဖေန်ချဳ လ္ပာ်ပလိုတ်",
28060,
"apa",
"Latn",
}
m["apx"] = {
"Aputai",
12473343,
"poz-tim",
"Latn",
}
m["apy"] = {
"အာက်ပါလာဲန်",
2736980,
"sai-gui",
"Latn",
}
m["apz"] = {
"သာပဵုယဝ်ကာ",
7398693,
"ngf-woj",
"Latn",
}
m["aqc"] = {
"အာဆိ",
34915,
"cau-lzg",
"Cyrl",
translit = "cau-nec-translit",
override_translit = true,
display_text = s["cau-Cyrl-displaytext"],
strip_diacritics = s["cau-Cyrl-stripdiacritics"],
sort_key = {
from = {
"ккъӏв", "ххьӏв", -- 5 chars
"гъӏв", "ёоӏ", "ккъӏ", "ккъв", "къӏв", "ллъв", "ххьӏ", "хъӏв", "хьӏв", "ццӏв", "ччӏв", -- 4 chars
"ааӏ", "гӏв", "гъӏ", "гъв", "гьв", "ееӏ", "ёӏ", "ёо", "ииӏ", "кӏв", "ккв", "ккъ", "къӏ", "къв", "кьв", "лӏв", "ллъ", "лъв", "льв", "ооӏ", "пӏв", "ппв", "ссв", "тӏв", "ттв", "ууӏ", "хӏв", "ххв", "хъӏ", "хъв", "хьӏ", "цӏв", "ццӏ", "ццв", "чӏв", "ччӏ", "ээӏ", "юуӏ", "яаӏ", -- 3 chars
"аӏ", "аа", "гӏ", "гв", "гъ", "гь", "дв", "еӏ", "ее", "ё", "жв", "зв", "иӏ", "ии", "кӏ", "кв", "кк", "къ", "кь", "лӏ", "лв", "лъ", "ль", "оӏ", "оо", "пӏ", "пв", "пп", "св", "сс", "тӏ", "тв", "тт", "уӏ", "уу", "фв", "хӏ", "хв", "хх", "хъ", "цӏ", "цв", "цц", "чӏ", "чв", "шв", "щв", "эӏ", "ээ", "юӏ", "юу", "яӏ", "яа" -- 2 chars
},
to = {
"к" .. p[8], "х" .. p[7],
"г" .. p[6], "е" .. p[7], "к" .. p[7], "к" .. p[9], "к" .. p[12], "л" .. p[5], "х" .. p[6], "х" .. p[10], "х" .. p[13], "ц" .. p[6], "ч" .. p[5],
"а" .. p[3], "г" .. p[2], "г" .. p[5], "г" .. p[7], "г" .. p[9], "е" .. p[3], "е" .. p[5], "е" .. p[6], "и" .. p[3], "к" .. p[2], "к" .. p[5], "к" .. p[6], "к" .. p[11], "к" .. p[13], "к" .. p[15], "л" .. p[2], "л" .. p[4], "л" .. p[7], "л" .. p[9], "о" .. p[3], "п" .. p[2], "п" .. p[5], "с" .. p[3], "т" .. p[2], "т" .. p[5], "у" .. p[3], "х" .. p[2], "х" .. p[5], "х" .. p[9], "х" .. p[11], "х" .. p[12], "ц" .. p[2], "ц" .. p[5], "ц" .. p[7], "ч" .. p[2], "ч" .. p[4], "э" .. p[3], "ю" .. p[3], "я" .. p[3],
"а" .. p[1], "а" .. p[2], "г" .. p[1], "г" .. p[3], "г" .. p[4], "г" .. p[8], "д" .. p[1], "е" .. p[1], "е" .. p[2], "е" .. p[4], "ж" .. p[1], "з" .. p[1], "и" .. p[1], "и" .. p[2], "к" .. p[1], "к" .. p[3], "к" .. p[4], "к" .. p[10], "к" .. p[14], "л" .. p[1], "л" .. p[3], "л" .. p[6], "л" .. p[8], "о" .. p[1], "о" .. p[2], "п" .. p[1], "п" .. p[3], "п" .. p[4], "с" .. p[1], "с" .. p[2], "т" .. p[1], "т" .. p[3], "т" .. p[4], "у" .. p[1], "у" .. p[2], "ф" .. p[1], "х" .. p[1], "х" .. p[3], "х" .. p[4], "х" .. p[8], "ц" .. p[1], "ц" .. p[3], "ц" .. p[4], "ч" .. p[1], "ч" .. p[3], "ш" .. p[1], "щ" .. p[1], "э" .. p[1], "э" .. p[2], "ю" .. p[1], "ю" .. p[2], "я" .. p[1], "я" .. p[2]
}
},
}
m["aqd"] = {
"Ampari Dogon",
4748057,
"nic-dgw",
"Latn",
}
m["aqg"] = {
"Arigidi",
34829,
"alv-von",
"Latn",
}
m["aqm"] = {
"Atohwaim",
11732297,
"paa-kay",
"Latn",
}
m["aqn"] = {
"Northern Alta",
7058116,
"phi",
"Latn",
}
m["aqp"] = {
"Atakapa",
10975683,
"qfa-iso",
"Latn",
}
m["aqr"] = {
"Arhâ",
4790085,
"poz-cln",
"Latn",
}
m["aqt"] = {
"Angaité",
15736037,
"sai-mas",
"Latn",
}
m["aqz"] = {
"Akuntsu",
4701960,
"tup",
"Latn",
}
m["arc"] = {
"အာရမေအဳ",
28602,
"sem-ara",
"Hebr, Armi, Syrc, Palm, Nbat, Phnx, Mand, Samr, Hatr, Elym",
translit = {
Armi = "Armi-translit",
Palm = "Palm-translit",
},
strip_diacritics = {
-- The first three were added by [[User:Wikitiki89]] in 2015 for use with Syriac, which has diacritics that look
-- like a diaeresis (syāmē) and macrons above and below (mṭalqānā); see Wikipedia [[w:Syriac alphabet]]. But
-- I don't know if they are actually represented using these diacritics.
Syrc = {remove_diacritics = c.macron .. c.diaer .. c.macronbelow .. u(0x0730) .. "-" .. u(0x0748)},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
-- Samr strip_diacritics, sort_key in [[Module:scripts/data]]; previously no sort_key for Samr, presumably a mistake
-- Phnx translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission)
}
m["ard"] = {
"Arabana",
3507959,
"aus-kar",
"Latn",
}
m["are"] = {
"Western Arrernte",
12645549,
"aus-rnd",
"Latn",
}
m["arh"] = {
"Arhuaco",
2640621,
"cba",
"Latn",
}
m["ari"] = {
"Arikara",
56539,
"cdd",
"Latn",
strip_diacritics = {remove_diacritics = c.acute},
}
m["arj"] = {
"Arapaso",
9627356,
"sai-tuc",
"Latn",
}
m["ark"] = {
"Arikapú",
3446640,
"sai-mje",
"Latn",
}
m["arl"] = {
"Arabela",
2591221,
"sai-zap",
"Latn",
}
m["arn"] = {
"မာၜေအ်ဓုန်ကာန်",
33730,
"sai-ara",
"Latn",
}
m["aro"] = {
"Araona",
958414,
"sai-tac",
"Latn",
}
m["arp"] = {
"အာရာပါဟဝ်",
56417,
"alg-ara",
"Latn",
}
m["arq"] = {
"အာရဗဳ အာန်လ်ဂျဳရဳယျာ",
56499,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["arr"] = {
"Arara-Karo",
35539,
"tup",
"Latn",
}
m["ars"] = {
"အာရဗဳ နေတ်ဒဳ",
56574,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["aru"] = {
"Arua",
2746221,
"auf",
"Latn",
}
m["arv"] = {
"Arbore",
56883,
"cus-eas",
"Latn",
}
m["arw"] = {
"အာရတ်ဝါတ်",
2655664,
"awd-taa",
"Latn",
}
m["arx"] = {
"Aruá",
3507907,
"tup",
"Latn",
}
m["ary"] = {
"အာရဗဳ မဝ်ရဝ်ကာန်",
56426,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["arz"] = {
"အာရဗဳ အဳဂျေပ်",
29919,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["asa"] = {
"Pare",
36403,
"bnt-par",
"Latn",
}
m["asb"] = {
"Assiniboine",
2591288,
"sio-dkt",
"Latn",
}
m["asc"] = {
"Casuarina Coast Asmat",
11732046,
"ngf-asm",
"Latn",
}
m["ase"] = {
"အရေဝ်ဘာသာကွတ်တဲအမေရိကာန်",
14759,
"sgn",
"Sgnw",
}
m["asf"] = {
"Auslan",
29525,
"sgn",
"Latn", -- when documented
}
m["asg"] = {
"Cishingini",
35199,
"nic-kam",
"Latn",
}
m["ash"] = {
"Abishira",
2871740,
"qfa-dis", -- extinct, poorly documented; isolate or in a proposed Tequiraca-Canichana family by Kaufman (1994)
"Latn",
}
m["asi"] = {
"Buruwai",
5001031,
"ngf-sab",
"Latn",
}
m["asj"] = {
"Nsari",
36418,
"nic-bbe",
"Latn",
}
m["ask"] = {
"အာပ်သကေန်",
29379,
"nur-sou",
"Arab, Latn",
}
m["asl"] = {
"Asilulu",
12473347,
"poz-cma",
"Latn",
}
m["asn"] = {
"ဃှေန်ဂူ အေက်သဝေနဳ",
8044571,
"tup-gua",
"Latn",
}
m["aso"] = {
"Dano",
5220979,
"ngf-gah",
"Latn",
}
m["asp"] = {
"Algerian Sign Language",
3135421,
"sgn",
}
m["asq"] = {
"Austrian Sign Language",
36668,
"sgn",
"Latn", -- when documented
}
m["asr"] = {
"Asuri",
3504321,
"mun",
"Latn", -- when documented
}
m["ass"] = {
"Ipulo",
35408,
"nic-tvc",
"Latn",
}
m["ast"] = {
"အေက်သတဝ်ရေန်",
29507,
"roa-asl",
"Latn",
}
m["asu"] = {
"Tocantins Asurini",
32041490,
"tup-gua",
"Latn",
}
m["asv"] = {
"Asoa",
56296,
"csu-maa",
"Latn",
}
m["asw"] = {
"Australian Aboriginal Sign Language",
955216,
"sgn",
"Latn", -- when documented
}
m["asx"] = {
"Muratayak",
11732766,
"ngf-war",
"Latn",
}
m["asy"] = {
"Yaosakor Asmat",
16113158,
"ngf-asm",
"Latn",
}
m["asz"] = {
"As",
2866218,
"poz-hce",
"Latn",
}
m["ata"] = {
"Pele-Ata",
56511,
"qfa-dis", -- Papuan; possibly in a putative West New Britain family, or an isolate
"Latn",
}
m["atb"] = {
"ဇြာဲဝါ",
56594,
"tbq-brm",
"Latn, Lisu", -- also Hani?
translit = {Lisu = "Lisu-translit"},
sort_key = {Lisu = s["Lisu-sortkey"]},
}
m["atc"] = {
"Atsahuaca",
4817730,
"sai-pan",
"Latn",
}
m["atd"] = {
"Ata Manobo",
12627315,
"mno",
"Latn",
}
m["ate"] = {
"အေက်တာမ်ဗါဝ်လ်",
4813055,
"ngf-wso",
"Latn",
}
m["atg"] = {
"Okpela",
7082551,
"alv-yek",
"Latn",
}
m["ati"] = {
"Attié",
34844,
"alv-lag",
"Latn",
}
m["atj"] = {
"အထိကာမိတ်",
56590,
"alg",
"Latn",
ancestors = "cr",
}
m["atk"] = {
"Ati",
3217458,
"phi",
"Latn",
}
m["atl"] = {
"Mount Iraya Agta",
6921430,
"phi",
"Latn",
}
m["atm"] = {
"Ata",
4812603,
"phi",
"Latn",
}
m["ato"] = {
"Atong (Cameroon)",
34824,
"nic-grs",
"Latn",
}
m["atp"] = {
"Pudtol Atta",
12640726,
"phi",
"Latn",
}
m["atq"] = {
"Aralle-Tabulahan",
4783889,
"poz-ssw",
"Latn",
}
m["atr"] = {
"ဝါဲမဳရဳ-အာထရဝ်ရဳ",
56865,
"sai-car",
"Latn",
}
m["ats"] = {
"ဂရတ် ဗါန်တေ",
56628,
"alg-ara",
"Latn",
}
m["att"] = {
"ပါန်ပလဝ်နာ အာတ်တာ",
12639245,
"phi",
"Latn",
}
m["atu"] = {
"Reel",
7306882,
"sdv-dnu",
"Latn",
}
m["atv"] = {
"အာန်တာယ် လ္ပာ်သၟဝ်ကျာ",
2640863,
"trk-ssb",
"Cyrl",
translit = "Altai-translit",
}
m["atw"] = {
"Atsugewi",
56718,
"nai-pal",
"Latn",
}
m["atx"] = {
"Arutani",
56609,
nil,
"Latn",
}
m["aty"] = {
"Aneityum",
2379113,
"poz-vns",
"Latn",
}
m["atz"] = {
"Arta",
3508067,
"phi",
"Latn",
}
m["aua"] = {
"Asumboa",
4811870,
"poz-tem",
"Latn",
}
m["aub"] = {
"Alugu",
12626798,
"tbq-urp",
"Latn", -- also Hani?
}
m["auc"] = {
"Huaorani",
758570,
"qfa-iso",
"Latn",
}
m["aud"] = {
"Anuta",
35326,
"poz-pnp",
"Latn",
}
m["aug"] = {
"Aguna",
34733,
"alv-gbe",
"Latn",
}
m["auh"] = {
"Aushi",
2872082,
"bnt-sbi",
"Latn",
}
m["aui"] = {
"Anuki",
3508132,
"poz-ocw",
"Latn",
}
m["auj"] = {
"အာဂျဳလာ",
56398,
"ber",
"Latn, Arab, Tfng",
}
m["auk"] = {
"Heyo",
3504295,
"paa-hya",
"Latn",
}
m["aul"] = {
"Aulua",
427300,
"poz-vnc",
"Latn",
}
m["aum"] = {
"အာသူ",
34798,
"alv-ngb",
"Latn",
}
m["aun"] = {
"Molmo One",
12637224,
"paa-trr",
"Latn",
}
m["auo"] = {
"Auyokawa",
56247,
"cdc-wst",
"Latn",
}
m["aup"] = {
"Makayam",
6738863,
"paa-tir",
"Latn",
}
m["auq"] = {
"Anus",
23855,
"poz-ocw",
"Latn",
}
m["aur"] = {
"Aruek",
3504279,
"paa-kom",
"Latn",
}
m["aut"] = {
"Austral",
2669261,
"poz-pep",
"Latn",
}
m["auu"] = {
"Auye",
4827334,
"ngf-pan",
"Latn",
}
m["auw"] = {
"Awyi",
3513326,
"paa-taa",
"Latn",
}
m["aux"] = {
"အာဝ်ရာက်",
3507995,
"tup-gua",
"Latn",
}
m["auy"] = {
"Auyana",
2873211,
"ngf-gau",
"Latn",
}
m["auz"] = {
"အာရဗဳ ဥူသဗက်ကဳ",
3399507,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["avb"] = {
"Avau",
12627412,
"poz-ocw",
"Latn",
}
m["avd"] = {
"အာယ်ဝဳရဳ-ဝဳဒါရဳ",
3327357,
"xme",
"fa-Arab",
ancestors = "xme-mid",
}
m["avi"] = {
"Avikam",
34840,
"alv-lag",
"Latn",
}
m["avk"] = {
"ခါဝ်တာဝါယ်",
1377116,
"art",
"Latn",
type = "appendix-constructed",
}
m["avm"] = {
"Angkamuthi",
62603022,
"aus-pmn",
"Latn",
}
m["avn"] = {
"Avatime",
34796,
"alv-ktg",
"Latn",
}
m["avo"] = {
"Agavotaguerra",
3508007,
"awd",
"Latn",
}
m["avs"] = {
"Aushiri",
3409318,
"sai-zap",
"Latn",
}
m["avt"] = {
"Au",
3446608,
"paa-wap",
"Latn",
}
m["avu"] = {
"အာတ်ဝါဝ်ခါယျ",
56685,
"csu-mma",
"Latn",
}
m["avv"] = {
"Avá-Canoeiro",
4829584,
"tup-gua",
"Latn",
}
m["awa"] = {
"အဝါဒဳ",
29579,
"inc-hie",
"Deva, Kthi, fa-Arab",
ancestors = "inc-oaw",
translit = {
Deva = "hi-translit"
},
}
m["awb"] = {
"Awa (New Guinea)",
2874650,
"ngf-gau",
"Latn",
}
m["awc"] = {
"Cicipu",
35193,
"nic-kam",
"Latn",
}
m["awe"] = {
"အာဝပ်တဳ",
4830038,
"tup",
"Latn",
}
m["awg"] = {
"အာန်ဂူတဳမဳရဳ",
4764288,
"aus-pam",
"Latn",
}
m["awh"] = {
"Awbono",
3446684,
"paa-baa",
"Latn",
}
m["awi"] = {
"Aekyom",
3399691,
"paa-kae",
"Latn",
}
m["awk"] = {
"အဝါဗာကဴ",
3449138,
"aus-pam",
"Latn",
}
m["awm"] = {
"Arawum",
4784537,
"ngf-rai",
"Latn",
}
m["awn"] = {
"Awngi",
34934,
"cus-cen",
"Ethi",
}
m["awo"] = {
"Awak",
3446643,
"alv-wjk",
"Latn",
}
m["awr"] = {
"Awera",
56379,
"paa-flp",
"Latn",
}
m["aws"] = {
"South Awyu",
12633986,
"ngf-awy",
"Latn",
}
m["awt"] = {
"Araweté",
4784535,
"tup-gua",
"Latn",
}
m["awu"] = {
"Central Awyu",
12628801,
"ngf-awy",
"Latn",
}
m["awv"] = {
"Jair Awyu",
16110177,
"ngf-awy",
"Latn",
}
m["aww"] = {
"Awun",
56369,
"paa-sep",
"Latn",
}
m["awx"] = {
"Awara",
2874670,
"ngf-waa",
"Latn",
}
m["awy"] = {
"Edera Awyu",
12630425,
"ngf-awy",
"Latn",
}
m["axb"] = {
"Abipón",
11252539,
"sai-guc",
"Latn",
}
m["axe"] = {
"Ayerrerenge",
16112737,
"aus-pam",
"Latn",
}
m["axg"] = {
"Arára (Mato Grosso)",
3446660,
nil,
"Latn",
}
m["axk"] = {
"Aka (Central Africa)",
11010149,
"bnt-ngn",
"Latn",
}
m["axl"] = {
"Lower Southern Aranda",
6693295,
"aus-rnd",
"Latn",
}
m["axm"] = {
"အာမေနဳယျာအဒေါဝ်",
4438498,
"hyx",
"Armn",
ancestors = "xcl",
translit = "Armn-translit",
override_translit = true,
strip_diacritics = {
remove_diacritics = "՞՜՛՟",
from = {"եւ", "ՙ", "՚"},
to = {"և", "ʻ", "’"}
}
}
m["axx"] = {
"Xârâgurè",
8045635,
"poz-cln",
"Latn",
}
m["aya"] = {
"Awar",
56876,
"paa-baw",
"Latn",
}
m["ayb"] = {
"Ayizo",
34841,
"alv-pph",
"Latn",
}
m["ayd"] = {
"Ayabadhu",
3509164,
"aus-pmn",
"Latn",
}
m["aye"] = {
"Ayere",
34788,
"alv-aah",
"Latn",
}
m["ayg"] = {
"Nyanga (Togo)",
35446,
"alv-gng",
"Latn",
}
m["ayi"] = {
"Leyigha",
3914492,
"nic-uce",
"Latn",
}
m["ayk"] = {
"Akuku",
3450179,
"alv-nwd",
"Latn",
}
m["ayl"] = {
"အာရဗဳလေတ်ဗျာ",
56503,
"sem-arb",
"Arab",
strip_diacritics = "ar-stripdiacritics",
}
m["ayn"] = {
"အာရဗဳ ယာက်မနဳ",
1686766,
"sem-arb",
"Arab, Hebr",
strip_diacritics = {
Arab = "ar-stripdiacritics",
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
m["ayo"] = {
"Ayoreo",
56634,
"sai-zam",
"Latn",
}
m["ayp"] = {
"အာရဗဳ မာက်သဝ်ပဝ်တေမဳယာန် သၟဝ်ကျာ",
56577,
"sem-arb",
"Arab",
ancestors = "acm",
strip_diacritics = "ar-stripdiacritics",
}
m["ayq"] = {
"Ayi",
56449,
"paa-sep",
"Latn",
}
m["ays"] = {
"Sorsogon Ayta",
7563752,
"phi",
"Latn",
}
m["ayt"] = {
"Bataan Ayta",
4921648,
"phi",
"Latn",
}
m["ayu"] = {
"Ayu",
34786,
"alv",
"Latn",
}
-- ayy deleted and removed from ISO; per the removal request, "no linguistic data exists for any [Ayta] language that the
-- ancestors of this group might have once spoken. And thus, there is no evidence that this group ever had a language
-- distinct from any other Philippine language." [Lobel]
m["ayz"] = {
"Maybrat",
4830892,
"paa-may",
-- either an isolate; grouped with Abun and the West Bird's Head family; or in the putative West Papuan family
"Latn",
}
m["aza"] = {
"Azha",
4832486,
"tbq-axi",
"Latn",
}
m["azd"] = {
"ဒူရာန်ဂဝ် နာဟွာတာယ်လ်လ္ပာ်ဖာဗၟံက်",
16115449,
"azc-dur",
"Latn",
}
m["azg"] = {
"San Pedro Amuzgos Amuzgo",
35092,
"omq",
"Latn",
}
m["azm"] = {
"Ipalapa Amuzgo",
12633013,
"omq",
"Latn",
}
m["azn"] = {
"Western Durango နာဟွာတာယ်လ်",
12645553,
"azc-dur",
"Latn",
}
m["azo"] = {
"Awing",
34856,
"nic-nge",
"Latn",
}
m["azt"] = {
"Faire Atta",
12630884,
"phi",
"Latn",
}
m["azz"] = {
"ဟာဲလာန် ပွာယ်ဗလာ နာဟွာတာယ်လ်",
12953754,
"azc-nah",
"Latn",
}
return require("Module:languages").finalizeData(m, "language")
ofxnqqiv8iy4gm884yq49wlidw5gqlc
မဝ်ဂျူ:languages/data/3/n
828
657
395878
394329
2026-05-29T15:37:06Z
Intobesa.bot
1035
Bot: ပလေဝ်ဒါန်
395878
Scribunto
text/plain
local m_langdata = require("Module:languages/data")
-- Loaded on demand, as it may not be needed (depending on the data).
local function u(...)
u = require("Module:string utilities").char
return u(...)
end
local c = m_langdata.chars
local p = m_langdata.puaChars
local s = m_langdata.shared
local m = {}
m["naa"] = {
"Namla",
3508760,
"paa-pau",
"Latn",
}
m["nab"] = {
"Nambikwara",
2068190,
"sai-nmk",
"Latn",
}
m["nac"] = {
"Narak",
6965295,
"ngf",
"Latn",
}
m["nae"] = {
"Naka'ela",
6960073,
"poz",
"Latn",
}
m["naf"] = {
"Nabak",
11732491,
"ngf",
"Latn",
}
m["nag"] = {
"Naga Pidgin",
3503454,
"crp",
"Latn",
ancestors = "as",
}
m["nah"] = {
"နာဟွာတာယ်လ်",
13300,
"azc-nah",
"Latn",
}
m["naj"] = {
"Nalu",
36026,
"alv-nal",
"Latn",
}
m["nak"] = {
"နာကာနာဲ",
6528669,
"poz-ocw",
"Latn",
}
m["nal"] = {
"Nalik",
3335387,
"poz-ocw",
"Latn",
}
m["nam"] = {
"Ngan'gityemerri",
3298041,
"aus-dal",
"Latn",
}
-- Being converted into the family "Southern Min" ("zhx-nan", which will take the code "nan" once the language can be removed). Retain the name "Min Nan" here to avoid having to move things that are scheduled for deletion anyway.
m["nan"] = {
"မိန်နာန်",
36495,
"zhx-com",
"Hants, Latn, Bopo, Kana",
wikimedia_codes = "zh-min-nan",
generate_forms = "zh-generateforms",
sort_key = {
Hani = "Hani-sortkey",
Kana = "Kana-sortkey"
},
}
m["nao"] = {
"Naaba",
11883865,
"sit-tib",
ancestors = "xct",
}
m["nap"] = {
"နဳပဝ်လဳတေန်",
33845,
"roa-itd",
"Latn",
}
m["naq"] = {
"ခိုဝ်ခိုဝ်",
13301,
"khi-khk",
"Latn",
}
m["nar"] = {
"Iguta",
5621686,
"nic-jer",
"Latn",
}
m["nas"] = {
"Nasioi",
56772,
"paa-sbo",
"Latn",
}
m["nat"] = {
"Hungworo",
3914395,
"nic-kmk",
"Latn",
}
m["naw"] = {
"Nawuri",
35906,
"alv-gng",
"Latn",
}
m["nax"] = {
"Nakwi",
3504178,
"qfa-mal",
"Latn",
}
m["nay"] = {
"Ngarrindjeri",
7022091,
"aus-pam",
"Latn",
}
m["naz"] = {
"Coatepec နာဟွာတာယ်လ်",
5138605,
"azc-nah",
"Latn",
}
m["nba"] = {
"Nyemba",
3346655,
"bnt-clu",
"Latn",
ancestors = "lch",
}
m["nbb"] = {
"Ndoe",
36134,
"nic-eko",
"Latn",
}
m["nbc"] = {
"Chang",
5071694,
"sit-kch",
"Latn",
}
m["nbd"] = {
"Ngbinda",
11132859,
"bnt-boa",
"Latn",
}
m["nbe"] = {
"Konyak Naga",
6430448,
"sit-kch",
"Latn",
}
m["nbg"] = {
"Nagarchal",
13299,
"dra-gon",
}
m["nbh"] = {
"Ngamo",
3438705,
"cdc-wst",
"Latn",
}
m["nbi"] = {
"Mao Naga",
12952905,
"tbq-anp",
"Latn",
}
m["nbj"] = {
"Ngarinman",
10600380,
nil,
"Latn",
}
m["nbk"] = {
"Nake",
11732496,
"ngf-mad",
"Latn",
}
m["nbm"] = {
"Ngbaka Ma'bo",
3915331,
"nic-nkm",
"Latn",
}
m["nbn"] = {
"Kuri",
3200540,
"poz",
"Latn",
}
m["nbo"] = {
"Nkukoli",
3914482,
"nic-uce",
"Latn",
}
m["nbp"] = {
"Nnam",
36138,
"nic-eko",
"Latn",
}
m["nbq"] = {
"Nggem",
12952956,
"ngf",
"Latn",
}
m["nbr"] = {
"Numana",
5529310,
"nic-nin",
"Latn",
}
m["nbs"] = {
"Namibian Sign Language",
6961792,
"sgn",
"Latn", -- when documented
}
m["nbt"] = {
"Na",
12952895,
"sit-tan",
"Deva, Latn",
}
m["nbu"] = {
"Rongmei Naga",
12952912,
"sit-zem",
"Latn",
}
m["nbv"] = {
"Ngamambo",
11129694,
"nic-mom",
"Latn",
}
m["nbw"] = {
"Southern Ngbandi",
17522635,
"nic-ngd",
"Latn",
}
m["nby"] = {
"Ningera",
11732524,
"paa-brd",
"Latn",
}
m["nca"] = {
"Iyo",
6101336,
"ngf-fin",
"Latn",
}
m["ncb"] = {
"Central Nicobarese",
3335553,
"aav-nic",
"Deva, Latn",
}
m["ncc"] = {
"Ponam",
3396122,
"poz-aay",
"Latn",
}
m["ncd"] = {
"Nachering",
6957144,
"sit-kic",
"Deva",
}
m["nce"] = {
"Yale",
2992915,
"paa", --kwomtari or isolate
"Latn",
}
m["ncf"] = {
"Notsi",
3344784,
"poz-ocw",
"Latn",
}
m["ncg"] = {
"နေတ်သကာ",
3342138,
"nai-tsi",
"Latn",
}
m["nch"] = {
"Central Huasteca နာဟွာတာယ်လ်",
2194290,
"azc-nah",
"Latn",
}
m["nci"] = {
"နာဝါတ်ဒဝ်ဝၚ်ဂန္ထ",
559242,
"azc-nah",
"Latn",
entry_name = {remove_diacritics = c.macron},
}
m["ncj"] = {
"Northern Puebla နာဟွာတာယ်လ်",
15705671,
"azc-nah",
"Latn",
}
m["nck"] = {
"Nakara",
6960662,
"aus-arn",
"Latn",
}
m["ncl"] = {
"Michoacán နာဟွာတာယ်လ်",
2896217,
"azc-nah",
"Latn",
}
m["ncm"] = {
"Nambo",
42173731,
nil,
"Latn",
}
m["ncn"] = {
"Nauna",
3337158,
"poz-aay",
"Latn",
}
m["nco"] = {
"Sibe",
56806,
"paa-sbo",
"Latn",
}
m["ncr"] = {
"Ncane",
11297920,
"nic-bbe",
"Latn",
ancestors = "nhu",
}
m["ncs"] = {
"Nicaraguan Sign Language",
33765,
"sgn",
"Sgnw",
}
m["nct"] = {
"Chothe Naga",
5105385,
"tbq-kuk",
"Beng, Latn",
}
m["ncu"] = {
"Chumburung",
35198,
"alv-gng",
"Latn",
}
m["ncx"] = {
"ပွယ်ဗလာ နာဟွာတာယ်လ် ဗဟဵု",
5061727,
"azc-nah",
"Latn",
}
m["ncz"] = {
"Natchez",
3111838,
nil,
"Latn",
}
m["nda"] = {
"Ndasa",
35904,
"bnt-kel",
"Latn",
}
m["ndb"] = {
"Kenswei Nsei",
7067553,
"nic-rnn",
"Latn",
}
m["ndc"] = {
"Ndau",
13311,
"bnt-sho",
"Latn",
}
m["ndd"] = {
"Nde-Nsele-Nta",
36131,
"nic-eko",
"Latn",
}
m["ndf"] = {
"Nadruvian",
6957967,
nil,
"Latn",
}
m["ndg"] = {
"Ndengereko",
6983726,
"bnt-mbi",
"Latn",
}
m["ndh"] = {
"Ndali",
6983678,
"bnt-run",
"Latn",
}
m["ndi"] = {
"Chamba Leko",
36381,
"alv-lek",
"Latn",
}
m["ndj"] = {
"Ndamba",
6983684,
"bnt-kil",
"Latn",
}
m["ndk"] = {
"Ndaka",
11164947,
"bnt-nya",
"Latn",
}
m["ndl"] = {
"Ndolo",
6983788,
"bnt-zbi",
"Latn",
ancestors = "lse",
}
m["ndm"] = {
"Ndam",
56283,
"cdc-est",
"Latn",
}
m["ndn"] = {
"Ngundi",
35916,
"bnt-ngn",
"Latn",
}
m["ndp"] = {
"Ndo",
6983774,
"csu-mle",
"Latn",
}
m["ndq"] = {
"Ndombe",
6983792,
"bnt-swb",
"Latn",
}
m["ndr"] = {
"Ndoola",
35837,
"nic-mmb",
"Latn",
}
m["nds"] = {
"ဂျာမာန်မသဝ်",
25433,
"gmw-lgm",
"Latn",
ancestors = "gml",
}
m["ndt"] = {
"Ndunga",
6983857,
"nic-mbc",
"Latn",
}
m["ndu"] = {
"Dugun",
11015189,
"alv-dur",
"Latn",
}
m["ndv"] = {
"Ndut",
36028,
"alv-cng",
"Latn",
}
m["ndw"] = {
"Ndobo",
11008568,
"bnt-ngn",
"Latn",
}
m["ndx"] = {
"Nduga",
6983833,
nil,
"Latn",
}
m["ndy"] = {
"Lutos",
6705910,
"csu-val",
"Latn",
}
m["ndz"] = {
"Ndogo",
35983,
"nic-ser",
"Latn",
}
m["nea"] = {
"Eastern Ngad'a",
12473454,
"poz-cet",
}
m["neb"] = {
"ထါန်ရာ",
7853636,
"dmn-mda",
"Latn",
}
m["nec"] = {
"Nedebang",
4925378,
"ngf",
}
m["ned"] = {
"Nde-Gbite",
11010279,
"nic-grf",
}
m["nee"] = {
"Kumak",
3347266,
"poz-cln",
"Latn",
}
m["nef"] = {
"Nefamese",
6987002,
"crp",
}
m["neg"] = {
"နေတ်ဂေါတ်ဒါန်",
33676,
"tuw-ewe",
"Cyrl",
}
m["neh"] = {
"Nyenkha",
3695185,
"sit-ebo",
"Tibt, Latn",
translit = {Tibt = "Tibt-translit"},
override_translit = true,
display_text = {Tibt = s["Tibt-displaytext"]},
entry_name = {Tibt = s["Tibt-entryname"]},
sort_key = {Tibt = "Tibt-sortkey"},
}
m["nej"] = {
"နေကိုဝ်",
6989840,
"ngf-fin",
"Latn",
}
m["nek"] = {
"Neku",
14916900,
"poz-cln",
}
m["nem"] = {
"Nemi",
3338008,
"poz-cln",
"Latn",
}
m["nen"] = {
"Nengone",
3338052,
"poz-cln",
"Latn",
}
m["neo"] = {
"Ná-Meo",
15977293,
"hmn",
}
m["neq"] = {
"North Central Mixe",
25559729,
nil,
"Latn",
}
m["ner"] = {
"Yahadian",
8046778,
nil,
"Latn",
}
m["nes"] = {
"Bhoti Kinnauri",
21179921,
"sit-las",
}
m["net"] = {
"Nete",
6998869,
"paa-eng",
}
m["neu"] = {
"Neo",
606917,
"art",
"Latn",
type = "appendix-constructed",
}
m["nev"] = {
"Nyaheun",
7070801,
"mkh-ban",
}
m["new"] = {
"နူဝါ",
33979,
"sit-new",
"Deva, Newa, Ranj",
ancestors = "nwx",
translit = {
Deva = "new-translit",
Newa = "new-Newa-translit",
},
}
m["nex"] = {
"Neme",
12952941,
}
m["ney"] = {
"Neyo",
36410,
"kro",
}
m["nez"] = {
"Nez Perce",
3339226,
"nai-shp",
"Latn",
}
m["nfa"] = {
"Dhao",
2053828,
"poz",
}
m["nfd"] = {
"Ahwai",
3913957,
"nic-plt",
"Latn",
}
m["nfl"] = {
"အာဲဝူ",
56742,
"poz-tem",
"Latn",
}
m["nfr"] = {
"Nafaanra",
13297,
"alv-snf",
"Latn",
}
m["nfu"] = {
"Mfumte",
6826794,
"nic-nka",
"Latn",
}
m["nga"] = {
"Ngbaka",
36022,
"gba-eas",
"Latn",
}
m["ngb"] = {
"Northern Ngbandi",
17522631,
"nic-ngd",
"Latn",
}
m["ngc"] = {
"Ngombe (Congo)",
3123524,
"bnt-bun",
}
m["ngd"] = {
"Ngando (Central African Republic)",
35910,
"bnt-ngn",
}
m["nge"] = {
"Ngemba",
6750551,
"nic-nge",
"Latn",
}
m["ngg"] = { -- compare 'aiy'
"Ngbaka Manza",
11033316,
"gba-eas",
"Latn",
}
m["ngh"] = {
"Nǀuu",
2618974,
"khi-tuu",
"Latn",
}
m["ngi"] = {
"Ngizim",
3914924,
"cdc-wst",
"Latn",
}
m["ngj"] = {
"Ngie",
36361,
"nic-mom",
"Latn",
}
m["ngk"] = {
"Ngalkbun",
3913790,
"aus-gun",
"Latn",
}
m["ngl"] = {
"Lomwe",
35824,
"bnt-mak",
"Latn",
}
m["ngm"] = {
"Ngatik Men's Creole",
36400,
"crp",
ancestors = "en, pon",
}
m["ngn"] = {
"Ngwo",
36051,
"nic-mom",
"Latn",
}
m["ngo"] = {
"Ngoni",
7022547,
"bnt-ngu",
"Latn",
}
m["ngp"] = {
"Ngulu",
7193332,
"bnt-seu",
"Latn",
}
m["ngq"] = {
"Ngoreme",
7022573,
"bnt-lok",
"Latn",
}
m["ngr"] = {
"Nagu",
3063524,
"poz-tem",
"Latn",
}
m["ngs"] = {
"Gvoko",
3441188,
"cdc-cbm",
"Latn",
}
m["ngt"] = {
"Ngeq",
25559548,
"mkh-kat",
}
m["ngu"] = {
"Guerrero နာဟွာတာယ်လ်",
5614980,
"azc-nah",
"Latn",
}
m["ngv"] = {
"Nagumi",
35842,
"nic-jrn",
}
m["ngw"] = {
"Ngwaba",
3440480,
"cdc-cbm",
"Latn",
}
m["ngx"] = {
"Nggwahyi",
56265,
"cdc-cbm",
"Latn",
}
m["ngy"] = {
"Tibea",
36598,
"bnt-baf",
"Latn",
}
m["ngz"] = {
"Ngungwel",
35920,
"bnt-tkc",
"Latn",
}
m["nha"] = {
"Nhanda",
3339380,
"aus-psw",
"Latn",
}
m["nhb"] = {
"ဗါန်",
3913311,
"dmn-nbe",
"Latn",
}
m["nhc"] = {
"Tabasco နာဟွာတာယ်လ်",
6047326,
"azc-nah",
"Latn",
}
m["nhd"] = {
"Chiripá",
2873230,
"tup-gua",
"Latn",
ancestors = "gn",
}
m["nhe"] = {
"ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်",
4358289,
"azc-nah",
"Latn",
}
m["nhf"] = {
"Nhuwala",
10600396,
"aus-nga",
"Latn",
}
m["nhg"] = {
"ထာန်ထာန်သေန်ဂဝ် နာဝါတော",
3450252,
"azc-nah",
"Latn",
}
m["nhh"] = {
"Nahari",
6583560,
"inc-hal",
}
m["nhi"] = {
"Zacatlán-Ahuacatlán-Tepetzintla နာဟွာတာယ်လ်",
12953764,
"azc-nah",
"Latn",
}
m["nhk"] = {
"Cosoleacaque နာဟွာတာယ်လ်",
12953757,
"azc-nah",
"Latn",
}
m["nhm"] = {
"Morelos နာဟွာတာယ်လ်",
4800819,
"azc-nah",
"Latn",
}
m["nhn"] = {
"နာဟောတ် မဇ္ဇျိမ",
6047309,
"azc-nah",
"Latn",
}
m["nho"] = {
"Takuu",
3409818,
"poz-pnp",
"Latn",
}
m["nhp"] = {
"Pajapan နာဟွာတာယ်လ်",
12953760,
"azc-nah",
"Latn",
}
m["nhq"] = {
"Huaxcaleca နာဟွာတာယ်လ်",
12953758,
"azc-nah",
"Latn",
}
m["nhr"] = {
"Naro",
2164778,
"khi-kal",
"Latn",
}
m["nht"] = {
"Ometepec နာဟွာတာယ်လ်",
7090132,
"azc-nah",
"Latn",
}
m["nhu"] = {
"နဝ်ဝါန်",
36072,
"nic-bbe",
"Latn",
}
m["nhv"] = {
"Temascaltepec နာဟွာတာယ်လ်",
2379405,
"azc-nah",
"Latn",
}
m["nhw"] = {
"ဝုတ်သတေကာ နာဟာဒ်တာဲ လ္ပာ်ပလိုတ်",
2678840,
"azc-nah",
"Latn",
}
m["nhx"] = {
"မကာယျာပါံ နာဝါတဝ်",
12953756,
"azc-nah",
"Latn",
}
m["nhy"] = {
"Northern Oaxaca နာဟွာတာယ်လ်",
12953763,
"azc-nah",
"Latn",
}
m["nhz"] = {
"Santa María La Alta နာဟွာတာယ်လ်",
15705753,
"azc-nah",
"Latn",
}
m["nia"] = {
"နဳယျာ",
2407831,
"poz-nws",
"Latn",
}
m["nib"] = {
"Nakame",
11732495,
"ngf-fin",
"Latn",
}
m["nid"] = {
"Ngandi",
7021977,
"aus-arn",
"Latn",
}
m["nie"] = {
"Niellim",
33662,
"alv-bua",
}
m["nif"] = {
"Nek",
6989781,
"ngf-fin",
"Latn",
}
m["nig"] = {
"Ngalakan",
3913796,
"aus-gun",
"Latn",
}
m["nih"] = {
"Nyiha",
11128374,
"bnt-mby",
"Latn",
}
m["nii"] = {
"Nii",
35237,
"ngf",
"Latn",
}
m["nij"] = {
"ၚဂျူ",
2992872,
"poz-brw",
"Latn",
}
m["nik"] = {
"Southern Nicobarese",
7570194,
"aav-nic",
}
m["nil"] = {
"Nila",
7036821,
}
m["nim"] = {
"Nilamba",
4121200,
"bnt-tkm",
"Latn",
}
m["nin"] = {
"Ninzo",
3914021,
"nic-nin",
}
m["nio"] = {
"နၞဴနေတ်သာန်",
36743,
"syd",
"Cyrl",
translit = "nio-translit",
}
m["niq"] = {
"Nandi",
6956591,
"sdv-nma",
}
m["nir"] = {
"Nimboran",
301116,
"paa-nim",
}
m["nis"] = {
"Nimi",
11732523,
"ngf-fin",
"Latn",
}
m["nit"] = {
"ကိုဝ်လာမဳ လ္ပာ်ဒိုဟ်ပလိုတ်သမၠုၚ်ကျာ",
56767,
"dra-knk",
"Deva, Telu",
translit = {
Telu = "te-translit"
},
}
m["niu"] = {
"နဳဥုအာယ်",
33790,
"poz-ton",
"Latn",
}
m["niv"] = {
"နိဖှေတ်",
36464,
"qfa-iso",
"Cyrl",
translit = "niv-translit",
entry_name = {
from = {"['’]"},
to = {"ʼ"}
},
sort_key = "niv-sortkey",
}
m["niw"] = {
"Nimo",
3504126,
"paa-asa",
}
m["nix"] = {
"Hema",
5710904,
"bnt-nyg",
"Latn",
}
m["niy"] = {
"Ngiti",
7022396,
"csu-lnd",
}
m["niz"] = {
"Ningil",
11732527,
"qfa-tor",
}
m["nja"] = {
"Nzanyi",
3441299,
"cdc-cbm",
"Latn",
}
m["njb"] = {
"Nocte Naga",
7046410,
"sit-tno",
}
m["njh"] = {
"Lotha Naga",
33590,
"sit-aao",
}
m["nji"] = {
"Gudanji",
3915692,
"aus-mir",
}
m["njj"] = {
"Njen",
36112,
"nic-mom",
"Latn",
}
m["njl"] = {
"Njalgulgule",
7071229,
"sdv-daj",
}
m["njm"] = {
"Angami",
56761,
"tbq-anp",
"Latn",
}
m["njn"] = {
"Liangmai Naga",
14194500,
"sit-zem",
}
m["njo"] = {
"အာအဝ်",
28433,
"sit-aao",
"Latn",
}
m["njr"] = {
"Njerep",
35844,
"nic-mmb",
"Latn",
}
m["njs"] = {
"Nisa",
13593518,
"paa-egb",
}
m["njt"] = {
"Ndyuka-Trio Pidgin",
13591205,
"crp",
ancestors = "djk, tri",
}
m["nju"] = {
"Ngadjunmaya",
7021846,
"aus-pam",
}
m["njx"] = {
"Kunyi",
3196559,
"bnt-kng",
"Latn",
}
m["njy"] = {
"Njyem",
35898,
"bnt-ndb",
"Latn",
}
m["njz"] = {
"နာ်ယဳဃှဳ",
56870,
"sit-tan",
"Latn",
}
m["nka"] = {
"Nkoya",
7042633,
"bnt-lbn",
"Latn",
}
m["nkb"] = {
"Khoibu Naga",
21481876,
"sit-mar",
}
m["nkc"] = {
"Nkongho",
35863,
"bnt-saw",
"Latn",
}
m["nkd"] = {
"Koireng",
6426342,
"sit-zem",
}
m["nke"] = {
"Duke",
3041075,
"poz-ocw",
}
m["nkf"] = {
"Inpui Naga",
21481817,
"sit-zem",
}
m["nkg"] = {
"Nekgini",
11732509,
"ngf-fin",
"Latn",
}
m["nkh"] = {
"Khezha Naga",
6401519,
"tbq-anp",
}
m["nki"] = {
"Thangal Naga",
56374,
"sit-zem",
}
m["nkj"] = {
"Nakai",
14916897,
"ngf-okk",
"Latn",
}
m["nkk"] = {
"Nokuku",
7048122,
"poz-vnn",
"Latn",
}
m["nkm"] = {
"Namat",
15634505,
}
m["nkn"] = {
"Nkangala",
10962292,
"bnt-clu",
"Latn",
ancestors = "mck",
}
m["nko"] = {
"အၚ်္ခါဝ်နိယျာ",
35867,
"alv-gng",
"Latn",
}
m["nkp"] = {
"Niuatoputapu",
3399095,
"poz-pnp",
}
m["nkq"] = {
"Nkami",
7042522,
"alv-gng",
"Latn",
}
m["nkr"] = {
"Nukuoro",
2635961,
"poz-pnp",
"Latn",
}
m["nks"] = {
"North Asmat",
11732049,
}
m["nkt"] = {
"Nyika",
16917497,
"bnt-mwi",
"Latn",
}
m["nku"] = {
"Bouna Kulango",
20668241,
"alv-kul",
}
-- nkv is treated as nkt, see WT:LT
m["nkw"] = {
"Nkutu",
7193313,
"bnt-tet",
"Latn",
}
m["nkx"] = {
"Nkoroo",
36000,
"ijo",
}
m["nkz"] = {
"Nkari",
11130307,
"nic-ief",
ancestors = "ibr",
}
m["nla"] = {
"Ngombale",
36292,
"bai",
"Latn",
}
m["nlc"] = {
"Nalca",
6960839,
"ngf",
"Latn",
}
m["nle"] = {
"East Nyala",
25559347,
"bnt-msl",
"Latn",
ancestors = "luy",
}
m["nlg"] = {
"Gela",
3063531,
"poz-sls",
"Latn",
}
m["nli"] = {
"Grangali",
3444203,
"inc-kun",
}
m["nlj"] = {
"Nyali",
7070830,
"bnt-nya",
"Latn",
}
m["nlk"] = {
"Ninia Yali",
12953310,
}
m["nll"] = {
"နဳဟာလဳ",
33904,
"qfa-iso",
"Deva, Latn",
}
m["nlm"] = {
"Mankiyali",
47522426,
"inc-koh",
}
m["nlo"] = {
"Ngul",
35894,
"bnt-bdz",
"Latn",
}
m["nlq"] = {
"Lao Naga",
63283609,
"sit-tno",
}
m["nlu"] = {
"Nchumbulu",
36143,
"alv-gng",
"Latn",
}
m["nlv"] = {
"Orizaba နာဟွာတာယ်လ်",
3086050,
"azc-nah",
"Latn",
}
m["nlw"] = {
"Walangama",
7961277,
}
m["nlx"] = {
"Nahali",
33361,
"inc-bhi",
}
m["nly"] = {
"Nyamal",
7070837,
"aus-nga",
"Latn",
}
m["nlz"] = {
"Nalögo",
20527138,
"poz-tem",
"Latn",
}
m["nma"] = {
"Maram Naga",
56378,
"sit-zem",
}
m["nmb"] = {
"ဗေတ် နာန်ဗာတ်သ်",
2902304,
"poz-vnc",
"Latn",
}
m["nmc"] = {
"ၚါမ်",
3915446,
"csu-sar",
"Latn",
}
m["nmd"] = {
"Ndumu",
35901,
"bnt-mbt",
"Latn",
}
m["nme"] = {
"Mzieme Naga",
6949473,
"sit-zem",
}
m["nmf"] = {
"Tangkhul Naga",
7682992,
"sit-tng",
}
m["nmg"] = {
"Kwasio",
34098,
"bnt-mnj",
"Latn",
}
m["nmh"] = {
"Monsang Naga",
6902496,
}
m["nmi"] = {
"Nyam",
3438738,
"cdc-wst",
"Latn",
}
m["nmj"] = {
"Ngombe (Central African Republic)",
3913949,
"gba-sou",
}
m["nmk"] = {
"Namakura",
3335410,
"poz-vnc",
"Latn",
}
m["nml"] = {
"Ndemli",
36089,
"nic-grf",
"Latn",
}
m["nmm"] = {
"Manangba",
6746900,
"sit-tam",
"Tibt, Deva",
translit = {Tibt = "Tibt-translit"},
override_translit = true,
display_text = {Tibt = s["Tibt-displaytext"]},
entry_name = {Tibt = s["Tibt-entryname"]},
sort_key = {Tibt = "Tibt-sortkey"},
}
m["nmn"] = {
"သူ",
13229,
"khi-tuu",
"Latn",
}
m["nmo"] = {
"Moyon Naga",
6927748,
"tbq-kuk",
}
m["nmp"] = {
"Nimanbur",
16891606,
}
m["nmq"] = {
"Nambya",
11008869,
"bnt-sho",
"Latn",
}
m["nmr"] = {
"Nimbari",
36069,
"alv-lni",
}
m["nms"] = {
"Letemboi",
3236886,
"poz-vnc",
"Latn",
}
m["nmt"] = {
"Namonuito",
12908815,
"poz-mic",
}
m["nmu"] = {
"Northeast Maidu",
3278074,
"nai-mdu",
"Latn",
}
m["nmv"] = {
"Ngamini",
7021944,
"aus-kar",
"Latn",
}
m["nmw"] = {
"Nimoa",
7037729,
"poz-ocw",
}
m["nmy"] = {
"နန်မူယဳ",
56844,
"sit-nax",
"Latn",
}
m["nmz"] = {
"Nawdm",
36085,
"nic-yon",
"Latn",
}
m["nna"] = {
"Nyangumarta",
33653,
}
m["nnb"] = {
"Nande",
3196953,
"bnt-glb",
"Latn",
}
m["nnc"] = {
"Nancere",
3140491,
"cdc-est",
"Latn",
}
m["nnd"] = {
"West Ambae",
2841479,
"poz-vnn",
"Latn",
}
m["nne"] = {
"Ngandyera",
10961003,
"bnt-ova",
"Latn",
}
m["nnf"] = {
"Ngaing",
11732510,
"ngf-fin",
"Latn",
}
m["nng"] = {
"Maring Naga",
12952908,
"sit-mar",
}
m["nnh"] = {
"Ngiemboon",
36286,
"bai",
"Latn",
}
m["nni"] = {
"North Nuaulu",
12952968,
"poz-cma",
}
m["nnj"] = {
"Nyangatom",
4662604,
"sdv-ttu",
}
m["nnk"] = {
"Nankina",
11732502,
"ngf-fin",
"Latn",
}
m["nnl"] = {
"Northern Rengma Naga",
7067615,
"tbq-anp",
}
m["nnm"] = {
"Namia",
56363,
"paa-spk",
"Latn",
}
m["nnn"] = {
"Ngete",
56625,
"cdc-mas",
"Latn",
}
m["nnp"] = {
"ဝါန်ဆေဝ်",
7967085,
"sit-kch",
"Wcho, Deva, Latn",
}
m["nnq"] = {
"Ngindo",
7022366,
"bnt-mbi",
"Latn",
}
m["nnr"] = {
"Narungga",
13591127,
"aus-pam",
"Latn",
}
m["nnt"] = {
"Nanticoke",
3915517,
"alg-eas",
"Latn",
}
m["nnu"] = {
"Dwang",
35258,
"alv-gng",
"Latn",
}
m["nnv"] = {
"Nukunu",
10604066,
}
m["nnw"] = {
"Southern Nuni",
11152248,
"nic-gnn",
"Latn",
}
m["nnx"] = {
"Ngong",
12952915,
}
m["nny"] = { -- contrast aus-ynk
"Nyangga",
10604331,
"aus-tnk",
"Latn",
}
m["nnz"] = {
"Nda'nda'",
36016,
"bai",
"Latn",
}
m["noa"] = {
"Woun Meu",
3111873,
"sai-chc",
"Latn",
}
m["noc"] = {
"Nuk",
11732534,
"ngf-fin",
"Latn",
}
m["nod"] = {
"သေံသၟဝ်ကျာ",
565110,
"tai-swe",
"Lana, Thai",
translit = {
Lana = "Lana-translit",
Thai = "Thai alphabet-translit",
},
sort_key = {
from = {"%p", "᩠", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", "ก", "ค", "ร", "ฮ", "ต", u(0x200C)},
to = {"", "", "ᩈᩈ", "ᩁ", "ᩃ", "ᨦ", "%1ᨮ", "%1ᨻ", "ᩣ", "ᨠ", "ᨣ", "ᩁ", "ᩁ", "ᨲ"}},
entry_name = {
from = {u(0x200C)},
to = {}},
entry_name = {remove_diacritics = c.ZWNJ},
sort_key = {
Lana = "Lana-sortkey",
Thai = "Thai-sortkey"
},
}
m["noe"] = {
"နဳမာဒဳ",
3502294,
"raj",
"Deva",
translit = "hi-translit",
}
m["nof"] = {
"Nomane",
11732531,
}
m["nog"] = {
"နဝ်ကာယ်",
33871,
"trk-kno",
"Cyrl, Arab, Latn",
translit = "nog-translit",
override_translit = true,
}
m["noh"] = {
"Nomu",
11732532,
}
m["noi"] = {
"Noiri",
12953774,
"inc-bhi",
}
m["noj"] = {
"Nonuya",
5372139,
"sai-wit",
"Latn",
}
m["nok"] = {
"Nooksack",
3343396,
}
m["nol"] = {
"Nomlaki",
3343229,
"nai-wtq",
"Latn",
}
m["nom"] = {
"Nocamán",
7046289,
"sai-pan",
"Latn",
}
m["non"] = {
"နဳနိုတ်တြေံ",
35505,
"gmq",
"Latn, Runr",
translit = {Runr = "Runr-translit"},
}
m["nop"] = {
"Numanggang",
7069052,
"ngf-fin",
"Latn",
}
m["noq"] = {
"Ngongo",
11057478,
"bnt-yak",
"Latn",
}
m["nos"] = {
"Eastern Nisu",
25559419,
"tbq-nis",
}
m["not"] = {
"Nomatsiguenga",
3342992,
"awd",
"Latn",
}
m["nou"] = {
"Ewage-Notu",
5418860,
}
m["nov"] = {
"Novial",
36738,
"art",
"Latn",
type = "appendix-constructed"
}
m["now"] = {
"Nyambo",
4967930,
"bnt-haj",
"Latn",
}
m["noy"] = {
"Noy",
36321,
"alv-bua",
}
m["noz"] = {
"Nayi",
3183349,
"omv-diz",
}
m["npa"] = {
"Nar Phu",
4926353,
"sit-tam",
}
m["npb"] = {
"Nupbikha",
3695201,
"sit-ebo",
}
m["npg"] = {
"Ponyo",
7228475,
"sit-kch",
}
m["nph"] = {
"Phom",
7187109,
"sit-kch",
}
m["npl"] = {
"Southeastern Puebla နာဟွာတာယ်လ်",
4632950,
"azc-nah",
"Latn",
}
m["npn"] = {
"Mondropolon",
3320594,
"poz-aay",
}
m["npo"] = {
"Pochuri Naga",
7206342,
"tbq-anp",
}
m["nps"] = {
"Nipsan",
11732528,
}
m["npu"] = {
"Puimei Naga",
7259044,
"sit-zem",
}
m["npy"] = {
"Napu",
12953768,
}
m["nqg"] = {
"Ede Nago",
12952408,
"alv-ede",
}
m["nqk"] = {
"Kura Ede Nago",
12952409,
"alv-ede",
}
m["nql"] = {
"Ngendelengo",
63283693,
"bnt-swb",
"Latn",
}
m["nqm"] = {
"Ndom",
6983791,
"ngf",
"Latn",
}
m["nqn"] = {
"Nen",
20816352,
"paa-yam",
}
m["nqo"] = {
"အိန်'ဂဝ်",
18546266,
"dmn-man",
"Nkoo",
}
m["nqq"] = {
"Kyan-Karyaw Naga",
63283784,
"sit-tno",
}
m["nqy"] = {
"Akyaung Ari",
4702035,
"sit-tng",
}
m["nra"] = {
"Ngom",
36087,
"bnt-kel",
"Latn",
}
m["nrb"] = {
"Nara",
36179,
"sdv-nes",
}
m["nrc"] = {
"Noric",
37023,
"cel",
"Ital",
}
m["nre"] = {
"Southern Rengma Naga",
7313205,
"tbq-anp",
}
m["nrf"] = {
"နဝ်မေံ",
33850,
"roa-oil",
"Latn",
wikimedia_codes = "nrm",
ancestors = "fro-nor",
sort_key = s["roa-oil-sortkey"],
}
m["nrg"] = {
"Narango",
12952929,
"poz-vnn",
"Latn",
}
m["nri"] = {
"Chokri Naga",
5104247,
"tbq-anp",
}
m["nrk"] = {
"Ngarla",
3915860,
"aus-nga",
"Latn",
}
m["nrl"] = {
"Ngarluma",
7022078,
"aus-nga",
"Latn",
}
m["nrm"] = {
"Narom",
3336135,
"poz-swa",
"Latn",
}
m["nrn"] = {
"နန်",
36708,
"gmq-ins",
"Latn",
}
m["nrp"] = {
"ပဳသေန် သၟဝ်ကျာ",
430138,
nil,
"Ital",
translit = "Ital-translit",
}
m["nrr"] = {
"Norra",
12952967,
"tai",
}
m["nrt"] = {
"Northern Kalapuya",
3192121,
"nai-klp",
}
m["nru"] = {
"Narua",
21658869,
"sit-nas",
"Latn",
}
m["nrx"] = {
"Ngurmbur",
2591251,
}
m["nrz"] = {
"Lala (New Guinea)",
6480151,
"poz-ocw",
}
m["nsa"] = {
"Sangtam Naga",
7418144,
"sit-aao",
}
m["nsb"] = {
"Lower Nossob",
6693681,
"khi-tuu",
"Latn",
}
m["nsc"] = {
"Nshi",
11129508,
"nic-rnn",
"Latn",
}
m["nsd"] = {
"Southern Nisu",
63284284,
"tbq-nis",
}
m["nse"] = {
"Nsenga",
3081996,
"bnt-sna",
"Latn",
}
m["nsg"] = {
"Ngasa",
56345,
"sdv-lma",
}
m["nsh"] = {
"Ngoshie",
7022582,
"nic-mom",
"Latn",
}
m["nsi"] = {
"Nigerian Sign Language",
7033021,
"sgn",
}
m["nsk"] = {
"နေတ်သကာပဳ",
1704302,
"alg",
"Cans",
ancestors = "cr",
translit = "nsk-translit",
}
m["nsl"] = {
"Norwegian Sign Language",
1781613,
"sgn",
}
m["nsm"] = {
"Sema",
3478238,
"tbq-anp",
}
m["nsn"] = {
"Nehan",
3337774,
"poz-ocw",
}
m["nso"] = {
"သူထူ လ္ပာ်သၟဝ်ကျာ",
33890,
"bnt-sts",
"Latn",
}
m["nsp"] = {
"Nepalese Sign Language",
3915492,
"sgn",
}
m["nsq"] = {
"Northern Sierra Miwok",
3344226,
"nai-utn",
"Latn",
}
m["nsr"] = {
"Maritime Sign Language",
3915483,
"sgn",
}
m["nss"] = {
"Nali",
3335385,
"poz-aay",
}
m["nst"] = {
"ထေန်သ",
56350,
"sit-tno",
"Latn, Tnsa",
}
m["nsu"] = {
"Sierra Negra နာဟွာတာယ်လ်",
63284326,
"azc-nah",
"Latn",
}
m["nsv"] = {
"Southwestern Nisu",
63308004,
"tbq-nis",
}
m["nsw"] = {
"Navut",
3337327,
"poz-vnn",
"Latn",
}
m["nsx"] = {
"Nsongo",
7067577,
"bnt-tmb",
"Latn",
}
m["nsy"] = {
"Nasal",
6966574,
}
m["nsz"] = {
"Nisenan",
33665,
"nai-mdu",
"Latn",
}
m["ntd"] = {
"Northern Tidung",
24938325,
"poz-san",
}
m["nte"] = {
"Nathembo",
11030947,
"bnt-mak",
}
m["ntg"] = {
"Ngantangarra",
33060509,
}
m["nti"] = {
"Natioro",
36140,
"alv-wan",
}
m["ntj"] = {
"Ngaanyatjarra",
3915409,
"aus-pam",
"Latn",
}
m["ntk"] = {
"Ikoma",
5996114,
"bnt-lok",
"Latn",
}
m["ntm"] = {
"Nateni",
3070731,
"nic-grm",
"Latn",
}
m["nto"] = {
"Ntomba",
11130292,
"bnt-mon",
"Latn",
}
m["ntp"] = {
"ထေပ်ပုဝ်ဝါန် လ္ပာ်သၟဝ်ကျာ",
15615651,
"azc",
"Latn",
sort_key = {remove_diacritics = c.acute},
}
m["ntr"] = {
"Delo",
35195,
"nic-gne",
"Latn",
}
m["nts"] = {
"Natagaimas",
6967931,
}
m["ntu"] = {
"Natügu",
63308082,
"poz-tem",
"Latn",
}
m["ntw"] = {
"Nottoway",
3344791,
"iro-nor",
}
m["ntx"] = {
"Somra",
7560536,
"sit-tng",
}
m["nty"] = {
"Mantsi",
56878,
"sit-mnz",
}
m["nua"] = {
"Yuanga",
3573088,
"poz-cln",
"Latn",
}
m["nuc"] = {
"Nukuini",
3346231,
}
m["nud"] = {
"Ngala",
7021893,
"paa-spk",
"Latn",
}
m["nue"] = {
"Ngundu",
12952953,
"bad-cnt",
"Latn",
}
m["nuf"] = {
"Nusu",
56413,
"tbq-nus",
}
m["nug"] = {
"Nungali",
7069826,
"aus-mir",
}
m["nuh"] = {
"Ndunda",
3913968,
"nic-mmb",
"Latn",
}
m["nui"] = {
"Ngumbi",
36459,
"bnt-yko",
}
m["nuj"] = {
"Nyole (Uganda)",
3739448,
"bnt-msl",
"Latn",
}
m["nuk"] = {
"နှတ်တက",
2992876,
"wak",
"Latn",
}
m["nul"] = {
"Nusa Laut",
7070332,
"poz-cma",
}
m["num"] = {
"Niuafo'ou",
36173,
"poz-ton",
"Latn",
}
m["nun"] = {
"Anong",
2748232,
"sit-nng",
}
m["nuo"] = {
"ၚောန်",
3915785,
"mkh-vie",
"Latn",
sort_key = "vi-sortkey",
}
m["nup"] = {
"နူပဳ",
36720,
"alv-ngb",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
sort_key = "nup-sortkey",
}
m["nuq"] = {
"Nukumanu",
12909019,
"poz-pnp",
}
m["nur"] = {
"Nuguria",
7068910,
"poz-pnp",
}
m["nus"] = {
"နူအေဝ်",
33675,
"sdv-dnu",
"Latn",
}
m["nut"] = {
"နောန်",
72695,
"tai",
"Latn, Hani",
sort_key = {Hani = "Hani-sortkey"},
}
m["nuu"] = {
"Ngbundu",
11126081,
"bad",
"Latn",
}
m["nuv"] = {
"Northern Nuni",
11016572,
"nic-gnn",
"Latn",
}
m["nuw"] = {
"Nguluwan",
6528643,
}
m["nux"] = {
"Mehek",
6809452,
"paa-spk",
"Latn",
}
m["nuy"] = {
"Nunggubuyu",
1747811,
"aus-arn",
}
m["nuz"] = {
"Tlamacazapa နာဟွာတာယ်လ်",
2073277,
"azc-nah",
"Latn",
}
m["nvh"] = {
"Nasarian",
6966614,
"poz-vnc",
"Latn",
}
m["nvm"] = {
"Namiae",
12952922,
}
m["nvo"] = {
"Nyokon",
19573407,
"nic-mbw",
"Latn",
}
m["nwa"] = {
"နဝထဳဟေနအ်",
6982892,
"alg-ara",
"Latn",
}
m["nwb"] = {
"Nyabwa",
33664,
"kro-wee",
}
m["nwc"] = {
"Classical Newar",
5128301,
"sit-new",
}
m["nwe"] = {
"Ngwe",
36181,
"bai",
"Latn",
}
m["nwi"] = {
"Southwest Tanna",
3504488,
"poz-vns",
"Latn",
}
m["nwm"] = {
"Nyamusa-Molo",
12747951,
"csu-bbk",
}
m["nwo"] = {
"Nauo",
6981305,
"aus-pam",
"Latn",
}
m["nwr"] = {
"Nawaru",
12638166,
"ngf",
}
m["nwx"] = {
"Middle Newar",
65455877,
"sit-new",
"Deva, Newa, Ranj",
ancestors = "nwc",
}
m["nwy"] = {
"Nottoway-Meherrin",
65455878,
"iro-nor",
}
m["nxa"] = {
"နောတ်အေတ်",
6981095,
"poz-tim",
}
m["nxd"] = {
"Ngando (Congo)",
3913277,
"bnt-ske",
}
m["nxe"] = {
"Nage",
2295569,
"poz-cet",
}
m["nxg"] = {
"Ngadha",
1516651,
"poz-cet",
"Latn",
}
m["nxi"] = {
"Nindi",
7038230,
"bnt-mbi",
"Latn",
}
m["nxl"] = {
"South Nuaulu",
18544857,
"poz-cma",
}
m["nxm"] = {
"Numidian",
35761,
"afa",
"Tfng, Latn", --Tfng may not support all the needed characters
}
m["nxn"] = {
"Ngawun",
3915711,
"aus-pam",
"Latn",
}
m["nxo"] = {
"Ndambomo",
6983681,
"bnt-kel",
"Latn",
}
m["nxq"] = {
"နာဃှဳ",
2478711,
"sit-nas",
"Nkdb, Nkgb, Latn, Lisu",
translit = {Lisu = "Lisu-translit"},
sort_key = {Lisu = s["Lisu-sortkey"]},
}
m["nxr"] = {
"Ninggerum",
11732526,
"ngf-okk",
"Latn",
}
m["nxu"] = {
"Narau",
6965452,
"ngf",
"Latn",
}
m["nxx"] = {
"Nafri",
6958211,
"paa-sen",
"Latn",
}
m["nyb"] = {
"Nyangbo",
36256,
"alv-ktg",
"Latn",
}
m["nyc"] = {
"Nyanga-li",
7070876,
"bnt-boa",
"Latn",
}
m["nyd"] = {
"Nyole (Kenya)",
7071227,
"bnt-msl",
"Latn",
}
m["nye"] = {
"Nyengo",
7071068,
"bnt-clu",
"Latn",
}
m["nyf"] = {
"Giryama",
3107606,
"bnt-mij",
"Latn",
}
m["nyg"] = {
"Nyindu",
11030685,
"bnt-shh",
"Latn",
}
m["nyh"] = {
"Nyigina",
3913780,
"aus-nyu",
"Latn",
}
m["nyi"] = {
"Nyimang",
34846,
"sdv-nyi",
"Latn",
}
m["nyj"] = {
"Nyanga (Congo)",
7070879,
"bnt-nyb",
"Latn",
}
m["nyk"] = {
"Nyaneka",
10962298,
"bnt-swb",
"Latn",
}
m["nyl"] = {
"Nyeu",
3033578,
"mkh-kat",
}
m["nym"] = {
"Nyamwezi",
4121131,
"bnt-tkm",
"Latn",
}
m["nyn"] = {
"နယျာန်ကိုဝ်လေဝ်",
13207,
"bnt-nyg",
"Latn",
}
m["nyo"] = {
"နယျဝ်ရုဝ်",
33794,
"bnt-nyg",
"Latn",
}
m["nyp"] = {
"Nyang'i",
7070894,
"ssa-klk",
}
m["nys"] = {
"Nyunga",
7049771,
"aus-pam",
"Latn",
}
m["nyt"] = {
"Nyawaygi",
3915783,
"aus-dyb",
}
m["nyu"] = {
"Nyungwe",
7071318,
"bnt-sna",
"Latn",
}
m["nyv"] = {
"Nyulnyul",
3442732,
"aus-nyu",
"Latn",
}
m["nyw"] = {
"နေဴ",
26425602,
"tai",
"Thai, Latn, Tayo", -- Vietnamese alphabet
sort_key = "Thai-sortkey", -- no effect on Latn
}
m["nyx"] = {
"Nganyaywana",
3913800,
"aus-cww",
"Latn",
}
m["nyy"] = {
"Nyakyusa",
3272620,
"bnt-run",
"Latn",
}
m["nza"] = {
"ထိုၚ်ဂန် အာန်ဗာန်ဗေ",
36518,
"nic-jkn",
"Latn",
}
m["nzb"] = {
"Njebi",
35923,
"bnt-nze",
"Latn",
}
m["nzd"] = {
"နဇာဒဳ",
17152586,
"bnt-bdz",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.caron},
}
m["nzi"] = {
"Nzima",
36337,
"alv-ctn",
}
m["nzk"] = {
"Nzakara",
3913339,
"znd",
"Latn",
}
m["nzm"] = {
"Zeme Naga",
21491053,
"sit-zem",
}
m["nzs"] = {
"New Zealand Sign Language",
36239,
"sgn",
}
m["nzu"] = {
"Central Teke",
36473,
"bnt-tkc",
}
m["nzy"] = {
"Nzakambay",
36374,
"alv-mbm",
"Latn",
}
m["nzz"] = {
"Nanga Dama Dogon",
6963443,
"nic-nwa",
}
return require("Module:languages").finalizeData(m, "language")
thzmma9u0ovguwyie085vnsc5p9juy2
မဝ်ဂျူ:languages/data/3/k
828
710
395884
394420
2026-05-29T15:52:21Z
Intobesa.bot
1035
Bot: ပလေဝ်ဒါန်
395884
Scribunto
text/plain
local m_langdata = require("Module:languages/data")
-- Loaded on demand, as it may not be needed (depending on the data).
local function u(...)
u = require("Module:string utilities").char
return u(...)
end
local c = m_langdata.chars
local p = m_langdata.puaChars
local s = m_langdata.shared
local m = {}
m["kaa"] = {
"ကာရာကာလပက်",
33541,
"trk-kno",
"Latn, Cyrl, fa-Arab",
dotted_dotless_i = true,
entry_name = {
from = {"['’]"},
to = {"ʼ"}
},
sort_key = {
Latn = {
from = {
-- Sort the old orthography (using the apostrophe) after the new orthography (using the acute accent).
"í", "iʼ", "i", -- Ensure "i" comes after "í", "iʼ", "ı".
"sh", "ch",
"á", "aʼ", "ǵ", "gʼ", "x", p[4], p[5], "ı", "q", "ń", "nʼ", "ó", "oʼ", "ú", "uʼ", "c"
},
to = {
p[4], p[5], "i" .. p[3],
"z" .. p[1], "z" .. p[3],
"a" .. p[1], "a" .. p[2], "g" .. p[1], "g" .. p[2], "h" .. p[1], "i", "i" .. p[1], "i" .. p[2], "k" .. p[1], "n" .. p[1], "n" .. p[2], "o" .. p[1], "o" .. p[2], "u" .. p[1], "u" .. p[2], "z" .. p[2]
}
},
Cyrl = {
from = {"ә", "ғ", "ё", "қ", "ң", "ө", "ү", "ў", "ҳ"},
to = {"а" .. p[1], "г" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1], "у" .. p[2], "х" .. p[1]}
},
},
}
m["kab"] = {
"ကာဗေန်အဝ်",
35853,
"ber",
"Latn",
}
m["kac"] = {
"ကချေၚ်",
33332,
"sit-jnp",
"Latn, Mymr",
}
m["kad"] = {
"Kadara",
3914011,
"nic-plc",
"Latn",
}
m["kae"] = {
"Ketangalan",
2779411,
"map",
}
m["kaf"] = {
"Katso",
246122,
"tbq-kzh",
}
m["kag"] = {
"Kajaman",
6348863,
"poz",
"Latn",
}
m["kah"] = {
"Fer",
5443742,
"csu-bgr",
"Latn",
}
m["kai"] = {
"ခါရေဝ်ခါရေဝ်",
3438770,
"cdc-wst",
"Latn",
}
m["kaj"] = {
"Jju",
35401,
"nic-plc",
"Latn",
}
m["kak"] = {
"ကယျာပါ ကာလဟာန်",
3192220,
"phi",
"Latn",
}
m["kam"] = {
"ကေန်ဗာ",
2574767,
"bnt-kka",
"Latn",
}
m["kao"] = {
"Kassonke",
36905,
"dmn-wmn",
"Latn",
}
m["kap"] = {
"ဗဳသဝ်တာ",
33054,
"cau-ets",
"Cyrl",
translit = "cau-nec-translit",
override_translit = true,
display_text = {Cyrl = s["cau-Cyrl-displaytext"]},
entry_name = {Cyrl = s["cau-Cyrl-entryname"]},
}
m["kaq"] = {
"Capanahua",
2937196,
"sai-pan",
"Latn",
}
m["kaw"] = {
"ဂျာဗာတြေံ",
49341,
"poz",
"Latn, Java, Kawi",
translit = "jv-translit", --same as jv
}
m["kax"] = {
"Kao",
3192799,
"paa-nha"
}
m["kay"] = {
"Kamayurá",
3192336,
"tup-gua",
"Latn",
}
m["kba"] = {
"Kalarko",
5517764,
"aus-pam",
"Latn",
}
m["kbb"] = {
"ကာသူယျာနာ",
12953626,
"sai-prk",
"Latn",
}
m["kbc"] = {
"Kadiwéu",
18168288,
"sai-guc",
"Latn",
}
m["kbd"] = {
"ခါပါဒဳယာန်",
33522,
"cau-cir",
"Cyrl, Latn, Arab",
translit = {
Cyrl = "cau-cir-translit",
Arab = "ar-translit",
},
override_translit = true,
display_text = {Cyrl = s["cau-Cyrl-displaytext"]},
entry_name = {
Cyrl = s["cau-Cyrl-entryname"],
Latn = s["cau-Latn-entryname"],
},
sort_key = {
Cyrl = {
from = {
"кхъу", "къӏу", -- 4 chars
"гъу", "джу", "дзу", "жъу", "къу", "кхъ", "къӏ", "кӏу", "кӏь", "лъу", "лӏу", "пӏу", "сӏу", "тӏу", "фӏу", "хъу", "цӏу", "чъу", "чӏу", "шъу", "шӏу", "щӏу", -- 3 chars
"гу", "гъ", "гь", "дж", "дз", "ё", "жъ", "жь", "ку", "къ", "кь", "кӏ", "лъ", "ль", "лӏ", "пӏ", "сӏ", "тӏ", "фӏ", "ху", "хъ", "хь", "цу", "цӏ", "чу", "чъ", "чӏ", "шъ", "шӏ", "щӏ", "ӏу", "ӏь", -- 2 chars
"э" -- 1 char
},
to = {
"к" .. p[5], "к" .. p[7],
"г" .. p[3], "д" .. p[2], "д" .. p[4], "ж" .. p[2], "к" .. p[3], "к" .. p[4], "к" .. p[6], "к" .. p[10], "к" .. p[11], "л" .. p[2], "л" .. p[5], "п" .. p[2], "с" .. p[2], "т" .. p[2], "ф" .. p[2], "х" .. p[3], "ц" .. p[3], "ч" .. p[3], "ч" .. p[5], "ш" .. p[2], "ш" .. p[4], "щ" .. p[2],
"г" .. p[1], "г" .. p[2], "г" .. p[4], "д" .. p[1], "д" .. p[3], "е" .. p[1], "ж" .. p[1], "ж" .. p[3], "к" .. p[1], "к" .. p[2], "к" .. p[8], "к" .. p[9], "л" .. p[1], "л" .. p[3], "л" .. p[4], "п" .. p[1], "с" .. p[1], "т" .. p[1], "ф" .. p[1], "х" .. p[1], "х" .. p[2], "х" .. p[4], "ц" .. p[1], "ц" .. p[2], "ч" .. p[1], "ч" .. p[2], "ч" .. p[4], "ш" .. p[1], "ш" .. p[3], "щ" .. p[1], "ӏ" .. p[1], "ӏ" .. p[2],
"а" .. p[1]
}
},
},
}
m["kbe"] = {
"Kanju",
10543322,
"aus-pam",
"Latn",
}
m["kbh"] = {
"Camsá",
2842667,
"qfa-iso",
"Latn",
}
m["kbi"] = {
"Kaptiau",
6367294,
"poz-oce",
"Latn",
}
m["kbj"] = {
"Kari",
6370438,
"bnt-boa",
"Latn",
}
m["kbk"] = {
"Grass Koiari",
12952642,
"ngf",
"Latn",
}
m["kbm"] = {
"Iwal",
3156391,
"poz-ocw",
"Latn",
}
m["kbn"] = {
"Kare (Africa)",
35554,
"alv-mbm",
"Latn",
}
m["kbo"] = {
"ခါလဳကာဝ်",
11275553,
"csu-mma",
}
m["kbp"] = {
"Kabiyé",
35475,
"nic-gne",
"Latn",
}
m["kbq"] = {
"Kamano",
11732272,
"paa-kag",
"Latn",
}
m["kbr"] = {
"Kafa",
35481,
"omv-gon",
"Ethi, Latn",
}
m["kbs"] = {
"Kande",
35556,
"bnt-tso",
"Latn",
}
m["kbt"] = {
"Gabadi",
3291159,
"poz-ocw",
"Latn",
}
m["kbu"] = {
"Kabutra",
10966761,
"raj",
}
m["kbv"] = {
"Kamberataro",
5261289,
"paa",
"Latn",
}
m["kbw"] = {
"Kaiep",
6347632,
"poz-ocw",
"Latn",
}
m["kbx"] = {
"Ap Ma",
56298,
"paa-ram",
}
m["kbz"] = {
"Duhwa",
56295,
"cdc-wst",
"Latn",
}
m["kcb"] = {
"Kawacha",
11732302,
"ngf",
}
m["kcc"] = {
"Lubila",
3914381,
"nic-uce",
"Latn",
}
m["kcd"] = {
"Ngkâlmpw Kanum",
12952566,
"paa-yam",
}
m["kce"] = {
"Kaivi",
6348685,
"nic-kau",
}
m["kcf"] = {
"Ukaan",
36651,
"nic-bco",
}
m["kcg"] = {
"ထိုၚ်အာက်",
3912765,
"nic-plc",
}
m["kch"] = {
"Vono",
3913920,
"nic-kau",
}
m["kci"] = {
"Kamantan",
3914019,
"nic-plc",
}
m["kcj"] = {
"Kobiana",
35609,
"alv-nyn",
}
m["kck"] = {
"Kalanga",
33672,
"bnt-sho",
"Latn",
}
m["kcl"] = {
"Kala",
6349982,
"poz-ocw",
"Latn",
}
m["kcm"] = {
"Tar Gula",
277963,
"csu-bba",
}
m["kcn"] = {
"နူဗဳ",
36388,
"crp",
"Latn, Arab",
ancestors = "apd",
entry_name = {remove_diacritics = c.acute},
}
m["kco"] = {
"Kinalakna",
11732320,
"ngf",
}
m["kcp"] = {
"Kanga",
6362384,
"qfa-kad",
"Latn",
}
m["kcq"] = {
"Kamo",
3914879,
"alv-wjk",
}
m["kcr"] = {
"Katla",
35688,
"nic-ktl",
}
m["kcs"] = {
"Koenoem",
3438755,
"cdc-wst",
}
m["kct"] = {
"Kaian",
6347538,
"paa-ram",
}
m["kcu"] = {
"Kikami",
3915212,
"bnt-ruv",
"Latn",
}
m["kcv"] = {
"Kete",
3195598,
"bnt-lub",
}
m["kcw"] = {
"Kabwari",
6344539,
"bnt-glb",
}
m["kcx"] = {
"Kachama-Ganjule",
12634070,
"omv-eom",
}
m["kcy"] = {
"Korandje",
33427,
"son",
}
m["kcz"] = {
"Konongo",
11732345,
"bnt-tkm",
"Latn",
}
m["kda"] = {
"Worimi",
3914062,
"aus-pam",
"Latn",
}
m["kdc"] = {
"Kutu",
6448634,
"bnt-ruv",
}
m["kdd"] = {
"Yankunytjatjara",
34207,
"aus-pam",
"Latn",
}
m["kde"] = {
"Makonde",
35172,
"bnt-rvm",
"Latn",
}
m["kdf"] = {
"Mamusi",
6746036,
"poz-ocw",
"Latn",
}
m["kdg"] = {
"Seba",
7442316,
"bnt-sbi",
"Latn",
}
m["kdh"] = {
"Tem",
36531,
"nic-gne",
}
m["kdi"] = {
"Kumam",
6443410,
"sdv-los",
}
m["kdj"] = {
"Karamojong",
56326,
"sdv-ttu",
"Latn",
}
m["kdk"] = {
"Numèè",
3346774,
"poz-cln",
"Latn",
}
m["kdl"] = {
"Tsikimba",
3914404,
"nic-kam",
}
m["kdm"] = {
"Kagoma",
3914420,
"nic-plc",
}
m["kdn"] = {
"Kunda",
4121130,
"bnt-sna",
}
m["kdp"] = {
"Kaningdon-Nindem",
3914956,
"nic-nin",
}
m["kdq"] = {
"Koch",
56431,
"tbq-bdg",
}
m["kdr"] = {
"ကာရေန်",
33725,
"trk-kcu",
"Cyrl, Latn, Hebr",
}
m["kdt"] = {
"Kuy",
56310,
"mkh-kat",
"Thai, Khmr, Laoo",
}
m["kdu"] = {
"Kadaru",
35441,
"nub-hil",
"Latn",
}
m["kdv"] = {
"Kado",
7402721,
"sit-luu",
}
m["kdw"] = {
"Koneraw",
11732341,
"ngf",
}
m["kdx"] = {
"Kam",
36753,
"alv-wjk",
}
m["kdy"] = {
"Keder",
6383641,
"paa-tkw",
}
m["kdz"] = {
"Kwaja",
11128866,
"nic-nka",
"Latn",
}
m["kea"] = {
"ခါၜေါအ်အဝ်ဒဳယဴနူ",
35963,
"crp",
"Latn",
ancestors = "pt",
}
m["keb"] = {
"Kélé",
35559,
"bnt-kel",
}
m["kec"] = {
"Keiga",
3409311,
"qfa-kad",
"Latn",
}
m["ked"] = {
"Kerewe",
6393846,
"bnt-haj",
}
m["kee"] = {
"Eastern Keres",
15649021,
"nai-ker",
"Latn",
}
m["kef"] = {
"Kpessi",
35748,
"alv-gbe",
}
m["keg"] = {
"Tese",
16887296,
"sdv",
}
m["keh"] = {
"Keak",
6382110,
"paa-spk",
}
m["kei"] = {
"Kei",
2410352,
}
m["kej"] = {
"Kadar",
6345179,
"dra-mal",
}
m["kek"] = {
"ခဳ'ချဳ",
35536,
"myn",
"Latn",
}
m["kel"] = {
"Kela-Yela",
6385426,
"bnt-mon",
"Latn",
}
m["kem"] = {
"Kemak",
35549,
"poz-tim",
}
m["ken"] = {
"Kenyang",
35650,
"nic-mam",
"Latn",
}
m["keo"] = {
"Kakwa",
3033547,
"sdv-bri",
}
m["kep"] = {
"Kaikadi",
6347757,
"dra-tam",
}
m["keq"] = {
"Kamar",
14916877,
"inc-hal",
}
m["ker"] = {
"Kera",
56251,
"cdc-est",
"Latn",
}
m["kes"] = {
"Kugbo",
3813394,
"nic-cde",
"Latn",
}
m["ket"] = {
"ခေပ်",
33485,
"qfa-yno",
"Cyrl",
entry_name = {
from = {"['’]"},
to = {"ʼ"}
},
sort_key = {
from = {"ӷ", "ё", "ӄ", "ӈ", "ө", "ә", "ʼ"},
to = {"г" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "ъ" .. p[1], "ь" .. p[1]}
},
}
m["keu"] = {
"Akebu",
35026,
"alv-ktg",
"Latn",
}
m["kev"] = {
"ကာနေတ်ကာရာန်",
6363201,
"dra-mal",
}
m["kew"] = {
"Kewa",
12952619,
"paa-eng",
"Latn",
}
m["kex"] = {
"Kukna",
5031131,
"inc-eas",
ancestors = "bh",
}
m["key"] = {
"Kupia",
6445354,
"inc-eas",
}
m["kez"] = {
"Kukele",
3915391,
"nic-ucn",
"Latn",
}
m["kfa"] = {
"ကိုဝ်ဒါဝါ",
33531,
"dra-kod",
"Knda, Mlym",
translit = {
Knda = "kn-translit",
Mlym = "ml-translit"
},
}
m["kfb"] = {
"ကိုဝ်လာမဳ",
33479,
"dra-knk",
"Deva, Telu",
translit = {
Telu = "te-translit",
},
}
m["kfc"] = {
"ခေါန်ဒါ-ဒါဝ်ရာ",
35679,
"dra-kki",
"Orya, Telu",
translit = {
Orya = "gon-Orya-translit",
Telu = "te-translit",
},
}
m["kfd"] = {
"ခါဝ်ရာတ် ခါဝ်ရာဂါ",
12952655,
"dra-kor",
"Knda",
translit = "kn-translit",
}
m["kfe"] = {
"ကိုဝ်တာ (အိန္ဒိယ)",
33483,
"dra-tkt",
"Taml",
translit = "ta-translit",
}
m["kff"] = {
"ခါဝ်ယျာ",
33471,
"dra-gon",
}
m["kfg"] = {
"Kudiya",
12952667,
"dra-tlk",
}
m["kfh"] = {
"ခူရဳချဳယျာ",
12952676,
"dra-mal",
"Mlym",
translit = "ml-translit",
}
m["kfi"] = {
"Kannada Kurumba",
56589,
"dra-sdo",
}
m["kfj"] = {
"Kemiehua",
27144776,
"mkh-pal",
}
m["kfk"] = {
"Kinnauri",
2383208,
"sit-kin",
"Takr, Deva, Latn",
}
m["kfl"] = {
"Kung",
6444510,
"nic-rnc",
"Latn",
}
m["kfn"] = {
"Kuk",
6442398,
"nic-rnc",
"Latn",
}
m["kfo"] = {
"Koro (West Africa)",
11160588,
"dmn-mnk",
"Latn, Nkoo",
}
m["kfp"] = {
"Korwa",
6432786,
"mun",
}
m["kfq"] = {
"Korku",
33715,
"mun",
}
m["kfr"] = {
"ကာတ်ချဳ",
56487,
"inc-snd",
"Gujr, sd-Arab, Sind, Khoj",
translit = {
Gujr = "gu-translit",
Sind = "Sind-translit",
},
entry_name = {
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
from = {u(0x0671)},
to = {u(0x0627)}
},
}
m["kfs"] = {
"ဖဳလာသၜေါအ်ရဳ",
12953397,
"him",
"Deva, Takr",
translit = "hi-translit",
}
m["kft"] = {
"Kanjari",
12953610,
"inc-pan",
ancestors = "pa",
}
m["kfu"] = {
"Katkari",
6377671,
"inc-sou",
}
m["kfv"] = {
"Kurmukar",
6446193,
"inc-eas",
}
m["kfw"] = {
"Kharam Naga",
12952906,
"tbq-kuk",
}
m["kfx"] = {
"ခူဠူ ပါဟာရေဝ်",
6443148,
"him",
"Deva",
translit = "hi-translit",
}
m["kfy"] = {
"ခူမာအဝ်နဳ",
33529,
"inc-pah",
"Deva, Shrd, Takr",
translit = "hi-translit",
}
m["kfz"] = {
"Koromfé",
35701,
"nic-gur",
"Latn",
}
m["kga"] = {
"Koyaga",
11155632,
"dmn-mnk",
}
m["kgb"] = {
"Kawe",
12952750,
"poz-hce",
}
m["kgd"] = {
"Kataang",
12953622,
"mkh",
}
m["kge"] = {
"Komering",
49224,
"poz-lgx",
}
m["kgf"] = {
"Kube",
11732359,
"ngf",
}
m["kgg"] = {
"Kusunda",
33630,
"qfa-iso",
"Latn",
}
m["kgi"] = {
"Selangor Sign Language",
33731,
"sgn",
}
m["kgj"] = {
"Gamale Kham",
22236996,
"sit-kha",
"Deva",
}
m["kgk"] = {
"Kaiwá",
3111883,
"tup-gua",
"Latn",
}
m["kgl"] = {
"Kunggari",
10550184,
"aus-pam",
}
m["kgm"] = {
"Karipúna",
6371069,
}
m["kgn"] = {
"ကာရေန်ကာနဳ",
6371041,
"xme-ttc",
ancestors = "xme-ttc-nor",
}
m["kgo"] = {
"Krongo",
6438927,
"qfa-kad",
"Latn",
}
m["kgp"] = {
"ခါဲဂါန်",
2665734,
"sai-sje",
"Latn",
}
m["kgq"] = {
"Kamoro",
6359001,
"ngf",
}
m["kgr"] = {
"Abun",
56657,
"paa",
"Latn",
}
m["kgs"] = {
"Kumbainggar",
3915412,
"aus-pam",
}
m["kgt"] = {
"Somyev",
3913354,
"nic-mmb",
"Latn",
}
m["kgu"] = {
"Kobol",
11732325,
"ngf-mad",
}
m["kgv"] = {
"Karas",
6368621,
"ngf",
}
m["kgw"] = {
"Karon Dori",
56817,
}
m["kgx"] = {
"Kamaru",
12953604,
"poz",
}
m["kgy"] = {
"Kyerung",
12952691,
"sit-kyk",
}
m["kha"] = {
"ခါသဳ",
33584,
"aav-pkl",
"Latn, as-Beng",
}
m["khb"] = {
"သေံလု",
36948,
"tai-swe",
"Talu, Lana",
translit = {
Talu = "Talu-translit",
Lana = "Lana-translit",
},
entry_name = {remove_diacritics = c.ZWNJ},
sort_key = {
Talu = "Talu-sortkey",
Lana = "Lana-sortkey",
},
}
m["khc"] = {
"Tukang Besi North",
18611555,
"poz",
}
m["khd"] = {
"Bädi Kanum",
20888004,
"paa-yam",
}
m["khe"] = {
"Korowai",
6432598,
"ngf",
}
m["khf"] = {
"Khuen",
27144893,
"mkh",
}
m["khh"] = {
"Kehu",
10994953,
}
m["khj"] = {
"Kuturmi",
3914490,
"nic-plc",
"Latn",
}
m["khl"] = {
"Lusi",
3267788,
"poz-ocw",
"Latn",
}
m["khn"] = {
"Khandeshi",
33726,
"inc-sou",
}
m["kho"] = {
"ခဝ်တေန်နေတ်",
6583551,
"xsc-sak",
"Brah, Khar",
translit = "Brah-translit",
}
m["khp"] = {
"Kapauri",
3502575,
"paa-tkw",
}
m["khq"] = {
"Koyra Chiini",
33600,
"son",
}
m["khr"] = {
"Kharia",
3915562,
"mun",
}
m["khs"] = {
"Kasua",
6374863,
"ngf",
}
m["kht"] = {
"သေံခဂၞဳ",
3915502,
"tai-swe",
"Mymr",
translit = "kht-translit",
entry_name = {remove_diacritics = c.VS01},
}
m["khu"] = {
"Nkhumbi",
11019169,
"bnt-swb",
}
m["khv"] = {
"ခပါဃှဳ",
56425,
"cau-wts",
"Cyrl",
translit = "khv-translit",
display_text = {Cyrl = s["cau-Cyrl-displaytext"]},
entry_name = {Cyrl = s["cau-Cyrl-entryname"]},
}
m["khw"] = {
"ခါဝ်ဝါ",
938216,
"inc-chi",
"Arab",
entry_name = {
-- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
from = {"هٔ", "ۂ", "ٱ"},
to = {"ہ", "ہ", "ا"},
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef
},
}
m["khx"] = {
"Kanu",
12952571,
"bnt-lgb",
}
m["khy"] = {
"Ekele",
6385549,
"bnt-ske",
"Latn",
}
m["khz"] = {
"Keapara",
12952603,
"poz-ocw",
"Latn",
}
m["kia"] = {
"Kim",
35685,
"alv-kim",
}
m["kib"] = {
"Koalib",
35859,
"alv-hei",
}
m["kic"] = {
"ခေတ်ခါၜေအ်",
20162127,
"alg-sfk",
"Latn",
}
m["kid"] = {
"Koshin",
35632,
"nic-beb",
"Latn",
}
m["kie"] = {
"Kibet",
56893,
}
m["kif"] = {
"Eastern Parbate Kham",
12953022,
"sit-kha",
"Deva",
}
m["kig"] = {
"Kimaama",
11732321,
"ngf",
}
m["kih"] = {
"Kilmeri",
6408020,
"paa-brd",
}
m["kii"] = {
"Kitsai",
56627,
"cdd",
"Latn",
}
m["kij"] = {
"Kilivila",
3196601,
"poz-ocw",
"Latn",
}
m["kil"] = {
"Kariya",
3438708,
"cdc-wst",
}
m["kim"] = {
"တဝ်ဖှာ",
36848,
"trk-ssb",
"Cyrl",
}
m["kio"] = {
"Kiowa",
56631,
"nai-kta",
"Latn",
}
m["kip"] = {
"Sheshi Kham",
12952622,
"sit-kha",
"Deva",
}
m["kiq"] = {
"Kosadle",
6432994,
}
m["kis"] = {
"ခေတ်",
6416362,
"poz-ocw",
"Latn",
}
m["kit"] = {
"Agob",
3332143,
nil,
"Latn",
}
m["kiv"] = {
"Kimbu",
10997740,
"bnt-tkm",
}
m["kiw"] = {
"Northeast Kiwai",
11732324,
"paa-kiw",
}
m["kix"] = {
"ခဳယျာနဳယျာဂါမ် နာဂ",
6401546,
"sit-kch",
"Latn",
}
m["kiy"] = {
"Kirikiri",
6415159,
"paa-lkp",
}
m["kiz"] = {
"Kisi",
3912772,
"bnt-bki",
}
m["kja"] = {
"Mlap",
6885683,
"paa-nim",
}
m["kjb"] = {
"ခွါန်ဂျေပ်ဗါဝ်",
35551,
"myn",
"Latn",
}
m["kjc"] = {
"Coastal Konjo",
3198689,
"poz",
}
m["kjd"] = {
"Southern Kiwai",
11732322,
"paa-kiw",
}
m["kje"] = {
"Kisar",
3197441,
"poz",
}
m["kjg"] = {
"ခမူ",
33335,
"mkh",
"Laoo",
sort_key = "Laoo-sortkey",
}
m["kjh"] = {
"ခါခေတ်",
33575,
"trk-ssb",
"Cyrl",
translit = "kjh-translit",
override_translit = true,
}
m["kji"] = {
"Zabana",
379130,
"poz-ocw",
"Latn",
}
m["kjj"] = {
"ဟဳနာလေတ်",
35278,
"cau-nec",
"Cyrl, Latn",
translit = "kjj-translit",
override_translit = true,
display_text = {Cyrl = s["cau-Cyrl-displaytext"]},
entry_name = {
Cyrl = s["cau-Cyrl-entryname"],
Latn = s["cau-Latn-entryname"],
},
}
m["kjk"] = {
"Highland Konjo",
3198688,
"poz",
}
m["kjl"] = {
"Western Parbate Kham",
22237017,
"sit-kha",
"Deva",
}
m["kjm"] = {
"Kháng",
6403501,
"mkh-pal",
}
m["kjn"] = {
"Kunjen",
3200468,
"aus-pmn",
"Latn",
}
m["kjo"] = {
"Harijan Kinnauri",
5657463,
"him",
}
m["kjp"] = {
"ကရေၚ်ပဝ်လ္ပာ်ဗၟံက်",
5330390,
"kar",
"Mymr, Leke, Thai",
translit = "kjp-translit",
override_translit = true,
}
m["kjq"] = {
"Western Keres",
12645568,
"nai-ker",
"Latn",
}
m["kjr"] = {
"Kurudu",
12952678,
"poz-hce",
"Latn",
}
m["kjs"] = {
"East Kewa",
20050949,
"paa-eng",
}
m["kjt"] = {
"Phrae Pwo",
7187991,
"kar",
"Thai",
}
m["kju"] = {
"Kashaya",
3193689,
"nai-pom",
"Latn",
}
m["kjx"] = {
"Ramopa",
56830,
"paa-nbo",
}
m["kjy"] = {
"Erave",
12952416,
"paa-eng",
}
m["kjz"] = {
"Bumthangkha",
2786408,
"sit-ebo",
"Tibt",
translit = "Tibt-translit",
override_translit = true,
display_text = s["Tibt-displaytext"],
entry_name = s["Tibt-entryname"],
sort_key = "Tibt-sortkey",
}
m["kka"] = {
"ကာကာန်ဒါ",
3915342,
"alv-ngb",
}
m["kkb"] = {
"Kwerisa",
56881,
"paa-lkp",
}
m["kkc"] = {
"Odoodee",
12952987,
}
m["kkd"] = {
"Kinuku",
6414422,
"nic-kau",
}
m["kke"] = {
"Kakabe",
3913966,
"dmn-mok",
"Latn",
}
m["kkf"] = {
"Kalaktang Monpa",
63257089,
"sit-tsk",
"Tibt, Latn, Deva",
translit = {Tibt = "Tibt-translit"},
override_translit = true,
display_text = {Tibt = s["Tibt-displaytext"]},
entry_name = {Tibt = s["Tibt-entryname"]},
sort_key = {Tibt = "Tibt-sortkey"},
}
m["kkg"] = {
"Mabaka Valley Kalinga",
18753304,
"phi",
}
m["kkh"] = {
"သေံဃိန်",
3545044,
"tai-swe",
"Lana, Thai",
translit = {
Lana = "Lana-translit",
Thai = "Thai alphabet-translit",
},
sort_key = {
Lana = "Lana-sortkey",
Thai = "Thai-sortkey"
},
}
m["kki"] = {
"Kagulu",
12952537,
"bnt-ruv",
"Latn",
}
m["kkj"] = {
"Kako",
35755,
"bnt-kak",
}
m["kkk"] = {
"Kokota",
3198399,
"poz-ocw",
"Latn",
}
m["kkl"] = {
"Kosarek Yale",
6432995,
"ngf",
}
m["kkm"] = {
"Kiong",
6414512,
"nic-ucr",
"Latn",
}
m["kkn"] = {
"Kon Keu",
6428686,
"mkh-pal",
}
m["kko"] = {
"Karko",
35529,
"nub-hil",
}
m["kkp"] = {
"Koko-Bera",
6426699,
"aus-pmn",
"Latn",
}
m["kkq"] = {
"Kaiku",
6347840,
"bnt-kbi",
"Latn",
}
m["kkr"] = {
"Kir-Balar",
3440527,
"cdc-wst",
"Latn",
}
m["kks"] = {
"Kirfi",
56242,
"cdc-wst",
"Latn",
}
m["kkt"] = {
"Koi",
6426194,
"sit-kiw",
}
m["kku"] = {
"Tumi",
3913934,
"nic-kau",
}
m["kkv"] = {
"Kangean",
2071325,
"poz-msa",
"Latn",
}
m["kkw"] = {
"Teke-Kukuya",
36560,
"bnt-tek",
}
m["kkx"] = {
"Kohin",
6425997,
"poz-brw",
}
m["kky"] = {
"Guugu Yimidhirr",
56543,
"aus-pam",
"Latn",
}
m["kkz"] = {
"Kaska",
20823,
"ath-nor",
"Latn",
}
m["kla"] = {
"Klamath-Modoc",
2669248,
"nai-plp",
"Latn",
}
m["klb"] = {
"Kiliwa",
3182593,
"nai-yuc",
"Latn",
}
m["klc"] = {
"Kolbila",
6427122,
"alv-lek",
}
m["kld"] = {
"ကာမိလာရာဲ",
3111818,
"aus-cww",
"Latn",
}
m["kle"] = {
"Kulung",
6443304,
"sit-kic",
}
m["klf"] = {
"Kendeje",
56895,
}
m["klg"] = {
"Tagakaulu Kalagan",
18756514,
"phi",
}
m["klh"] = {
"Weliki",
7981017,
"ngf-fin",
"Latn",
}
m["kli"] = {
"Kalumpang",
13561407,
"poz",
}
m["klj"] = {
"ခါပ်လေတ်",
33455,
"trk",
"fa-Arab, Latn",
ancestors = "klj-arg",
entry_name = {
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun,
}
}
m["klk"] = {
"Kono (Nigeria)",
6429589,
"nic-kau",
"Latn",
}
m["kll"] = {
"Kagan Kalagan",
18748913,
"phi",
}
m["klm"] = {
"Kolom",
6844970,
"ngf-mad",
"Latn",
}
m["kln"] = {
"Kalenjin",
637228,
"sdv-nma",
"Latn",
}
m["klo"] = {
"Kapya",
6367410,
"nic-ykb",
}
m["klp"] = {
"Kamasa",
6356107,
"ngf",
}
m["klq"] = {
"Rumu",
7379420,
"ngf",
}
m["klr"] = {
"ခါလေန်",
56381,
"sit-kiw",
"Deva",
}
m["kls"] = {
"ကလာချာ",
33416,
"inc-chi",
"Latn, ks-Arab",
}
m["klt"] = {
"Nukna",
7068874,
"ngf-fin",
"Latn",
}
m["klu"] = {
"Klao",
3914866,
"kro-wkr",
}
m["klv"] = {
"မာတ်သခေလေါန်",
3297282,
"poz-vnc",
"Latn",
}
m["klw"] = {
"လေန်ဒူ",
18390055,
"poz-kal",
"Latn",
}
m["klx"] = {
"Koluwawa",
6427954,
"poz-ocw",
"Latn",
}
m["kly"] = {
"Kalao",
6350643,
"poz",
}
m["klz"] = {
"Kabola",
11732258,
"qfa-tap",
}
m["kma"] = {
"Konni",
35680,
"nic-buk",
}
m["kmb"] = {
"ခေၚ်ဗွိုန်ဒူန်",
35891,
"bnt-kmb",
"Latn",
}
m["kmc"] = {
"ကါမ် လ္ပာ်ဒိုဟ်သမၠုၚ်ကျာ",
35379,
"qfa-kms",
"Latn",
}
m["kmd"] = {
"Madukayang Kalinga",
18753305,
"phi",
}
m["kme"] = {
"Bakole",
35068,
"bnt-kpw",
}
m["kmf"] = {
"Kare (New Guinea)",
11732286,
"ngf-mad",
"Latn",
}
m["kmg"] = {
"Kâte",
3201059,
"ngf",
}
m["kmh"] = {
"Kalam",
12952550,
"ngf-mad",
}
m["kmi"] = {
"ခမ်မဳ",
3915372,
"alv-ngb",
"Latn",
}
m["kmj"] = {
"ကူမာရာတ်ဗါတ် ပါဟာရဳယျာ",
3130374,
"dra-mlo",
"Beng, Deva",
}
m["kmk"] = {
"လေန်မဝ်သေန် ကလေန်ဂါ",
18753303,
"phi",
}
m["kml"] = {
"တနုဒါန် ကလဳၚ်္ဂါ",
18753307,
"phi",
"Latn",
}
m["kmm"] = {
"Kom (India)",
12952647,
"tbq-kuk",
}
m["kmn"] = {
"Awtuw",
3504217,
"paa-spk",
}
m["kmo"] = {
"Kwoma",
11732376,
"paa-spk",
}
m["kmp"] = {
"Gimme",
11152236,
"alv-dur",
}
m["kmq"] = {
"Kwama",
2591184,
"ssa-kom",
}
m["kmr"] = {
"ကာဒ် လ္ပာ်သၟဝ်ကျာ",
36163,
"ku",
"Latn, Cyrl, Armn, ku-Arab, Yezi",
translit = {
Cyrl = "kmr-translit",
Armn = "Armn-translit",
["ku-Arab"] = "ckb-translit",
},
entry_name = {
remove_diacritics = "'’",
from = {"r̄", "R̄", "ẍ", "Ẍ"},
to = {"rr", "Rr", "x", "X"}
},
wikimedia_codes = "ku",
}
m["kms"] = {
"Kamasau",
6356117,
"qfa-tor",
"Latn",
}
m["kmt"] = {
"Kemtuik",
6387179,
"paa-nim",
}
m["kmu"] = {
"Kanite",
12952567,
"paa-kag",
}
m["kmv"] = {
"Karipúna Creole French",
2523999,
"crp",
"Latn",
ancestors = "fr",
sort_key = s["roa-oil-sortkey"],
}
m["kmw"] = {
"Kumu",
6428450,
"bnt-kbi",
"Latn",
}
m["kmx"] = {
"Waboda",
7958705,
"paa-kiw",
}
m["kmy"] = {
"Koma",
35634,
"alv-dur",
}
m["kmz"] = {
"Khorasani Turkish",
35373,
"trk-ogz",
ancestors = "trk-oat",
}
m["kna"] = {
"Kanakuru",
56811,
"cdc-wst",
"Latn",
}
m["knb"] = {
"ဠူၜေအ်ဂါန် ကလဳၚ်္ဂါ",
12953602,
"phi",
}
m["knd"] = {
"Konda",
11732340,
"ngf-sbh",
"Latn",
}
m["kne"] = {
"ကာန်ခါနာအဳ",
18753329,
"phi",
"Latn",
entry_name = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer,
}
},
sort_key = {
Latn = "tl-sortkey",
},
standardChars = {
Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy" .. c.punc,
},
}
m["knf"] = {
"မေန်ခမ်ယျာ",
35789,
"alv-pap",
}
m["kni"] = {
"Kanufi",
3913297,
"nic-nin",
"Latn",
}
m["knj"] = {
"အာကာတေက်",
34923,
"myn",
"Latn",
}
m["knk"] = {
"ခူရာန်ခဝ်",
3198896,
"dmn-mok",
"Latn",
}
m["knl"] = {
"Keninjal",
6389309,
"poz-mly",
}
m["knm"] = { -- two unrelated lects have this name; this is the Katukinian one
"Kanamari",
3438373,
"sai-ktk",
"Latn",
}
m["kno"] = {
"ခဝ်နဝ် (သဲယျာရာ လဳယျေန်နဳ)",
35675,
"dmn-vak",
}
m["knp"] = {
"Kwanja",
35641,
"nic-mmb",
"Latn",
}
m["knq"] = {
"Kintaq",
6414335,
"mkh-asl",
}
m["knr"] = {
"Kaningra",
6363253,
"paa-spk",
}
m["kns"] = {
"Kensiu",
6391529,
"mkh-asl",
}
m["knt"] = {
"Katukina",
3194265,
"sai-pan",
"Latn",
}
m["knu"] = { -- a dialect of 'kpe'
"Kono (Guinea)",
3198703,
"dmn-msw",
"Latn, Kpel",
ancestors = "kpe",
}
m["knv"] = {
"Tabo",
7959888,
"aav",
}
m["knx"] = {
"Kendayan",
6388963,
"poz-mly",
"Latn",
}
m["kny"] = {
"Kanyok",
11110766,
"bnt-lub",
}
m["knz"] = {
"Kalamsé",
3914000,
"nic-gnn",
}
m["koa"] = {
"Konomala",
3198732,
"poz-ocw",
"Latn",
}
m["koc"] = {
"Kpati",
3913279,
"nic-nge",
"Latn",
}
m["kod"] = {
"Kodi",
4577633,
}
m["koe"] = {
"Kacipo-Balesi",
5364424,
"sdv",
}
m["kof"] = {
"Kubi",
3438718,
"cdc-wst",
"Latn",
}
m["kog"] = {
"ခါဝ်ဂွဳ",
3198286,
"cba",
}
m["koh"] = {
"Koyo",
35649,
"bnt-mbo",
"Latn",
}
m["koi"] = {
"ခဝ်မဳ-ဖေန်ယျိတ်",
56318,
"urj-prm",
"Cyrl",
translit = "kv-translit",
entry_name = {remove_diacritics = c.acute},
override_translit = true,
}
m["kok"] = {
"ခေန်ကနဳ",
34239,
"inc-sou",
"Deva, Knda, Mlym, fa-Arab, Latn",
translit = {
Deva = "mr-translit",
Knda = "kn-translit",
Mlym = "ml-translit",
},
entry_name = {
from = {"च़", "ज़", "झ़", "ಚ಼", "ಜ಼", "ಝ಼"},
to = {"च", "ज", "झ", "ಚ", "ಜ", "ಝ"}
} ,
}
m["kol"] = {
"Kol (New Guinea)",
4227542,
}
m["koo"] = {
"Konzo",
2361829,
"bnt-glb",
}
m["kop"] = {
"Waube",
11732373,
"ngf-mad",
}
m["koq"] = {
"Kota (Gabon)",
35607,
"bnt-kel",
"Latn",
}
m["kos"] = {
"Kosraean",
33464,
"poz-mic",
"Latn",
}
m["kot"] = {
"Lagwan",
3502264,
"cdc-cbm",
"Latn",
}
m["kou"] = {
"Koke",
797249,
"alv-bua",
}
m["kov"] = {
"Kudu-Camo",
3915850,
"nic-jer",
}
m["kow"] = {
"Kugama",
3913307,
"alv-mye",
}
m["koy"] = {
"Koyukon",
28304,
"ath-nor",
"Latn",
}
m["koz"] = {
"Korak",
6431365,
"ngf-mad",
}
m["kpa"] = {
"Kutto",
3437656,
"cdc-wst",
}
m["kpb"] = {
"မူဠူ ကူရုန်ဗါ",
19573111,
"dra-mal",
}
m["kpc"] = {
"Curripaco",
2882543,
"awd-nwk",
"Latn",
}
m["kpd"] = {
"Koba",
6424249,
"poz",
}
m["kpe"] = {
"Kpelle",
35673,
"dmn-msw",
"Latn, Kpel",
}
m["kpf"] = {
"Komba",
6428239,
"ngf",
}
m["kpg"] = {
"ကပေန်ဂါမာရာန်ဂဳ",
35771,
"poz-pnp",
"Latn",
}
m["kph"] = {
"Kplang",
35628,
"alv-gng",
}
m["kpi"] = {
"Kofei",
6425665,
"paa-egb",
"Latn",
}
m["kpj"] = {
"Karajá",
10322066,
"sai-mje",
"Latn",
}
m["kpk"] = {
"Kpan",
3915380,
"nic-jkn",
"Latn",
}
m["kpl"] = {
"Kpala",
11154769,
"nic-nkk",
"Latn",
}
m["kpm"] = {
"ကိုဝ်ဟဝ်",
3511919,
"mkh-ban",
"Latn",
}
m["kpn"] = {
"Kepkiriwát",
3195366,
"tup",
"Latn",
}
m["kpo"] = {
"Ikposo",
35029,
"alv-ktg",
"Latn",
}
m["kpq"] = {
"Korupun-Sela",
6432769,
"ngf",
}
m["kpr"] = {
"Korafe-Yegha",
11732347,
"ngf",
}
m["kps"] = {
"Tehit",
7694851,
}
m["kpt"] = {
"ခါရာတ",
56636,
"cau-and",
"Cyrl",
display_text = {Cyrl = s["cau-Cyrl-displaytext"]},
entry_name = {Cyrl = s["cau-Cyrl-entryname"]},
}
m["kpu"] = {
"Kafoa",
6346151,
"qfa-tap",
}
m["kpv"] = {
"ခဝ်မဳ-သဳရေဝ်ယာန်",
34114,
"urj-prm",
"Cyrl",
translit = "kv-translit",
override_translit = true,
wikimedia_codes = "kv",
}
m["kpw"] = {
"ကဝ်ဗေန်",
11732326,
"ngf-mad",
}
m["kpx"] = {
"Mountain Koiari",
6925030,
"ngf",
}
m["kpy"] = {
"Koryak",
36199,
"qfa-ckn",
"Cyrl",
entry_name = {
from = {"['’]"},
to = {"ʼ"}
},
sort_key = {
from = {"вʼ", "гʼ", "ё", "ӄ", "ӈ"},
to = {"в" .. p[1], "г" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1]}
},
translit = "kpy-translit",
}
m["kpz"] = {
"Kupsabiny",
56445,
"sdv-kln",
}
m["kqa"] = {
"Mum",
6935252,
"ngf-mad",
}
m["kqb"] = {
"Kovai",
6434822,
"ngf",
}
m["kqc"] = {
"Doromu-Koki",
5298175,
"ngf",
}
m["kqd"] = {
"Koy Sanjaq Surat",
33463,
"sem-nna",
}
m["kqe"] = {
"Kalagan",
18748906,
"phi",
}
m["kqf"] = {
"Kakabai",
6349119,
"poz-ocw",
"Latn",
}
m["kqg"] = {
"Khe",
3914015,
"nic-gur",
}
m["kqh"] = {
"Kisankasa",
6416409,
"sdv",
}
m["kqi"] = {
"Koitabu",
6426363,
"ngf",
}
m["kqj"] = {
"Koromira",
6432520,
"paa-sbo",
}
m["kqk"] = {
"Kotafon Gbe",
12952447,
"alv-pph",
}
m["kql"] = {
"Kyenele",
11732453,
"paa-yua",
}
m["kqm"] = {
"Khisa",
3913955,
"nic-gur",
}
m["kqn"] = {
"Kaonde",
33601,
"bnt-lub",
"Latn",
}
m["kqo"] = {
"Eastern Krahn",
3915374,
"kro-wee",
}
m["kqp"] = {
"Kimré",
3441210,
"cdc-est",
}
m["kqq"] = {
"Krenak",
6436747,
"sai-cer",
}
m["kqr"] = {
"Kimaragang",
3196845,
"poz-san",
"Latn",
}
m["kqs"] = {
"Northern Kissi",
19921576,
"alv-kis",
}
m["kqt"] = {
"Klias River Kadazan",
12953594,
"poz-san",
}
m["kqu"] = {
"Seroa",
33127766,
"khi-tuu",
}
m["kqv"] = {
"Okolod",
7082487,
"poz-san",
}
m["kqw"] = {
"ခါန်ဒါတ်သ်",
3192590,
"poz-ocw",
"Latn",
}
m["kqx"] = {
"Mser",
3502347,
"cdc-cbm",
}
m["kqy"] = {
"Koorete",
6430753,
"omv-eom",
}
m["kqz"] = {
"Korana",
2756709,
"khi-khk",
"Latn",
}
m["kra"] = {
"Kumhali",
13580783,
"inc-eas",
ancestors = "bh",
}
m["krb"] = {
"Karkin",
3193345,
"nai-you",
"Latn",
}
m["krc"] = {
"ကရာချဲ-ဗါဝ်ကာ",
33714,
"trk-kcu",
"Cyrl",
translit = "krc-translit",
sort_key = {
from = {"гъ", "дж", "ё", "къ", "нг"},
to = {"г" .. p[1], "д" .. p[1], "е" .. p[1], "к" .. p[1], "н" .. p[1]}
},
}
m["krd"] = {
"Kairui-Midiki",
12953277,
"poz-tim",
}
m["kre"] = {
"Panará",
3361895,
"sai-cer",
}
m["krf"] = {
"Koro (Vanuatu)",
3198995,
"poz-vnn",
"Latn",
}
m["krh"] = {
"Kurama",
35593,
"nic-kau",
}
m["kri"] = {
"ခရိအဝ်",
35744,
"crp",
"Latn",
ancestors = "en",
}
m["krj"] = {
"ခဳနာရော-အာ",
33720,
"phi",
"Latn",
}
m["krk"] = {
"Kerek",
332792,
"qfa-ckn",
"Cyrl",
}
m["krl"] = {
"ခါရေဝ်လဳယာန်",
33557,
"urj-fin",
"Latn",
sort_key = {
from = {
"č", "š", "ž", "ü", "ä", "ö", -- 2 chars
"z", "'" -- 1 char
},
to = {
"c" .. p[1], "s" .. p[1], "s" .. p[3], "y" .. p[1], "y" .. p[2], "y" .. p[3],
"s" .. p[2], "y" .. p[4],
}
},
}
m["krm"] = {
"Krim",
35713,
"alv",
}
m["krn"] = {
"Sapo",
3915386,
"kro-wee",
}
m["krp"] = {
"Korop",
35626,
"nic-ucr",
"Latn",
}
m["krr"] = {
"Kru'ng",
12953650,
"mkh-ban",
}
m["krs"] = {
"Kresh",
56674,
"csu-bkr",
}
m["kru"] = {
"ကူရု",
33492,
"dra-kml",
"Deva, Tols",
translit = {
Deva = "hi-translit",
},
}
m["krv"] = {
"Kavet",
12953649,
"sai-ktk",
"Latn",
}
m["krw"] = {
"Western Krahn",
10975611,
"kro-wee",
}
m["krx"] = {
"Karon",
35704,
"alv-jol",
}
m["kry"] = {
"Kryts",
35861,
"cau-ssm",
"Latn, Cyrl",
display_text = {Cyrl = s["cau-Cyrl-displaytext"]},
entry_name = {
Latn = s["cau-Latn-entryname"],
Cyrl = s["cau-Cyrl-entryname"],
},
}
m["krz"] = {
"Sota Kanum",
12952568,
"paa-yam",
}
m["ksa"] = {
"Shuwa-Zamani",
3913929,
"nic-kau",
}
m["ksb"] = {
"Shambala",
3788739,
"bnt-seu",
"Latn",
}
m["ksc"] = {
"Southern Kalinga",
18753301,
"phi",
}
m["ksd"] = {
"တဝ်လာၚ်",
35870,
"poz-ocw",
"Latn",
}
m["kse"] = {
"Kuni",
6444619,
"poz-ocw",
"Latn",
}
m["ksf"] = {
"Bafia",
34930,
"bnt-baf",
}
m["ksg"] = {
"Kusaghe",
3200638,
"poz-ocw",
"Latn",
}
m["ksi"] = {
"ခရေတ်သ",
841704,
"paa-msk",
"Latn",
}
m["ksj"] = {
"Uare",
6450052,
"ngf",
}
m["ksk"] = {
"Kansa",
3192772,
"sio-dhe",
}
m["ksl"] = {
"Kumalu",
17584381,
"poz-ocw",
"Latn",
}
m["ksm"] = {
"Kumba",
3913972,
"alv-mye",
}
m["ksn"] = {
"Kasiguranin",
6374525,
"phi",
}
m["kso"] = {
"Kofa",
56278,
"cdc-cbm",
}
m["ksp"] = {
"Kaba",
3915316,
"csu-sar",
}
m["ksq"] = {
"Kwaami",
3440525,
"cdc-wst",
}
m["ksr"] = {
"Borong",
4946263,
"ngf",
}
m["kss"] = {
"Southern Kissi",
11028974,
"alv-kis",
}
m["kst"] = {
"Winyé",
3913360,
"nic-gnw",
}
m["ksu"] = {
"Khamyang",
6583541,
"tai-swe",
}
m["ksv"] = {
"Kusu",
6448199,
"bnt-tet",
}
m["ksw"] = {
"ကရေၚ်သကုဝ်",
56410,
"kar",
"Mymr",
translit = "ksw-translit",
}
m["ksx"] = {
"Kedang",
6382520,
"poz",
"Latn",
}
m["ksy"] = {
"Kharia Thar",
6400661,
"inc-eas",
}
m["ksz"] = {
"Kodaku",
21179986,
"mun",
}
m["kta"] = {
"Katua",
6378404,
"mkh-ban",
}
m["ktb"] = {
"Kambaata",
35664,
"cus-hec",
"Latn",
}
m["ktc"] = {
"Kholok",
3440464,
"cdc-wst",
}
m["ktd"] = {
"Kokata",
10547021,
"aus-pam",
}
m["ktf"] = {
"Kwami",
12952687,
"bnt-lgb",
}
m["ktg"] = {
"Kalkatungu",
3914057,
"aus-pam",
"Latn",
}
m["kth"] = {
"Karanga",
713643,
}
m["kti"] = {
"North Muyu",
20857698,
"ngf",
"Latn",
}
m["ktj"] = {
"Plapo Krumen",
10975356,
"kro-grb",
}
m["ktk"] = {
"Kaniet",
3399050,
"poz-aay",
"Latn",
}
m["ktl"] = {
"Koroshi",
3775265,
"ira-nwi",
ancestors = "bal",
}
m["ktm"] = {
"Kurti",
3200615,
"poz-aay",
"Latn",
}
m["ktn"] = {
"Karitiâna",
3112184,
"tup",
"Latn",
}
m["kto"] = {
"Kuot",
56537,
}
m["ktp"] = {
"Kaduo",
769809,
"tbq-bka",
}
m["ktq"] = {
"Katabaga",
3193895,
}
m["ktr"] = {
"Kota Marudu Tinagas",
18642280,
}
m["kts"] = {
"South Muyu",
42308820,
"ngf",
"Latn",
}
m["ktt"] = {
"Ketum",
12952616,
"ngf",
}
m["ktu"] = {
"Kituba",
35746,
"crp",
"Latn",
ancestors = "kg",
}
m["ktv"] = {
"ကဒူ လ္ပာ်ဖာဗၟံက်",
22808951,
"mkh-kat",
"Latn",
}
m["ktw"] = {
"Kato",
20831,
"ath-pco",
"Latn",
}
m["ktx"] = {
"Kaxararí",
6380124,
"sai-pan",
"Latn",
}
m["kty"] = {
"Kango",
6362818,
"bnt-bta",
"Latn",
}
m["ktz"] = {
"Juǀ'hoan",
1192295,
"khi-kxa",
"Latn",
}
m["kub"] = {
"Kutep",
35645,
"nic-jkn",
}
m["kuc"] = {
"Kwinsu",
6450460,
"paa-tkw",
}
m["kud"] = {
"Auhelawa",
5166,
"poz-ocw",
"Latn",
}
m["kue"] = {
"Kuman",
137525,
"ngf",
"Latn",
}
m["kuf"] = {
"ကတူ လ္ပာ်ပလိုတ်",
6378400,
"mkh-kat",
"Laoo, Tale",
}
m["kug"] = {
"Kupa",
3915336,
"alv-ngb",
}
m["kuh"] = {
"Kushi",
3438747,
"cdc-wst",
}
m["kui"] = {
"ခူဣိခူရဝ်",
3915522,
"sai-kui",
"Latn",
}
m["kuj"] = {
"Kuria",
6445968,
"bnt-lok",
"Latn",
}
m["kuk"] = {
"Kepo'",
6393217,
"poz",
}
m["kul"] = {
"Kulere",
3440506,
"cdc-wst",
}
m["kum"] = {
"ခူမာတ်",
36209,
"trk-kcu",
"Cyrl",
translit = "kum-translit",
sort_key = {
from = {"гъ", "гь", "ё", "къ", "нг", "оь", "уь"},
to = {"г" .. p[1], "г" .. p[2], "е" .. p[1], "к" .. p[1], "н" .. p[1], "о" .. p[1], "у" .. p[1]}
},
}
m["kun"] = {
"Kunama",
36041,
}
m["kuo"] = {
"Kumukio",
11732362,
"ngf",
}
m["kup"] = {
"Kunimaipa",
6444696,
}
m["kuq"] = {
"Karipuna",
6371071,
"tup-gua",
"Latn",
}
m["kus"] = {
"ကူသာလ်",
35708,
"nic-dag",
"Latn",
}
m["kut"] = {
"Kutenai",
33434,
"qfa-iso",
}
m["kuu"] = {
"Upper Kuskokwim",
28062,
"ath-nor",
}
m["kuv"] = {
"Kur",
12635082,
"poz-cma",
"Latn",
}
m["kuw"] = {
"Kpagua",
11137573,
"bad-cnt",
}
m["kux"] = {
"Kukatja",
10549839,
"aus-pam",
}
m["kuy"] = {
"Kuuku-Ya'u",
10550697,
"aus-pmn",
}
m["kuz"] = {
"Kunza",
2669181,
"qfa-iso",
}
m["kva"] = {
"ဗတ်ဝါဠူ",
56638,
"cau-and",
"Cyrl",
translit = "cau-nec-translit",
override_translit = true,
display_text = {Cyrl = s["cau-Cyrl-displaytext"]},
entry_name = {Cyrl = s["cau-Cyrl-entryname"]},
}
m["kvb"] = {
"Kubu",
6441341,
"poz-mly",
}
m["kvc"] = {
"Kove",
3199402,
"poz-ocw",
"Latn",
}
m["kvd"] = {
"Kui (Indonesia)",
6442230,
"ngf",
}
m["kve"] = {
"Kalabakan",
6350003,
"poz-san",
}
m["kvf"] = {
"Kabalai",
3440427,
"cdc-est",
}
m["kvg"] = {
"Kuni-Boazi",
2907551,
"ngf",
}
m["kvh"] = {
"Komodo",
3198565,
"poz-cet",
}
m["kvi"] = {
"Kwang",
3440398,
"cdc-est",
"Latn",
}
m["kvj"] = {
"Psikye",
56304,
"cdc-cbm",
}
m["kvk"] = {
"အရေဝ်ဘာသာကွတ်တဲကိုဝ်ရဳယျာ",
3073428,
"sgn-jsl",
}
m["kvl"] = {
"Brek Karen",
12952577,
"kar",
}
m["kvm"] = {
"Kendem",
35751,
"nic-mam",
"Latn",
}
m["kvn"] = {
"Border Kuna",
31777873,
"cba",
}
m["kvo"] = {
"Dobel",
5286559,
"poz",
}
m["kvp"] = {
"Kompane",
18343041,
"poz",
}
m["kvq"] = {
"Geba Karen",
12952581,
"kar",
}
m["kvr"] = {
"Kerinci",
3195442,
"poz-mly",
}
m["kvt"] = {
"Lahta Karen",
12952582,
"kar",
}
m["kvu"] = {
"Yinbaw Karen",
14426328,
"kar",
}
m["kvv"] = {
"Kola",
6426967,
"poz",
}
m["kvw"] = {
"Wersing",
7983599,
"qfa-tap",
}
m["kvx"] = {
"Parkari Koli",
3244176,
"inc-wes",
}
m["kvy"] = {
"Yintale Karen",
14426329,
"kar",
}
m["kvz"] = {
"Tsakwambo",
7849438,
"ngf",
}
m["kwa"] = {
"Dâw",
3042278,
"sai-nad",
}
m["kwb"] = {
"Baa",
34842,
"alv-ada",
}
m["kwc"] = {
"Likwala",
35597,
"bnt-mbo",
}
m["kwd"] = {
"Kwaio",
3200796,
"poz-sls",
"Latn",
}
m["kwe"] = {
"Kwerba",
6450328,
"paa-tkw",
}
m["kwf"] = {
"Kwara'ae",
3200829,
"poz-sls",
"Latn",
}
m["kwg"] = {
"Sara Kaba Deme",
3915384,
"csu-kab",
}
m["kwh"] = {
"Kowiai",
6435028,
"poz",
}
m["kwi"] = {
"Awa-Cuaiquer",
2603103,
"sai-bar",
"Latn",
}
m["kwj"] = {
"Kwanga",
3438383,
"paa-spk",
}
m["kwk"] = {
"ခွါကွာ'ဝလာ",
2640628,
"wak",
"Latn",
}
m["kwl"] = {
"Kofyar",
3441382,
"cdc-wst",
"Latn",
}
m["kwm"] = {
"Kwambi",
3487165,
"bnt-ova",
}
m["kwn"] = {
"Kwangali",
36334,
"bnt-kav",
"Latn",
}
m["kwo"] = {
"Kwomtari",
3508116,
}
m["kwp"] = {
"Kodia",
3914867,
"kro-ekr",
}
m["kwq"] = {
"Kwak",
11014183,
"nic-nka",
ancestors = "yam",
}
m["kwr"] = {
"Kwer",
12635137,
"ngf-okk",
}
m["kws"] = {
"Kwese",
3200846,
"bnt-pen",
}
m["kwt"] = {
"Kwesten",
6450354,
"paa-tkw",
}
m["kwu"] = {
"Kwakum",
35624,
"bnt-kak",
}
m["kwv"] = {
"Sara Kaba Náà",
3915361,
"csu-kab",
}
m["kww"] = {
"ကဝေန်တဳ",
721182,
"crp",
"Latn",
ancestors = "en"
}
m["kwx"] = {
"Khirwar",
12976968,
"dra",
}
m["kwz"] = {
"Kwadi",
2364661,
"khi-kkw",
"Latn",
}
m["kxa"] = {
"Kairiru",
3398785,
"poz-ocw",
"Latn",
}
m["kxb"] = {
"Krobu",
35586,
"alv-ptn",
"Latn",
}
m["kxc"] = {
"Khonso",
56624,
"cus-eas",
}
m["kxd"] = {
"မလေဝ် ဗရုနာဲ",
3182878,
"poz-mly",
"Latn, ms-Arab",
}
m["kxe"] = {
"Kakihum",
3914433,
"nic-kam",
ancestors = "tvd",
}
m["kxf"] = {
"ကရေၚ်မနုမနဝ်",
12952592,
"kar",
"Mymr, Latn",
}
m["kxh"] = {
"Karo",
3447116,
"omv-aro",
}
m["kxi"] = {
"Keningau Murut",
6389308,
"poz-san",
"Latn",
}
m["kxj"] = {
"Kulfa",
713654,
"csu-kab",
}
m["kxk"] = {
"ကရေၚ်သယာန်",
14352960,
"kar",
}
m["kxl"] = {
"Nepali Kurux",
3200624,
"dra-kml",
"Deva",
ancestors = "kru",
}
m["kxm"] = {
"ခမေန်သၟဝ်ကျာ",
3502234,
"mkh-kmr",
"Thai, Khmr",
ancestors = "xhm",
translit = {
Khmr = "km-translit",
Thai = "Thai alphabet-translit",
},
sort_key = {
from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"},
to = {"", "", "%2%1"}
},
}
m["kxn"] = {
"ကနဝ်ဝေတ်",
6364300,
"poz-bnn",
"Latn",
}
m["kxo"] = {
"Kanoé",
4356223,
"qfa-iso",
}
m["kxp"] = {
"Wadiyara Koli",
12953645,
"inc-wes",
}
m["kxq"] = {
"Smärky Kanum",
12952569,
"paa-yam",
}
m["kxr"] = {
"Manus Koro",
3198994,
"poz-aay",
"Latn",
}
m["kxs"] = {
"Kangjia",
3182570,
"xgn-shr",
"Latn",
}
m["kxt"] = {
"Koiwat",
6426388,
"paa-spk",
}
m["kxu"] = {
"ကူအဳ (အိန္ဒိယ)",
33919,
"dra-kki",
"Orya",
translit = "kxv-translit",
entry_name = {
remove_diacritics = "୕",
from = {"ଆଆ", "ଇଇ", "ଉଉ", "ଏଏ", "ଓଓ", "ିଇ", "ୁଉ", "େଏ", "ୋଓ"},
to = {"ଆ", "ଈ", "ଊ", "ଏ", "ଓ", "ୀ", "ୂ", "େ", "ୋ"},
},
}
m["kxv"] = {
"ကူဝဳ",
3200721,
"dra-kki",
"Orya",
translit = "kxv-translit",
entry_name = {
remove_diacritics = "୕",
from = {"ଆଆ", "ଇଇ", "ଉଉ", "ଏଏ", "ଓଓ", "([କ-ହ])ଆ", "ିଇ", "ୁଉ", "େଏ", "ୋଓ"},
to = {"ଆ", "ଈ", "ଊ", "ଏ", "ଓ", "%1ା", "ୀ", "ୂ", "େ", "ୋ"},
},
}
m["kxw"] = {
"Konai",
11732339,
}
m["kxx"] = {
"Likuba",
35646,
"bnt-bmo",
}
m["kxy"] = {
"Kayong",
6380673,
"mkh",
}
m["kxz"] = {
"Kerewo",
6393847,
"paa-kiw",
}
m["kya"] = {
"Kwaya",
6450276,
"bnt-haj",
"Latn",
}
m["kyb"] = {
"Butbut Kalinga",
18753300,
"phi",
}
m["kyc"] = {
"Kyaka",
12952690,
"paa-eng",
}
m["kyd"] = {
"Karey",
6370196,
"poz",
}
m["kye"] = {
"Krache",
35658,
"alv-gng",
}
m["kyf"] = {
"Kouya",
35595,
"kro-bet",
}
m["kyg"] = {
"Keyagana",
6398208,
"paa-kag",
}
m["kyh"] = {
"Karok",
1288440,
"qfa-iso",
"Latn",
}
m["kyi"] = {
"ခဳပွေန်",
3038653,
"poz-swa",
"Latn",
}
m["kyj"] = {
"ကာဒ်ရာအဝ်",
3192950,
"phi",
"Latn",
}
m["kyk"] = {
"Kamayo",
3192339,
"phi",
}
m["kyl"] = {
"Kalapuya",
3192120,
"nai-klp",
}
m["kym"] = {
"Kpatili",
3913982,
"znd",
}
m["kyn"] = {
"Karolanos",
6373093,
"phi",
}
m["kyo"] = {
"Kelon",
6386414,
"ngf",
}
m["kyp"] = {
"Kang",
25559558,
"tai",
}
m["kyq"] = {
"Kenga",
35707,
"csu-bgr",
}
m["kyr"] = {
"Kuruáya",
3200633,
"tup",
"Latn",
}
m["kys"] = {
"Baram Kayan",
2883794,
"poz",
}
m["kyt"] = {
"Kayagar",
6380394,
"ngf",
}
m["kyu"] = {
"ကယျာလပါက်ပၠိုတ်",
12952596,
"kar",
"Kali, Mymr, Latn",
translit = {Kali = "Kali-translit"},
}
m["kyv"] = {
"Kayort",
6380675,
"inc-eas",
"Deva",
}
m["kyw"] = {
"ကုဒ်မာလဳ",
6446173,
"inc-bih",
"Deva, as-Beng, Orya, Chis",
}
m["kyx"] = {
"Rapoisi",
7294279,
"paa-nbo",
}
m["kyy"] = {
"Kambaira",
6356254,
"paa-kag",
}
m["kyz"] = {
"Kayabí",
6380372,
"tup-gua",
"Latn",
}
m["kza"] = {
"Western Karaboro",
36601,
"alv-krb",
}
m["kzb"] = {
"Kaibobo",
6347565,
"poz-cma",
}
m["kzc"] = {
"Bondoukou Kulango",
11031321,
"alv-kul",
}
m["kzd"] = {
"Kadai",
7679471,
"poz-cma",
"Latn",
}
m["kze"] = {
"Kosena",
12952663,
"ngf",
"Latn",
}
m["kzf"] = {
"Da'a Kaili",
33103997,
"poz-kal",
"Latn",
}
m["kzg"] = {
"ခဳခါဲ",
3196527,
"jpx-nry",
"Jpan",
translit = s["jpx-translit"],
display_text = s["jpx-displaytext"],
entry_name = s["jpx-entryname"],
sort_key = s["jpx-sortkey"],
}
m["kzh"] = {
"ဒံၚ်ဂါဝ်လာဝဳ",
5295991,
"nub",
"Latn",
}
m["kzi"] = {
"ခေလာဗေတ်",
6385445,
"poz-swa",
"Latn",
}
m["kzj"] = {
"Coastal Kadazan",
3307195,
"poz-san",
"Latn",
}
m["kzk"] = {
"Kazukuru",
1089069,
"poz-ocw",
}
m["kzl"] = {
"Kayeli",
4207444,
"poz-cma",
"Latn",
}
m["kzm"] = {
"Kais",
6348319,
"paa",
"Latn",
}
m["kzn"] = {
"Kokola",
11128329,
"bnt-mak",
"Latn",
ancestors = "vmw",
}
m["kzo"] = {
"Kaningi",
35683,
"bnt-mbt",
}
m["kzp"] = {
"Kaidipang",
6347611,
"phi",
}
m["kzq"] = {
"Kaike",
10951226,
"sit-tam",
}
m["kzr"] = {
"Karang",
35681,
"alv-mbm",
"Latn",
}
m["kzs"] = {
"Sugut Dusun",
12953510,
"poz-san",
"Latn",
}
m["kzt"] = {
"Tambunan Dusun",
12953514,
"poz-san",
"Latn",
}
m["kzu"] = {
"Kayupulau",
6380723,
"poz-ocw",
}
m["kzv"] = {
"Komyandaret",
6428671,
"ngf-okk",
"Latn",
}
m["kzw"] = { -- contrast xoo, sai-kat, sai-xoc, the last of which the ISO conflated into this code
"Kariri",
12953620,
"sai-mje",
"Latn",
}
m["kzx"] = {
"Kamarian",
6356040,
"poz-cma",
"Latn",
}
m["kzy"] = {
"Kango-Sua",
11008360,
"bnt-kbi",
"Latn",
ancestors = "bip",
}
m["kzz"] = {
"Kalabra",
6350038,
"paa",
"Latn",
}
return require("Module:languages").finalizeData(m, "language")
2rdppc10eur8p1283mbm14fu8wq9vjh
ညးလွပ်:咽頭べさ/Notepad
2
9186
395939
395779
2026-05-30T01:37:23Z
咽頭べさ
33
395939
wikitext
text/x-wiki
[[🝴]] [[🝵]] [[🝶]] [[🝻]] [[🝼]] [[🝽]] [[🝾]] [[🝿]] [[🟙]] [[🛜]] [[🩵]] [[🩶]] [[🩷]] [[🪇]] [[🪈]] [[🪭]] [[🪮]] [[🪯]] [[🪻]] [[🪼]] [[🪽]] [[🪿]] [[🫎]] [[🫏]] [[🫚]] [[🫛]] [[🫨]] [[🫷]] [[🫸]]
[[File:Omx-san̊krān.png|50x50px]]
# {{l|shn|ၶိူဝ်း}}
{{shn-pron|ၽူၼ်-တူၵ်း}}
* {{kjp-IPA|လီ}}
#: {{ux|mnw|သ္ၚာ
|t=bh}}
#: {{ux|ksw|ဃိၣ်သၢရှ်ဖျၢၣ်တၢ်ဘါတရိၣ်
|t=bh}}
|
#: {{ux|shn|ပိတ်းမၢၵ်ႇၼမ်ႉတဝ်ႈ
|t=bh}}
#: {{ux|my|အဲ
|t=bh}}
{{alt sp|th|บ้านมอญ นครสวรรค์}}
* {{ur-IPA|grī}}
{{der3|shn
|တေ မိူဝ်း ယဝ့် ႁုး
|
|
}}
{{pi-alt|Mymr=ယဒိ လောကသန္တိံ ဣစ္ဆေယျ၊ ဗုဒ္ဓမဂ္ဂံ ဝိနာ အညော မဂ္ဂေါ နတ္ထိ။}}
{{sa-alt|Deva=किं अहं सम्यक् अस्मि}}
{{langtrack|mnw|mkh-mmn|omx|mkh-pro|mkh-mnc-pro}}
{{langtrack|mn|en|enm|ang|ine-pro|gem-pro|gmw-pro}}
{{langtrack|th|shn|tai-pro|tai-swe-pro|qfa-bet-pro|aho|ar|ja|}}
{{langtrack|sa|la|hi|ru|ur}}
{{langtrack|so|as|it|hu|pt}}
{{langtrack|zh|vi|km|lo|ko}}
{{langtrack|es|sh|gmw-pro|fr|ine-pro}}
{{langtrack|my|za|wa|ka|mn}}
lchzpqj90j9f66etxkl2z8am7cekxo4
395940
395939
2026-05-30T02:05:06Z
咽頭べさ
33
395940
wikitext
text/x-wiki
[[🝴]] [[🝵]] [[🝶]] [[🝻]] [[🝼]] [[🝽]] [[🝾]] [[🝿]] [[🟙]] [[🛜]] [[🩵]] [[🩶]] [[🩷]] [[🪇]] [[🪈]] [[🪭]] [[🪮]] [[🪯]] [[🪻]] [[🪼]] [[🪽]] [[🪿]] [[🫎]] [[🫏]] [[🫚]] [[🫛]] [[🫨]] [[🫷]] [[🫸]]
[[File:Omx-san̊krān.png|50x50px]]
# {{l|shn|ၶိူဝ်း}}
{{shn-pron|ၽူၼ်-တူၵ်း}}
* {{kjp-IPA|လီ}}
#: {{ux|mnw|ၐြဳ ဒၞာ မိက်
|t=bh}}
#: {{ux|ksw|ဃိၣ်သၢရှ်ဖျၢၣ်တၢ်ဘါတရိၣ်
|t=bh}}
|
#: {{ux|shn|ပိတ်းမၢၵ်ႇၼမ်ႉတဝ်ႈ
|t=bh}}
#: {{ux|my|အဲ
|t=bh}}
{{alt sp|th|บ้านมอญ นครสวรรค์}}
* {{ur-IPA|grī}}
{{der3|shn
|တေ မိူဝ်း ယဝ့် ႁုး
|
|
}}
{{pi-alt|Mymr=ယဒိ လောကသန္တိံ ဣစ္ဆေယျ၊ ဗုဒ္ဓမဂ္ဂံ ဝိနာ အညော မဂ္ဂေါ နတ္ထိ။}}
{{sa-alt|Deva=किं अहं सम्यक् अस्मि}}
{{langtrack|mnw|mkh-mmn|omx|mkh-pro|mkh-mnc-pro}}
{{langtrack|mn|en|enm|ang|ine-pro|gem-pro|gmw-pro}}
{{langtrack|th|shn|tai-pro|tai-swe-pro|qfa-bet-pro|aho|ar|ja|}}
{{langtrack|sa|la|hi|ru|ur}}
{{langtrack|so|as|it|hu|pt}}
{{langtrack|zh|vi|km|lo|ko}}
{{langtrack|es|sh|gmw-pro|fr|ine-pro}}
{{langtrack|my|za|wa|ka|mn}}
fsaszwytf9yvh3knlzs1eu5h2pquo8f
395941
395940
2026-05-30T11:10:36Z
咽頭べさ
33
395941
wikitext
text/x-wiki
[[🝴]] [[🝵]] [[🝶]] [[🝻]] [[🝼]] [[🝽]] [[🝾]] [[🝿]] [[🟙]] [[🛜]] [[🩵]] [[🩶]] [[🩷]] [[🪇]] [[🪈]] [[🪭]] [[🪮]] [[🪯]] [[🪻]] [[🪼]] [[🪽]] [[🪿]] [[🫎]] [[🫏]] [[🫚]] [[🫛]] [[🫨]] [[🫷]] [[🫸]]
[[File:Omx-san̊krān.png|50x50px]]
# {{l|shn|ၶိူဝ်း}}
{{shn-pron|ပဵင်း-ၽဵင်ႇ}}
* {{kjp-IPA|လီ}}
#: {{ux|mnw|ၐြဳ ဒၞာ မိက်
|t=bh}}
#: {{ux|ksw|ဃိၣ်သၢရှ်ဖျၢၣ်တၢ်ဘါတရိၣ်
|t=bh}}
|
#: {{ux|shn|ပိတ်းမၢၵ်ႇၼမ်ႉတဝ်ႈ
|t=bh}}
#: {{ux|my|အဲ
|t=bh}}
{{alt sp|th|บ้านมอญ นครสวรรค์}}
* {{ur-IPA|grī}}
{{der3|shn
|တေ မိူဝ်း ယဝ့် ႁုး
|
|
}}
{{pi-alt|Mymr=ယဒိ လောကသန္တိံ ဣစ္ဆေယျ၊ ဗုဒ္ဓမဂ္ဂံ ဝိနာ အညော မဂ္ဂေါ နတ္ထိ။}}
{{sa-alt|Deva=किं अहं सम्यक् अस्मि}}
{{langtrack|mnw|mkh-mmn|omx|mkh-pro|mkh-mnc-pro}}
{{langtrack|mn|en|enm|ang|ine-pro|gem-pro|gmw-pro}}
{{langtrack|th|shn|tai-pro|tai-swe-pro|qfa-bet-pro|aho|ar|ja|}}
{{langtrack|sa|la|hi|ru|ur}}
{{langtrack|so|as|it|hu|pt}}
{{langtrack|zh|vi|km|lo|ko}}
{{langtrack|es|sh|gmw-pro|fr|ine-pro}}
{{langtrack|my|za|wa|ka|mn}}
hxc43tt3ujkbhwe1dp3s21ruhrdrsdx
ကဏ္ဍ:နာမ်တဳရူရာန်ဂမၠိုၚ်
14
21334
395886
30632
2026-05-29T15:56:57Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:နာမ် တဳရူရာန်]] ဇရေင် [[ကဏ္ဍ:နာမ်တဳရူရာန်ဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင်
30632
wikitext
text/x-wiki
[[ကဏ္ဍ:ဘာသာတဳရူရာန်]]
51nhwl3eqwxusbpqd9wlk1cfgds3hyp
ကဏ္ဍ:ဝေါဟာအဓိကတဳရူရာန်ဂမၠိုၚ်
14
21336
395885
171150
2026-05-29T15:56:35Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:ဝေါဟာတဳရူရာန်နွံပ္ဍဲအဘိဓာန်ဂမၠိုၚ်]] ဇရေင် [[ကဏ္ဍ:ဝေါဟာအဓိကတဳရူရာန်ဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင်
30634
wikitext
text/x-wiki
[[ကဏ္ဍ:ဘာသာတဳရူရာန်]]
51nhwl3eqwxusbpqd9wlk1cfgds3hyp
ကဏ္ဍ:နာမ်နာဝါတ်ဒဝ်ဝၚ်ဂန္ထဂမၠိုၚ်
14
28170
395880
166509
2026-05-29T15:39:34Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:နာမ် နာဝါတ်ဒဝ်ဝၚ်ဂန္ထ]] ဇရေင် [[ကဏ္ဍ:နာမ်နာဝါတ်ဒဝ်ဝၚ်ဂန္ထဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင်
163448
wikitext
text/x-wiki
[[ကဏ္ဍ:ဘာသာနာဝါတ်ဒဝ်ဝၚ်ဂန္ထ]]
roafxwijt67qilxix1vqbfzebxma7ox
ကဏ္ဍ:ဝေါဟာအဓိကနာဝါတ်ဒဝ်ဝၚ်ဂန္ထဂမၠိုၚ်
14
28172
395879
274636
2026-05-29T15:38:44Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:ဝေါဟာနာဝါတ်ဒဝ်ဝၚ်ဂန္ထနွံပ္ဍဲအဘိဓာန်ဂမၠိုၚ်]] ဇရေင် [[ကဏ္ဍ:ဝေါဟာအဓိကနာဝါတ်ဒဝ်ဝၚ်ဂန္ထဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင်
274636
wikitext
text/x-wiki
[[ကဏ္ဍ:ဘာသာနာဝါတ်ဒဝ်ဝၚ်ဂန္ထ]]
roafxwijt67qilxix1vqbfzebxma7ox
ကဏ္ဍ:ကြိယာဝိသေသနဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်ဂမၠိုၚ်
14
76211
395882
290821
2026-05-29T15:42:52Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:ကြိယာဝိသေသန ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]] ဇရေင် [[ကဏ္ဍ:ကြိယာဝိသေသနဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်ဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင်
290821
wikitext
text/x-wiki
[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်|ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]] » [[:ကဏ္ဍ:ဝေါဟာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်နွံပ္ဍဲအဘိဓာန်ဂမၠိုၚ်|ဝေါဟာတံသ္ဇိုၚ်]] » '''ကြိယာဝိသေသနဂမၠိုၚ်'''
:ဝေါဟာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်မပြုပြေၚ်ပြံၚ်လှာဲလဝ်ပိုဒ်လိက်ဂမၠိုၚ်၊ ပိုတ်ဂမၠိုၚ် ကဵု ဇၟန်လိက်တပ်ပ်ဂမၠိုၚ်။
[[ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]][[ကဏ္ဍ:ကြိယာဝိသေသနဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဝ]]
ffwu2mqmq42zendcma7rsi8pf9flkzl
395883
395882
2026-05-29T15:43:36Z
咽頭べさ
33
395883
wikitext
text/x-wiki
[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်|ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]] » [[:ကဏ္ဍ:ဝေါဟာအဓိကဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်ဂမၠိုၚ်|ဝေါဟာတံသ္ဇိုၚ်]] » '''ကြိယာဝိသေသနဂမၠိုၚ်'''
:ဝေါဟာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်မပြုပြေၚ်ပြံၚ်လှာဲလဝ်ပိုဒ်လိက်ဂမၠိုၚ်၊ ပိုတ်ဂမၠိုၚ် ကဵု ဇၟန်လိက်တပ်ပ်ဂမၠိုၚ်။
[[ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]][[ကဏ္ဍ:ကြိယာဝိသေသနဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဝ]]
o8euf8gg85oxxl40nr28pdcmqlo0wls
ကဏ္ဍ:ဝေါဟာအဓိကဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်ဂမၠိုၚ်
14
76213
395881
275129
2026-05-29T15:42:23Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ကဏ္ဍ:ဝေါဟာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်နွံပ္ဍဲအဘိဓာန်ဂမၠိုၚ်]] ဇရေင် [[ကဏ္ဍ:ဝေါဟာအဓိကဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်ဂမၠိုၚ်]] သီုကဵု ဟွံဂွံ ဂိုင်စွံလဝ် မကလေင်ပညုင်
275129
wikitext
text/x-wiki
[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်|ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]] » '''ဝေါဟာတံသ္ဇိုၚ်ဂမၠိုၚ်'''
:ဝေါဟာတံသ္ဇိုၚ်ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်၊ ကဏ္ဍနူကဵုမပါ်ပရံဒကုတ်မဆေၚ်စပ်ကဵုမအရေဝ်ဝေါဟာ။
[[ကဏ္ဍ:ဘာသာဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်]][[ကဏ္ဍ:ဝေါဟာအဓိကဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဝ]]
hkfh0w5bbj3pqgeaq52zj31razczamv
မဝ်ဂျူ:lt-common
828
119841
395936
154896
2026-05-29T18:27:36Z
咽頭べさ
33
395936
Scribunto
text/plain
local export = {}
local m_str_utils = require("Module:string utilities")
local u = m_str_utils.char
local ugsub = m_str_utils.gsub
local ulower = m_str_utils.lower
local uupper = m_str_utils.upper
local ufind = m_str_utils.find
local ulen = m_str_utils.len
local ucodepoint = m_str_utils.codepoint
-- Keep native Unicode normalization functions (no replacement available)
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
-- =============================================================================
-- Unicode constants
-- =============================================================================
local GRAVE = u(0x0300) -- combining grave accent
local ACUTE = u(0x0301) -- combining acute accent
local TILDE = u(0x0303) -- combining tilde
local MACRON = u(0x0304) -- combining macron
local DOTABOVE = u(0x0307) -- combining dot above
local CARON = u(0x030C) -- combining caron
local OGONEK = u(0x0328) -- combining ogonek
local ANY_ACCENT = "[" .. GRAVE .. ACUTE .. TILDE .. "]"
-- Legacy aliases for backward compatibility
local grave = GRAVE
local acute = ACUTE
local tilde = TILDE
local macron = MACRON
local dotabove = DOTABOVE
local caron = CARON
local ogonek = OGONEK
local accents = ANY_ACCENT
-- Export accent constants for use by other Lithuanian modules
-- (lt-pron, lt-verb, lt-noun, etc.) to avoid duplicating these values.
export.GRAVE = GRAVE
export.ACUTE = ACUTE
export.TILDE = TILDE
export.MACRON = MACRON
export.DOTABOVE = DOTABOVE
export.CARON = CARON
export.OGONEK = OGONEK
export.ANY_ACCENT = ANY_ACCENT
-- =============================================================================
-- Private Use Area (PUA) replacement hints
-- =============================================================================
-- Maps PUA codepoints (U+E000–U+E022) to their suggested standard Unicode
-- replacements. Used by reject_pua to give actionable error messages when
-- editors paste in pre-composed glyphs from old Lithuanian-specific encodings.
local INVALID_CHARS = {
[0xE000] = "Ą́", [0xE001] = "ą́", [0xE002] = "Ą̃", [0xE003] = "ą̃",
[0xE004] = "Ę́", [0xE005] = "ę́", [0xE006] = "Ę̃", [0xE007] = "ę̃",
[0xE008] = "Ė́", [0xE009] = "ė́", [0xE00A] = "Ė̃", [0xE00B] = "ė̃",
[0xE00C] = "i̇̀", [0xE00D] = "i̇́", [0xE00E] = "i̇̃",
[0xE00F] = "Į̇́", [0xE010] = "į̇́", [0xE011] = "Į̇̃", [0xE012] = "į̇̃",
[0xE013] = "J̃", [0xE014] = "j̇̃",
[0xE015] = "L̃", [0xE016] = "l̃",
[0xE017] = "M̃", [0xE018] = "m̃",
[0xE019] = "R̃", [0xE01A] = "r̃",
[0xE01B] = "Ų́", [0xE01C] = "ų́", [0xE01D] = "Ų̃", [0xE01E] = "ų̃",
[0xE01F] = "Ū́", [0xE020] = "ū́", [0xE021] = "Ū̃", [0xE022] = "ū̃",
}
export.INVALID_CHARS = INVALID_CHARS
-- =============================================================================
-- Internal helper functions
-- =============================================================================
local dotless_to_dotted = {
["ı"] = "i",
["ȷ"] = "j",
}
local function char_to_dotted_form(base, below)
return (dotless_to_dotted[base] or base) .. below
end
local function normalize_dotted_chars(text)
-- Remove any dots above, and convert dotless forms to dotted.
-- On entry, text must be in NFD form.
return ugsub(text, "([iıjȷ])(" .. ogonek .. "?)" .. dotabove, char_to_dotted_form)
end
local function char_to_accent_form(base, below)
-- Add a 'dot above' after the base.
if base == "i" or base == "j" then
return base .. below .. dotabove
end
-- Convert any dotless chars combining with accents to the dotted form,
-- so that they normalize properly. This shouldn't happen, but just in case.
return char_to_dotted_form(base, below)
end
local function stripped_text_form(text)
-- Remove accents.
text = ugsub(toNFD(text), accents .. "+", "")
-- Normalize dotless characters and dot-above diacritics.
return normalize_dotted_chars(text)
end
-- =============================================================================
-- Input validation
-- =============================================================================
-- Reject Private Use Area characters (U+E000–U+F8FF). When the character is
-- a known non-standard Lithuanian glyph, the error message includes the
-- recommended standard Unicode replacement (see INVALID_CHARS above).
function export.reject_pua(s)
if not s then return end
for i = 1, ulen(s) do
local cp = ucodepoint(s, i)
if cp >= 0xE000 and cp <= 0xF8FF then
local replacement = INVALID_CHARS[cp]
if replacement then
error(string.format(
"lt-common: private use area character U+%04X \"%s\" detected. " ..
"Please use \"%s\" instead.", cp, u(cp), replacement))
else
error(string.format(
"lt-common: private use area character U+%04X detected in \"%s\". " ..
"Please use a standard Unicode character instead.", cp, s))
end
end
end
end
-- =============================================================================
-- Input normalization
-- =============================================================================
-- Detect nonstandard encoding patterns in the input.
-- Returns: dotless_flag (found ı/ȷ), precomp_i_flag (found precomposed í/ì/ĩ)
function export.detect_nonstandard(s)
if not s then return false, false end
local nfd_s = toNFD(s)
local dotless_flag = ufind(nfd_s, "[ıȷ]") ~= nil
local precomp_i_flag = ufind(nfd_s, "[íìĩ]") ~= nil
return dotless_flag, precomp_i_flag
end
-- Normalize input to clean canonical NFC.
-- Handles dotless i/j (ı, ȷ) and stray dot-above combinations.
function export.canonicalize_input(s)
if not s then return s end
s = toNFD(s)
-- Remove stray dot-above after i/j (with or without ogonek)
s = ugsub(s, "([iıjȷ])(" .. OGONEK .. "?)" .. DOTABOVE, function(base, below)
base = (base == "ı") and "i" or (base == "ȷ") and "j" or base
return base .. below
end)
-- Convert any remaining dotless i/j to standard forms
s = ugsub(s, "ı", "i")
s = ugsub(s, "ȷ", "j")
return toNFC(s)
end
-- =============================================================================
-- Partial NFD conversion (stem_ac representation)
-- =============================================================================
-- Convert canonical NFC to partial NFD (stem_ac).
-- Applies full NFD, then recomposes non-accent diacritics.
-- Only grave/acute/tilde remain as combining characters.
function export.to_stem_ac(s)
if not s then return s end
s = toNFD(s)
-- Recompose ogonek vowels
s = ugsub(s, "a" .. OGONEK, "ą")
s = ugsub(s, "e" .. OGONEK, "ę")
s = ugsub(s, "i" .. OGONEK, "į")
s = ugsub(s, "u" .. OGONEK, "ų")
-- Recompose macron vowel
s = ugsub(s, "u" .. MACRON, "ū")
-- Recompose dot-above e
s = ugsub(s, "e" .. DOTABOVE, "ė")
-- Recompose caron consonants
s = ugsub(s, "c" .. CARON, "č")
s = ugsub(s, "s" .. CARON, "š")
s = ugsub(s, "z" .. CARON, "ž")
return s
end
-- =============================================================================
-- Accent manipulation
-- =============================================================================
-- Strip all accent marks (grave/acute/tilde) from partial NFD text.
function export.to_stem_bare(stem_ac)
if not stem_ac then return stem_ac end
return ugsub(stem_ac, ANY_ACCENT, "")
end
-- Check if partial NFD text contains any accent marks.
function export.has_accent(stem_ac)
return ufind(stem_ac, ANY_ACCENT) ~= nil
end
-- =============================================================================
-- Complete input pipeline
-- =============================================================================
-- Process raw user input through the complete normalization pipeline.
-- Returns: stem_bare, stem_ac, dotless_flag, precomp_flag
function export.process_input(raw)
if not raw then return raw, raw, false, false end
export.reject_pua(raw)
local dotless_flag, precomp_flag = export.detect_nonstandard(raw)
local canon = export.canonicalize_input(raw)
local stem_ac = export.to_stem_ac(canon)
local stem_bare = export.to_stem_bare(stem_ac)
return stem_bare, stem_ac, dotless_flag, precomp_flag
end
-- =============================================================================
-- Display and text processing
-- =============================================================================
function export.makeDisplayText(text, lang, sc)
if not text then return text end
-- Normalize dotless characters and dot-above diacritics (while retaining accents).
text = normalize_dotted_chars(toNFD(text))
-- Add a 'dot above' between "i" or "j" and an accent.
text = ugsub(text, "([iıjȷ])(" .. ogonek .. "?)%f" .. accents, char_to_accent_form)
return toNFC(text)
end
-- Called from [[Module:languages]] since [[Module:lt-common]] is set as the stripDiacritics handler in
-- [[Module:languages/data/2]].
function export.stripDiacritics(text, lang, sc)
if not text then return text end
return toNFC(stripped_text_form(text))
end
local sortkey_substitutes = {
[ogonek] = u(0xF000),
[caron] = u(0xF001),
[macron] = u(0xF002),
[dotabove] = u(0xF003),
["y"] = "i" .. u(0xF004),
}
function export.makeSortKey(text, lang, sc)
if not text then return text end
-- Normalize to the stripped-text form and convert diacritics to Private Use
-- Area characters so they sort after all other characters.
text = stripped_text_form(ulower(text))
:gsub(".[\128-\191]*", sortkey_substitutes)
return toNFC(uupper(text))
end
return export
6n0bptzg7w3qgh8ov3knajxqwibwckv
ထာမ်ပလိက်:Documentation/documentation
10
119999
395916
155140
2026-05-29T18:03:52Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:documentation/documentation]] ဇရေင် [[ထာမ်ပလိက်:Documentation/documentation]]
155140
wikitext
text/x-wiki
{{documentation subpage}}
{{uses lua|Module:documentation}}
__NOTOC__
This template automatically displays a documentation section like you are seeing now. The content of this section comes from a subpage named {{cd|<var>page</var>/documentation}}; e.g. the documentation for [[Template:affix]] is located on the page named {{cd|Template:affix/documentation}}. In order for this documentation to be displayed, templates must manually invoke {{tl|documentation}} inside of a {{cd|<nowiki><noinclude>...</noinclude></nowiki>}} section; see [[#Usage]] below. (However, {{tl|documentation}} is automatically invoked on module and JavaScript pages. The mechanism implementing this is described in more detail in the documentation for [[Module:documentation]], which implements the {{tl|documentation}} template.)
==Usage==
<var>template code</var>
<var>...</var>{{wt|noinclude}}{{temp|documentation}}{{wt|/noinclude}}
or
{{wt|onlyinclude}}template code . . .{{wt|/onlyinclude}}
{{temp|documentation}}
The most common practice is to place the {{temp|documentation}} template in {{wt|noinclude}} tags. Alternatively, the template code itself can be wrapped in {{wt|onlyinclude}} tags and the {{temp|documentation}} template should be outside of those tags. Both of these methods ensure that the documentation is not transcluded onto other pages as part of the template.
In order to place the template itself in a category, or supply interwiki links, put those categories and interwiki links on the documentation page, inside {{wt|includeonly}} tags. If the documentation page contains {{wt|includeonly}} or {{wt|noinclude}} tags as part of the documentation, use {{tl|wikitag}} or replace {{cd|<}} with {{cd|&lt;}}.
== Functions ==
If the documentation page doesn't exist, the "edit" link includes a [[mw:Manual:Creating pages with preloaded text|preload]] parameter so that clicking it will pre-fill the edit form with a stub documentation page.
== Rationale ==
Use of this template allows templates to be protected where necessary, while allowing anyone to edit the documentation, categories, and interwiki links. In addition, [[ဝိက်ရှေန်နရဳ:မဝ်ဂျူဂမၠိုၚ်|modules]] strictly require documentation pages as they cannot be categorized or documented any other way, so it makes sense to handle templates likewise.
== See also ==
*[[Help:Documenting templates]]
<includeonly>
[[ကဏ္ဍ:ထာမ်ပလိက်စရၚ်မချူသမ္တီလဝ်ဂမၠိုၚ်]]
</includeonly>
lx6ccrohs466b2m5359fzc370vz9won
ထာမ်ပလိက်:R:la:du Cange
10
147629
395923
211126
2026-05-29T18:12:02Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:R:du Cange]] ဇရေင် [[ထာမ်ပလိက်:R:la:du Cange]]
211126
wikitext
text/x-wiki
"[http://ducange.enc.sorbonne.fr/{{urlencode:{{{1|{{PAGENAME}}}}}|PATH}} {{{2|{{{1|{{PAGENAME}}}}}}}}]", in Charles du Fresne du Cange’s {{w|Charles du Fresne, sieur du Cange#Works|''Glossarium Mediæ et Infimæ Latinitatis''|lang=en}} (augmented edition with additions by D. P. Carpenterius, Adelungius and others, edited by Léopold Favre, 1883–1887)<noinclude>{{documentation}}</noinclude>
jdtpexpafs28ef7kc345qq5qb2mo1id
ထာမ်ပလိက်:R:la:du Cange/documentation
10
147639
395925
211136
2026-05-29T18:12:02Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:R:du Cange/documentation]] ဇရေင် [[ထာမ်ပလိက်:R:la:du Cange/documentation]]
211136
wikitext
text/x-wiki
{{documentation subpage}}
{{documentation needed}}<!-- Replace this with a short description of the purpose of the template, and how to use it. -->
<includeonly>
[[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲလပ်တေန်ဂမၠိုၚ်|du Cange]]
</includeonly>
gl90hxj3bmdrek5fom4tjoqq874ki37
မဝ်ဂျူ:kl-pron
828
219785
395895
300356
2026-05-29T16:13:58Z
咽頭べさ
33
395895
Scribunto
text/plain
local export = {}
local lang = require("Module:languages").getByCode("kl")
local ipa = require("Module:IPA")
local acc = require("Module:accent qualifier")
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local len = mw.ustring.len
local lower = mw.ustring.lower
local sub = mw.ustring.sub
-- Letter groups
local consGroup = "mnptkqvsgrljfbd"
local vowelGroup = "aeiou"
local uvular = "rq"
local labial = "mp"
local alveolar = "ntsl"
local vowelBound = "ː?%.?"
-- Phonemic transcription
function export.phonemic(word)
-- Make text lowercase
word = lower(word)
-- Phonemic changes
local mapPL = {
["nng"] = "ŋŋ",
["ng"] = "ŋ",
["g"] = "ɡ",
["d"] = "t",
["b"] = "p",
["e"] = "i",
["o"] = "u"
}
word = gsub(word, "n*.", mapPL)
word = gsub(word, ".", mapPL) -- Repeat to capture all remaining characters
return word
end
-- Syllabification rules
function export.syllabify(word, hide_borders)
-- Mark all word borders with #
word = gsub(word, "([^ ]+)", "#%1#")
word = gsub(word, "([^" .. consGroup .. "]-)(n?[" .. consGroup .. "]?[" .. vowelGroup .. "])", "%1.%2")
word = gsub(word, "([" .. vowelGroup .. "])%.%1", "%1%1")
word = gsub(word, "%.nn", "n.n")
word = gsub(word, "(#%-?)%.", "%1")
return hide_borders and gsub(word, "#", "") or word
end
-- Phonetic transcription
function export.phonetic(word)
-- Make text lowercase
word = lower(word)
-- Syllabify the word
word = export.syllabify(word, false)
-- NG
word = gsub(word, "ng", "ŋ")
-- long vowels
word = gsub(word, "([" .. vowelGroup .. "])%1", "%1ː")
-- /ɡ/-allophony
word = gsub(word, "ig%.g", "iç.ç")
word = gsub(word, "ag%.g", "ax̟.x̟")
-- /u/-labialisation
word = gsub(word, "u(ː?)%.v?([" .. vowelGroup .. "])", "u%1.ʷ%2")
-- /t/-affrication
word = gsub(word, "ti", "t͡si")
word = gsub(word, "t%.s", "t.t͡s")
-- word-initial G is voiceless
word = gsub(word, "#g", "#k")
-- Vowel uvularisation
word = gsub(word, "ːr%.([" .. consGroup .. "])", "ʶːr.%1")
-- Vowel allophone changes
-- U
word = gsub(word, "u(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "O%1")
word = gsub(word, "([" .. alveolar .. "])u(" .. vowelBound .. "[" .. alveolar .. "])", "%1ʉ%2")
word = gsub(word, "u(" .. vowelBound .. "[" .. labial .. "])", "u%1")
word = gsub(word, "u(" .. vowelBound .. ")", "ʊ%1")
word = gsub(word, "#ʊ(" .. vowelBound .. "[^" .. uvular .. "])", "#u%1")
-- A
word = gsub(word, "a(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "ɑ%1")
word = gsub(word, "a(" .. vowelBound .. "[^#])", "ə%1")
word = gsub(word, "#ə(" .. vowelBound .. "[^" .. uvular .. "])", "#a%1")
-- I
word = gsub(word, "i(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "ɐ%1")
word = gsub(word, "i(" .. vowelBound .. "[" .. labial .. "])", "y%1")
word = gsub(word, "i(" .. vowelBound .. ")", "ɪ%1")
word = gsub(word, "#ɪ(" .. vowelBound .. "[^" .. uvular .. "])", "#i%1")
-- Geminates
local mapGL = {
["g"] = "x",
["l"] = "ɬ",
["r"] = "χ",
["v"] = "f"
}
word = gsub(word, "r%.([lfsnmptk])", "%1.%1")
word = gsub(word, "n%.ŋ", "ŋ.ŋ")
word = gsub(word, "([glrv])%.%1", function(c) return mapGL[c] .. "." .. mapGL[c] end)
-- Substitute monographs
local mapML = {
["g"] = "ɣ",
["e"] = "ɜ",
["o"] = "ɔ", -- FIXME: don't substitute twice
["O"] = "o",
["r"] = "ʁ",
["d"] = "t",
["b"] = "p",
["'"] = "ˈ"
}
word = gsub(word, ".", mapML)
-- Remove word boundaries
return gsub(word, "#", "")
end
-- Display pronunciation
function export.show(frame)
local args = frame:getParent().args
local pagetitle = mw.loadData("Module:headword/data").pagename
local p, results = {}, {}
if args[1] then
for _, v in ipairs(args) do
table.insert(p, (v ~= "") and v or nil)
end
else
p = { pagetitle }
end
for _, word in ipairs(p) do
word = (word == "kl-IPA") and "avinngaq" or word
local phonemic = export.phonemic(word)
local phonetic = export.phonetic(word)
table.insert(results, { pron = "/" .. phonemic .. "/" })
table.insert(results, { pron = "[" .. phonetic .. "]" })
end
return acc.format_qualifiers(lang, {"[[w:West Greenlandic|Nuuk]]"}) .. ' ' .. ipa.format_IPA_full { lang = lang, items = results }
end
return export
cf3gd9qg256t4c6ugkjpdq57p5g113r
395898
395895
2026-05-29T16:18:54Z
咽頭べさ
33
395898
Scribunto
text/plain
local export = {}
local lang = require("Module:languages").getByCode("kl")
local ipa = require("Module:IPA")
local acc = require("Module:accent qualifier")
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local len = mw.ustring.len
local lower = mw.ustring.lower
local sub = mw.ustring.sub
-- Letter groups
local consGroup = "mnptkqvsgrljfbd"
local vowelGroup = "aeiou"
local uvular = "rq"
local labial = "mp"
local alveolar = "ntsl"
local vowelBound = "ː?%.?"
-- Phonemic transcription
function export.phonemic(word)
-- Make text lowercase
word = lower(word)
-- Phonemic changes
local mapPL = {
["nng"] = "ŋŋ",
["ng"] = "ŋ",
["g"] = "ɡ",
["d"] = "t",
["b"] = "p",
["e"] = "i",
["o"] = "u"
}
word = gsub(word, "n*.", mapPL)
word = gsub(word, ".", mapPL) -- Repeat to capture all remaining characters
return word
end
-- Syllabification rules
function export.syllabify(word, hide_borders)
-- Mark all word borders with #
word = gsub(word, "([^ ]+)", "#%1#")
word = gsub(word, "([^" .. consGroup .. "]-)(n?[" .. consGroup .. "]?[" .. vowelGroup .. "])", "%1.%2")
word = gsub(word, "([" .. vowelGroup .. "])%.%1", "%1%1")
word = gsub(word, "%.nn", "n.n")
word = gsub(word, "(#%-?)%.", "%1")
return hide_borders and gsub(word, "#", "") or word
end
-- Phonetic transcription
function export.phonetic(word)
-- Make text lowercase
word = lower(word)
-- Syllabify the word
word = export.syllabify(word, false)
-- NG
word = gsub(word, "ng", "ŋ")
-- long vowels
word = gsub(word, "([" .. vowelGroup .. "])%1", "%1ː")
-- /ɡ/-allophony
word = gsub(word, "ig%.g", "iç.ç")
word = gsub(word, "ag%.g", "ax̟.x̟")
-- /u/-labialisation
word = gsub(word, "u(ː?)%.v?([" .. vowelGroup .. "])", "u%1.ʷ%2")
-- /t/-affrication
word = gsub(word, "ti", "t͡si")
word = gsub(word, "t%.s", "t.t͡s")
-- word-initial G is voiceless
word = gsub(word, "#g", "#k")
-- Vowel uvularisation
word = gsub(word, "ːr%.([" .. consGroup .. "])", "ʶːr.%1")
-- Vowel allophone changes
-- U
word = gsub(word, "u(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "O%1")
word = gsub(word, "([" .. alveolar .. "])u(" .. vowelBound .. "[" .. alveolar .. "])", "%1ʉ%2")
word = gsub(word, "u(" .. vowelBound .. "[" .. labial .. "])", "u%1")
word = gsub(word, "u(" .. vowelBound .. ")", "ʊ%1")
word = gsub(word, "#ʊ(" .. vowelBound .. "[^" .. uvular .. "])", "#u%1")
-- A
word = gsub(word, "a(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "ɑ%1")
word = gsub(word, "a(" .. vowelBound .. "[^#])", "ə%1")
word = gsub(word, "#ə(" .. vowelBound .. "[^" .. uvular .. "])", "#a%1")
-- I
word = gsub(word, "i(ʶ?" .. vowelBound .. "[" .. uvular .. "])", "ɐ%1")
word = gsub(word, "i(" .. vowelBound .. "[" .. labial .. "])", "y%1")
word = gsub(word, "i(" .. vowelBound .. ")", "ɪ%1")
word = gsub(word, "#ɪ(" .. vowelBound .. "[^" .. uvular .. "])", "#i%1")
-- Geminates
local mapGL = {
["g"] = "x",
["l"] = "ɬ",
["r"] = "χ",
["v"] = "f"
}
word = gsub(word, "r%.([lfsnmptk])", "%1.%1")
word = gsub(word, "n%.ŋ", "ŋ.ŋ")
word = gsub(word, "([glrv])%.%1", function(c) return mapGL[c] .. "." .. mapGL[c] end)
-- Substitute monographs
local mapML = {
["g"] = "ɣ",
["e"] = "ɜ",
["o"] = "ɔ", -- FIXME: don't substitute twice
["O"] = "o",
["r"] = "ʁ",
["d"] = "t",
["b"] = "p",
["'"] = "ˈ"
}
word = gsub(word, ".", mapML)
-- Remove word boundaries
return gsub(word, "#", "")
end
-- Display pronunciation
function export.show(frame)
local args = frame:getParent().args
local pagetitle = mw.loadData("Module:headword/data").pagename
local p, results = {}, {}
if args[1] then
for _, v in ipairs(args) do
table.insert(p, (v ~= "") and v or nil)
end
else
p = { pagetitle }
end
for _, word in ipairs(p) do
word = (word == "kl-IPA") and "avinngaq" or word
local phonemic = export.phonemic(word)
local phonetic = export.phonetic(word)
table.insert(results, { pron = "/" .. phonemic .. "/" })
table.insert(results, { pron = "[" .. phonetic .. "]" })
end
return acc.format_qualifiers(lang, {"[[w:en:West Greenlandic|နူခ်]]"}) .. ' ' .. ipa.format_IPA_full { lang = lang, items = results }
end
return export
h0qp42wqkw4i4jsf0of970czuzy925i
မဝ်ဂျူ:lt-pron
828
219829
395928
300400
2026-05-29T18:15:17Z
咽頭べさ
33
395928
Scribunto
text/plain
--[==[
Backend for {{lt-pr}}: IPA, hyphenation, and rhyme generation.
Author: TongcyDai
]==]
local export = {}
local m_debug = require("Module:debug")
local m_str = require("Module:string utilities")
local m_lt_common = require("Module:lt-common")
local u = m_str.char
local ulower = m_str.lower
local uupper = m_str.upper
local usub = m_str.sub
local ulen = m_str.len
local ugsub = m_str.gsub
local ufind = m_str.find
local umatch = m_str.match
local rsplit = m_str.split
-- Accent mark constants (re-exported from Module:lt-common to keep all
-- Lithuanian modules in sync).
local GRAVE = m_lt_common.GRAVE -- U+0300
local ACUTE = m_lt_common.ACUTE -- U+0301
local TILDE = m_lt_common.TILDE -- U+0303
local DOTABOVE = m_lt_common.DOTABOVE -- U+0307
local OGONEK = m_lt_common.OGONEK -- U+0328
-- M4: Reuse Module:lt-common's display formatter directly instead of
-- maintaining a parallel local copy.
local makeDisplayText = m_lt_common.makeDisplayText
-- Liaison marker: U+203F UNDERTIE — separates clitics from their stressed host
-- in input. The phonological grammar treats it like a "soft" word boundary that
-- is transparent to several cross-word processes (palatalization spread,
-- geminate / fricative simplification, place assimilation), per VLKK §19–§23.
local LIAISON = u(0x203F)
-- Lazy-loaded external modules
local m_IPA
local audio_module = "Module:audio"
local homophones_module = "Module:homophones"
local hyphenation_module = "Module:hyphenation"
local rhymes_module = "Module:rhymes"
local parameters_module = "Module:parameters"
local parse_util_module = "Module:parse utilities"
local concat = table.concat
local insert = table.insert
local lang_obj
local function get_lang()
if not lang_obj then
lang_obj = require("Module:languages").getByCode("lt")
end
return lang_obj
end
local function track(reason)
m_debug.track("lt-pron/" .. reason)
end
-- ============================================================================
-- SECTION 1: Orthography & Phonology Definitions
-- ============================================================================
-- Suffix table for automatic phonetic adjustments (currently disabled)
-- Exact matching lists (must include precomposed normalized tone markers)
--[[
local SUFFIX_LOAN = {
["fòbas"]=true, ["fòbė"]=true, ["fòbija"]=true, ["fònas"]=true, ["fònė"]=true,
["lògas"]=true, ["lògija"]=true, ["skòpas"]=true
}
--]]
-- Consonant classes for syllabification (Sonority Hierarchy)
local CLASS = {
R = {["l"]=true, ["m"]=true, ["n"]=true, ["r"]=true, ["v"]=true, ["j"]=true},
S = {["s"]=true, ["z"]=true, ["š"]=true, ["ž"]=true, ["f"]=true, ["x"]=true, ["h"]=true, ["ch"]=true},
T = {["p"]=true, ["b"]=true, ["t"]=true, ["d"]=true, ["k"]=true, ["g"]=true, ["c"]=true, ["dz"]=true, ["č"]=true, ["dž"]=true}
}
-- Front vowels trigger palatalization
local FRONT_V = {
["e"]=true, ["ę"]=true, ["ė"]=true, ["i"]=true, ["į"]=true, ["y"]=true, ["ie"]=true, ["ei"]=true, ["eu"]=true
}
-- Vowel -> Base IPA mapping (Unstressed short/inherent)
local V_IPA = {
["a"] = "ɐ", ["ą"] = "ɑː", ["e"] = "ɛ", ["ę"] = "æː", ["ė"] = "eː",
["i"] = "ɪ", ["į"] = "iː", ["y"] = "iː",
["u"] = "ʊ", ["ų"] = "uː", ["ū"] = "uː", ["o"] = "oː",
["ie"] = "iɛ", ["uo"] = "uɔ",
-- Unstressed simple diphthongs
["ai"] = "ɐɪ", ["au"] = "ɒʊ", ["ei"] = "ɛɪ", ["eu"] = "ɛʊ",
["ui"] = "ʊɪ", ["oi"] = "ɔɪ", ["ou"] = "ɔʊ"
}
-- Consonant -> Base IPA mapping (Unpalatalized)
local CONS_IPA = {
["b"] = "b", ["c"] = "t͡s", ["č"] = "t͡ʃ", ["d"] = "d",
["dz"] = "d͡z", ["dž"] = "d͡ʒ", ["ch"] = "x",
["f"] = "f", ["g"] = "ɡ", ["h"] = "ɣ", ["j"] = "j",
["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n",
["p"] = "p", ["r"] = "r", ["s"] = "s", ["š"] = "ʃ",
["t"] = "t", ["v"] = "ʋ", ["z"] = "z", ["ž"] = "ʒ",
}
-- Voicing pairs for Voicing Assimilation
local VOICING_PAIRS = {
["p"]="b", ["b"]="p", ["t"]="d", ["d"]="t", ["k"]="g", ["g"]="k",
["c"]="dz", ["dz"]="c", ["č"]="dž", ["dž"]="č",
["s"]="z", ["z"]="s", ["š"]="ž", ["ž"]="š", ["x"]="ɣ", ["ɣ"]="x"
}
local function is_voiced(c)
local voiced_set = {["b"]=true, ["d"]=true, ["g"]=true, ["dz"]=true, ["dž"]=true, ["z"]=true, ["ž"]=true, ["ɣ"]=true}
return voiced_set[c] == true
end
-- Accent pairs for conjugation module support
-- Maps base vowel/diphthong to accented forms (falling/rising)
local ACCENT_PAIRS = {
-- Long vowels (acute or tilde)
["ą"] = {acute="ą"..ACUTE, tilde="ą"..TILDE},
["ę"] = {acute="ę"..ACUTE, tilde="ę"..TILDE},
["ė"] = {acute="ė"..ACUTE, tilde="ė"..TILDE},
["y"] = {acute="y"..ACUTE, tilde="y"..TILDE},
["į"] = {acute="į"..ACUTE, tilde="į"..TILDE},
["ū"] = {acute="ū"..ACUTE, tilde="ū"..TILDE},
["ų"] = {acute="ų"..ACUTE, tilde="ų"..TILDE},
-- a/e can be short (grave) or long (tilde)
["a"] = {grave="a"..GRAVE, tilde="a"..TILDE},
["e"] = {grave="e"..GRAVE, tilde="e"..TILDE},
-- o: ó/õ/o are long, ò is short
["o"] = {acute="o"..ACUTE, grave="o"..GRAVE, tilde="o"..TILDE},
-- Short vowels i/u (only grave)
["i"] = {grave="i"..GRAVE},
["u"] = {grave="u"..GRAVE},
-- Simple diphthongs
["ai"] = {acute="a"..ACUTE.."i", tilde="a".."i"..TILDE},
["au"] = {acute="a"..ACUTE.."u", tilde="a".."u"..TILDE},
["ei"] = {acute="e"..ACUTE.."i", tilde="e".."i"..TILDE},
["ui"] = {grave="u"..GRAVE.."i", tilde="u".."i"..TILDE},
-- Complex diphthongs
["ie"] = {acute="i"..ACUTE.."e", tilde="i".."e"..TILDE},
["uo"] = {acute="u"..ACUTE.."o", tilde="u".."o"..TILDE},
-- Mixed diphthongs (a series - acute/tilde)
["al"] = {acute="a"..ACUTE.."l", tilde="a".."l"..TILDE},
["am"] = {acute="a"..ACUTE.."m", tilde="a".."m"..TILDE},
["an"] = {acute="a"..ACUTE.."n", tilde="a".."n"..TILDE},
["ar"] = {acute="a"..ACUTE.."r", tilde="a".."r"..TILDE},
-- Mixed diphthongs (e series - acute/grave/tilde, grave for foreign)
["el"] = {acute="e"..ACUTE.."l", grave="e"..GRAVE.."l", tilde="e".."l"..TILDE},
["em"] = {acute="e"..ACUTE.."m", grave="e"..GRAVE.."m", tilde="e".."m"..TILDE},
["en"] = {acute="e"..ACUTE.."n", grave="e"..GRAVE.."n", tilde="e".."n"..TILDE},
["er"] = {acute="e"..ACUTE.."r", grave="e"..GRAVE.."r", tilde="e".."r"..TILDE},
-- Mixed diphthongs (i series - grave/tilde)
["il"] = {grave="i"..GRAVE.."l", tilde="i".."l"..TILDE},
["im"] = {grave="i"..GRAVE.."m", tilde="i".."m"..TILDE},
["in"] = {grave="i"..GRAVE.."n", tilde="i".."n"..TILDE},
["ir"] = {grave="i"..GRAVE.."r", tilde="i".."r"..TILDE},
-- Mixed diphthongs (u series - grave/tilde)
["ul"] = {grave="u"..GRAVE.."l", tilde="u".."l"..TILDE},
["um"] = {grave="u"..GRAVE.."m", tilde="u".."m"..TILDE},
["un"] = {grave="u"..GRAVE.."n", tilde="u".."n"..TILDE},
["ur"] = {grave="u"..GRAVE.."r", tilde="u".."r"..TILDE},
-- Foreign diphthongs (grave only)
["eu"] = {grave="e"..GRAVE.."u"},
["oi"] = {grave="o"..GRAVE.."i"},
["ou"] = {grave="o"..GRAVE.."u"},
["ol"] = {grave="o"..GRAVE.."l"},
["om"] = {grave="o"..GRAVE.."m"},
["on"] = {grave="o"..GRAVE.."n"},
["or"] = {grave="o"..GRAVE.."r"},
}
-- ============================================================================
-- SECTION 2: Lexical Normalization (Avoid NFD destruction)
-- ============================================================================
-- Helper: Remove all accent marks from text (moved here for early use).
-- Delegates to Module:lt-common to keep the de-accenting logic shared.
local function remove_all_accents(text)
return m_lt_common.to_stem_bare(mw.ustring.toNFD(text))
end
-- Extract pagename from input or load from headword data
local function get_pagename(input)
-- Check for manual override: <base:xxx>
local manual = input:match("<base:([^>]+)>")
if manual then
return manual, input:gsub("<base:[^>]+>", "")
end
-- Load from headword data
local success, data = pcall(function()
return mw.loadData("Module:headword/data").pagename
end)
if success and data then
return data, input
end
return nil, input
end
-- Identify respelling j and (j) positions
local function identify_respelling_glides(input_with_accents, pagename)
if not pagename then
return {} -- No pagename, no respelling detection
end
-- Remove all accents from input
local input_clean = remove_all_accents(input_with_accents)
-- Remove special markers (^, .)
input_clean = ugsub(input_clean, "[%^%.]", "")
-- Remove <base:...> if present
input_clean = ugsub(input_clean, "<base:[^>]+>", "")
-- Remove literal ˌ
input_clean = ugsub(input_clean, "ˌ", "")
-- Remove softening mark ʼ
input_clean = ugsub(input_clean, "ʼ", "")
-- Remove (j) markers - replace with j for comparison
input_clean = ugsub(input_clean, "%(j%)", "j")
-- Remove spaces for comparison
input_clean = ugsub(input_clean, " ", "")
-- Convert to NFC for comparison
input_clean = mw.ustring.toNFC(input_clean)
-- Normalize pagename (lowercase, remove spaces)
local pagename_clean = ulower(pagename)
pagename_clean = ugsub(pagename_clean, " ", "")
-- Find respelling j positions (j in input but not in pagename)
local respelling_positions = {}
local input_idx = 1
local page_idx = 1
while input_idx <= ulen(input_clean) do
local input_char = usub(input_clean, input_idx, input_idx)
if input_char == "j" then
-- Check if this j exists in pagename at corresponding position
local page_char = page_idx <= ulen(pagename_clean)
and usub(pagename_clean, page_idx, page_idx)
if page_char ~= "j" then
-- This is a respelling j
insert(respelling_positions, input_idx)
input_idx = input_idx + 1
-- Don't advance page_idx
else
-- This is an original j
input_idx = input_idx + 1
page_idx = page_idx + 1
end
else
input_idx = input_idx + 1
page_idx = page_idx + 1
end
end
return respelling_positions
end
-- Safe mapping to extract tones without destroying precomposed characters
local TONE_MAP = {
["á"]="a,acute", ["à"]="a,grave", ["ã"]="a,tilde",
["é"]="e,acute", ["è"]="e,grave", ["ẽ"]="e,tilde",
["í"]="i,acute", ["ì"]="i,grave", ["ĩ"]="i,tilde",
["ý"]="y,acute", ["ỳ"]="y,grave", ["ỹ"]="y,tilde",
["ú"]="u,acute", ["ù"]="u,grave", ["ũ"]="u,tilde",
["ó"]="o,acute", ["ò"]="o,grave", ["õ"]="o,tilde",
-- Precomposed vowels with macrons/ogoneks + tones (represented here via standard combinations)
["ą́"]="ą,acute", ["ą̃"]="ą,tilde",
["ę́"]="ę,acute", ["ę̃"]="ę,tilde",
["ė́"]="ė,acute", ["ė̃"]="ė,tilde",
["į́"]="į,acute", ["į̃"]="į,tilde",
["ų́"]="ų,acute", ["ų̃"]="ų,tilde",
["ū́"]="ū,acute", ["ū̃"]="ū,tilde",
-- Tilde on liquids (for semi-diphthongs)
["l̃"]="l,tilde", ["m̃"]="m,tilde", ["ñ"]="n,tilde", ["r̃"]="r,tilde",
["j̃"]="j,tilde"
}
-- Resolves NFD back to safe representation if input was somehow NFD.
-- The PUA rejection (with replacement hints), non-standard format tracking,
-- and i/j-with-dotabove canonicalization are all delegated to Module:lt-common
-- so that all Lithuanian modules share one implementation. Only the
-- TONE_MAP-based codepoint parser remains local since it produces the
-- token list specifically consumed by lt-pron's tokenizer.
local function safe_normalize(text)
-- Reject PUA characters with helpful "use X instead" hints.
m_lt_common.reject_pua(text)
-- Track non-standard input encodings for analytics. Detection runs on
-- the raw input (in NFD internally) before any canonicalization, so the
-- counts reflect what editors actually typed.
local has_dotless, has_precomp_i = m_lt_common.detect_nonstandard(text)
if has_dotless then track('dotless-ij') end
if has_precomp_i then track('precomposed-i-accent') end
-- "Explicit dotabove" (i.e., i/j + U+0307 + accent) is the *correct*
-- input form for accented i/j and is tracked separately to monitor
-- editor adoption. This check stays local since lt-common's
-- detect_nonstandard intentionally only flags the wrong forms.
if ufind(mw.ustring.toNFD(text), "[ij]" .. DOTABOVE) then
track('explicit-dotabove')
end
-- Normalize: drops stray dot-aboves between i/j and accents,
-- converts dotless ı/ȷ to standard i/j, returns clean NFC.
text = m_lt_common.canonicalize_input(text)
-- Parse the canonicalized NFC string into {char, tone} tokens.
-- TONE_MAP entries are 1- or 2-codepoint precomposed sequences
-- (e.g. "á" is one codepoint; "ą́" is "ą" + U+0301). The lookup tries
-- the 2-codepoint match first, then falls back to the 1-codepoint match.
local result = {}
local i = 1
while i <= ulen(text) do
local c = usub(text, i, i)
local c_lower = ulower(c) -- Convert to lowercase for TONE_MAP lookup
-- Look ahead for combining marks if any slipped through
local next_c = usub(text, i+1, i+1)
local next_c_lower = ulower(next_c)
local combined = c_lower .. next_c_lower
if TONE_MAP[combined] then
local parts = rsplit(TONE_MAP[combined], ",")
-- Preserve original case of base character
local base_char = parts[1]
if c ~= c_lower then
base_char = uupper(base_char)
end
insert(result, {char = base_char, tone = parts[2]})
i = i + 2
elseif TONE_MAP[c_lower] then
local parts = rsplit(TONE_MAP[c_lower], ",")
-- Preserve original case of base character
local base_char = parts[1]
if c ~= c_lower then
base_char = uupper(base_char)
end
insert(result, {char = base_char, tone = parts[2]})
i = i + 1
else
insert(result, {char = c, tone = nil})
i = i + 1
end
end
return result
end
-- ============================================================================
-- SECTION 3: Tokenization & Diphthong/Digraph resolution
-- ============================================================================
local function get_type(c)
local lc = ulower(c)
if V_IPA[lc] then return "V" end
if CLASS.R[lc] then return "R" end
if CLASS.S[lc] then return "S" end
if CLASS.T[lc] then return "T" end
return "UNKNOWN"
end
-- Helper: Convert token array back to NFC string for suffix/prefix matching
local function tokens_to_string(tok_list)
local s = ""
for _, t in ipairs(tok_list) do
local c = t.char
if t.tone == "grave" then c = c .. GRAVE
elseif t.tone == "acute" then c = c .. ACUTE
elseif t.tone == "tilde" then c = c .. TILDE
end
s = s .. c
end
return mw.ustring.toNFC(s)
end
-- Apply automatic properties based on word structure (e.g. loanwords)
local function apply_auto_properties(tokens)
local word_str = ulower(tokens_to_string(tokens))
--[[ Suffix detection for loanword quality (currently disabled)
local matched_loan_suff = nil
for suff, _ in pairs(SUFFIX_LOAN) do
if usub(word_str, -ulen(suff)) == suff then
matched_loan_suff = suff; break
end
end
if matched_loan_suff then
local suff_len = ulen(matched_loan_suff)
local acc_len = 0
for i = #tokens, 1, -1 do
local t = tokens[i]
acc_len = acc_len + ulen(tokens_to_string({t}))
if t.type == "V" and ulower(t.char) == "o" then
t.auto_targeted = true
if not t.force_default then t.loan_quality = true end
end
if acc_len >= suff_len then break end
end
end
-- Check for redundant asterisks globally
for _, t in ipairs(tokens) do
if t.force_default and not t.auto_targeted then
track('redundant-asterisk')
end
end
--]]
return tokens
end
-- Strict whitelist for valid diphthong and tone combinations
local function is_strict_diphthong(c1, t1, c2, t2)
local combo = c1 .. c2
-- Unstressed: neither element has a tone
if not t1 and not t2 then
return (combo == "ie" or combo == "uo" or combo == "ai" or combo == "au" or combo == "ei" or combo == "ui" or combo == "oi" or combo == "ou" or combo == "eu")
end
-- Tone on the first element (acute or grave)
if t1 and not t2 then
if t1 == "acute" then
return (combo == "ai" or combo == "au" or combo == "ei" or combo == "ie" or combo == "uo" or combo == "oi")
elseif t1 == "grave" then
return (combo == "ui" or combo == "oi" or combo == "ou" or combo == "eu")
end
end
-- Tone on the second element (tilde)
if not t1 and t2 then
if t2 == "tilde" then
return (combo == "ai" or combo == "au" or combo == "ei" or combo == "ui" or combo == "ie" or combo == "uo" or combo == "eu")
end
end
return false
end
local function tokenize(text_str, pagename)
local raw_chars = safe_normalize(text_str)
-- Identify respelling glides
local respelling_j_positions = identify_respelling_glides(text_str, pagename)
local tokens = {}
local i = 1
while i <= #raw_chars do
local curr = raw_chars[i]
local nxt = raw_chars[i+1]
local lc_curr = ulower(curr.char)
local lc_nxt = nxt and ulower(nxt.char)
-- Explicit Modifiers
if curr.char == "^" then
local last_v = nil
for j = #tokens, 1, -1 do
if tokens[j].type == "V" then last_v = tokens[j]; break end
end
if last_v then
local lc_v = ulower(last_v.char)
local base_v = usub(lc_v, -1) -- Last char for silent i combinations
local is_e_base = (lc_v == "e") or (base_v == "e")
local is_o_base = (lc_v == "o") or (base_v == "o")
-- Check for valid e: no tone or grave only
if is_e_base then
if not last_v.tone or last_v.tone == "grave" then
last_v.loan_quality = true
elseif last_v.tone == "acute" then
error("lt-pron: '^' cannot be used with acute 'é' (use only with plain 'e' or grave 'è')")
elseif last_v.tone == "tilde" then
error("lt-pron: '^' cannot be used with tilde 'ẽ' (use only with plain 'e' or grave 'è')")
end
-- Check for valid o: no tone only
elseif is_o_base then
if not last_v.tone then
last_v.loan_quality = true
elseif last_v.tone == "grave" then
error("lt-pron: '^' is redundant for 'ò' (already pronounced [ɔ])")
elseif last_v.tone == "acute" or last_v.tone == "tilde" then
error("lt-pron: '^' cannot be used with 'ó' or 'õ' (native long vowels)")
end
else
error("lt-pron: '^' can only be used with 'e' (plain/grave) or 'o' (plain). Found: '" .. lc_v .. "'")
end
end
i = i + 1
-- Check for (j) marker
elseif curr.char == "(" and i + 2 <= #raw_chars then
local char2 = raw_chars[i+1]
local char3 = raw_chars[i+2]
if char2.char == "j" and char3.char == ")" then
insert(tokens, {char = "j", type = "R", tone = nil, is_respelling = true, is_optional = true, original_char = "-"})
i = i + 3
else
error("lt-pron: '(' must be followed by 'j)' to form the (j) glide marker")
end
--[[ Asterisk modifier (currently disabled)
elseif curr.char == "*" then
local last_v = nil
for j = #tokens, 1, -1 do
if tokens[j].type == "V" then last_v = tokens[j]; break end
end
if last_v then last_v.force_default = true end
i = i + 1
--]]
elseif curr.char == "." then
insert(tokens, {char = ".", type = "BOUNDARY"})
i = i + 1
elseif curr.char == "ˌ" then
insert(tokens, {char = "ˌ", type = "SECONDARY_STRESS_BOUNDARY"})
i = i + 1
elseif curr.char == "ʼ" then
-- Softening mark: palatalize the preceding consonant
for j = #tokens, 1, -1 do
local tok = tokens[j]
if tok.type == "T" or tok.type == "S" or tok.type == "R" then
tok.softening_mark = true
break
end
end
i = i + 1
elseif lc_curr == "d" and nxt and (lc_nxt == "z" or lc_nxt == "ž") then
insert(tokens, {char = curr.char .. nxt.char, type = "T", tone = nil})
i = i + 2
elseif lc_curr == "c" and nxt and lc_nxt == "h" then
insert(tokens, {char = curr.char .. nxt.char, type = "S", tone = nil})
i = i + 2
-- Special handling for V + j̃ (final j with tilde): treat as V + ĩ diphthong
elseif curr.type ~= "BOUNDARY" and get_type(curr.char) == "V" and nxt and ulower(nxt.char) == "j" and nxt.tone == "tilde" then
-- Check if this is word-final (no more non-boundary tokens after j̃)
local is_final = true
for k = i + 2, #raw_chars do
if raw_chars[k].char ~= " " then
is_final = false
break
end
end
if is_final then
-- Combine V + j̃ as a diphthong V + ĩ (e.g., uj̃ → ui̇̃)
local v_char = ulower(curr.char)
local combined_char = v_char .. "i" -- e.g., "u" + "i" = "ui"
local tone = "tilde" -- j̃'s tilde
local tone_position = 2 -- Tilde is on the second vowel (i)
insert(tokens, {
char = combined_char,
type = "V",
tone = tone,
tone_position = tone_position,
original_char = v_char .. "j" -- For hyphenation: display as "uj̃"
})
i = i + 2
else
-- Not final, treat as regular V + j
insert(tokens, {char = curr.char, type = "V", tone = curr.tone})
i = i + 1
end
elseif curr.type ~= "BOUNDARY" and get_type(curr.char) == "V" and nxt and get_type(nxt.char) == "V" then
local nxt_nxt = raw_chars[i+2]
local lc_nxt_nxt = nxt_nxt and ulower(nxt_nxt.char)
local back_diph = lc_nxt and lc_nxt_nxt and (lc_nxt .. lc_nxt_nxt)
local is_silent_i_diph = (lc_curr == "i" and not curr.tone) and
(back_diph == "au" or back_diph == "ai" or back_diph == "ou" or back_diph == "oi" or back_diph == "uo")
local is_silent_i_mono = (lc_curr == "i" and not curr.tone) and
(lc_nxt == "a" or lc_nxt == "ą" or lc_nxt == "o" or
lc_nxt == "u" or lc_nxt == "ų" or lc_nxt == "ū" or lc_nxt == "ɔ")
local is_valid_diph = is_strict_diphthong(lc_curr, curr.tone, lc_nxt, nxt and nxt.tone)
if is_valid_diph and nxt_nxt and get_type(nxt_nxt.char) == "V" then
local lc_nxt_nxt = ulower(nxt_nxt.char)
local is_next_valid_diph = is_strict_diphthong(lc_nxt, nxt.tone, lc_nxt_nxt, nxt_nxt.tone)
if is_next_valid_diph then
-- Resolve ambiguous triplets (e.g., auo -> a.uo is standard)
-- Break the first valid diphthong unless explicitly stressed
if not curr.tone then
is_valid_diph = false
end
end
end
if is_silent_i_diph then
local tone = nxt.tone or nxt_nxt.tone
local tone_position = nil
if tone then
-- For silent i diphthongs (e.g., iau), position is relative to the full string
-- Position 2 = middle vowel, Position 3 = last vowel
if nxt.tone then
tone_position = 2
elseif nxt_nxt.tone then
tone_position = 3
end
end
insert(tokens, {char = curr.char .. nxt.char .. nxt_nxt.char, type = "V", tone = tone, has_silent_i = true, tone_position = tone_position})
i = i + 3
elseif is_silent_i_mono then
local tone = nxt.tone
local tone_position = nil
if tone then
-- For silent i monosyllables (e.g., ia), position 2 = second character
tone_position = 2
end
insert(tokens, {char = curr.char .. nxt.char, type = "V", tone = tone, has_silent_i = true, tone_position = tone_position})
i = i + 2
elseif is_valid_diph then
local tone = curr.tone or nxt.tone
local tone_position = nil
if tone then
-- Record which vowel carries the tone (1 = first, 2 = second)
if curr.tone then
tone_position = 1
elseif nxt.tone then
tone_position = 2
end
end
insert(tokens, {char = curr.char .. nxt.char, type = "V", tone = tone, tone_position = tone_position})
i = i + 2
else
insert(tokens, {char = curr.char, type = "V", tone = curr.tone})
i = i + 1
end
else
if curr.char ~= " " then
local tok_type = get_type(curr.char)
local is_respelling_j = false
-- Check if this is a respelling j
if ulower(curr.char) == "j" and #respelling_j_positions > 0 then
-- Build cleaned string up to current position to find clean position
local cleaned_so_far = ""
for k = 1, i do
local c = raw_chars[k]
if c.char ~= "^" and c.char ~= "." and c.char ~= " " and c.char ~= "ʼ" and c.char ~= "ˌ" then
local char_clean = c.char
-- Don't add tone marks to cleaned string
if not c.tone then
cleaned_so_far = cleaned_so_far .. char_clean
else
-- Add base character without tone
cleaned_so_far = cleaned_so_far .. char_clean
end
end
end
local clean_pos = ulen(cleaned_so_far)
-- Check if this position is in respelling list
for _, pos in ipairs(respelling_j_positions) do
if pos == clean_pos then
is_respelling_j = true
break
end
end
end
if ulower(curr.char) == "j" then
local tok_data = {char = curr.char, type = tok_type, tone = curr.tone, is_respelling = is_respelling_j, is_optional = false}
if is_respelling_j then
tok_data.original_char = "-" -- Respelling: use "-" to indicate not in orthography
end
insert(tokens, tok_data)
else
insert(tokens, {char = curr.char, type = tok_type, tone = curr.tone})
end
end
i = i + 1
end
end
-- Validate respelling glides are between vowels
for i, tok in ipairs(tokens) do
if tok.is_respelling then
local prev_is_vowel = false
local next_is_vowel = false
-- Check previous non-boundary token
for j = i - 1, 1, -1 do
if tokens[j].type ~= "BOUNDARY" and tokens[j].type ~= "SECONDARY_STRESS_BOUNDARY" then
prev_is_vowel = (tokens[j].type == "V")
break
end
end
-- Check next non-boundary token
for j = i + 1, #tokens do
if tokens[j].type ~= "BOUNDARY" and tokens[j].type ~= "SECONDARY_STRESS_BOUNDARY" then
next_is_vowel = (tokens[j].type == "V")
break
end
end
if not (prev_is_vowel and next_is_vowel) then
error("lt-pron: Respelling glide 'j' or '(j)' must be between two vowels")
end
end
end
return apply_auto_properties(tokens)
end
-- ============================================================================
-- SECTION 4: Syllabification
-- ============================================================================
-- Pre-syllabification: Merge geminate (doubled) consonants
-- This must happen BEFORE syllabification to prevent false mixed diphthongs
-- For example: pérrašo → pér-ra-šo would incorrectly treat ér as a mixed diphthong
-- By merging rr→r first, we get pé-ra-šo, correctly keeping é as a pure vowel
local function merge_geminate_consonants(tokens)
local SIBILANTS = {["s"]=true, ["z"]=true, ["š"]=true, ["ž"]=true}
local i = 1
while i < #tokens do
local tok = tokens[i]
local nxt = tokens[i+1]
local tok_is_cons = (tok.type == "T" or tok.type == "S" or tok.type == "R")
local nxt_is_cons = (nxt.type == "T" or nxt.type == "S" or nxt.type == "R")
if tok_is_cons and nxt_is_cons then
local tok_char = ulower(tok.char)
local nxt_char = ulower(nxt.char)
local tok_is_sib = SIBILANTS[tok_char]
local nxt_is_sib = SIBILANTS[nxt_char]
-- Merge if: (1) both are sibilants, or (2) identical consonants
if (tok_is_sib and nxt_is_sib) or (tok_char == nxt_char) then
-- Remove the first token (keep the second)
table.remove(tokens, i)
-- Don't increment i, check the same position again
else
i = i + 1
end
else
i = i + 1
end
end
end
local function syllabify(tokens)
local syllables = {}
local current_syl = {}
-- Check for leading secondary stress marker
local has_initial_secondary_stress = false
if #tokens > 0 and tokens[1].type == "SECONDARY_STRESS_BOUNDARY" then
has_initial_secondary_stress = true
end
local v_indices = {}
for i, tok in ipairs(tokens) do
if tok.type == "V" then insert(v_indices, i) end
end
if #v_indices == 0 then return {tokens} end -- Edge case: no vowels
local boundaries = {}
-- Sonority Sequencing Algorithm combined with Morphophonological Maximum Onset
for idx = 1, #v_indices - 1 do
local v1_idx = v_indices[idx]
local v2_idx = v_indices[idx + 1]
local raw_c_tokens = {}
local forced_boundary_idx = nil
for i = v1_idx + 1, v2_idx - 1 do
if tokens[i].type == "BOUNDARY" or tokens[i].type == "SECONDARY_STRESS_BOUNDARY" then
forced_boundary_idx = i
else
insert(raw_c_tokens, {t=tokens[i], orig_idx=i})
end
end
if forced_boundary_idx then
boundaries[forced_boundary_idx] = true
elseif #raw_c_tokens == 0 then
-- Hiatus
boundaries[v2_idx] = true
else
-- Macro-Token Grouping: Treat consecutive sibilants (S) as a single phonological unit
local macro_c = {}
local i = 1
while i <= #raw_c_tokens do
local current = raw_c_tokens[i]
if current.t.type == "S" then
local absorbed = {current}
local j = i + 1
-- Absorb any subsequent S tokens into this macro unit, keeping track of them
while j <= #raw_c_tokens and raw_c_tokens[j].t.type == "S" do
insert(absorbed, raw_c_tokens[j])
j = j + 1
end
insert(macro_c, {type = "S", orig_idx = current.orig_idx, tokens = absorbed})
i = j
else
insert(macro_c, {type = current.t.type, orig_idx = current.orig_idx, tokens = {current}})
i = i + 1
end
end
local m_count = #macro_c
if m_count == 1 then
-- V.CV (or V.SSV, e.g., sausšala -> sau.sšala)
boundaries[macro_c[1].orig_idx] = true
elseif m_count == 2 then
local t1, t2 = macro_c[1].type, macro_c[2].type
-- ST, SR, TR -> V.CCV
if (t1=="S" and t2=="T") or (t1=="S" and t2=="R") or (t1=="T" and t2=="R") then
boundaries[macro_c[1].orig_idx] = true
else
-- Handle TSS and RSS sequences properly (e.g., Oksfordas, transformavo).
-- If the macro cluster is T+S or R+S, and the S unit absorbed multiple sibilants,
-- split between the first and second sibilant (TS.S, RS.S).
if (t1=="T" or t1=="R") and t2=="S" and #macro_c[2].tokens > 1 then
boundaries[macro_c[2].tokens[2].orig_idx] = true
else
-- Default VC.CV
boundaries[macro_c[2].orig_idx] = true
end
end
elseif m_count == 3 then
local t1, t2, t3 = macro_c[1].type, macro_c[2].type, macro_c[3].type
if t1=="S" and t2=="T" and t3=="R" then
boundaries[macro_c[1].orig_idx] = true -- V.CCCV
elseif (t1=="T" and t2=="S" and t3=="T") or
(t1=="R" and t2=="S" and t3=="T") or
(t1=="T" and t2=="S" and t3=="R") or
(t1=="R" and t2=="T" and t3=="R") or
(t1=="T" and t2=="T" and t3=="R") or
(t1=="R" and t2=="S" and t3=="R") then
boundaries[macro_c[2].orig_idx] = true -- VC.CCV (includes RSR, e.g., konfliktas)
else
boundaries[macro_c[3].orig_idx] = true -- VCC.CV
end
elseif m_count == 4 then
-- Identify the 4-consonant pattern according to the 8 documented combinations
local pattern = macro_c[1].type .. macro_c[2].type .. macro_c[3].type .. macro_c[4].type
if pattern == "RSTR" or pattern == "TSTR" then
-- R.STR, T.STR -> VC.CCCV
boundaries[macro_c[2].orig_idx] = true
elseif pattern == "RTRR" or pattern == "TSTS" then
-- RTR.R, TST.S -> VCCC.CV
boundaries[macro_c[4].orig_idx] = true
else
-- RT.ST, RT.SR, RT.TR, ST.TR -> VCC.CCV
boundaries[macro_c[3].orig_idx] = true
end
elseif m_count >= 5 then
-- Fallback for >=5 logical consonant units
track('complex-consonant-cluster')
boundaries[macro_c[3].orig_idx] = true
end
end
end
-- Construct syllables
local secondary_stress_syllables = {}
for i, tok in ipairs(tokens) do
if boundaries[i] and #current_syl > 0 then
insert(syllables, current_syl)
-- If this is a secondary stress boundary, mark the NEXT syllable
if tok.type == "SECONDARY_STRESS_BOUNDARY" then
secondary_stress_syllables[#syllables + 1] = true
end
current_syl = {}
end
if tok.type ~= "BOUNDARY" and tok.type ~= "SECONDARY_STRESS_BOUNDARY" then
insert(current_syl, tok)
end
end
if #current_syl > 0 then insert(syllables, current_syl) end
-- Apply secondary stress marks
for idx, _ in pairs(secondary_stress_syllables) do
if syllables[idx] then
syllables[idx].secondary_stress = true
end
end
-- Apply initial secondary stress if present
if has_initial_secondary_stress and #syllables > 0 then
syllables[1].secondary_stress = true
end
return syllables
end
-- ============================================================================
-- SECTION 5: Base IPA Mapping & Stress Assignment
-- ============================================================================
local function is_mixed_diphthong(syl, v_idx)
local v_tok = syl[v_idx]
local lc_v = ulower(v_tok.char)
-- Strip silent 'i' for accurate length calculation
if v_tok.has_silent_i then
lc_v = usub(lc_v, 2)
end
-- Digraphs (ie, uo, ai, au, etc.) or natively long vowels (ą, ę, ė, į, y, ų, ū)
-- do NOT form mixed diphthongs with subsequent resonants.
-- Only short a, e, i, u, o can form true mixed diphthongs.
if ulen(lc_v) > 1 then return false end
local LONG_V = {["ą"]=true, ["ę"]=true, ["ė"]=true, ["į"]=true, ["y"]=true, ["ų"]=true, ["ū"]=true}
if LONG_V[lc_v] then return false end
-- V + R in the SAME syllable (coda)
if v_idx < #syl and syl[v_idx+1].type == "R" then
-- j and v are not considered for typical liquid semi-diphthongs length rules
if syl[v_idx+1].char ~= "j" and syl[v_idx+1].char ~= "v" then
return true
end
end
return false
end
local function map_base_phonetics(syllables)
local stress_prefix = nil
for _, syl in ipairs(syllables) do
local v_idx = nil
for i, tok in ipairs(syl) do
if tok.type == "V" then v_idx = i; break end
end
if v_idx then
local v_tok = syl[v_idx]
local v_char = v_tok.char
local lc_v_char = ulower(v_char)
local tone = v_tok.tone
local is_mixed = is_mixed_diphthong(syl, v_idx)
if is_mixed and not tone then
local r_tok = syl[v_idx+1]
if r_tok.tone then tone = r_tok.tone end
end
if v_tok.has_silent_i then
v_tok.silent_i = true
local actual_vowel = usub(lc_v_char, 2)
v_tok.ipa = V_IPA[actual_vowel] or "ɐ"
lc_v_char = actual_vowel
else
-- Apply loan quality base default for 'o' and 'e'
v_tok.ipa = V_IPA[lc_v_char] or "ɐ"
if lc_v_char == "o" and v_tok.loan_quality then
v_tok.ipa = "ɔ"
end
if lc_v_char == "e" and v_tok.loan_quality then
v_tok.ipa = "e"
end
-- Automatically prepend glide 'j' to syllable-initial 'ie'
if lc_v_char == "ie" and v_idx == 1 then
v_tok.ipa = "jiɛ"
end
end
if tone then
local s_mark = ""
local v_base = usub(lc_v_char, 1, 1)
if tone == "acute" then
s_mark = "¹ˈ"
if lc_v_char == "a" then
if is_mixed then v_tok.ipa = "ɑˑ" else v_tok.ipa = "ɑː" end
elseif lc_v_char == "e" then
-- Note: '^' modifier not allowed with acute 'é' as of current rules
-- This code path preserved for consistency
if is_mixed and v_tok.loan_quality then
v_tok.ipa = "ɛ" -- Loanword é in mixed diphthongs is short /ɛ/ without length
elseif is_mixed then
v_tok.ipa = "æˑ" -- Mixed diphthong: half-long
else
v_tok.ipa = "æː" -- Pure vowel: full-long
end
elseif lc_v_char == "ai" then v_tok.ipa = "ɑˑɪ"
elseif lc_v_char == "au" then v_tok.ipa = "ɑˑʊ"
elseif lc_v_char == "ei" then v_tok.ipa = "æˑɪ"
elseif lc_v_char == "eu" then v_tok.ipa = "æˑʊ"
elseif lc_v_char == "oi" then v_tok.ipa = "oˑɪ"
end
elseif tone == "grave" then
if is_mixed and (v_base == "i" or v_base == "u" or v_base == "e" or v_base == "o") then
s_mark = "¹ˈ"
elseif lc_v_char == "ui" or lc_v_char == "oi" or lc_v_char == "ou" or lc_v_char == "eu" then
s_mark = "¹ˈ"
else
s_mark = "ˈ"
end
-- Handle loanword variants for grave
if lc_v_char == "e" and v_tok.loan_quality then
v_tok.ipa = "e" -- è^ (loanword) → [e]
end
if lc_v_char == "o" then
v_tok.ipa = "ɔ" -- ò (always loanword) → [ɔ]
end
elseif tone == "tilde" then
s_mark = "²ˈ"
if lc_v_char == "ai" then v_tok.ipa = "ɐɪˑ"
elseif lc_v_char == "au" then v_tok.ipa = "ɒʊˑ"
elseif lc_v_char == "ei" then v_tok.ipa = "ɛɪˑ"
elseif lc_v_char == "eu" then v_tok.ipa = "ɛʊˑ"
elseif lc_v_char == "ui" then v_tok.ipa = "ʊɪˑ"
elseif lc_v_char == "a" then
if is_mixed then v_tok.ipa = "ɐ" else v_tok.ipa = "ɑː" end
elseif lc_v_char == "e" then
if is_mixed then v_tok.ipa = "ɛ" else v_tok.ipa = "æː" end
end
end
syl.stress = s_mark
end
-- Set base IPA for consonants (case-insensitive mapping)
for i, tok in ipairs(syl) do
if tok.type ~= "V" then
local lc_c = ulower(tok.char)
tok.ipa = CONS_IPA[lc_c] or lc_c
end
end
-- Handle tilde half-length on mixed diphthong coda
if tone == "tilde" and is_mixed then
local r_tok = syl[v_idx+1]
r_tok.half_long = true
end
else
-- Syllable with no vowel (e.g. leftover consonant)
for i, tok in ipairs(syl) do
local lc_c = ulower(tok.char)
tok.ipa = CONS_IPA[lc_c] or lc_c
end
end
end
end
-- ============================================================================
-- SECTION 6: Phonetic Polish Passes
-- ============================================================================
-- Pass 1: Palatalization Spreading (Right-to-Left)
--
-- right_context_palatalizing (optional, used by the cross-word pipeline):
-- When the current word is followed by a liaisoned word whose first effective
-- phoneme is "soft" (front V, j, or a palatalized consonant), pass `true` so
-- that:
-- 1. spread_active starts true (allowing the word's last consonant to
-- receive ʲ even though it has no in-word right neighbour);
-- 2. is_direct evaluates to true for that last consonant when it is k/g,
-- so VLKK §19's "lyk‿jója → [lʲiːkʲ‿…]" pattern is produced.
-- When omitted or false, the function behaves exactly like the within-word
-- palatalization that this module has always done.
local function apply_palatalization(syllables, right_context_palatalizing)
-- Flatten tokens for cross-syllable spreading
local flat_tokens = {}
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do insert(flat_tokens, tok) end
end
-- First pass: Apply softening marks (no spreading)
for i = 1, #flat_tokens do
local tok = flat_tokens[i]
if tok.softening_mark and tok.ipa ~= "" then
tok.ipa = tok.ipa .. "ʲ"
tok.is_palatalized = true
end
end
local spread_active = right_context_palatalizing and true or false
for i = #flat_tokens, 1, -1 do
local tok = flat_tokens[i]
local lc_char = ulower(tok.char)
if tok.type == "V" then
if tok.silent_i or FRONT_V[lc_char] then
spread_active = true
else
spread_active = false
end
elseif lc_char == "j" then
-- Preserve special IPA for final j (ɪ̯), don't override it
if tok.ipa ~= "ɪ̯" and tok.ipa ~= "" then
tok.ipa = "j"
end
tok.is_palatalized = true
spread_active = true
else
if spread_active then
-- Check if the palatalization is DIRECT (immediate contact with front V or j)
local is_direct = false
local nxt = flat_tokens[i+1]
if nxt then
local nxt_lc = ulower(nxt.char)
if (nxt.type == "V" and (nxt.silent_i or FRONT_V[nxt_lc])) or nxt_lc == "j" then
is_direct = true
end
elseif right_context_palatalizing then
-- No in-word neighbour, but a liaisoned soft phoneme follows.
is_direct = true
end
tok.is_palatalized = true
if lc_char == "k" or lc_char == "g" then
if is_direct and tok.ipa ~= "" then
tok.ipa = tok.ipa .. "ʲ"
end
-- DO NOT set spread_active to false! k/g are transparent to spreading.
else
if tok.ipa ~= "" then
tok.ipa = tok.ipa .. "ʲ"
end
end
end
end
end
end
-- Pass 2: Voicing Assimilation (Right-to-Left)
local function apply_voicing_assimilation(syllables)
local flat_tokens = {}
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do insert(flat_tokens, tok) end
end
local target_voice = nil
for i = #flat_tokens, 1, -1 do
local tok = flat_tokens[i]
if tok.type == "V" or tok.type == "R" then
target_voice = nil -- Blocked by vowels and resonants
elseif tok.type == "S" or tok.type == "T" then
local lc_char = ulower(tok.char)
local is_uppercase = (tok.char ~= lc_char)
if target_voice == nil then
-- Establish new assimilation target
target_voice = is_voiced(lc_char) and "voiced" or "voiceless"
else
-- Assimilate
local current_is_voiced = is_voiced(lc_char)
if target_voice == "voiced" and not current_is_voiced then
local new_char = VOICING_PAIRS[lc_char] or lc_char
tok.char = is_uppercase and uupper(new_char) or new_char
elseif target_voice == "voiceless" and current_is_voiced then
local new_char = VOICING_PAIRS[lc_char] or lc_char
tok.char = is_uppercase and uupper(new_char) or new_char
end
-- Update IPA based on new character, preserving palatalization
local lc_new_char = ulower(tok.char)
local new_ipa = CONS_IPA[lc_new_char] or lc_new_char
if tok.is_palatalized and lc_new_char ~= "k" and lc_new_char ~= "g" then
new_ipa = new_ipa .. "ʲ"
end
tok.ipa = new_ipa
end
end
end
end
-- Pass 3: Nasal Assimilation (n -> ŋ before velars k/g and post-velar ch/h)
-- Per VLKK §6.3, n assimilates to the place of articulation of any following
-- velar/uvular consonant, including the fricatives ch [x] and h [ɣ].
local function apply_nasal_assimilation(syllables)
local flat_tokens = {}
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do insert(flat_tokens, tok) end
end
for i = 1, #flat_tokens - 1 do
local tok = flat_tokens[i]
-- Ignore vowels for lookahead
local lookahead = i + 1
while lookahead <= #flat_tokens and flat_tokens[lookahead].type == "V" do
if flat_tokens[lookahead].silent_i then
lookahead = lookahead + 1
else
break
end
end
local nxt = flat_tokens[lookahead]
if nxt and ulower(tok.char) == "n" then
local lc_nxt = ulower(nxt.char)
if lc_nxt == "k" or lc_nxt == "g" or lc_nxt == "ch" or lc_nxt == "h" then
-- Check actual IPA string for direct palatalization
if ufind(nxt.ipa, "ʲ") then
tok.ipa = "ŋʲ"
else
tok.ipa = "ŋ"
end
end
end
end
end
-- Pass 4: Vowel Quality Adjustments after Palatalization
local function adjust_vowel_quality(syllables)
for _, syl in ipairs(syllables) do
local has_palatal_onset = false
for i, tok in ipairs(syl) do
if tok.type ~= "V" and (tok.is_palatalized or ulower(tok.char) == "j") then
has_palatal_onset = true
elseif tok.type == "V" and has_palatal_onset then
local ipa = tok.ipa
-- Shift a/e quality
if ipa == "ɐ" then ipa = "ɛ"
elseif ipa == "ɑː" then ipa = "æː"
-- Diphthong shifts for iau / iai
elseif ipa == "ɒʊ" then ipa = "ɛʊ"
elseif ipa == "ɑˑʊ" then ipa = "æˑʊ"
elseif ipa == "ɒʊˑ" then ipa = "ɛʊˑ"
elseif ipa == "ɐɪ" then ipa = "ɛɪ"
elseif ipa == "ɑˑɪ" then ipa = "æˑɪ"
elseif ipa == "ɐɪˑ" then ipa = "ɛɪˑ"
end
-- Dynamic fronting for u/o/ɔ (adds U+031F)
-- Matches ONLY the first character (^) to avoid double fronting in uɔ
ipa = ugsub(ipa, "^([uʊoɔ])", "%1̟")
tok.ipa = ipa
has_palatal_onset = false
end
end
end
end
-- Pass 5: Terminal Devoicing (Word-final obstruent devoicing)
local function apply_terminal_devoicing(syllables)
if #syllables == 0 then return end
local last_syl = syllables[#syllables]
local last_tok = last_syl[#last_syl]
if last_tok and (last_tok.type == "S" or last_tok.type == "T") then
local lc_char = ulower(last_tok.char)
local is_uppercase = (last_tok.char ~= lc_char)
if is_voiced(lc_char) then
local devoiced = VOICING_PAIRS[lc_char]
if devoiced then
last_tok.char = is_uppercase and uupper(devoiced) or devoiced
local new_ipa = CONS_IPA[devoiced] or devoiced
if last_tok.is_palatalized then new_ipa = new_ipa .. "ʲ" end
last_tok.ipa = new_ipa
end
end
end
end
-- Pass 6: Place Assimilation (Sibilant + Affricate)
-- When a sibilant meets an affricate, the sibilant adjusts its place of articulation:
-- s+č→š, z+dž→ž, š+c→s, ž+dz→z
local function apply_place_assimilation(syllables)
local flat_tokens = {}
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do insert(flat_tokens, tok) end
end
local PLACE_ASSIM = {
["s"] = {["t͡ʃ"] = "ʃ"},
["z"] = {["d͡ʒ"] = "ʒ"},
["ʃ"] = {["t͡s"] = "s"},
["ʒ"] = {["d͡z"] = "z"},
}
for i = 1, #flat_tokens - 1 do
local tok = flat_tokens[i]
local nxt = flat_tokens[i+1]
if tok.type == "S" and nxt.type == "T" then
local rule = PLACE_ASSIM[tok.ipa]
if rule and rule[nxt.ipa] then
tok.ipa = rule[nxt.ipa]
end
end
end
end
-- Pass 7: Geminate Simplification (Double consonant reduction)
-- NOTE: Original geminates (rr, ll, etc.) are already merged in merge_geminate_consonants.
-- This pass handles geminates created by phonetic rules (e.g., voicing assimilation: td→dd).
-- Sibilants: any two sibilants merge into one (keeping the second)
-- Other consonants: only identical pairs merge
local function apply_geminate_simplification(syllables)
local flat_tokens = {}
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do insert(flat_tokens, tok) end
end
local SIBILANTS = {["s"]=true, ["z"]=true, ["ʃ"]=true, ["ʒ"]=true}
for i = 1, #flat_tokens - 1 do
local tok = flat_tokens[i]
local nxt = flat_tokens[i+1]
local tok_is_cons = (tok.type == "T" or tok.type == "S" or tok.type == "R")
local nxt_is_cons = (nxt.type == "T" or nxt.type == "S" or nxt.type == "R")
if tok_is_cons and nxt_is_cons then
local tok_is_sib = SIBILANTS[tok.ipa]
local nxt_is_sib = SIBILANTS[nxt.ipa]
if tok_is_sib and nxt_is_sib then
tok.ipa = ""
elseif tok.ipa == nxt.ipa and tok.ipa ~= "" then
tok.ipa = ""
end
end
end
end
-- Pass 8: Final Consonant Vocalization (v → ʊ̯, j → ɪ̯)
-- Word-final v and j (without tilde) become non-syllabic vowels
-- Note: j with tilde is already converted to i with tilde in tokenization
local function apply_final_consonant_vocalization(syllables)
if #syllables == 0 then return end
local last_syl = syllables[#syllables]
local last_tok = last_syl[#last_syl]
if last_tok then
local lc_char = ulower(last_tok.char)
-- Final v → ʊ̯
if lc_char == "v" then
last_tok.ipa = "ʊ̯"
-- Keep type as "R" (resonant) for now - it's treated as non-syllabic
end
-- Final j (without tilde) → ɪ̯
-- Note: j with tilde is already converted to i in tokenization, so won't reach here
if lc_char == "j" and last_tok.tone ~= "tilde" then
last_tok.ipa = "ɪ̯"
-- Keep type as "R" (resonant) for now - it's treated as non-syllabic
end
end
end
-- ============================================================================
-- SECTION 7: Output Assembly
-- ============================================================================
-- ---------------------------------------------------------------------------
-- Cross-word phonology helpers
-- ---------------------------------------------------------------------------
-- Split a term into segments at spaces only. Returns a plain list of
-- non-empty word strings. The liaison marker ‿ is reserved for IPA output
-- and must never appear in input; if it does, raise an error so the editor
-- knows to use a regular space instead.
local function split_into_segments(term)
if term and ufind(term, LIAISON) then
error("lt-pron: the liaison marker \"" .. LIAISON ..
"\" (U+203F) must not appear in the input. Use a regular " ..
"space between words; the module decides where to insert ‿ " ..
"in the IPA output based on stress.")
end
local segs = {}
for _, w in ipairs(rsplit(term or "", " ")) do
if w ~= "" then insert(segs, w) end
end
return segs
end
-- True if any syllable in the word has primary or secondary stress.
local function word_has_stress(syllables)
for _, syl in ipairs(syllables) do
if syl.stress or syl.secondary_stress then
return true
end
end
return false
end
-- Compute the clitic group anchor for each word in a phrase.
--
-- A "clitic group" is a stressed word together with all unstressed words that
-- prosodically attach to it. Two adjacent words share a liaison ‿ iff they
-- belong to the same group. The algorithm:
--
-- 1. Every stressed word is its own anchor.
-- 2. Each unstressed word looks FORWARD for the nearest stressed word
-- (proclitic case, e.g., "iš namų̃" — iš leans on namų̃).
-- 3. If no stressed word follows, look BACKWARD instead (enclitic case,
-- e.g., "sakaũ gi" — gi leans on sakaũ).
-- 4. If the entire phrase has no stressed word (rare edge case), all
-- unstressed words share a single pseudo-group with anchor 0.
--
-- This matches VLKK §4.7 examples like "iš namų̃ [ɪʃ‿nɐ²ˈmuː]" (proclitic)
-- and "sakaũ gi [sɐ²ˈkɒʊˑ‿ɡʲɪ]" (enclitic), and produces the correct
-- behavior for VLKK §4.9's "išėjaũ į kiẽmą" where the unstressed į proclitic
-- to kiẽmą while išėjaũ stands alone.
local function compute_clitic_anchors(word_data)
local anchors = {}
-- Pass 1: stressed words anchor themselves.
for i, wd in ipairs(word_data) do
if word_has_stress(wd.syllables) then
anchors[i] = i
end
end
-- Pass 2: RTL — each unstressed word adopts the next word's anchor.
-- Scanning RTL means each position can simply copy anchors[i+1], which
-- already points to the nearest stressed word to the right (or nil).
for i = #word_data - 1, 1, -1 do
if anchors[i] == nil then
anchors[i] = anchors[i+1]
end
end
-- Pass 3: LTR — words still without an anchor (no stressed word to the
-- right) fall back to the nearest stressed word on the left.
for i = 2, #word_data do
if anchors[i] == nil then
anchors[i] = anchors[i-1]
end
end
-- Pass 4: entire phrase has no stress at all — bundle everything into
-- pseudo-group 0 so the words at least share liaison with each other.
if #word_data > 0 and anchors[1] == nil then
for i = 1, #word_data do
anchors[i] = 0
end
end
return anchors
end
-- Find the first non-empty token across syllables (skips silent or zero-IPA
-- tokens that don't realize a phoneme).
local function first_effective_token(syllables)
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do
if tok.ipa ~= "" then return tok end
end
end
return nil
end
-- Find the last non-empty token across syllables.
local function last_effective_token(syllables)
for s = #syllables, 1, -1 do
local syl = syllables[s]
for t = #syl, 1, -1 do
if syl[t].ipa ~= "" then return syl[t] end
end
end
return nil
end
-- Return true if the next word's first effective phoneme triggers
-- palatalization across the liaison boundary (front V, j, or an already
-- palatalized consonant). Must be called AFTER the next word's palatalization
-- pass has run, so `is_palatalized` is reliable.
local function first_token_palatalizes(syllables)
local tok = first_effective_token(syllables)
if not tok then return false end
local lc = ulower(tok.char)
if tok.type == "V" then
return tok.silent_i or FRONT_V[lc] or false
end
if lc == "j" then return true end
return tok.is_palatalized == true
end
-- VLKK §20: the preposition "už" keeps its [ʒ] (i.e. terminal devoicing is
-- skipped) when the next liaisoned word starts with a vowel or sonorant
-- consonant (n, m, l, r, j, v). Other words always undergo terminal devoicing.
local UZ_SKIP_SONORANTS = {
["n"]=true, ["m"]=true, ["l"]=true,
["r"]=true, ["j"]=true, ["v"]=true,
}
local function should_skip_devoicing_for_uz(word_text, next_syllables)
-- remove_all_accents returns NFD; we have to fold it back to NFC before
-- comparing against the literal "už" because ž (U+017E) decomposes to
-- z + COMBINING CARON (U+030C) in NFD form.
local clean = ulower(mw.ustring.toNFC(remove_all_accents(word_text or "")))
if clean ~= "už" then return false end
if not next_syllables then return false end
local nxt = first_effective_token(next_syllables)
if not nxt then return false end
if nxt.type == "V" then return true end
return UZ_SKIP_SONORANTS[ulower(nxt.char)] == true
end
-- VLKK §21b / §22b / §23b: at a liaison boundary, the last consonant of W1
-- and the first consonant of W2 may interact. We mirror within-word place
-- assimilation and geminate / sibilant simplification, applied just before
-- W1's palatalization pass so that any new ipa (e.g. s → ʃ) gets palatalized
-- correctly when needed.
local CROSSWORD_PLACE_ASSIM = {
["s"] = {["t͡ʃ"] = "ʃ"},
["z"] = {["d͡ʒ"] = "ʒ"},
["ʃ"] = {["t͡s"] = "s"},
["ʒ"] = {["d͡z"] = "z"},
}
local CROSSWORD_SIBILANTS = {["s"]=true, ["z"]=true, ["ʃ"]=true, ["ʒ"]=true}
local function strip_trailing_palatal(ipa)
if not ipa then return "" end
local stripped = ugsub(ipa, "ʲ$", "")
return stripped
end
local function apply_crossword_polish_at_junction(w1_syllables, w2_syllables)
local w1_last = last_effective_token(w1_syllables)
local w2_first = first_effective_token(w2_syllables)
if not (w1_last and w2_first) then return end
-- W1's last token has not yet been palatalized at this point in the
-- pipeline, so its ipa is the bare base form. W2 has already been fully
-- polished, so we must strip a trailing ʲ before using it as a key.
local w1_base = w1_last.ipa or ""
local w2_base = strip_trailing_palatal(w2_first.ipa)
if w1_base == "" or w2_base == "" then return end
-- Geminate / sibilant cluster: drop W1's last consonant entirely.
if w1_base == w2_base
or (CROSSWORD_SIBILANTS[w1_base] and CROSSWORD_SIBILANTS[w2_base]) then
w1_last.ipa = ""
return
end
-- Place assimilation: rewrite W1's last consonant base. Palatalization,
-- if any, will be re-applied by the palatalization pass.
local rule = CROSSWORD_PLACE_ASSIM[w1_base]
if rule and rule[w2_base] then
w1_last.ipa = rule[w2_base]
end
end
-- ---------------------------------------------------------------------------
-- Word-level rendering
-- ---------------------------------------------------------------------------
-- Tokenize, syllabify, and assign base phonetics for one word. Returns a
-- table { text, syllables } with the word's mutable phonological state.
local function prepare_word_state(word, pagename)
local tokens = tokenize(word, pagename)
merge_geminate_consonants(tokens)
local syllables = syllabify(tokens)
map_base_phonetics(syllables)
return {text = word, syllables = syllables}
end
-- Render a fully polished syllables list into an IPA string. Identical to
-- the original tail of process_single_word_ipa.
local function render_word_ipa(syllables)
local parts = {}
for s_idx, syl in ipairs(syllables) do
local syl_str = ""
local hiatus_sep = ""
if s_idx > 1 then
local prev_syl = syllables[s_idx - 1]
local prev_last_tok = prev_syl[#prev_syl]
local curr_first_tok = syl[1]
local prev_ends_with_v = (prev_last_tok and prev_last_tok.type == "V" and not prev_last_tok.silent_i)
local curr_starts_with_v = (curr_first_tok and curr_first_tok.type == "V" and not curr_first_tok.silent_i)
if prev_ends_with_v and curr_starts_with_v then
local prev_v_char = ulower(prev_last_tok.char)
local curr_v_char = ulower(curr_first_tok.char)
-- Only handle ie special case
if curr_v_char == "ie" then
-- 'ie' has implicit 'j' from map_base_phonetics.
syl_str = ""
hiatus_sep = ""
else
-- Standard hiatus without glide insertion
syl_str = "."
hiatus_sep = ""
end
end
end
-- Check for conflict between primary and secondary stress
if syl.secondary_stress and syl.stress then
error("lt-pron: A syllable cannot have both primary stress (tone mark) and secondary stress (ˌ)")
end
-- Add stress markers (primary or secondary, mutually exclusive)
if syl.secondary_stress then
syl_str = syl_str .. "ˌ"
elseif syl.stress then
syl_str = syl_str .. syl.stress
end
syl_str = syl_str .. hiatus_sep
for _, tok in ipairs(syl) do
if tok.is_respelling then
if tok.is_optional then
syl_str = syl_str .. "(j)"
else
syl_str = syl_str .. tok.ipa
end
else
syl_str = syl_str .. tok.ipa
end
if tok.half_long then syl_str = syl_str .. "ˑ" end
end
insert(parts, syl_str)
end
return concat(parts, "")
end
-- ---------------------------------------------------------------------------
-- Multi-word IPA assembler with cross-word phonology
-- ---------------------------------------------------------------------------
-- Pipeline for a phrase made of space-separated words. The liaison marker ‿
-- never appears in input; it is inserted into the rendered IPA according to
-- clitic-group anchors computed from per-word stress (see
-- compute_clitic_anchors above).
--
-- Per-word polish order (unchanged from VLKK §17–§19):
-- terminal devoicing → voicing assim → place assim → geminate simp →
-- final-cons vocalization → cross-word polish at junction →
-- palatalization (with cross-word right context) → nasal assim →
-- vowel quality.
--
-- Cross-word polish at the junction (§21b/§22b/§23b) runs *before* W1's
-- palatalization pass so that any rewritten ipa still receives ʲ correctly.
-- Words are processed RTL so that each W_i sees the already-polished state
-- of W_{i+1} when computing its cross-word context.
local function to_ipa(term, provided_pagename)
-- Use provided pagename if available, otherwise try to extract from term
local pagename, clean_term
if provided_pagename then
pagename = provided_pagename
clean_term = term
else
pagename, clean_term = get_pagename(term)
end
term = clean_term
-- Split input and pagename on whitespace; bail out if input contains ‿.
local input_segs = split_into_segments(term)
local pagename_segs = nil
if pagename then
pagename_segs = split_into_segments(pagename)
-- Verify word count matches (only when both are multi-word)
if #input_segs > 1 and #pagename_segs > 1 and #input_segs ~= #pagename_segs then
error("lt-pron: Input has " .. #input_segs .. " words but pagename has "
.. #pagename_segs .. " words. They must match.")
end
-- If pagename is a single word but input is multi-word, drop pagename
-- alignment (respelling detection only makes sense for exact matches).
if #pagename_segs == 1 and #input_segs > 1 then
pagename_segs = nil
end
end
-- Stage 1: tokenize / syllabify / map base phonetics for every word.
local word_data = {}
for i, seg_text in ipairs(input_segs) do
local seg_pagename = (pagename_segs and pagename_segs[i]) or nil
insert(word_data, prepare_word_state(seg_text, seg_pagename))
end
if #word_data == 0 then return "" end
-- Stage 2: compute clitic anchors and decide liaison per junction.
-- Two adjacent words share a ‿ iff they belong to the same clitic group.
local anchors = compute_clitic_anchors(word_data)
for i = 1, #word_data - 1 do
word_data[i].is_liaison = (anchors[i] == anchors[i+1])
end
if word_data[#word_data] then
word_data[#word_data].is_liaison = false -- no successor
end
-- Stage 3: flag the už §20 exception. už keeps its [ʒ] (i.e. terminal
-- devoicing is skipped) only when it is in a liaison junction with the
-- following word AND that word starts with a vowel or sonorant. Whether
-- už is proclitic or enclitic in the group doesn't matter — what matters
-- is that ‿ sits between už and the next phoneme.
for i = 1, #word_data do
local wd = word_data[i]
wd.skip_terminal_devoicing = false
if wd.is_liaison and word_data[i+1] then
if should_skip_devoicing_for_uz(wd.text, word_data[i+1].syllables) then
wd.skip_terminal_devoicing = true
end
end
end
-- Stage 4: run the polish pipeline RTL across word_data so each W_i sees
-- W_{i+1}'s polished state when computing cross-word context.
for i = #word_data, 1, -1 do
local wd = word_data[i]
local next_wd = wd.is_liaison and word_data[i+1] or nil
-- Within-word polish (passes 1-5)
if not wd.skip_terminal_devoicing then
apply_terminal_devoicing(wd.syllables)
end
apply_voicing_assimilation(wd.syllables)
apply_place_assimilation(wd.syllables)
apply_geminate_simplification(wd.syllables)
apply_final_consonant_vocalization(wd.syllables)
-- Cross-word fricative simplification / place assimilation at the
-- liaison boundary, before W1 palatalizes (so a freshly assimilated
-- s → ʃ can still pick up ʲ).
if next_wd then
apply_crossword_polish_at_junction(wd.syllables, next_wd.syllables)
end
-- Determine right palatalization context for the cross-word case.
local right_palatalizing = false
if next_wd then
right_palatalizing = first_token_palatalizes(next_wd.syllables)
end
-- Pass 6: palatalization (with cross-word context).
apply_palatalization(wd.syllables, right_palatalizing)
-- Remaining within-word passes.
apply_nasal_assimilation(wd.syllables)
adjust_vowel_quality(wd.syllables)
end
-- Stage 5: render each word and join with ‿ (same clitic group) or " ".
local result_parts = {}
for i, wd in ipairs(word_data) do
insert(result_parts, render_word_ipa(wd.syllables))
end
local result = result_parts[1] or ""
for i = 2, #word_data do
local sep = word_data[i-1].is_liaison and LIAISON or " "
result = result .. sep .. result_parts[i]
end
return result
end
-- Rhyme Extractor
local IPA_VOWELS = "aɐɑæɛeəɪiɔoʊuɒɜ"
local function get_rhyme(ipa)
-- Remove liaison and spaces before rhyme calculation
local clean_ipa = ugsub(ipa, "[‿ ]", "")
-- Search for the last stress mark from right to left
local last_stress_pos = nil
for i = ulen(clean_ipa), 1, -1 do
local char = usub(clean_ipa, i, i)
if char == "ˈ" then
-- Skip superscript if present
if i > 1 then
local prev = usub(clean_ipa, i - 1, i - 1)
if prev == "¹" or prev == "²" then
last_stress_pos = i -- Point to ˈ, skip superscript
else
last_stress_pos = i
end
else
last_stress_pos = i
end
break
end
end
if not last_stress_pos then return nil end
-- Extract content after the stress mark
local after = usub(clean_ipa, last_stress_pos + 1)
-- Remove hiatus dots for rhyme grouping
after = ugsub(after, "%.", "")
-- Find first vowel position (skip onset consonants)
local vstart = umatch(after, "()[" .. IPA_VOWELS .. "]")
if vstart then
return usub(after, vstart)
end
return after
end
-- Hyphenation generator (supports multi-word phrases)
local function get_hyphenation(term, provided_pagename)
-- Use provided pagename if available, otherwise try to extract from term
local pagename, clean_term
if provided_pagename then
pagename = provided_pagename
clean_term = term
else
pagename, clean_term = get_pagename(term)
end
term = clean_term
-- Split input on both ‿ and space, tracking the separator type so the
-- final hyphenation string can preserve liaison markers from the input.
local input_segs = split_into_segments(term)
local pagename_segs = nil
if pagename then
pagename_segs = split_into_segments(pagename)
-- Verify segment count matches (only when both are multi-segment)
if #input_segs > 1 and #pagename_segs > 1 and #input_segs ~= #pagename_segs then
error("lt-pron: Input has " .. #input_segs .. " words but pagename has "
.. #pagename_segs .. " words. They must match.")
end
-- If pagename is a single word but input is multi-word, drop alignment
if #pagename_segs == 1 and #input_segs > 1 then
pagename_segs = nil
end
end
local all_word_parts = {}
for i, seg_text in ipairs(input_segs) do
-- Get the corresponding pagename word, or nil if not available
local seg_pagename = (pagename_segs and pagename_segs[i]) or nil
local tokens = tokenize(seg_text, seg_pagename)
local syllables = syllabify(tokens)
local parts = {}
for _, syl in ipairs(syllables) do
local text = ""
for _, tok in ipairs(syl) do
-- Skip respelling characters (original_char == "-" means not in orthography)
if tok.original_char ~= "-" then
-- Re-attach original tone visually for display
local t_mark = ""
if tok.tone == "acute" then t_mark = ACUTE
elseif tok.tone == "grave" then t_mark = GRAVE
elseif tok.tone == "tilde" then t_mark = TILDE
end
-- Use original_char if set (e.g., j̃ → i internally but j in display)
-- Otherwise use tok.char
local disp_char = tok.original_char or tok.char
-- Restore original orthography for ɔ
if disp_char == "ɔ" then disp_char = "o" end
if disp_char == "Ɔ" then disp_char = "O" end
-- Handle tone placement for diphthongs
if tok.tone_position then
-- Diphthong: place tone on the specified vowel
local char_len = ulen(disp_char)
if char_len == 2 then
-- Two-character diphthong
local v1 = usub(disp_char, 1, 1)
local v2 = usub(disp_char, 2, 2)
if tok.tone_position == 1 then
text = text .. v1 .. t_mark .. v2
else
text = text .. v1 .. v2 .. t_mark
end
elseif char_len == 3 then
-- Three-character (silent i) diphthong
local v1 = usub(disp_char, 1, 1)
local v2 = usub(disp_char, 2, 2)
local v3 = usub(disp_char, 3, 3)
if tok.tone_position == 1 then
text = text .. v1 .. t_mark .. v2 .. v3
elseif tok.tone_position == 2 then
text = text .. v1 .. v2 .. t_mark .. v3
else
text = text .. v1 .. v2 .. v3 .. t_mark
end
end
else
-- Single vowel or consonant: tone goes after the character
text = text .. disp_char .. t_mark
end
end
end
parts[#parts + 1] = makeDisplayText(text)
end
insert(all_word_parts, concat(parts, "‧")) -- Use ‧ instead of standard - internally to preserve word boundaries
end
-- Hyphenation always joins multi-word phrases with a plain space — the
-- liaison marker ‿ is purely an IPA-output device and never appears here.
local combined_string = concat(all_word_parts, " ")
local final_parts = {}
for _, piece in ipairs(rsplit(combined_string, "‧")) do
insert(final_parts, piece)
end
-- Calculate actual syllable count (for correct num_syl)
local syllable_count = 0
for _, word_part in ipairs(all_word_parts) do
local word_syls = rsplit(word_part, "‧")
syllable_count = syllable_count + #word_syls
end
-- Store actual syllable count as a field (since __len doesn't work on tables in Lua 5.1)
final_parts.syllable_count = syllable_count
return final_parts
end
-- ============================================================================
-- SECTION 7.5: Conjugation Module Support Functions
-- ============================================================================
-- Export: Get syllables as string array with accents
function export.get_syllables(term)
local pagename, clean_term = get_pagename(term)
term = clean_term
local tokens = tokenize(term, pagename)
local syllables = syllabify(tokens)
local result = {}
for _, syl in ipairs(syllables) do
local text = ""
for _, tok in ipairs(syl) do
-- Skip respelling glides in syllable output
if not tok.is_respelling then
-- Re-attach original tone
local t_mark = ""
if tok.tone == "acute" then t_mark = ACUTE
elseif tok.tone == "grave" then t_mark = GRAVE
elseif tok.tone == "tilde" then t_mark = TILDE
end
-- Restore original orthography for ɔ
local disp_char = tok.char
if disp_char == "ɔ" then disp_char = "o" end
if disp_char == "Ɔ" then disp_char = "O" end
-- Handle tone placement for diphthongs
if tok.tone_position then
-- Diphthong: place tone on the specified vowel
local char_len = ulen(disp_char)
if char_len == 2 then
-- Two-character diphthong
local v1 = usub(disp_char, 1, 1)
local v2 = usub(disp_char, 2, 2)
if tok.tone_position == 1 then
text = text .. v1 .. t_mark .. v2
else
text = text .. v1 .. v2 .. t_mark
end
elseif char_len == 3 then
-- Three-character (silent i) diphthong
local v1 = usub(disp_char, 1, 1)
local v2 = usub(disp_char, 2, 2)
local v3 = usub(disp_char, 3, 3)
if tok.tone_position == 1 then
text = text .. v1 .. t_mark .. v2 .. v3
elseif tok.tone_position == 2 then
text = text .. v1 .. v2 .. t_mark .. v3
else
text = text .. v1 .. v2 .. v3 .. t_mark
end
end
else
-- Single vowel or consonant: tone goes after the character
text = text .. disp_char .. t_mark
end
end
end
-- Return NFC format with proper dotabove insertion
insert(result, makeDisplayText(text))
end
return result
end
-- Export: Check if a syllable is heavy or light
function export.is_heavy_syllable(syllable)
local pagename, clean_syllable = get_pagename(syllable)
syllable = clean_syllable
-- Validate single syllable
local tokens = tokenize(syllable, pagename)
local syllables = syllabify(tokens)
if #syllables ~= 1 then
error("is_heavy_syllable: input must be a single syllable, got " .. #syllables .. " syllables")
end
local syl = syllables[1]
-- Find the vowel token
local v_idx = nil
for i, tok in ipairs(syl) do
if tok.type == "V" then
v_idx = i
break
end
end
if not v_idx then
error("is_heavy_syllable: no vowel found in syllable")
end
local v_tok = syl[v_idx]
local lc_v = ulower(v_tok.char)
-- Handle silent i (e.g., "iau" where i is silent)
if v_tok.has_silent_i then
lc_v = usub(lc_v, 2)
end
-- Long vowels (inherently long, regardless of accent)
local LONG_VOWELS = {
["ą"]=true, ["ę"]=true, ["ė"]=true, ["į"]=true, ["y"]=true, ["ų"]=true, ["ū"]=true,
["o"]=true, ["ɔ"]=true -- o/ɔ are always long (except ò, but we treat all o as long)
}
if LONG_VOWELS[lc_v] then
return true
end
-- Diphthongs (length > 1)
if ulen(lc_v) > 1 then
return true
end
-- Mixed diphthongs: short vowel + liquid in coda position
local SHORT_VOWELS = {["a"]=true, ["e"]=true, ["i"]=true, ["u"]=true}
if SHORT_VOWELS[lc_v] and v_idx < #syl then
local next_tok = syl[v_idx + 1]
-- Liquid in coda (not j or v, which don't form mixed diphthongs)
if next_tok.type == "R" and next_tok.char ~= "j" and next_tok.char ~= "v" then
return true
end
end
-- Otherwise, it's a light syllable
return false
end
-- Export: Change accent of a syllable
function export.change_accent(syllable, target_accent)
local pagename, clean_syllable = get_pagename(syllable)
syllable = clean_syllable
-- Validate target_accent parameter
local VALID_ACCENTS = {acute=true, tilde=true, grave=true, none=true}
if not VALID_ACCENTS[target_accent] then
error("change_accent: invalid target_accent '" .. tostring(target_accent) ..
"', must be 'acute', 'tilde', 'grave', or 'none'")
end
-- Validate single syllable
local tokens = tokenize(syllable, pagename)
local syllables = syllabify(tokens)
if #syllables ~= 1 then
error("change_accent: input must be a single syllable, got " .. #syllables .. " syllables")
end
-- Remove all existing accents
local clean = remove_all_accents(syllable)
-- If target is 'none', return clean syllable
if target_accent == "none" then
return clean
end
-- Re-tokenize the clean syllable to analyze structure
local clean_tokens = tokenize(clean, pagename)
local clean_syllables = syllabify(clean_tokens)
local syl = clean_syllables[1]
-- Find vowel position and extract vowel part
local onset = ""
local vowel_part = ""
local coda = ""
local v_idx = nil
for i, tok in ipairs(syl) do
if tok.type == "V" then
v_idx = i
break
else
onset = onset .. tok.char
end
end
if not v_idx then
error("change_accent: no vowel found in syllable")
end
local v_tok = syl[v_idx]
vowel_part = ulower(v_tok.char)
-- Handle silent i
if v_tok.has_silent_i then
vowel_part = usub(vowel_part, 2)
end
-- Check if this is a mixed diphthong
local is_mixed = false
if v_idx < #syl then
local next_tok = syl[v_idx + 1]
if next_tok.type == "R" and next_tok.char ~= "j" and next_tok.char ~= "v" then
-- Mixed diphthong: vowel + liquid
vowel_part = vowel_part .. ulower(next_tok.char)
is_mixed = true
-- Collect remaining coda after the liquid
for i = v_idx + 2, #syl do
coda = coda .. syl[i].char
end
else
-- Regular syllable: collect all coda
for i = v_idx + 1, #syl do
coda = coda .. syl[i].char
end
end
end
-- Look up the accented form in ACCENT_PAIRS
if not ACCENT_PAIRS[vowel_part] then
error("change_accent: vowel/diphthong '" .. vowel_part .. "' not found in accent pairs table")
end
local accented_vowel = ACCENT_PAIRS[vowel_part][target_accent]
if not accented_vowel then
error("change_accent: accent type '" .. target_accent ..
"' is not allowed for vowel/diphthong '" .. vowel_part .. "'")
end
-- Reconstruct the syllable with new accent (in NFD format)
local result = onset .. accented_vowel .. coda
return result
end
-- ============================================================================
-- SECTION 8: Module Exports & Template Formatting (Preserved)
-- ============================================================================
local q_spec = {store = "insert-flattened", type = "qualifier"}
local a_spec = {store = "insert-flattened", type = "labels"}
local ref_spec = {store = "insert-flattened", item_dest = "refs", type = "references"}
-- Generate audio object, supporting file#caption syntax
local function generate_audio_obj(arg)
local file, caption = arg:match("^(.-)%s*#%s*(.*)$")
file = file or arg
return {file = file, caption = caption}
end
-- Parse rhyme specification with optional syllable count
local function parse_rhyme(arg, parse_err)
local function generate_obj(term)
return {rhyme = term}
end
local param_mods = {
s = {
item_dest = "num_syl",
type = "number",
sublist = true,
},
}
-- Add q/qq/a/aa/ref support if inline modifiers are present
if arg:find("<") then
param_mods.q = q_spec
param_mods.qq = q_spec
param_mods.a = a_spec
param_mods.aa = a_spec
param_mods.ref = ref_spec
end
return require(parse_util_module).parse_inline_modifiers(arg, {
param_mods = param_mods,
generate_obj = generate_obj,
parse_err = parse_err,
splitchar = ",",
})
end
-- Parse hyphenation specification (dot-separated syllables)
local function parse_hyph(arg, parse_err)
local function generate_obj(term)
local parts = rsplit(term, "%.")
return {hyph = parts, syllabification = term}
end
local param_mods = {}
-- Add q/qq/a/aa/ref support if inline modifiers are present
if arg:find("<") then
param_mods.q = q_spec
param_mods.qq = q_spec
param_mods.a = a_spec
param_mods.aa = a_spec
param_mods.ref = ref_spec
end
return require(parse_util_module).parse_inline_modifiers(arg, {
param_mods = param_mods,
generate_obj = generate_obj,
parse_err = parse_err,
splitchar = ",",
})
end
-- Parse homophone specification
local function parse_homophone(arg, parse_err)
local function generate_obj(term)
return {term = term}
end
local param_mods = {
t = {item_dest = "gloss"},
gloss = {},
pos = {},
alt = {},
lit = {},
id = {},
g = {
item_dest = "genders",
sublist = true,
},
}
-- Add q/qq/a/aa/ref support if inline modifiers are present
if arg:find("<") then
param_mods.q = q_spec
param_mods.qq = q_spec
param_mods.a = a_spec
param_mods.aa = a_spec
param_mods.ref = ref_spec
end
return require(parse_util_module).parse_inline_modifiers(arg, {
param_mods = param_mods,
generate_obj = generate_obj,
parse_err = parse_err,
splitchar = ",",
})
end
local audio_nested_mods = {
["a"] = a_spec, ["aa"] = a_spec,
["q"] = q_spec, ["qq"] = q_spec,
["text"] = {},
["IPA"] = {sublist = true},
["t"] = {item_dest = "gloss"},
["gloss"] = {},
["pos"] = {},
["lit"] = {},
["g"] = {
item_dest = "genders",
sublist = true,
},
}
local function parse_one_term(raw, parse_err)
if not raw:find("<") then
return {term = raw, audio_list = {}, rhyme_list = {}, hyph_list = {}, pagename = nil}
end
-- Extract base spelling before parse_inline_modifiers
local pagename = nil
if raw:find("<base:") then
pagename = raw:match("<base:([^>]+)>")
raw = raw:gsub("<base:[^>]+>", "")
end
local parsed = require(parse_util_module).parse_inline_modifiers(raw, {
param_mods = {
["q"] = q_spec, ["qq"] = q_spec,
["a"] = a_spec, ["aa"] = a_spec,
["ref"] = ref_spec,
["audio"] = {
store = "insert",
item_dest = "audio_list",
convert = function(arg, perr)
if arg:find("<") then
local parsed_audio = require(parse_util_module).parse_inline_modifiers(arg, {
param_mods = audio_nested_mods,
generate_obj = generate_audio_obj,
parse_err = perr,
})
parsed_audio.lang = get_lang()
local textobj = require(audio_module).construct_audio_textobj(parsed_audio)
parsed_audio.text = textobj
parsed_audio.gloss = nil
parsed_audio.pos = nil
parsed_audio.lit = nil
parsed_audio.genders = nil
return parsed_audio
end
local audio_obj = generate_audio_obj(arg)
audio_obj.lang = get_lang()
local textobj = require(audio_module).construct_audio_textobj(audio_obj)
audio_obj.text = textobj
return audio_obj
end,
},
["rhyme"] = {
store = "insert-flattened",
item_dest = "rhyme_list",
convert = parse_rhyme,
},
["hyph"] = {
store = "insert-flattened",
item_dest = "hyph_list",
convert = parse_hyph,
},
["hmp"] = {
store = "insert-flattened",
item_dest = "hmp_list",
convert = parse_homophone,
},
},
generate_obj = function(t)
return {term = t, audio_list = {}, rhyme_list = {}, hyph_list = {}, hmp_list = {}}
end,
parse_err = parse_err,
})
parsed.audio_list = parsed.audio_list or {}
parsed.rhyme_list = parsed.rhyme_list or {}
parsed.hyph_list = parsed.hyph_list or {}
parsed.hmp_list = parsed.hmp_list or {}
parsed.pagename = pagename
return parsed
end
-- Format rhyme objects with qualifiers
local function fmt_rhyme(rhyme_objs, bullet)
if not rhyme_objs or #rhyme_objs == 0 then return nil end
local rhyme_data = {}
for _, robj in ipairs(rhyme_objs) do
insert(rhyme_data, {
rhyme = robj.rhyme,
num_syl = robj.num_syl,
q = robj.q,
qq = robj.qq,
a = robj.a,
aa = robj.aa,
})
end
return bullet .. require(rhymes_module).format_rhymes({
lang = get_lang(),
rhymes = rhyme_data
})
end
-- Format hyphenation objects with qualifiers
local function fmt_hyph(hyph_objs, bullet)
if not hyph_objs or #hyph_objs == 0 then return nil end
local hyph_data = {}
for _, hobj in ipairs(hyph_objs) do
insert(hyph_data, {
hyph = hobj.hyph,
q = hobj.q,
qq = hobj.qq,
a = hobj.a,
aa = hobj.aa,
})
end
return bullet .. require(hyphenation_module).format_hyphenations({
lang = get_lang(),
hyphs = hyph_data,
caption = "Syllabification"
})
end
-- Format audio object
local function fmt_audio(audio_obj, bullet)
return bullet .. require(audio_module).format_audio(audio_obj)
end
-- Format homophone objects with qualifiers
local function fmt_hmp(hmp_objs, bullet)
if not hmp_objs or #hmp_objs == 0 then return nil end
return bullet .. require(homophones_module).format_homophones({
lang = get_lang(),
homophones = hmp_objs,
})
end
local function is_multiword_term(term)
-- split_into_segments returns a list of non-empty word strings.
return #split_into_segments(term) > 1
end
function export.show(frame)
local parargs = frame:getParent().args
local args = require(parameters_module).process(parargs, {
[1] = {default = "nãmas"},
["bullets"] = {type = "number", default = 1},
})
local input = args[1]
local nb = args.bullets
local b1 = string.rep("*", nb) .. " "
local b2 = string.rep("*", nb + 1) .. " "
local raw_terms = require(parse_util_module).split_escaping(input, ",")
local parsed_terms = {}
for i, raw in ipairs(raw_terms) do
raw = raw:match("^%s*(.-)%s*$")
local pt = parse_one_term(raw, function(msg)
error("lt-pron: " .. msg .. " (term " .. i .. ")")
end)
parsed_terms[#parsed_terms + 1] = pt
end
m_IPA = m_IPA or require("Module:IPA")
local text_parts = {}
for _, pt in ipairs(parsed_terms) do
-- Determine bullet level: same level as IPA for single pronunciation, indented for multiple
local content_bullet = (#parsed_terms == 1) and b1 or b2
-- Generate IPA
local ipa_str = to_ipa(pt.term, pt.pagename)
-- Handle rhyme: manual override, suppression, or auto-generation
local rhyme_objs = nil
local suppress_rhyme = false
if #pt.rhyme_list > 0 then
for _, robj in ipairs(pt.rhyme_list) do
if robj.rhyme == "-" then
suppress_rhyme = true
break
end
end
if not suppress_rhyme then
rhyme_objs = {}
for _, robj in ipairs(pt.rhyme_list) do
-- If num_syl not specified, try to get from auto-generated hyphenation
if not robj.num_syl then
local auto_hyph = get_hyphenation(pt.term, pt.pagename)
if auto_hyph and #auto_hyph > 0 then
-- Use syllable_count field if available (for multi-word phrases), otherwise use array length
local syl_count = auto_hyph.syllable_count or #auto_hyph
robj.num_syl = {syl_count}
end
end
insert(rhyme_objs, robj)
end
end
else
-- Auto-generate rhyme (skip if term ends with - or is a multiword term)
if not pt.term:match("%-$") and not is_multiword_term(pt.term) then
local rhyme_str = get_rhyme(ipa_str)
if rhyme_str then
local auto_hyph = get_hyphenation(pt.term, pt.pagename)
-- Use syllable_count field if available (for multi-word phrases), otherwise use array length
local num_syl = (auto_hyph and #auto_hyph > 0) and {auto_hyph.syllable_count or #auto_hyph} or nil
rhyme_objs = {{rhyme = rhyme_str, num_syl = num_syl}}
end
end
end
-- Handle hyphenation: manual override, suppression, or auto-generation
local hyph_objs = nil
local suppress_hyph = false
if #pt.hyph_list > 0 then
for _, hobj in ipairs(pt.hyph_list) do
if hobj.syllabification == "-" then
suppress_hyph = true
break
end
end
if not suppress_hyph then
hyph_objs = pt.hyph_list
end
else
-- Auto-generate hyphenation
local auto_hyph = get_hyphenation(pt.term, pt.pagename)
if auto_hyph and #auto_hyph > 0 then
hyph_objs = {{hyph = auto_hyph, syllabification = concat(auto_hyph, ".")}}
end
end
-- Format IPA with qualifiers and references
local ipa_item = {pron = "[" .. ipa_str .. "]"}
if pt.q then ipa_item.q = pt.q end
if pt.qq then ipa_item.qq = pt.qq end
if pt.a then ipa_item.a = pt.a end
if pt.aa then ipa_item.aa = pt.aa end
if pt.refs then ipa_item.refs = pt.refs end
text_parts[#text_parts + 1] = b1 .. m_IPA.format_IPA_full({
lang = get_lang(),
items = {ipa_item}
})
-- Audio
for _, aud in ipairs(pt.audio_list or {}) do
text_parts[#text_parts + 1] = fmt_audio(aud, content_bullet)
end
-- Rhyme
if rhyme_objs then
local r = fmt_rhyme(rhyme_objs, content_bullet)
if r then text_parts[#text_parts + 1] = r end
end
-- Hyphenation
if hyph_objs then
local h = fmt_hyph(hyph_objs, content_bullet)
if h then text_parts[#text_parts + 1] = h end
end
-- Homophones
if pt.hmp_list and #pt.hmp_list > 0 then
local hmp = fmt_hmp(pt.hmp_list, content_bullet)
if hmp then text_parts[#text_parts + 1] = hmp end
end
end
return concat(text_parts, "\n")
end
export.toIPA = to_ipa
export.hyphenate = get_hyphenation
export.rhyme = get_rhyme
return export
pqyyto9ntkmwafrrq481zamx4xplqhv
မဝ်ဂျူ:lt-pron/testcases
828
219830
395930
300401
2026-05-29T18:17:59Z
咽頭べさ
33
395930
Scribunto
text/plain
local tests = require("Module:UnitTests")
local m_pron = require("Module:User:TongcyDai/lt-pron")
local unpack = unpack or table.unpack
-- ── helpers ──────────────────────────────────────────────────────────────────
local function tag_IPA(s)
return '<span class="IPA">' .. s .. '</span>'
end
-- Compare toIPA output
function tests:check_IPA(term, expected, comment)
self:equals(
term,
tag_IPA(mw.ustring.toNFC(m_pron.toIPA(term))),
tag_IPA(expected),
{ comment = comment, show_difference = true }
)
end
-- Compare hyphenate output
function tests:check_hyph(term, expected, comment)
local parts = m_pron.hyphenate(term)
self:equals(
term,
table.concat(parts, "‧"),
expected,
{ comment = comment, show_difference = true }
)
end
-- Compare rhyme output
function tests:check_rhyme(term, expected, comment)
local ipa = m_pron.toIPA(term)
self:equals(
term,
mw.ustring.toNFC(m_pron.rhyme(ipa) or ""),
expected,
{ comment = comment, show_difference = true }
)
end
-- ════════════════════════════════════════════════════════════════════════════
-- IPA TESTS
-- ════════════════════════════════════════════════════════════════════════════
-- A: Consonants and basic palatalization (one example per consonant, in two
-- versions: hard variant + soft variant before front vowel).
function tests:test_IPA_A_consonants_and_palatalization()
local examples = {
-- B, b
{ "bai̇̃gti", "²ˈbɐɪˑktʲɪ", "b" },
{ "bi̇̀rbt", "¹ˈbʲɪrpt", "bʲ" },
-- C, c
{ "cùkrus", "ˈt͡sʊkrʊs", "c" },
{ "ci̇̀bė", "ˈt͡sʲɪbʲeː", "cʲ" },
-- Č, č
{ "čaižùs", "t͡ʃɐɪˈʒʊs", "č" },
{ "Kãčinas", "²ˈkɑːt͡ʃʲɪnɐs", "čʲ" },
-- D, d
{ "daũg", "²ˈdɒʊˑk", "d" },
{ "di̇̀delis", "ˈdʲɪdʲɛlʲɪs", "dʲ" },
-- F, f
{ "fãzė", "²ˈfɑːzʲeː", "f" },
{ "filė̃", "fʲɪ²ˈlʲeː", "fʲ" },
-- G, g
{ "gãlas", "²ˈɡɑːlɐs", "g" },
{ "girià", "ɡʲɪˈrʲɛ", "gʲ" },
-- H, h
{ "hãlė", "²ˈɣɑːlʲeː", "h" },
{ "hi̇̀mnas", "¹ˈɣʲɪmnɐs", "hʲ" },
-- J, j
{ "jáunas<base:jaunas>", "¹ˈjæˑʊnɐs", "j" },
{ "vajè", "ʋɐˈjɛ", "j" },
-- K, k
{ "kãras", "²ˈkɑːrɐs", "k" },
{ "kitóks", "kʲɪ¹ˈtoːks", "kʲ" },
-- L, l
{ "lãbas", "²ˈlɑːbɐs", "l" },
{ "lė̃kti", "²ˈlʲeːktʲɪ", "lʲ" },
-- M, m
{ "mamà", "mɐˈmɐ", "m" },
{ "méilė", "¹ˈmʲæˑɪlʲeː", "mʲ" },
-- N, n
{ "nósis", "¹ˈnoːsʲɪs", "n" },
{ "knygà", "knʲiːˈɡɐ", "nʲ" },
-- P, p
{ "pãdas", "²ˈpɑːdɐs", "p" },
{ "pẽčius", "²ˈpʲæːt͡ʃʲʊ̟s", "pʲ" },
-- R, r
{ "rai̇̃dė", "²ˈrɐɪˑdʲeː", "r" },
{ "kairė̃", "kɐɪ²ˈrʲeː", "rʲ" },
-- S, s
{ "sõdas", "²ˈsoːdɐs", "s" },
{ "si̇́ela", "¹ˈsʲiɛlɐ", "sʲ" },
-- Š, š
{ "šókti", "¹ˈʃoːktʲɪ", "š" },
{ "šỹpsena", "²ˈʃʲiːpʲsʲɛnɐ", "šʲ" },
-- T, t
{ "tetà", "tʲɛˈtɐ", "tʲ, t" },
-- V, v
{ "svajõnė", "sʋɐ²ˈjo̟ːnʲeː", "v" },
{ "vi̇̀ltis", "¹ˈʋʲɪlʲtʲɪs", "vʲ" },
-- Z, z
{ "zui̇̃kis", "²ˈzʊɪˑkʲɪs", "z" },
{ "zi̇̀r̃zinti", "¹ˈzʲɪrʲzʲɪnʲtʲɪ", "zʲ" },
-- Ž, ž
{ "žolė̃", "ʒoː²ˈlʲeː", "ž" },
{ "žẽmė", "²ˈʒʲæːmʲeː", "žʲ" },
-- Ch, ch
{ "chalãtas", "xɐ²ˈlɑːtɐs", "ch" },
{ "cherèsas", "xʲɛˈrʲɛsɐs", "chʲ" },
-- Dz, dz
{ "dzū̃kai", "²ˈd͡zuːkɐɪ", "dz" },
{ "dzi̇̀ngt", "¹ˈd͡zʲɪŋkt", "dzʲ" },
-- Dž, dž
{ "džáulis", "¹ˈd͡ʒɑˑʊlʲɪs", "dž" },
{ "džiãzas", "²ˈd͡ʒʲæːzɐs", "džʲ" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- B: Monophthongs (short vs. long, native vs. loanword variants).
function tests:test_IPA_B_monophthongs()
local examples = {
-- A, a
{ "tàvo", "ˈtɐʋoː", "à" },
{ "vãkaras", "²ˈʋɑːkɐrɐs", "ã, a" },
-- Ą, ą
{ "rýtą", "¹ˈrʲiːtɑː", "ą" },
{ "ą́žuolas", "¹ˈɑːʒuɔlɐs", "ą́" },
{ "šą̃la", "²ˈʃɑːlɐ", "ą̃" },
-- E, e
{ "Pelesà", "pʲɛlʲɛˈsɐ", "e" },
{ "nèšti", "ˈnʲɛʃʲtʲɪ", "è" },
{ "mètras", "ˈmʲɛtrɐs", "è" },
{ "mẽnas", "²ˈmʲæːnɐs", "ẽ" },
-- E with `^`: closed short e in loanwords (VLKK §6.7.3 — written `ẹ`)
{ "se^ktà", "sʲekˈtɐ", "e^ (closed short e in loanwords)" },
{ "re^ži̇̀mas", "rʲeˈʒʲɪmɐs", "e^ (closed short e in loanwords)" },
-- Ę, ę
{ "tęsinỹs", "tʲæːsʲɪ²ˈnʲiːs", "ę" },
{ "tavę̃s", "tɐ²ˈʋʲæːs", "ę̃" },
{ "tę́vas", "¹ˈtʲæːʋɐs", "ę́" },
-- Ė, ė
{ "ėdė́jas", "eː¹ˈdʲeːjɛs", "ė, ė́" },
{ "gėlė̃", "ɡʲeː²ˈlʲeː", "ė, ė̃" },
-- I, i
{ "liki̇̀mas", "lʲɪˈkʲɪmɐs", "i, i̇̀" },
-- Į, į
{ "įlį̃sti", "iː²ˈlʲiːsʲtʲɪ", "į, į̃" },
{ "į́spūdis", "¹ˈiːspuːdʲɪs", "į́" },
-- Y, y
{ "mylė́ti", "mʲiː¹ˈlʲeːtʲɪ", "y" },
{ "ýda", "¹ˈiːdɐ", "ý" },
{ "knỹgė", "²ˈknʲiːɡʲeː", "ỹ" },
-- O, o (long native [oː] vs. short loanword [ɔ])
{ "norė́ti", "noː¹ˈrʲeːtʲɪ", "o (native, long)" },
{ "óras", "¹ˈoːrɐs", "ó" },
{ "keliõnė", "kʲɛ²ˈlʲo̟ːnʲeː", "õ" },
{ "òmas", "ˈɔmɐs", "ò (loanword, short)" }, -- FIXME: ò /oː/ exists?
{ "stòksas", "ˈstɔksɐs", "ò (loanword, grave)" },
{ "Zo^jà", "zɔˈjɛ", "o^ (loanword [ɔ], unstressed)" },
{ "žo^ngliẽrius", "ʒɔŋ²ˈɡlʲiɛrʲʊ̟s", "o^ (loanword [ɔ], unstressed)" },
{ "fo^to^parodà", "fɔtɔpɐroːˈdɐ", "o^ (loanword foto- prefix)" },
{ "ho^mo^ni̇̀mas", "ɣɔmɔˈnʲɪmɐs", "o^ (loanword homo- prefix)" },
-- U, u
{ "ugni̇̀s", "ʊˈɡnʲɪs", "u" },
{ "pùsė", "ˈpʊsʲeː", "ù" },
-- Ų, ų
{ "siųstùvas", "sʲu̟ːˈstʊʋɐs", "ų" },
{ "įskų́sti", "iː¹ˈskuːsʲtʲɪ", "ų́" },
{ "sių̃sti", "²ˈsʲu̟ːsʲtʲɪ", "ų̃" },
-- Ū, ū
{ "sū́nūs", "¹ˈsuːnuːs", "ū́, ū" },
{ "rū̃gštis", "²ˈruːkʃʲtʲɪs", "ū̃" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C1: Diphthongs ai / au — three accent positions each.
function tests:test_IPA_C1_diphthongs_ai_au()
local examples = {
-- ai
{ "táikino", "¹ˈtɑˑɪkʲɪnoː", "ái (acute)" },
{ "tai̇̃ką", "²ˈtɐɪˑkɑː", "ai̇̃ (tilde)" },
{ "taiki̇̀klis", "tɐɪˈkʲɪklʲɪs", "ai (unstressed)" },
-- au
{ "tráukia", "¹ˈtrɑˑʊkʲɛ", "áu (acute)" },
{ "patraũklų", "pɐ²ˈtrɒʊˑkluː", "aũ (tilde)" },
{ "pértrauka", "¹ˈpʲæˑrtrɒʊkɐ", "au (unstressed)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C2: Diphthongs ei / ui — three accent positions each.
function tests:test_IPA_C2_diphthongs_ei_ui()
local examples = {
-- ei
{ "méilė", "¹ˈmʲæˑɪlʲeː", "éi (acute)" },
{ "mei̇̃liai", "²ˈmʲɛɪˑlʲɛɪ", "ei̇̃ (tilde)" },
{ "meilikáuti", "mʲɛɪlʲɪ¹ˈkɑˑʊtʲɪ", "ei (unstressed)" },
-- ui
{ "kùisytis", "¹ˈkʊɪsʲiːtʲɪs", "ùi (grave; first element short)" },
{ "kui̇̃sti", "²ˈkʊɪˑsʲtʲɪ", "ui̇̃ (tilde)" },
{ "kuitinė́tis", "kʊɪtʲɪ¹ˈnʲeːtʲɪs", "ui (unstressed)" },
{ "bùivo^las", "¹ˈbʊɪʋɔlɐs", "ùi (grave; with loanword o^)" },
-- úi (acute with first element half-long) is a free-style variant; not tested separately
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C3: Variable diphthongs ie / uo (treated phonemically as monophthongs by VLKK,
-- but written as digraphs and patterning with diphthongs in accent placement).
function tests:test_IPA_C3_diphthongs_ie_uo()
local examples = {
-- ie
{ "si̇́ena", "¹ˈsʲiɛnɐ", "íe (acute)" },
{ "jiẽ<base:jie>", "²ˈjiɛ", "iẽ (tilde)" },
{ "Diẽvas", "²ˈdʲiɛʋɐs", "iẽ (tilde)" },
{ "Ki̇̀msienė", "¹ˈkʲɪmʲsʲiɛnʲeː", "ie (unstressed)" },
-- uo
{ "úodas", "¹ˈuɔdɐs", "úo (acute)" },
{ "ruduõ", "rʊ²ˈduɔ", "uõ (tilde)" },
{ "Aluojà<base:Aluoja>", "ɐluɔˈjɛ", "uo (unstressed)" },
{ "vaizduõtė", "ʋɐɪ²ˈzduɔtʲeː", "uõ (tilde, after consonant cluster)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C4: Mixed diphthongs of the a-series — al, am, an, ar.
function tests:test_IPA_C4_mixed_a()
local examples = {
-- al
{ "álkanas", "¹ˈɑˑlkɐnɐs", "ál (acute)" },
{ "al̃kis", "²ˈɐlʲˑkʲɪs", "al̃ (tilde)" },
{ "alkanáuti", "ɐlkɐ¹ˈnɑˑʊtʲɪ", "al (unstressed)" },
-- am
{ "skámbčioti", "¹ˈskɑˑmʲpʲt͡ʃʲo̟ːtʲɪ", "ám (acute)" },
{ "skam̃biai", "²ˈskɐmʲˑbʲɛɪ", "am̃ (tilde)" },
{ "skambùmas", "skɐmˈbʊmɐs", "am (unstressed)" },
-- an
{ "ránkioja<base:rankioja>", "¹ˈrɑˑŋʲkʲo̟ːjɛ", "án (acute)" },
{ "rañkdarbis", "²ˈrɐŋˑɡdɐrʲbʲɪs", "añ (tilde)" },
{ "rankinùkas", "rɐŋʲkʲɪˈnʊkɐs", "an (unstressed)" },
-- ar
{ "sárgas", "¹ˈsɑˑrɡɐs", "ár (acute)" },
{ "sar̃giai", "²ˈsɐrʲˑɡʲɛɪ", "ar̃ (tilde)" },
{ "sargýba", "sɐrʲ¹ˈɡʲiːbɐ", "ar (unstressed)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C5: Mixed diphthongs of the e-series — el, em, en, er.
-- Foreign-word variants with grave (èl, èm, èn, èr) read tvirtapradiškai
-- are tested as alternates per VLKK §9.21, §9.23.
function tests:test_IPA_C5_mixed_e()
local examples = {
-- el
{ "kélmas", "¹ˈkʲæˑlmɐs", "él (acute)" },
{ "Kel̃mė", "²ˈkʲɛlʲˑmʲeː", "el̃ (tilde)" },
{ "kelmùtis", "kʲɛlˈmʊtʲɪs", "el (unstressed)" },
{ "èlfas", "¹ˈɛlfɐs", "èl (loanword, grave; tvirtapradiškai)" },
{ "el̃fas", "²ˈɛlˑfɐs", "el̃ (loanword, tilde)" },
-- em
{ "drémžti", "¹ˈdʲrʲæˑmʲʃʲtʲɪ", "ém (acute; ž → š before t)" },
{ "drem̃bti", "²ˈdʲrʲɛmʲˑpʲtʲɪ", "em̃ (tilde)" },
{ "Trempai̇̃", "tʲrʲɛm²ˈpɐɪˑ", "em (unstressed)" },
{ "Jaržèmskis", "jɛrʲ¹ˈʒʲɛmʲsʲkʲɪs", "èm (loanword, grave)" },
{ "kem̃pingas", "²ˈkʲɛmʲˑpʲɪŋɡɐs", "em̃ (loanword, tilde)" },
-- en
{ "véngia", "¹ˈʋʲæˑŋʲɡʲɛ", "én (acute)" },
{ "žeñgsena", "²ˈʒʲɛŋˑksʲɛnɐ", "eñ (tilde)" },
{ "vengi̇̀mas", "ʋʲɛŋʲˈɡʲɪmɐs", "en (unstressed)" },
{ "ménkė", "¹ˈmʲæˑŋʲkʲeː", "én (acute, before nk)" },
{ "meñkinti", "²ˈmʲɛŋʲˑkʲɪnʲtʲɪ", "eñ (tilde, before nk)" },
{ "menkystà", "mʲɛŋʲkʲiːˈstɐ", "en (unstressed, before nk)" },
{ "hènris", "¹ˈɣʲɛnʲrʲɪs", "èn (loanword, grave)" },
{ "ceñtas", "²ˈt͡sʲɛnˑtɐs", "eñ (loanword, tilde)" },
{ "ãmen", "²ˈɑːmʲɛn", "en (loanword, unstressed final)" },
-- er
{ "nérti", "¹ˈnʲæˑrʲtʲɪ", "ér (acute)" },
{ "ner̃šti", "²ˈnʲɛrʲˑʃʲtʲɪ", "er̃ (tilde)" },
{ "nerštãvietė", "nʲɛr²ˈʃtɑːʋʲiɛtʲeː", "er (unstressed)" },
{ "ko^ncèrtas", "kɔnʲ¹ˈt͡sʲɛrtɐs", "èr (loanword, grave)" },
{ "ko^ncer̃tas", "kɔnʲ²ˈt͡sʲɛrˑtɐs", "er̃ (loanword, tilde)" },
{ "ter̃minas", "²ˈtʲɛrʲˑmʲɪnɐs", "er̃ (loanword, tilde)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C6: Mixed diphthongs of the i-series — il, im, in, ir.
function tests:test_IPA_C6_mixed_i()
local examples = {
-- il
{ "pi̇̀lti", "¹ˈpʲɪlʲtʲɪ", "ìl (grave)" },
{ "pil̃vas", "²ˈpʲɪlˑʋɐs", "il̃ (tilde)" },
{ "pilti̇̀nis", "pʲɪlʲˈtʲɪnʲɪs", "il (unstressed)" },
-- im
{ "ti̇̀mptelėjimas<base:timptelėjimas>", "¹ˈtʲɪmʲpʲtʲɛlʲeːjɪmɐs", "ìm (grave)" },
{ "tim̃pinti", "²ˈtʲɪmʲˑpʲɪnʲtʲɪ", "im̃ (tilde)" },
{ "timpinė́ti", "tʲɪmʲpʲɪ¹ˈnʲeːtʲɪ", "im (unstressed)" },
-- in
{ "gi̇̀nti", "¹ˈɡʲɪnʲtʲɪ", "ìn (grave)" },
{ "giñklas", "²ˈɡʲɪŋˑklɐs", "iñ (tilde)" },
{ "ginkluõtė", "ɡʲɪŋ²ˈkluɔtʲeː", "in (unstressed)" },
-- ir
{ "di̇̀rti", "¹ˈdʲɪrʲtʲɪ", "ìr (grave)" },
{ "dir̃žas", "²ˈdʲɪrˑʒɐs", "ir̃ (tilde)" },
{ "dirži̇̀nis", "dʲɪrʲˈʒʲɪnʲɪs", "ir (unstressed)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C7: Mixed diphthongs of the u-series — ul, um, un, ur.
function tests:test_IPA_C7_mixed_u()
local examples = {
-- ul
{ "dùlkė", "¹ˈdʊlʲkʲeː", "ùl (grave)" },
{ "dul̃kti", "²ˈdʊlʲˑktʲɪ", "ul̃ (tilde)" },
{ "dulkė́tas", "dʊlʲ¹ˈkʲeːtɐs", "ul (unstressed)" },
-- um
{ "grùmtis", "¹ˈɡrʊmʲtʲɪs", "ùm (grave)" },
{ "grum̃ba", "²ˈɡrʊmˑbɐ", "um̃ (tilde)" },
{ "grumtỹnės", "ɡrʊmʲ²ˈtʲiːnʲeːs", "um (unstressed)" },
-- un
{ "skùndė", "¹ˈskʊnʲdʲeː", "ùn (grave)" },
{ "skuñdas", "²ˈskʊnˑdɐs", "uñ (tilde)" },
{ "skundi̇̀kas", "skʊnʲˈdʲɪkɐs", "un (unstressed)" },
-- ur
{ "gùrkšnis", "¹ˈɡʊrʲkʃʲnʲɪs", "ùr (grave)" },
{ "gur̃gti", "²ˈɡʊrʲˑktʲɪ", "ur̃ (tilde)" },
{ "gurkšnóti", "ɡʊrk¹ˈʃnoːtʲɪ", "ur (unstressed)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C8: Mixed diphthongs of the foreign o-series — ol, om, on, or
-- (per VLKK §9.22, default reading is tvirtapradiškai with grave).
function tests:test_IPA_C8_mixed_o_foreign()
local examples = {
-- ol
{ "kòlba", "¹ˈkɔlbɐ", "òl (grave; default reading)" },
{ "hòldingas", "¹ˈɣɔlʲdʲɪŋɡɐs", "òl (grave)" },
-- om
{ "do^mkrãtas", "dɔm²ˈkrɑːtɐs", "om (unstressed)" },
{ "pòmpa", "¹ˈpɔmpɐ", "òm (grave)" },
-- on
{ "po^ntònas", "pɔnˈtɔnɐs", "on (unstressed) / òn (grave)" },
{ "fòndas", "¹ˈfɔndɐs", "òn (grave)" },
-- or
{ "po^rtrètas", "pɔrʲˈtʲrʲɛtɐs", "or (unstressed)" },
{ "fòrma", "¹ˈfɔrmɐ", "òr (grave)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C9: Foreign diphthongs eu, oi, ou — three accent positions where attested.
function tests:test_IPA_C9_diphthongs_foreign_eu_oi_ou()
local examples = {
-- eu
{ "plèura", "¹ˈpʲlʲɛʊrɐ", "èu (grave; tvirtapradiškai)" },
{ "eũras", "²ˈɛʊˑrɐs", "eũ (tilde)" },
{ "Euro^pà", "ɛʊrɔˈpɐ", "eu (unstressed)" },
-- éu does not exist (é is long; éu would be long+long)
-- oi
{ "Kóiva", "¹ˈkoˑɪʋɐ", "ói (acute; rare)" },
{ "mòira", "¹ˈmɔɪrɐ", "òi (grave)" },
{ "sinusòidė", "sʲɪnʊ¹ˈsɔɪdʲeː", "òi (grave)" },
{ "bròileris", "¹ˈbrɔɪlʲɛrʲɪs", "òi (grave)" },
{ "oikumenà", "ɔɪkʊmʲɛˈnɐ", "oi (unstressed)" },
-- oi̇̃ does not exist (per VLKK: oi reads tvirtapradiškai only)
-- ou
{ "šòu", "¹ˈʃɔʊ", "òu (grave)" },
{ "klòunas", "¹ˈklɔʊnɐs", "òu (grave)" },
{ "klounadà", "klɔʊnɐˈdɐ", "ou (unstressed)" },
-- óu does not exist (ó is long)
-- oũ does not exist (per VLKK: ou reads tvirtapradiškai only)
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- D: Fake diphthongs — vowel sequences that LOOK like diphthongs but are
-- actually two separate vowels in adjacent syllables (hiatus). The module
-- must NOT collapse these into a single diphthong nucleus.
function tests:test_IPA_D_fake_diphthongs()
local examples = {
-- ai (fake)
{ "nebepàima", "nʲɛbʲɛˈpɐ.ɪmɐ", "ài" },
{ "archãika", "ɐr²ˈxɑː.ɪkɐ", "ãi" },
{ "pai̇́eško", "pɐ¹ˈjiɛʃkoː", "ai̇́" },
{ "betai̇̀nas", "bʲɛtɐ.ˈɪnɐs", "ai̇̀" },
-- au (fake)
{ "pàurzgė", "ˈpɐ.ʊrʲzʲɡʲeː", "àu" },
{ "šilãuogė", "ʃʲɪ²ˈlɑː.uɔɡʲeː", "ãu" },
{ "Naùmo^vas", "nɐ.ˈʊmɔʋɐs", "aù" },
{ "nepaúosto", "nʲɛpɐ.¹ˈuɔstoː", "aú" },
-- ei (fake)
{ "nebèima", "nʲɛˈbʲɛ.ɪmɐ", "èi" },
{ "nebei̇̀rti", "nʲɛbʲɛ.¹ˈɪrʲtʲɪ", "ei̇̀" },
{ "neji̇́eško<base:neieško>", "nʲɛ¹ˈjiɛʃkoː", "ei̇́ (no-j-insertion not found yet)" },
-- ẽi not found
-- ui (fake)
{ "sui̇̀ro", "sʊ.ˈɪroː", "ui̇̀" },
{ "sui̇́eško", "sʊ¹ˈjiɛʃkoː", "ui̇́" },
-- úi, ũi not found
-- ie (fake)
{ "besi̇̀elgė", "bʲɛˈsʲɪ.ɛlʲɡʲeː", "i̇̀e" },
{ "ˌpo^lièsteris", "ˌpɔlʲɪ.ˈɛsʲtʲɛrʲɪs", "i̇̀e" },
{ "įsiérzina", "iːsʲɪ.¹ˈæˑrʲzʲɪnɐ", "ié" },
-- i̇̃e not found
-- uo (fake)
{ "sùošė", "ˈsʊ.oːʃʲeː", "ùo" },
-- ũo, uó not found; for uò only /ʊˈɔ/ examples found
-- foreign: ao
{ "mao^ji̇̀zmas<base:maojizmas>", "mɐ.ɔˈjɪzmɐs", "ao (no-j-insertion variant)" },
-- foreign: oi (fake)
-- õi, oi̇́ not found
-- foreign: ou (fake)
{ "fino^ùgrai", "fʲɪnɔ.ˈʊɡrɐɪ", "où" },
-- õu, oú not exist
-- foreign: eu (fake)
{ "neúosti", "nʲɛ.¹ˈuɔsʲtʲɪ", "eú" },
{ "teùrginis", "tʲɛ.¹ˈʊrʲɡʲɪnʲɪs", "eù" },
-- ẽu not exist
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- E: Fronting of o/u after a palatalized consonant or j (VLKK IPA rec §4.4):
-- [oː → o̟ː], [ʊ → ʊ̟], [uː → u̟ː], [uɔ → u̟ɔ].
function tests:test_IPA_E_o_u_fronting_after_palatal()
local examples = {
-- After palatalized consonant + o/u
{ "sagióti", "sɐ¹ˈɡʲo̟ːtʲɪ", "Cʲ + o → o̟ː" },
{ "angijo^mà", "ɐŋʲɡʲɪjɔ̟ˈmɐ", "Cʲ + o^ → ɔ̟" },
{ "siuñčia", "²ˈsʲʊ̟nʲˑt͡ʃʲɛ", "Cʲ + u → ʊ̟" },
{ "sių̃sti", "²ˈsʲu̟ːsʲtʲɪ", "Cʲ + ų → u̟ː" },
{ "ãčiū", "²ˈɑːt͡ʃʲu̟ː", "Cʲ + ū → u̟ː" },
{ "liuobà", "lʲu̟ɔˈbɐ", "Cʲ + uo → u̟ɔ" },
-- After j + o/u (j inherently palatal, triggers fronting)
{ "at.jójo<base:atjojo>", "ɐtʲ¹ˈjo̟ːjo̟ː", "j + o → o̟ː" },
{ "Lo^jo^là<base:Lojola>", "lɔjɔ̟ˈlɐ", "j + o^ → ɔ̟" },
{ "Jùlė<base:Julė>", "ˈjʊ̟lʲeː", "j + u → ʊ̟" },
{ "ãkcijų<base:akcijų>", "²ˈɑːkt͡sʲɪju̟ː", "j + ų → u̟ː" },
{ "jū́ra<base:jūra>", "¹ˈju̟ːrɐ", "j + ū → u̟ː" },
{ "júodas<base:juodas>", "¹ˈju̟ɔdɐs", "j + uo → u̟ɔ" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- F: Palatalization spreading — palatalization of a front vowel propagates
-- leftward through preceding consonants, including through k/g (which do
-- not directly palatalize but transmit the feature; VLKK §13).
function tests:test_IPA_F_palatalization_spreading()
local examples = {
-- Spreading through obstruent clusters
{ "skri̇́eti", "¹ˈsʲkrʲiɛtʲɪ", "Spreading left through r and k" },
{ "displė̃jus<base:displėjus>", "dʲɪ²ˈsʲpʲlʲeːjʊ̟s", "Spreading left through cluster spl" },
-- Spreading to next syllable's onset
{ "pùlti", "¹ˈpʊlʲtʲɪ", "Palatalized l before t (softened by following i)" },
{ "méilė", "¹ˈmʲæˑɪlʲeː", "Palatalized resonant cluster" },
-- Secondary stress should not block spreading
{ "išˌverstaãkis", "ɪʃʲˌʋʲɛrstɐ.²ˈɑːkʲɪs", "Secondary stress should not stop palatalization" },
-- VLKK §13: l palatalizes through k/g before another soft consonant
{ "al̃ksnis", "²ˈɐlʲˑksʲnʲɪs", "lʲ through k before sʲnʲ (alksnis)" },
{ "álgebra", "¹ˈɑˑlʲɡʲɛbrɐ", "lʲ through gʲ before front vowel (álgebra)" },
{ "buhálteris", "bʊ¹ˈɣɑˑlʲtʲɛrʲɪs", "lʲ before tʲ (buhálteris)" },
{ "Báltija", "¹ˈbɑˑlʲtʲɪjɛ", "lʲ before tʲ (Báltija)" },
{ "fakultètas", "fɐkʊlʲˈtʲɛtɐs", "lʲ before tʲ (fakultètas)" },
{ "fi̇̀lme", "¹ˈfʲɪlʲmʲɛ", "lʲ before mʲ (fi̇̀lme)" },
{ "smùlkmena", "¹ˈsmʊlʲkmʲɛnɐ", "lʲ through kʲ before mʲ (smùlkmena)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- G: ng / nk reverse palatalization rule.
-- n + k/g normally velarizes to ŋ. The ŋ palatalizes only when the FOLLOWING
-- k/g itself palatalizes (i.e., when the cluster is directly followed by
-- a front vowel). When the k/g stays hard (because next is a consonant),
-- ŋ also stays hard.
function tests:test_IPA_G_ng_nk_reverse_palatalization()
local examples = {
{ "žiñgsnis", "²ˈʒʲɪŋˑksʲnʲɪs", "indirect: ng + s → ŋ stays hard" },
{ "plunksnẽlė", "plʊŋk²ˈsʲnʲæːlʲeː", "indirect: nk + s → ŋ stays hard" },
{ "anketà", "ɐŋʲkʲɛˈtɐ", "direct: nk + e → ŋʲkʲ" },
{ "Bangỹs", "bɐŋʲ²ˈɡʲiːs", "direct: ng + y → ŋʲɡʲ" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- H1: Voicing assimilation (regressive: a stop/fricative agrees in voicing
-- with the next obstruent). VLKK §16, §17.
function tests:test_IPA_H1_voicing_assimilation()
local examples = {
{ "di̇̀rbti", "¹ˈdʲɪrʲpʲtʲɪ", "b → p before t (devoicing)" },
{ "apgáuti", "ɐb¹ˈɡɑˑʊtʲɪ", "p → b before g (voicing)" },
{ "už.trùkti", "ʊʃˈtrʊktʲɪ", "ž → š before t (devoicing)" },
{ "li̇̀pdo", "ˈlʲɪbdoː", "p → b before d (voicing)" },
{ "kàsdavo", "ˈkɐzdɐʋoː", "s → z before d (voicing)" },
{ "iš.gir̃do", "ɪʒʲ²ˈɡʲɪrˑdoː", "š → ž before g (voicing)" },
{ "iš.džiū́ti", "ɪʒʲ¹ˈd͡ʒʲu̟ːtʲɪ", "š → ž before dž (voicing)" },
{ "degtùkas", "dʲɛkˈtʊkɐs", "g → k before t (devoicing)" },
{ "žiebtùvas", "ʒʲiɛpˈtʊʋɐs", "b → p before t (devoicing)" },
{ "grį̇̃žti", "²ˈɡrʲiːʃʲtʲɪ", "ž → š before t (devoicing)" },
{ "už.púola", "ʊʃ¹ˈpuɔlɐ", "ž → š before p (devoicing)" },
{ "už.króvė", "ʊʃ¹ˈkroːʋʲeː", "ž → š before k (devoicing)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- H2: Word-final devoicing (VLKK §18).
function tests:test_IPA_H2_word_final_devoicing()
local examples = {
{ "juolàb<base:juolab>", "ju̟ɔˈlɐp", "b → p word-finally" },
{ "visàd", "ʋʲɪˈsɐt", "d → t word-finally" },
{ "jóg<base:jog>", "¹ˈjo̟ːk", "g → k word-finally" },
{ "ùž", "ˈʊʃ", "ž → š word-finally" },
{ "daũg", "²ˈdɒʊˑk", "final g → k" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- H3: Place assimilation between sibilants and affricates (VLKK §23):
-- s + č → š; z + dž → ž; š + c → s; ž + dz → z.
function tests:test_IPA_H3_place_assimilation()
local examples = {
{ "mókesčiai", "¹ˈmoːkʲɛʃʲt͡ʃʲɛɪ", "s + č → š (place assim.)" },
{ "kàsčiau", "ˈkɐʃʲt͡ʃʲɛʊ", "s + č → š (place assim.)" },
{ "vabzdžiai̇̃", "ʋɐbʲ²ˈʒʲd͡ʒʲɛɪˑ", "z + dž → ž (place assim.)" },
{ "išcukrúoti", "ɪst͡sʊ¹ˈkruɔtʲɪ", "š + c → s (place assim.)" },
{ "už.cỹpti", "ʊsʲ²ˈt͡sʲiːpʲtʲɪ", "ž + c → z (devoicing) → s (place assim.)" },
-- ž + dz not found
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- H4: Geminate simplification — two identical consonants reduce to one
-- (VLKK §21, plus the same effect on stops once they have been levelled
-- by voicing assimilation, e.g. d + t → t + t → t).
function tests:test_IPA_H4_geminate_simplification()
local examples = {
-- Sibilants (identical pairs)
{ "pùsseserė", "ˈpʊsʲɛsʲɛrʲeː", "ss → s" },
{ "iššóko", "ɪ¹ˈʃoːkoː", "šš → š" },
{ "užžiẽbti", "ʊ²ˈʒʲiɛpʲtʲɪ", "žž → ž (also b → p before t)" },
-- zz not found
-- Sonorants (Liquids and Nasals)
{ "so^ciˌjalliberãlas<base:socialliberãlas>", "sɔt͡sʲɪˌjɛlʲɪbʲɛ²ˈrɑːlɐs", "ll → l" },
{ "šė́mmargas", "¹ˈʃʲeːmɐrɡɐs", "mm → m" },
{ "viennỹtis", "ʋʲiɛ²ˈnʲiːtʲɪs", "nn → n" },
{ "pérrašo", "¹ˈpʲæːrɐʃoː", "rr → r" },
-- Bilabial stops (after voicing assimilation)
-- bb not found
{ "tar̃ppievis", "²ˈtɐrʲˑpʲiɛʋʲɪs", "pp → p" },
{ "bóbpalaikė", "¹ˈboːpɐlɐɪkʲeː", "bp → pp → p (devoicing + degemination)" },
-- pb not found
-- Alveolar stops (after voicing assimilation)
-- dd not found
{ "añttrobis", "²ˈɐnˑtroːbʲɪs", "tt → t" },
{ "Šmi̇̀dtas", "ˈʃʲmʲɪtɐs", "dt → tt → t (devoicing + degemination)" },
{ "atdarà", "ɐdɐˈrɐ", "td → dd → d (voicing + degemination)" },
-- Velar stops (after voicing assimilation)
-- gg not found (needs g + g)
{ "kiekkar̃t", "kʲiɛ²ˈkɐrˑt", "kk → k" },
{ "daugkar̃t", "dɒʊ²ˈkɐrˑt", "gk → kk → k (devoicing + degemination)" },
{ "ki̇́ekgi", "¹ˈkʲiɛɡʲɪ", "kg → gg → g (voicing + degemination)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- H5: Sibilant simplification — when two DIFFERENT sibilants meet at a
-- morpheme boundary, only the second is pronounced (VLKK §22).
function tests:test_IPA_H5_sibilant_simplification()
local examples = {
{ "išsprę́sti", "ɪ¹ˈsʲpʲrʲæːsʲtʲɪ", "šs → s" },
{ "ùžsienis", "ˈʊsʲiɛnʲɪs", "žs → s (via šs)" },
{ "pùsšimtis", "ˈpʊʃʲɪmʲtʲɪs", "sš → š" },
{ "pùszuikis", "ˈpʊzʊɪkʲɪs", "sz → z" },
{ "pùsžalis", "ˈpʊʒɐlʲɪs", "sž → ž" },
{ "išžarà", "ɪʒɐˈrɐ", "šž → ž" },
{ "ùžšovas", "ˈʊʃoːʋɐs", "žš → š" },
-- zš not found
-- šz not found
-- zs not found
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- I: Word-final j and v become non-syllabic [ɪ̯], [ʊ̯] (VLKK IPA rec §7.5).
function tests:test_IPA_I_final_j_v_nonsyllabic()
local examples = {
{ "rytój<base:rytoj>", "rʲiː¹ˈtoːɪ̯", "final j after long o → ɪ̯" },
{ "tuõj<base:tuoj>", "²ˈtuɔɪ̯", "final j after uo → ɪ̯" },
{ "viduj̃<base:viduj>", "ʋʲɪ²ˈdʊɪˑ", "final j with tilde after short u" },
{ "viršuj̃<base:virsuj>", "ʋʲɪr²ˈʃʊɪˑ", "final j with tilde after short u" },
{ "sudiẽv", "sʊ²ˈdʲiɛʊ̯", "final v after ie → ʊ̯" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- J1: Hiatus — vowel sequences pronounced as two separate syllables, marked
-- either by morpheme boundary (native: prefix `.`) or explicitly preserved
-- (foreign: user-marked `.`). VLKK §24, §25, §27.2 (i-second variant).
function tests:test_IPA_J1_hiatus()
local examples = {
-- Native prefix boundaries
{ "pa.upỹs", "pɐ.ʊ²ˈpʲiːs", "prefix pa- + u" },
{ "priim̃ti", "pʲrʲɪ.²ˈɪmʲˑtʲɪ", "prefix pri- + i" },
{ "pri̇̀ima", "ˈpʲrʲɪ.ɪmɐ", "prefix pri- + i" },
{ "pri̇̀ėmė", "ˈpʲrʲɪ.eːmʲeː", "prefix pri- + ė" },
{ "priei̇̃ti", "pʲrʲɪ.²ˈɛɪˑtʲɪ", "prefix pri- + ei" },
{ "priė̃jo", "pʲrʲɪ.²ˈeːjo̟ː", "prefix pri- + ė" },
{ "nù.imtas", "ˈnʊ.ɪmtɐs", "prefix nu- + i (user-marked)" },
-- Foreign words: hiatus preserved between non-i vowels (VLKK §25)
{ "di.acetãtas", "dʲɪ.ɐt͡sʲɛ²ˈtɑːtɐs", "foreign i.a (user-marked)" },
{ "di.akrilãtas", "dʲɪ.ɐkrʲɪ²ˈlɑːtɐs", "foreign i.a (user-marked)" },
{ "fino^ùgrai", "fʲɪnɔ.ˈʊɡrɐɪ", "foreign o.u" },
{ "paleo^nto^lògas", "pɐlʲɛ.ɔntɔˈlɔɡɐs", "paleo- + onto-: only e.o is hiatus, last ɔ is in coda" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- J2: J-insertion — in foreign words, an epenthetic [j] is inserted between
-- vowel sequences containing i (VLKK §27). Input is a respelling that
-- spells out the inserted j, optionally with `(j)` for the variable
-- forms in §27.2.
function tests:test_IPA_J2_j_insertion()
local examples = {
-- §27.1: i first → j obligatorily inserted
{ "dijãkonas", "dʲɪ²ˈjæːkoːnɐs", "ia → ija (i first, accented vowel)" },
{ "dijakonỹstė", "dʲɪjɛkoː²ˈnʲiːsʲtʲeː", "ia → ija (i first, unaccented)" },
{ "dijalèktas", "dʲɪjɛˈlʲɛktɐs", "ia → ija (i first, unaccented)" },
{ "pijani̇̀nas<base:pianinas>", "pʲɪjɛˈnʲɪnɐs", "ia → ija (respell j)" },
{ "dijèzas<base:diezas>", "dʲɪˈjɛzɐs", "ie → ije (respell j)" },
{ "audijo^fònas<base:audiofonas>", "ɒʊdʲɪjɔ̟ˈfɔnɐs", "io → ijo (respell j)" },
-- §27.2: i second → j optional, written as `(j)` in respelling
{ "teji̇̀stas<base:teistas>", "tʲɛˈjɪstɐs", "ei → eji (respell j)" },
{ "stò(j)ikas<base:stoikas>", "ˈstɔ(j)ɪkɐs", "oi: variant with (j)" },
{ "babu(j)i̇̀nai<base:babuinai>", "bɐbʊˈ(j)ɪnɐɪ", "ui: variant with (j)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- K: Word juncture — clitic liaison `‿` is inserted between an unstressed
-- word and a following stressed word; word-final voiced obstruents devoice.
-- Input uses a regular space; the module inserts `‿` automatically.
function tests:test_IPA_K_word_juncture()
local examples = {
-- Basic liaison
{ "be ãbejo", "bʲɛ‿²ˈɑːbʲɛjo̟ː", "clitic be + main word" },
{ "kaip kàd", "kɐɪp‿ˈkɐt", "clitic kaip + main word" },
{ "kadà ne kadà", "kɐˈdɐ nʲɛ‿kɐˈdɐ", "stressed + clitic + stressed" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- L: Secondary stress (`ˌ`) — different positions and interactions with
-- primary stress.
function tests:test_IPA_L_secondary_stress()
local examples = {
{ "ˌho^mo^fòbė", "ˌɣɔmɔˈfɔbʲeː", "initial secondary stress" },
{ "saˌvanoriáuti", "sɐˌʋɐnoː¹ˈrʲæˑʊtʲɪ", "medial secondary stress" },
{ "nebekõneˌveikti", "nʲɛbʲɛ²ˈkoːnʲɛˌʋʲɛɪktʲɪ", "secondary AFTER primary stress" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- M: Optional soft l in loanwords (VLKK §15) — by default the module reads
-- l as hard before a hard consonant; the user marks softening explicitly
-- with U+2019 (the right single quotation mark) after l.
function tests:test_IPA_M_l_dual_reading()
local examples = {
{ "pòlka", "¹ˈpɔlkɐ", "default: hard l" },
{ "pòlʼka", "¹ˈpɔlʲkɐ", "with U+02BC: soft lʲ" },
{ "válsas", "¹ˈʋɑˑlsɐs", "default: hard l" },
{ "válʼsas", "¹ˈʋɑˑlʲsɐs", "with U+02BC: soft lʲ" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- ════════════════════════════════════════════════════════════════════════════
-- SYLLABIFICATION TESTS (Phonotactic models)
-- ════════════════════════════════════════════════════════════════════════════
-- A: 2-consonant cluster models.
function tests:test_hyphen_A_models_2C()
local examples = {
-- Onset patterns (V-CCV)
{ "vèsti", "vè‧sti", "ST onset" },
{ "dažnai̇̃", "da‧žnai̇̃", "SR onset" },
{ "veiklõs", "vei‧klõs", "TR onset" },
-- Split patterns (VC-CV)
{ "kalbõs", "kal‧bõs", "RT split" },
{ "ámžiaus", "ám‧žiaus", "RS split" },
}
for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end
end
-- B: 3-consonant cluster models.
function tests:test_hyphen_B_models_3C()
local examples = {
-- Onset
{ "displė̃jus<base:displėjus>", "di‧splė̃‧jus", "STR onset (V-CCCV)" },
-- Splits
{ "pýksta", "pýk‧sta", "T+ST split" },
{ "mir̃šta", "mir̃‧šta", "R+ST split" },
{ "mókslo", "mók‧slo", "T+SR split" },
{ "lengvai̇̃", "len‧gvai̇̃", "R+TR split" },
{ "atkrei̇̃pia", "at‧krei̇̃‧pia", "T+TR split" },
{ "di̇̀rbti", "di̇̀rb‧ti", "RT+T split" },
{ "elgsenõs", "elg‧se‧nõs", "RT+S split" },
{ "piktžolė̃s", "pikt‧žo‧lė̃s", "TT+S split" },
{ "Oksfòrdas", "Oks‧fòr‧das", "TS+S split (foreign)" },
{ "transfòrmavo", "trans‧fòr‧ma‧vo", "RS+S split" },
}
for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end
end
-- C: 4-consonant cluster models.
function tests:test_hyphen_C_models_4C()
local examples = {
{ "konstrùkcija<base:konstrukcija>", "kon‧strùk‧ci‧ja", "R+STR split" },
{ "apskritai̇̃", "ap‧skri‧tai̇̃", "T+STR split" },
{ "ankstà", "ank‧stà", "RT+ST split" },
{ "ži̇̀ngsnis", "ži̇̀ng‧snis", "RT+SR split" },
{ "ántplūdžio", "ánt‧plū‧džio", "RT+TR split" },
{ "postprodùkcija<base:postprodukcija>", "post‧pro‧dùk‧ci‧ja", "ST+TR split" },
{ "kontrmotỹvas", "kontr‧mo‧tỹ‧vas", "RTR+R split" },
{ "Obstfelderis", "Obst‧fel‧de‧ris", "TST+S split" }, -- FIXME: need accentuation
-- Hyphenation for theoretically-existing consonant clusters,
-- per Bendrinės lietuvių kalbos skiemuo monografija:
-- S-STR
-- RS-SR, RR-ST, ST-SR, RR-TR
-- RTR-T, RST-T, RTT-S, TST-T
}
for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end
end
-- D: Morphology-driven hyphenation — native prefixes vs. pseudo-prefixes,
-- and prefix boundaries that introduce hiatus.
function tests:test_hyphen_D_morphology()
local examples = {
-- Native prefixes (user-marked with `.`)
{ "ap.rašýti", "ap‧ra‧šý‧ti", "Native prefix ap-" },
{ "at.nèšti", "at‧nè‧šti", "Native prefix at-" },
{ "iš.mókyti", "iš‧mó‧ky‧ti", "Native prefix iš-" },
-- Pseudo-prefixes (Internationalisms): no morphological boundary
{ "atòmas", "a‧tò‧mas", "Pseudo-prefix" },
{ "apãratas", "a‧pã‧ra‧tas", "Pseudo-prefix" },
-- Prefix boundary with hiatus
{ "pa.upỹs", "pa‧u‧pỹs", "Prefix boundary with hiatus" },
{ "priim̃ti", "pri‧im̃‧ti", "Prefix boundary with hiatus" },
-- User-marked hiatus and secondary-stress boundary
{ "Kiurasã.o", "Kiu‧ra‧sã‧o", "User-marked hiatus" },
{ "išˌverstaãkis", "iš‧ver‧sta‧ã‧kis", "Secondary stress also marks syllable boundary" },
}
for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end
end
-- ════════════════════════════════════════════════════════════════════════════
-- RHYME TESTS
-- ════════════════════════════════════════════════════════════════════════════
-- A: Basic rhyme extraction across vowel/diphthong types and stress patterns.
function tests:test_rhyme_A_basic()
local examples = {
{ "nakti̇̀s", "ɪs", "Short i rhyme" },
{ "kalbà", "ɐ", "Short a rhyme" },
{ "homològas", "ɔɡɐs", "Loanword o rhyme" },
{ "naũjas", "ɒʊˑjɛs", "Diphthong rhyme" },
{ "var̃das", "ɐrˑdɐs", "Mixed diphthong rhyme" },
{ "mótina", "oːtʲɪnɐ", "Long o rhyme" },
{ "vil̃kas", "ɪlˑkɐs", "Mixed diphthong rhyme" },
{ "nebekõneˌveikti", "oːnʲɛˌʋʲɛɪktʲɪ", "Secondary stress should be stripped" },
}
for _, ex in ipairs(examples) do self:check_rhyme(unpack(ex)) end
end
return tests
o9r9dsdnddxow5l5n7phfg6fsgmn4j7
ima
0
295361
395887
2026-05-29T15:58:58Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{also|Appendix:ဗီုပြၚ်နာနာသာ်မဆေၚ်စပ်ကဵု "ima"}} ==မအရေဝ်ပံၚ်ကောံ== ===သၚ်္ကေတ=== {{mul-symbol}} # {{ISO 639|3}} ==အၚ်္ဂလိက်== ===သမ္ဗန္ဓ=== {{en-head|contr}} # {{alternative form of|en|Imma}} ==အာရာကဳ== ===နိရုတ်=== ဝေါဟာကၠုၚ်နူ {{inh|akr|p..."
395887
wikitext
text/x-wiki
{{also|Appendix:ဗီုပြၚ်နာနာသာ်မဆေၚ်စပ်ကဵု "ima"}}
==မအရေဝ်ပံၚ်ကောံ==
===သၚ်္ကေတ===
{{mul-symbol}}
# {{ISO 639|3}}
==အၚ်္ဂလိက်==
===သမ္ဗန္ဓ===
{{en-head|contr}}
# {{alternative form of|en|Imma}}
==အာရာကဳ==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|akr|poz-oce-pro|*ʀumaq}}
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|akr|/ima/}}
===နာမ်===
{{head|akr|noun}}
# သ္ၚိ။
===နိဿဲ===
* {{R:akr:lex}} ‒ [https://marama.huma-num.fr/Lex/Araki/i.htm#%E2%93%94ima entry ''ima''].
==အာက်သှ်==
===နာမ်===
{{head|ahs|noun}}
# လ္ၚဴ။
==ဗေတ် နာန်ဗာတ်သ်==
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|nmb|/ima/}}
===ကြိယာ===
{{head|nmb|verbs}}
# သကဵုကၠုၚ်။
===နိဿဲ===
* ''[https://core.ac.uk/download/pdf/159465135.pdf Big Nambas Grammar] Pacific Linguistics - G.J. Fox''
==နာဝါတ်ဒဝ်ဝၚ်ဂန္ထ==
===နာမ်===
{{head|nci|noun form|head=īmā}}
# {{inflection of|nci|maitl||3|s|possessed|form}}
==ဒါဝ်==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|dta|xgn-pro|*ïmaxan}}
===နာမ်===
{{head|dta|noun}}
# ဗ္ၜေံ။
==ဝါသတေကာ နာဝါတော လ္ပာ်ဖာဗၟံက်==
===နာမ်===
{{head|nhe|noun form}}
# {{inflection of|nhe|mayitl||3|s|possessed|form}}
==ဂရိန်လာန်==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|kl|esx-inu-pro|*ima}}၊ နူကဵုဝေါဟာ {{inh|kl|esx-esk-pro|*imV}}
===ဗွဟ်ရမ္သာၚ်===
* {{kl-IPA}}
===လုပ်ကၠောန်စွံလဝ်===
{{head|kl|particle}}
# သာ်၊ ဏံ (ညံၚ်ရဴဗက်အလိုက်ဂမၠိုၚ်)၊ ၜိုတ်ဏံဂှ်။
==ဟာန်ဂါရေဝ်==
===ဗွဟ်ရမ္သာၚ်===
* {{hu-IPA}}
* {{hyphenation|hu|ima}}
* {{rhymes|hu|mɒ|s=2}}
===နာမ်===
{{hu-noun|pl=imák}}
# ပရေၚ်ရာဒၞာမိက်။
#: {{syn|hu|imádság|fohász}}
====လဟုတ်စှ်ေ====
{{hu-infl-nom|imá|o}}{{hu-pos-tok|imá}}
==အဳလဝ်ကာနဝ်==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|ilo|poz-pro|*(qa)lima}}၊ နူကဵုဝေါဟာ {{inh|ilo|map-pro|*(qa)lima}}
===နာမ်===
{{ilo-noun}}
# တဲ။
==အဳတလဳ==
====နာမဝိသေသန====
{{head|it|adjective form}}
# {{inflection of|it|imo|imo|f|s}}
==အဳတာဝေတ်==
===နာမ်===
{{head|itv|noun}}
# တဲ။
==ဂျပါန်==
===ဗီုအက္ခရ်ရောမ===
{{ja-romaji}}
# {{ja-romanization of|いま}}
==ကာန်ခါနာအဳ==
===ဗွဟ်ရမ္သာၚ်===
{{kne-pr|íma}}
===နာမ်===
{{kne-noun|íma}}
# တဲ။
===နိဿဲ===
* {{R:kne:Vanoverbergh 1933|íma|page=186}}
==လပ်တေန်==
===ဗွဟ်ရမ္သာၚ်===
* {{la-IPA|eccl=yes|īma}}
====နာမဝိသေသန====
{{head|la|adjective form|head=īma}}
# {{inflection of|la|īmus||nom//voc|f|s|;|nom//acc//voc|n|p}}
====နာမဝိသေသန ၂ ====
{{head|la|adjective form|head=īmā}}
# {{inflection of|la|īmus||abl|f|s}}
===နိဿဲ===
* {{R:la:du Cange}}
==လေန်မဝ်သေန် ကလေန်ဂါ==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|kmk|poz-pro|*lima,*qalima}}၊ နူကဵုဝေါဟာ {{inh|kmk|map-pro|*(qa)lima}}
===နာမ်===
{{head|kmk|noun}}
# တဲ။
==လေတ်တူယဵုနဳယျာ==
===ဗွဟ်ရမ္သာၚ်===
{{lt-pr|i̇̀ma}}
===ကြိယာ===
{{head|lt|verb form|head=i̇̀ma}}
# {{infl of|lt|imti||3|s//p|pres}}
==ဠူၜေအ်ဂါန် ကလဳၚ်္ဂါ==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|knb|poz-pro|*lima,*qalima}}၊ နူကဵုဝေါဟာ {{inh|knb|map-pro|*(qa)lima}}
===နာမ်===
{{head|knb|noun}}
# တဲ။
==ပါဠိ==
===ဗီုပြၚ်ခ္ဍံက်လိက်ပါဠိမန်===
*'''{{#invoke:pi-Latn-translit|tr|[[{{PAGENAME}}]]|Mymr|variation=1}}'''
===ပွံၚ်အက္ခရ်နဲတၞဟ်===
{{pi-alt}}
====နာမဝိသေသန====
{{pi-adj}}
# ဏံ (ဗွဲမကြပ်နကဵု)။
====လဟုတ်စှ်ေ====
{{pi-decl-noun|ima|g=m|novoc=1
|noms_mod=replace|noms=ayaṃ
|nomp_mod=replace|nomp=ime
|inss_mod=replace|inss=iminā|inss2=anena <!-- anena should be verified; PTS denies it. Others also. -->
|gens=assa
|dats_mod=replace|dats=imassa|dats2=assa
|abls_mod=replace|abls=imasmā|abls2=imamhā|abls3=asmā
|locs_mod=replace|locs=imasmiṃ|locs2=asmiṃ
|genp_mod=replace|genp=imesaṃ|genp2=esaṃ|genp3=imesānaṃ|genp4=esānaṃ
|datp_mod=replace|datp=imesaṃ|datp2=esaṃ|datp3=imesānaṃ|datp4=esānaṃ
|insp=ehi|insp2=ebhi|ablp=ehi|ablp2=ebhi
}}
{{pi-decl-noun|imā|g=f|novoc=true
|noms_mod=replace|noms=ayaṃ
|gens2=imissā|gens3=assā|gens4=imissāya|gens5=assāya
|dats2=imissā|dats3=assā|dats4=imissāya|dats5=assāya
|locs=imissā|locs2=imissaṃ|locs3=assaṃ
|genp_mod=replace|genp=imāsaṃ|genp3=imāsānaṃ|genp2=āsaṃ
|datp_mod=replace|datp=imāsaṃ|datp3=imāsānaṃ|datp2=āsaṃ
}}
{{pi-decl-noun|ima|g=n|novoc=true
|noms_mod=replace|noms=idaṃ
|accs_mod=before|accs=idaṃ
|inss_mod=replace|inss=iminā|inss2=anena
|gens=assa
|dats_mod=replace|dats=imassa|dats2=assa
|abls_mod=replace|abls=imasmā|abls2=imamhā|abls3=asmā
|locs_mod=replace|locs=imasmiṃ|locs2=asmiṃ
|genp_mod=replace|genp=imesaṃ|genp2=esaṃ|genp3=imesānaṃ|genp4=esānaṃ
|datp_mod=replace|datp=imesaṃ|datp2=esaṃ|datp3=imesānaṃ|datp4=esānaṃ
|insp=ehi|insp2=ebhi|ablp=ehi|ablp2=ebhi
}}
===သဗ္ဗနာမ်===
{{head|pi|g=m|pronoun}}
# ဣဏံ၊ ၝဏံ။
===သဗ္ဗနာမ် ၂ ===
{{head|pi|g=f|pronoun}}
# ဣဝွံ၊ ဣၝဏံဝွံ။
===သဗ္ဗနာမ် ၃ ===
{{head|pi|g=n|pronoun}}
# သာ်ဏံ။
===နိဿဲ===
{{R:pi:PTS|ayaŋ}}
==ခေန်ချူဝါ==
====နာမဝိသေသန====
{{head|qu|adjective}}
# လ္ၚဵု။
===ကြိယာဝိသေသန===
{{head|qu|adverb}}
# လေဝ်၊ ကဵု။
===နာမ်===
{{head|qu|noun}}
# အရာ၊ အရာမွဲမွဲ။
====လဟုတ်စှ်ေ====
{{qu-noun-v}}{{qu-poss-v}}
===သဗ္ဗနာမ်===
{{head|qu|pronoun}}
# မု။
# အရာဏံ။
==ရဝ်မေနဳယျာ==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|ro|la|līmō|līmāre}}
===ကြိယာ===
{{ro-verb|imă|imat|ime}}
# သကဵုကၠိ၊ ကၠိပပ်၊ မသ္ပကၠိမၞုံကဵုခရီု ဝါ အိက်သတ်တိရစ္ဆာန်။
# သကဵုဟွံရှ်ေသှ်ေရဴဂဴအရာဝတ္ထု ဝါ မအရေဝ်နကဵုအရီုအဗၚ်မချိုတ်ပၠိုတ်ဂမၠိုၚ် ဝါ လက်ချဴဂမၠိုၚ်။
==သာဗ်ခြဝ်ဨရှဳယာန်==
===ကြိယာ===
{{sh-verb form}}
#: {{inflection of|sh|imati||3|s|pres}}
# ညးမသ္ဒးဒုၚ်စသိုၚ်၊ ဍေံမသ္ဒးဒုၚ်စသိုၚ်။
# အတေံဂှ်၊ ဣတေံဂှ်။
==သၠဝ်ဝေနဳ==
===ကြိယာ===
{{head|sl|verb form}}
# {{infl of|sl|imeti||3|s|pres}}
==တာဂါလံက်==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{bor|tl|nan-hbl|引媽|tr=ín-má}}
===ဗွဟ်ရမ္သာၚ်===
{{tl-pr|imâ}}
===နာမ်===
{{tl-noun|imâ|b=+}}
# မိ။
#: {{syn|tl|mama|inay|ina|nanay|inang|nanang|mami}}
===နိဿဲ===
* {{R:KWF Diksiyonaryo}}
* {{R:Pambansang Diksiyonaryo}}
* {{R:CEDOF|4|178}}
* {{R:Vicassan's Pilipino-English Dictionary 1978|page=720}}
* {{R:Diksyunaryo tesauro Pilipino-Ingles 1973|page=546}}
* {{R:tl:Manuel 1948|page=27}}
==တဳရူရာန်==
===နာမ်===
{{head|tiy|noun}}
# ပါၚ်ကၞက်။
==တူရကဳ==
===နိရုတ်===
{{inh+|tr|ota|ايما}}၊ နူကဵုဝေါဟာ {{der|tr|ar|إِيْمَاء}}
===ဗွဟ်ရမ္သာၚ်===
* {{tr-IPA|i:ma:}}
===နာမ်===
{{tr-noun|imayı|imalar}}
# ပရေၚ်ဒမြိပ်မြော်။
# ပရေၚ်လုပ်ဆေၚ်စပ်။
mdfpaultb5iga8fnb9yf1r8pq7lpvhn
ထာမ်ပလိက်:R:akr:lex
10
295362
395888
2026-05-29T16:02:37Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[w:en:Alexandre François|François, Alexandre]]. 2008. [https://marama.huma-num.fr/AF-Araki_e.htm ''An online lexicon of Araki (Santo, Vanuatu)'']. Electronic files. Paris: CNRS. <small>[https://marama.huma-num.fr/data/AlexFrancois_Araki_trilingual-lexicon-2008.pdf (Pdf version)]</small> <noinclude>ကဏ္ဍ:ထာမ်ပလိက်နိဿဲအာရာကဳဂမၠိုၚ်|Araki reference templates..."
395888
wikitext
text/x-wiki
[[w:en:Alexandre François|François, Alexandre]]. 2008. [https://marama.huma-num.fr/AF-Araki_e.htm ''An online lexicon of Araki (Santo, Vanuatu)'']. Electronic files. Paris: CNRS. <small>[https://marama.huma-num.fr/data/AlexFrancois_Araki_trilingual-lexicon-2008.pdf (Pdf version)]</small> <noinclude>[[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲအာရာကဳဂမၠိုၚ်|Araki reference templates]]</noinclude>
59xitabtc6whiz933eb5tmu4qd8jwov
ကဏ္ဍ:ထာမ်ပလိက်နိဿဲအာရာကဳဂမၠိုၚ်
14
295363
395889
2026-05-29T16:04:00Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်လေန်အာရာကဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]]"
395889
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်လေန်အာရာကဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]]
t8ym3v0ky82pcjyimqlse6zfbz2t44i
ကဏ္ဍ:ထာမ်ပလိက်လေန်အာရာကဳဂမၠိုၚ်
14
295364
395890
2026-05-29T16:05:43Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်အာရာကဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်လေန်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]]"
395890
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်အာရာကဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်လေန်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]]
16g5clr1jt63yxourl855nbuwanf281
ကဏ္ဍ:ထာမ်ပလိက်အာရာကဳဂမၠိုၚ်
14
295365
395891
2026-05-29T16:07:44Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ဘာသာအာရာကဳ][[ကဏ္ဍ:ထာမ်ပလိက်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]]"
395891
wikitext
text/x-wiki
[[ကဏ္ဍ:ဘာသာအာရာကဳ][[ကဏ္ဍ:ထာမ်ပလိက်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]]
e1j3shyr5wpamoc3zz1tg1bftqlqamc
395892
395891
2026-05-29T16:08:37Z
咽頭べさ
33
395892
wikitext
text/x-wiki
[[ကဏ္ဍ:ဘာသာအာရာကဳ]][[ကဏ္ဍ:ထာမ်ပလိက်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|အ]]
9kd0bwi0d85iwmhcalfrj1jgsy1drro
ကဏ္ဍ:ဘာသာအာရာကဳ
14
295366
395893
2026-05-29T16:09:28Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:အရေဝ်ဘာသာ|အ]][[ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အ]]"
395893
wikitext
text/x-wiki
[[ကဏ္ဍ:အရေဝ်ဘာသာ|အ]][[ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အ]]
sz2twsl0oy4c22ynqtyz2948a9dbmpr
ထာမ်ပလိက်:kl-IPA
10
295367
395894
2026-05-29T16:11:56Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "<includeonly>{{#invoke:kl-pron|show}}</includeonly><noinclude>{{#invoke:kl-pron|show}}{{documentation}}</noinclude>"
395894
wikitext
text/x-wiki
<includeonly>{{#invoke:kl-pron|show}}</includeonly><noinclude>{{#invoke:kl-pron|show}}{{documentation}}</noinclude>
8hoded3o4fv5ca517d12kfpnlgjepdv
မဝ်ဂျူ:kl-pron/doc
828
295368
395896
2026-05-29T16:14:57Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "This module implements the {{temp|kl-IPA}} template. <includeonly> {{module cat|kl}} </includeonly>"
395896
wikitext
text/x-wiki
This module implements the {{temp|kl-IPA}} template.
<includeonly>
{{module cat|kl}}
</includeonly>
3rdcct6jz5t5hvs0qpogbqvfa1xqyi0
ကဏ္ဍ:မဝ်ဂျူဂရိန်လာန်ဂမၠိုၚ်
14
295369
395897
2026-05-29T16:16:54Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဂရိန်လာန်|ဂရိန်လာန်]] » '''မဝ..."
395897
wikitext
text/x-wiki
[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဂရိန်လာန်|ဂရိန်လာန်]] » '''မဝ်ဂျူဂမၠိုၚ်'''
:[[:ကဏ္ဍ:မဝ်ဂျူဂမၠိုၚ်|မဝ်ဂျူ]]ဘာသာဂရိန်လာန်၊ မနွံကဵုလုပ်အဝေါၚ်ကုဒ် Lua နကဵုမကၠောန်ဗဒှ် ကဵု မစဳရေၚ်ယဵုဒုၚ်သ္ပမာန်ဂမၠိုၚ်။
[[ကဏ္ဍ:ဘာသာဂရိန်လာန်]][[ကဏ္ဍ:မဝ်ဂျူဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]]
f1vccp40fwmc1jri2vi49m3a7yvgohr
ထာမ်ပလိက်:kl-IPA/documentation
10
295370
395899
2026-05-29T16:21:17Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} This template creates automatic phonetic transcriptions for '''native''' Greenlandic words. It is powered by [[Module:kl-pron]]. ==Parameters== ; <code>1=</code> : The word that should be converted. It is defaulted to <code><nowiki>{{PAGENAME}}</nowiki></code>. ==References== * {{cite-book<!-- -->|last=Fortescue<!-- -->|first=Michael<!-- -->|title=West Greenlandic<!-- -->|p..."
395899
wikitext
text/x-wiki
{{documentation subpage}}
This template creates automatic phonetic transcriptions for '''native''' Greenlandic words. It is powered by [[Module:kl-pron]].
==Parameters==
; <code>1=</code>
: The word that should be converted. It is defaulted to <code><nowiki>{{PAGENAME}}</nowiki></code>.
==References==
* {{cite-book<!--
-->|last=Fortescue<!--
-->|first=Michael<!--
-->|title=West Greenlandic<!--
-->|publisher=Routledge<!--
-->|year=1984<!--
-->|isbn=978-0-7099-1069-5<!--
-->}}
<includeonly>
[[ကဏ္ဍ:ထာမ်ပလိက်မပ္တိတ်ရမျာၚ်ဂရိန်လာန်ဂမၠိုၚ်|IPA]]
</includeonly>
gxrsddpe6u1o5h6x0j2hy4ibvyypjs9
ကဏ္ဍ:ထာမ်ပလိက်မပ္တိတ်ရမျာၚ်ဂရိန်လာန်ဂမၠိုၚ်
14
295371
395900
2026-05-29T16:22:31Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်ဂရိန်လာန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်ပ္တိတ်ရမျာၚ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]]"
395900
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်ဂရိန်လာန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်ပ္တိတ်ရမျာၚ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]]
slpi9gam7shwujx5y7j0sa2wcwcravu
ထာမ်ပလိက်:hu-infl-nom
10
295372
395901
2026-05-29T16:25:28Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:hu-nominals|show|regular}}<!-- -->{{#if:{{{stem2|}}}|{{#if:{{{nocat|}}}|| }}}}<!-- --><noinclude>{{documentation}}</noinclude>"
395901
wikitext
text/x-wiki
{{#invoke:hu-nominals|show|regular}}<!--
-->{{#if:{{{stem2|}}}|{{#if:{{{nocat|}}}|| }}}}<!--
--><noinclude>{{documentation}}</noinclude>
l5fcrfmgeav6a5ls7ud1bawdpsbzamz
မဝ်ဂျူ:hu-nominals
828
295373
395902
2026-05-29T17:17:00Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "local m_utilities = require("Module:utilities") local m_links = require("Module:links") local lang = require("Module:languages").getByCode("hu") local export = {} -- Functions that do the actual inflecting by creating the forms of a basic term. local inflections = {} -- The main entry point. -- This is the only function that can be invoked from a template. function export.show(frame) local infl_type = frame.args[..."
395902
Scribunto
text/plain
local m_utilities = require("Module:utilities")
local m_links = require("Module:links")
local lang = require("Module:languages").getByCode("hu")
local export = {}
-- Functions that do the actual inflecting by creating the forms of a basic term.
local inflections = {}
-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
local infl_type = frame.args[1] or error("Inflection type has not been specified. Please pass parameter 1 to the module invocation")
local args = frame:getParent().args
if not inflections[infl_type] then
error("Unknown inflection type '" .. infl_type .. "'")
end
local data = {forms = {}, title = nil, categories = {}}
-- Generate the forms
inflections[infl_type](args, data)
-- Postprocess
postprocess(args, data)
if args["form"] then
-- table.insert(data.categories, "hu-decl with form")
end
return make_table(data) .. m_utilities.format_categories(data.categories, lang)
end
-- Inflection functions
local function check_acc(stem, acc_sg_vowel, acc_sg_vowel2)
if (mw.ustring.find(stem, "[nsz]$") or mw.ustring.find(stem, "[aáeéiíoóöőuúüű][lr]$") or mw.ustring.find(stem, "ny$") or mw.ustring.find(stem, "[aáeéiíoóöőuúüű]ly$")) and not mw.ustring.find(stem, "cs$") then
acc_sg_vowel2 = ""
end
if acc_sg_vowel ~= acc_sg_vowel2 then
require("Module:debug").track("hu-nominals/acc")
end
end
local function make_stems(stem)
local stems = {normal = stem}
stems.b = mw.ustring.gsub(stem, "bb$", "b") .. "b"
stems.k = mw.ustring.gsub(stem, "kk$", "k") .. "k"
stems.n = mw.ustring.gsub(stem, "nn$", "n") .. "n"
stems.r = mw.ustring.gsub(stem, "rr$", "r") .. "r"
stems.t = mw.ustring.gsub(stem, "tt$", "t") .. "t"
stems.v = stem .. "v"
-- Remove v after a long consonant
stems.v = mw.ustring.gsub(stems.v, "(ccs)v$", "%1")
stems.v = mw.ustring.gsub(stems.v, "(ddz)v$", "%1")
stems.v = mw.ustring.gsub(stems.v, "(ddzs)v$", "%1")
stems.v = mw.ustring.gsub(stems.v, "(ggy)v$", "%1")
stems.v = mw.ustring.gsub(stems.v, "(lly)v$", "%1")
stems.v = mw.ustring.gsub(stems.v, "(nny)v$", "%1")
stems.v = mw.ustring.gsub(stems.v, "(ssz)v$", "%1")
stems.v = mw.ustring.gsub(stems.v, "(tty)v$", "%1")
stems.v = mw.ustring.gsub(stems.v, "(zzs)v$", "%1")
stems.v = mw.ustring.gsub(stems.v, "([bcdfghjklmnpqrstvwz])%1v$", "%1%1")
-- Assimilate v to preceding short consonant
stems.v = mw.ustring.gsub(stems.v, "csv$", "ccs")
stems.v = mw.ustring.gsub(stems.v, "dzv$", "ddz")
stems.v = mw.ustring.gsub(stems.v, "dzsv$", "ddzs")
stems.v = mw.ustring.gsub(stems.v, "gyv$", "ggy")
stems.v = mw.ustring.gsub(stems.v, "lyv$", "lly")
stems.v = mw.ustring.gsub(stems.v, "nyv$", "nny")
stems.v = mw.ustring.gsub(stems.v, "szv$", "ssz")
stems.v = mw.ustring.gsub(stems.v, "thv$", "tht")
stems.v = mw.ustring.gsub(stems.v, "tyv$", "tty")
stems.v = mw.ustring.gsub(stems.v, "xv$", "xsz")
stems.v = mw.ustring.gsub(stems.v, "zsv$", "zzs")
stems.v = mw.ustring.gsub(stems.v, "([bcdfghjklmnpqrstvwz])v$", "%1%1")
return stems
end
local function make_plural(data, stem, vh)
if vh == "o" then
vh = {a = "a", aa = "á", o = "o", oo = "ó", u = "u"}
elseif vh == "ö" then
vh = {a = "e", aa = "é", o = "ö", oo = "ő", u = "ü"}
elseif vh == "e" then
vh = {a = "e", aa = "é", o = "e", oo = "ő", u = "ü"}
end
data.forms["nom_pl"] = {stem .. "k"}
data.forms["acc_pl"] = {stem .. "k" .. vh.a .. "t"}
data.forms["dat_pl"] = {stem .. "kn" .. vh.a .. "k"}
data.forms["ins_pl"] = {stem .. "kk" .. vh.a .. "l"}
data.forms["cfi_pl"] = {stem .. "kért"}
data.forms["tra_pl"] = {stem .. "kk" .. vh.aa}
data.forms["ter_pl"] = {stem .. "kig"}
data.forms["esf_pl"] = {stem .. "kként"}
data.forms["esm_pl"] = {stem .. "k" .. vh.u .. "l"}
data.forms["ine_pl"] = {stem .. "kb" .. vh.a .. "n"}
data.forms["spe_pl"] = {stem .. "k" .. vh.o .. "n"}
data.forms["ade_pl"] = {stem .. "kn" .. vh.aa .. "l"}
data.forms["ill_pl"] = {stem .. "kb" .. vh.a}
data.forms["sbl_pl"] = {stem .. "kr" .. vh.a}
data.forms["all_pl"] = {stem .. "kh" .. vh.o .. "z"}
data.forms["ela_pl"] = {stem .. "kb" .. vh.oo .. "l"}
data.forms["del_pl"] = {stem .. "kr" .. vh.oo .. "l"}
data.forms["abl_pl"] = {stem .. "kt" .. vh.oo .. "l"}
data.forms["np1_pl"] = {stem .. "ké"}
data.forms["np2_pl"] = {stem .. "kéi"}
end
local function make_singular_short(data, stem, stem2, spe_sg_stem, acc_sg_vowel, v, vh)
if vh == "o" then
vh = {a = "a", aa = "á", o = "o", oo = "ó", u = "u"}
elseif vh == "ö" then
vh = {a = "e", aa = "é", o = "ö", oo = "ő", u = "ü"}
elseif vh == "e" then
vh = {a = "e", aa = "é", o = "e", oo = "ő", u = "ü"}
end
stem2 = make_stems(stem2 or mw.ustring.gsub(stem, "[aeoö]$", ""))
if v then
stem2.v = stem2.normal .. v
end
local stem_no_vowel = mw.ustring.gsub(stem, "[aeoö]$", "")
spe_sg_stem = spe_sg_stem or stem2.normal
spe_sg_stem = spe_sg_stem .. (mw.ustring.find(spe_sg_stem, "[aáeéiíoóöőuúüű]$") and "" or vh.o)
local acc_sg_stem = stem
if acc_sg_vowel == "-" then
acc_sg_stem = stem2.normal
elseif acc_sg_vowel then
acc_sg_stem = stem_no_vowel .. acc_sg_vowel
end
data.forms["nom_sg"] = {mw.loadData("Module:headword/data").pagename}
data.forms["acc_sg"] = {acc_sg_stem .. "t"}
data.forms["dat_sg"] = {stem2.n .. vh.a .. "k"}
data.forms["ins_sg"] = {stem2.v .. vh.a .. "l"}
data.forms["cfi_sg"] = {stem2.normal .. "ért"}
data.forms["tra_sg"] = {stem2.v .. vh.aa}
data.forms["ter_sg"] = {stem2.normal .. "ig"}
data.forms["esf_sg"] = {stem2.k .. "ént"}
data.forms["esm_sg"] = {stem2.normal .. vh.u .. "l"}
data.forms["ine_sg"] = {stem2.b .. vh.a .. "n"}
data.forms["spe_sg"] = {spe_sg_stem .. "n"}
data.forms["ade_sg"] = {stem2.n .. vh.aa .. "l"}
data.forms["ill_sg"] = {stem2.b .. vh.a}
data.forms["sbl_sg"] = {stem2.r .. vh.a}
data.forms["all_sg"] = {stem2.normal .. "h" .. vh.o .. "z"}
data.forms["ela_sg"] = {stem2.b .. vh.oo .. "l"}
data.forms["del_sg"] = {stem2.r .. vh.oo .. "l"}
data.forms["abl_sg"] = {stem2.t .. vh.oo .. "l"}
data.forms["np1_sg"] = {stem2.normal .. "é"}
data.forms["np2_sg"] = {stem2.normal .. "éi"}
end
local function make_singular_long(data, stem, vh)
if not mw.ustring.find(stem, "%-$") then
if not mw.ustring.find(stem, "[iuüáéíóőúű]$") then
require("Module:debug").track("hu-nominals/vowel")
elseif stem ~= mw.ustring.gsub(mw.loadData("Module:headword/data").pagename, "([aeoö])$", {["a"] = "á", ["e"] = "é", ["o"] = "ó", ["ö"] = "ő"}) then
require("Module:debug").track("hu-nominals/pagename")
end
end
if vh == "o" then
vh = {a = "a", aa = "á", o = "o", oo = "ó", u = "u"}
elseif vh == "ö" then
vh = {a = "e", aa = "é", o = "ö", oo = "ő", u = "ü"}
elseif vh == "e" then
vh = {a = "e", aa = "é", o = "e", oo = "ő", u = "ü"}
end
local stems = make_stems(stem)
data.forms["nom_sg"] = {mw.loadData("Module:headword/data").pagename}
data.forms["acc_sg"] = {stems.normal .. "t"}
data.forms["dat_sg"] = {stems.n .. vh.a .. "k"}
data.forms["ins_sg"] = {stems.v .. vh.a .. "l"}
data.forms["cfi_sg"] = {stems.normal .. "ért"}
data.forms["tra_sg"] = {stems.v .. vh.aa}
data.forms["ter_sg"] = {stems.normal .. "ig"}
data.forms["esf_sg"] = {mw.loadData("Module:headword/data").pagename .. (mw.ustring.find(stems.normal, "%-$") and "-" or "") .. "ként"}
data.forms["esm_sg"] = {stems.normal .. vh.u .. "l"}
data.forms["ine_sg"] = {stems.b .. vh.a .. "n"}
data.forms["spe_sg"] = {stems.normal .. "n"}
data.forms["ade_sg"] = {stems.n .. vh.aa .. "l"}
data.forms["ill_sg"] = {stems.b .. vh.a}
data.forms["sbl_sg"] = {stems.r .. vh.a}
data.forms["all_sg"] = {stems.normal .. "h" .. vh.o .. "z"}
data.forms["ela_sg"] = {stems.b .. vh.oo .. "l"}
data.forms["del_sg"] = {stems.r .. vh.oo .. "l"}
data.forms["abl_sg"] = {stems.t .. vh.oo .. "l"}
data.forms["np1_sg"] = {stems.normal .. "é"}
data.forms["np2_sg"] = {stems.normal .. "éi"}
end
local function make_singular_Vk(data, stem, stem2, spe_sg_stem, acc_t, v, vh)
if vh == "o" then
vh = {a = "a", aa = "á", o = "o", oo = "ó", u = "u"}
elseif vh == "ö" then
vh = {a = "e", aa = "é", o = "ö", oo = "ő", u = "ü"}
elseif vh == "e" then
vh = {a = "e", aa = "é", o = "e", oo = "ő", u = "ü"}
end
local stems = make_stems(stem)
if v then
stems.v = stems.normal .. v
end
stem2 = stem2 or stem
spe_sg_stem = spe_sg_stem or stem2
acc_t = acc_t or "t"
local fill_vowel = mw.ustring.find(spe_sg_stem, "[aáeéiíoóöőuúüű]$") and "" or vh.o
data.forms["nom_sg"] = {mw.loadData("Module:headword/data").pagename}
data.forms["acc_sg"] = {stem2 .. acc_t}
data.forms["dat_sg"] = {stems.n .. vh.a .. "k"}
data.forms["ins_sg"] = {stems.v .. vh.a .. "l"}
data.forms["cfi_sg"] = {stems.normal .. "ért"}
data.forms["tra_sg"] = {stems.v .. vh.aa}
data.forms["ter_sg"] = {stems.normal .. "ig"}
data.forms["esf_sg"] = {stems.k .. "ént"}
data.forms["esm_sg"] = {stems.normal .. vh.u .. "l"}
data.forms["ine_sg"] = {stems.b .. vh.a .. "n"}
data.forms["spe_sg"] = {spe_sg_stem .. fill_vowel .. "n"}
data.forms["ade_sg"] = {stems.n .. vh.aa .. "l"}
data.forms["ill_sg"] = {stems.b .. vh.a}
data.forms["sbl_sg"] = {stems.r .. vh.a}
data.forms["all_sg"] = {stems.normal .. "h" .. vh.o .. "z"}
data.forms["ela_sg"] = {stems.b .. vh.oo .. "l"}
data.forms["del_sg"] = {stems.r .. vh.oo .. "l"}
data.forms["abl_sg"] = {stems.t .. vh.oo .. "l"}
data.forms["np1_sg"] = {stems.normal .. "é"}
data.forms["np2_sg"] = {stems.normal .. "éi"}
end
inflections["regular"] = function(args, data)
local stem = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if stem == "" then error("Parameter 1 (base stem) may not be empty.") end
local vh = args[2] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "o")
local acc_sg_vowel = args[3]; if acc_sg_vowel == "" then acc_sg_vowel = nil end
local stem2 = args["stem2"]; if stem2 == "" then stem2 = nil end
local spe_sg_stem = args["spe_sg_stem"]; if spe_sg_stem == "" then spe_sg_stem = nil end
local v = args["v"]; if v == "" then v = nil end
if not (vh == "o" or vh == "ö" or vh == "e") then
error("Vowel harmony type must be \"o\", \"ö\" or \"e\".")
end
local vh_pl = vh
if mw.ustring.find(stem, "[aeoö]$") then
data.title = "stem in " .. m_links.full_link({lang = lang, alt = "-" .. mw.ustring.match(stem, "([aeoö])$") .. "-"}, "term")
if vh == "ö" and mw.ustring.find(stem, "e$") then
vh_pl = "e"
end
make_singular_short(data, stem, stem2, spe_sg_stem, acc_sg_vowel, v, vh)
elseif mw.ustring.find(stem, "[iuüáéíóőúű%-]$") or mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" then
data.title = "stem in long/high vowel"
make_singular_long(data, stem, vh)
else
error("The stem must end in vowel or \"-\".")
end
if vh == "o" then
data.title = data.title .. ", back harmony"
elseif vh == "ö" then
data.title = data.title .. ", front rounded harmony"
elseif vh == "e" then
data.title = data.title .. ", front unrounded harmony"
end
make_plural(data, stem, vh_pl)
end
inflections["ak"] = function(args, data)
data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-ak"}, "term") .. ", back harmony"
local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end
local final = args[2] or ""
local acc_t = args[3]; if acc_t == "" then acc_t = nil end
local stem2 = args["stem"]; if stem2 == "" then stem2 = nil end
local spe_sg_stem = args["sup"]; if spe_sg_stem == "" then spe_sg_stem = nil end
local v = args["v"]; if v == "" then v = nil end
if acc_t and not mw.ustring.find(acc_t, "^[aeoö]t$") then
error("Invalid accusative singular")
end
make_singular_Vk(data, base .. final, stem2, spe_sg_stem, acc_t, v, "o")
make_plural(data, (stem2 or base .. final) .. "a", "o")
end
inflections["ek"] = function(args, data)
data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-ek"}, "term") .. ", front unrounded harmony"
local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end
local final = args[2] or ""
local acc_t = args[3]; if acc_t == "" then acc_t = nil end
local stem2 = args["stem"]; if stem2 == "" then stem2 = nil end
local spe_sg_stem = args["sup"]; if spe_sg_stem == "" then spe_sg_stem = nil end
local v = args["v"]; if v == "" then v = nil end
if acc_t and not mw.ustring.find(acc_t, "^[aeoö]t$") then
error("Invalid accusative singular")
end
make_singular_Vk(data, base .. final, stem2, spe_sg_stem, acc_t, v, "e")
make_plural(data, (stem2 or base .. final) .. "e", "e")
end
inflections["ek2"] = function(args, data)
data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-ek"}, "term") .. ", front rounded harmony"
local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end
local final = args[2] or ""
local acc_t = args[3]; if acc_t == "" then acc_t = nil end
local stem2 = args["stem"]; if stem2 == "" then stem2 = nil end
local spe_sg_stem = args["sup"]; if spe_sg_stem == "" then spe_sg_stem = nil end
local v = args["v"]; if v == "" then v = nil end
if acc_t and not mw.ustring.find(acc_t, "^[aeoö]t$") then
error("Invalid accusative singular")
end
make_singular_Vk(data, base .. final, stem2, spe_sg_stem, acc_t, v, "ö")
make_plural(data, (stem2 or base .. final) .. "e", "e")
end
inflections["ok"] = function(args, data)
data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-ok"}, "term") .. ", back harmony"
local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end
local final = args[2] or ""
local acc_t = args[3]; if acc_t == "" then acc_t = nil end
local stem2 = args["stem"]; if stem2 == "" then stem2 = nil end
local spe_sg_stem = args["sup"]; if spe_sg_stem == "" then spe_sg_stem = nil end
local v = args["v"]; if v == "" then v = nil end
if acc_t and not mw.ustring.find(acc_t, "^[aeoö]t$") then
error("Invalid accusative singular")
end
make_singular_Vk(data, base .. final, stem2, spe_sg_stem, acc_t, v, "o")
make_plural(data, (stem2 or base .. final) .. "o", "o")
end
inflections["ök"] = function(args, data)
data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-ök"}, "term") .. ", front rounded harmony"
local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end
local final = args[2] or ""
local acc_t = args[3]; if acc_t == "" then acc_t = nil end
local stem2 = args["stem"]; if stem2 == "" then stem2 = nil end
local spe_sg_stem = args["sup"]; if spe_sg_stem == "" then spe_sg_stem = nil end
local v = args["v"]; if v == "" then v = nil end
if acc_t and not mw.ustring.find(acc_t, "^[aeoö]t$") then
error("Invalid accusative singular")
end
make_singular_Vk(data, base .. final, stem2, spe_sg_stem, acc_t, v, "ö")
make_plural(data, (stem2 or base .. final) .. "ö", "ö")
end
inflections["k-back"] = function(args, data)
if args["stem"] or args["sup"] then
require("Module:debug").track("hu-nominals/stem2")
end
data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-k"}, "term") .. ", back harmony"
local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end
local final = args[2] or ""
make_singular_long(data, base .. final, "o")
make_plural(data, base .. final, "o")
end
inflections["k-front1"] = function(args, data)
if args["stem"] or args["sup"] then
require("Module:debug").track("hu-nominals/stem2")
end
data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-k"}, "term") .. ", front unrounded harmony"
local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end
local final = args[2] or ""
make_singular_long(data, base .. final, "e")
make_plural(data, base .. final, "e")
end
inflections["k-front2"] = function(args, data)
if args["stem"] or args["sup"] then
require("Module:debug").track("hu-nominals/stem2")
end
data.title = "plural in " .. m_links.full_link({lang = lang, alt = "-k"}, "term") .. ", front rounded harmony"
local base = args[1] or (mw.title.getCurrentTitle().nsText == "ထာမ်ပလိက်" and "{{{1}}}") or ""; if base == "" then error("Parameter 1 (base stem) may not be empty.") end
local final = args[2] or ""
make_singular_long(data, base .. final, "ö")
make_plural(data, base .. final, "ö")
end
function postprocess(args, data)
local n = args["n"] or args["form"]; if n == "" then n = nil end
if n and not (n == "sg" or n == "sing" or n == "pl" or n == "isg") then
error("The parameter \"n\" must be \"sg\", \"pl\", \"isg\" or empty.")
-- isg: "i-type singular" like képeim, képeid, képei, which need to be inflected like singular but displayed as plural
end
if n == "isg" then data.forms["esm_pl"] = data.forms["esm_sg"] end
-- sortedPairs saves a list of keys so that we can modify the table
-- while iterating over it.
for key, form in require "Module:table".sortedPairs(data.forms) do
-- Do not show singular or plural forms for nominals that don't have them
if (n == "pl" and key:find("_sg$")) or ((n == "sg" or n == "sing") and key:find("_pl$")) then
data.forms[key] = nil
end
-- if "isg" is given for "n", singular forms are copied into the plural, then the singular forms are set to nil
if (n == "isg" and key:find("_sg$")) then
data.forms[string.gsub(key, "sg", "pl")] = form
data.forms[key] = nil
end
end
local function any_arg(...)
for _, key in ipairs {...} do
if args[key] and args[key] ~= "" then
return true
end
end
return false
end
local has_esm_sg = any_arg("esm_sg", "ul", "ül")
local has_esm_pl = any_arg("esm_pl", "akul", "ekül", "okul", "ökül", "kul", "kül")
local has_noposs = any_arg("noposs", "é", "éi")
if not has_esm_sg then
data.forms["esm_sg"] = nil
end
if not ((n == "isg" and has_esm_sg) or has_esm_pl) then
data.forms["esm_pl"] = nil
end
-- Blank the non-attributive possessive rows if noposs is specified in the call
if has_noposs then
data.forms["np1_sg"] = nil
data.forms["np2_sg"] = nil
data.forms["np1_pl"] = nil
data.forms["np2_pl"] = nil
end
end
-- Make the table
function make_table(data)
local function show_form(forms, code)
local form = forms[code]
if not form then
return "—"
elseif type(form) ~= "table" then
error("a non-table value was given in the list of inflected forms.")
end
local ret = {}
local accel = {
form = code:gsub("%f[^_](%a%a)$", {sg = "s", pl = "p"}):gsub("_", "|"),
}
for key, subform in ipairs(form) do
table.insert(ret, m_links.full_link({
lang = lang,
term = subform,
accel = accel,
}))
end
return table.concat(ret, "<br/>")
end
local function repl(param)
if param == "lemma" then
return m_links.full_link({lang = lang, alt = mw.loadData("Module:headword/data").pagename}, "term")
elseif param == "info" then
return data.title and " (" .. data.title .. ")" or ""
else
return show_form(data.forms, param)
end
end
local wikicode = mw.getCurrentFrame():expandTemplate{
title = 'inflection-table-top',
args = {
title = 'Inflection{{{info}}}',
tall = 'yes',
palette = 'green'
}
} .. [=[
! class="outer" |
! class="outer" | ကိုန်ဨကဝုစ်
! class="outer" | ကိုန်ဗဟုဝစ်
|-
! [[nominative case|မဒုၚ်ယၟု]]
| {{{nom_sg}}}
| {{{nom_pl}}}
|-
! [[accusative case|ကမ္မကာရက]]
| {{{acc_sg}}}
| {{{acc_pl}}}
|-
! [[dative case|ပြကမ္မကာရက]]
| {{{dat_sg}}}
| {{{dat_pl}}}
|-
! [[instrumental case|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်]]
| {{{ins_sg}}}
| {{{ins_pl}}}
|-
! တၞုၚ်တၞောတ်-ပရေၚ်ပရောဟိုတ်
| {{{cfi_sg}}}
| {{{cfi_pl}}}
|-
! [[translative case|ပရေၚ်ကၠာဲပ္တိတ်]]
| {{{tra_sg}}}
| {{{tra_pl}}}
|-
! [[terminative case|ပရေၚ်တုဲဒှ်]]
| {{{ter_sg}}}
| {{{ter_pl}}}
|-
! ဓဝ်ဍာံ-အဓိက
| {{{esf_sg}}}
| {{{esf_pl}}}
|-
! မဝ်ဒါယ်လ်-အဓိက
| {{{esm_sg}}}
| {{{esm_pl}}}
|-
! [[inessive case|ပရေၚ်ဟွံဆေၚ်စပ်]]
| {{{ine_sg}}}
| {{{ine_pl}}}
|-
! [[superessive case|တၞုၚ်တၞောတ်]]
| {{{spe_sg}}}
| {{{spe_pl}}}
|-
! [[adessive case|ပရေၚ်စောဲလာံ]]
| {{{ade_sg}}}
| {{{ade_pl}}}
|-
! [[illative case|ဟၟဲကဵုအဓိပ္ပါဲ]]
| {{{ill_sg}}}
| {{{ill_pl}}}
|-
! ဒုၚ်အသၟဝ်တန်ဍောတ်
| {{{sbl_sg}}}
| {{{sbl_pl}}}
|-
! [[allative case|တသိုက်ပိုန်ဂြပ်]]
| {{{all_sg}}}
| {{{all_pl}}}
|-
! [[elative case|မစၞောန်ထ္ၜးအဆက်နာမ်]]
| {{{ela_sg}}}
| {{{ela_pl}}}
|-
! [[delative case|ပြကမ္မကာရက]]
| {{{del_sg}}}
| {{{del_pl}}}
|-
! [[ablative case|ပရေၚ်မလၚ်]]
| {{{abl_sg}}}
| {{{abl_pl}}}
|-
! ကိုန်ဨကဝုစ် – မစောဲစုတ်-ဟွံသေၚ်<br>ပၟိက်သၟိက်မိက်ဂွံပိုၚ်ပြဳ
| {{{np1_sg}}}
| {{{np1_pl}}}
|-
! ကိုန်ဗဟုဝစ် – မစောဲစုတ်-ဟွံသေၚ်<br>ပၟိက်သၟိက်မိက်ဂွံပိုၚ်ပြဳ
| {{{np2_sg}}}
| {{{np2_pl}}}
]=] .. mw.getCurrentFrame():expandTemplate{ title = 'inflection-table-bottom' }
return mw.ustring.gsub(wikicode, "{{{([a-z0-9_]+)}}}", repl)
end
return export
nhntc9bonrnvix3xb5370hairjwfgzh
မဝ်ဂျူ:hu-nominals/doc
828
295374
395903
2026-05-29T17:19:01Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "Used by {{temp|hu-infl-nom}}. <includeonly> {{module cat|hu}} </includeonly>"
395903
wikitext
text/x-wiki
Used by {{temp|hu-infl-nom}}.
<includeonly>
{{module cat|hu}}
</includeonly>
qaoh54e43yviih96cq10jutnnizn4ta
ကဏ္ဍ:မဝ်ဂျူဟာန်ဂါရေဝ်ဂမၠိုၚ်
14
295375
395904
2026-05-29T17:22:29Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဟာန်ဂါရေဝ်|ဟာန်ဂါရေဝ်]] » '''..."
395904
wikitext
text/x-wiki
[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဟာန်ဂါရေဝ်|ဟာန်ဂါရေဝ်]] » '''မဝ်ဂျူဂမၠိုၚ်'''
:[[:ကဏ္ဍ:မဝ်ဂျူဂမၠိုၚ်|မဝ်ဂျူ]]ဘာသာဟာန်ဂါရေဝ်၊ မနွံကဵုလုပ်အဝေါၚ်ကုဒ် Lua နကဵုမကၠောန်ဗဒှ် ကဵု မစဳရေၚ်ယဵုဒုၚ်သ္ပမာန်ဂမၠိုၚ်။
[[ကဏ္ဍ:ဘာသာဟာန်ဂါရေဝ်]][[ကဏ္ဍ:မဝ်ဂျူဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]]
b4ccoudzqws7hl3e3krjv7u90pc0zx3
ကဏ္ဍ:မဝ်ဂျူပွမပြံၚ်လှာဲဟာန်ဂါရေဝ်ဂမၠိုၚ်
14
295376
395905
2026-05-29T17:24:46Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:မဝ်ဂျူဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:မဝ်ဂျူဗီုအပြံၚ်အလှာဲဝေါဟာဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]]"
395905
wikitext
text/x-wiki
[[ကဏ္ဍ:မဝ်ဂျူဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:မဝ်ဂျူဗီုအပြံၚ်အလှာဲဝေါဟာဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]]
q3ns4hqjhfaufqq1zk7hdn38wdu0c0a
ထာမ်ပလိက်:hu-infl-nom/documentation
10
295377
395906
2026-05-29T17:30:24Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} {{uses lua|Module:hu-nominals}} This template is for use with Hungarian regular nominals (nouns, adjectives, numerals and pronouns). It can also be used for abbreviations of nominals where suffixes are attached with a hyphen (-). ==Parameters== ; <code>1=</code> : The stem of the word, which must end in a vowel or a hyphen (for some abbreviations). : To construct the stem, take the plural of..."
395906
wikitext
text/x-wiki
{{documentation subpage}}
{{uses lua|Module:hu-nominals}}
This template is for use with Hungarian regular nominals (nouns, adjectives, numerals and pronouns). It can also be used for abbreviations of nominals where suffixes are attached with a hyphen (-).
==Parameters==
; <code>1=</code>
: The stem of the word, which must end in a vowel or a hyphen (for some abbreviations).
: To construct the stem, take the plural of the word and remove the final {{m|hu|-k}}. Examples:
:: ''Lemma'': {{m|hu|ház||house}}
:: ''Plural'': {{m|hu|házak||houses}}
:: ''Stem (parameter 1)'': ''háza''
:: ''Lemma'': {{m|hu|CD}}
:: ''Plural'': {{m|hu|CD-k}}
:: ''Stem (parameter 1)'': ''CD-''
: To construct the stem for the inflection of a '''possessive form''', take the accusative and remove the final {{m|hu|-t}}:
:: ''Lemma'': {{m|hu|ablak||window}}
:: ''3rd person possessive'': {{m|hu|ablaka||his/her/its window}}
:: ''Accusative of 3rd person possessive'': {{m|hu|ablakát}}
:: ''Stem (parameter 1)'': ''ablaká''
; <code>2=</code>
: The vowel in the ending of the allative singular ({{m|hu|-hoz}}/{{m|hu|-hez}}/{{m|hu|-höz}}). Values can be:
:: <code>o</code> (for back a/á/o/ó/u/ú harmony)
:: <code>e</code> (for front unrounded e/é/i/í harmony)
:: <code>ö</code> (for front rounded ö/ő/ü/ű harmony)
; <code>3=</code>
: The vowel before the ending in the accusative singular. By default, this is the final vowel of the stem in parameter 1. Specifying this parameter replaces that vowel with another. If the word forms its accusative singular using the second stem (without the final vowel), use "<code>-</code>".
; <code>stem2=</code>
: Specifies the second stem: the stem that is used to form all of the singular cases except the accusative. By default, this is the regular stem with a final short low or mid vowel (''a'', ''e'', ''o'', ''ö'') removed. Most of the time, stem2 will be the lemma.
; <code>pos=</code>
: The part of speech of the word in the ''plural,'' IF <code>stem2</code> was supplied AND the word is not a noun. (All other words with <code>stem2</code> are categorized under "<code>Hungarian ''nouns'' with alternating stems</code>" because most of these words are nouns.)
; <code>spe_sg_stem=</code>
: This should only be used together with <code>stem2=</code>, and specifies the stem of the superessive singular, if it is different from the <code>stem2=</code> value.
; <code>v=</code>
: Specifies which consonant replaces the "v" of the instrumental and translative singular endings. By default, this is the same as the final consonant of the second stem, or ''v'' if it ends in a vowel. For abbreviations or words with a foreign spelling, it's not possible to determine this from the spelling, so this parameter provides it. Proper nouns ending in two identical consonants, the parameter's value must be preceded by a hyphen:
:: ''Lemma'': {{m|hu|Baross}}
:: ''Parameter'': v=-s
:: ''Instrumental singular case'': Baross-sal
:: ''Translative singular case'': Baross-sá
:If the final double consonants are -tt, -nn, -kk, -bb, -rr, -hh, use the old templates that start with {{temp|hu-decl}}. They will add the hyphen not just to the instrumental and translative cases, but to all appropriate cases.
; <code>n=</code>
: Can be <code>sg</code>, <code>pl</code>, or <code>isg</code>. The first two specify that the word has only singular or only plural forms, respectively (if left empty, the default is to show both). The value <code>isg</code> (for <span style="white-space:nowrap;">“{{m|hu|-i|id=possessive}}-type</span> singular”) is used when the singular inflection is needed, although the meaning of the word is plural, in the case of [[Appendix:Hungarian possessive suffixes|multiple-possession forms]], which should be displayed in the plural column.
; <code>esm_sg=1</code>
: Specify this if the word has an essive-modal singular form. By default, this form is omitted.
; <code>esm_pl=1</code>
: Specify this if the word has an essive-modal plural form. By default, this form is omitted.
; <code>noposs=y</code>
: Specify this if the two rows for the non-attributive possessive forms should be blanked. By default, these forms are included.
==Examples==
===Basic usage===
For most words, the first two parameters are enough.
For {{m|hu|ház}}:
: <code><nowiki>{{hu-infl-nom|háza|o}}</nowiki></code>
For {{m|hu|ablak}}:
: <code><nowiki>{{hu-infl-nom|ablako|o}}</nowiki></code>
For {{m|hu|ábra}}:
: <code><nowiki>{{hu-infl-nom|ábrá|o}}</nowiki></code>
For {{m|hu|öv}}:
: <code><nowiki>{{hu-infl-nom|öve|ö}}</nowiki></code>
For {{m|hu|összefüggő}}:
: <code><nowiki>{{hu-infl-nom|összefüggőe|ö}}</nowiki></code>
For {{m|hu|kedd}}:
: <code><nowiki>{{hu-infl-nom|kedde|e}}</nowiki></code>
For {{m|hu|kefe}}:
: <code><nowiki>{{hu-infl-nom|kefé|e}}</nowiki></code>
If the word has essive-modal forms, the parameters for those are added too.
For {{m|hu|díj}}:
: <code><nowiki>{{hu-infl-nom|díja|o|esm_sg=1|esm_pl=1}}</nowiki></code>
For {{m|hu|eszperantó}}:
: <code><nowiki>{{hu-infl-nom|eszperantó|o|esm_sg=1}}</nowiki></code>
For {{m|hu|török}}:
: <code><nowiki>{{hu-infl-nom|törökö|ö|esm_sg=1}}</nowiki></code>
For {{m|hu|nő}}:
: <code><nowiki>{{hu-infl-nom|nő|ö|esm_sg=1}}</nowiki></code>
For {{m|hu|kert}}:
: <code><nowiki>{{hu-infl-nom|kerte|e|esm_sg=1|esm_pl=1}}</nowiki></code>
Singular-only and plural-only nouns must specify the <code>n=</code> parameter.
For {{m|hu|Románia}}:
: <code><nowiki>{{hu-infl-nom|Romániá|o|n=sg}}</nowiki></code>
For {{m|hu|Budapest}}:
: <code><nowiki>{{hu-infl-nom|Budapeste|e|n=sg}}</nowiki></code>
For {{m|hu|Falkland-szigetek}}:
: <code><nowiki>{{hu-infl-nom|Falkland-szigete|e|n=pl}}</nowiki></code>
===Vowel dropped in accusative singular===
The third parameter must be specified if the final vowel of the stem is dropped before the accusative singular ending {{m|hu|-t}}.
For {{m|hu|asztal}}:
: <code><nowiki>{{hu-infl-nom|asztalo|o|-}}</nowiki></code>
For {{m|hu|Tokaj}}:
: <code><nowiki>{{hu-infl-nom|Tokajo|o|-|n=sg}}</nowiki></code>
For {{m|hu|kör}}:
: <code><nowiki>{{hu-infl-nom|körö|ö|-}}</nowiki></code>
For {{m|hu|Köln}}:
: <code><nowiki>{{hu-infl-nom|Kölnö|ö|-|n=sg}}</nowiki></code>
For {{m|hu|ösvény}}:
: <code><nowiki>{{hu-infl-nom|ösvénye|e|-}}</nowiki></code>
===Alternating stems===
Some words will alternate between two stems. The simplest (when it comes to parameters) are those where short vowels alternate with long ones.
For {{m|hu|szamár}}:
: <code><nowiki>{{hu-infl-nom|szamara|o|stem2=szamár}}</nowiki></code>
For {{m|hu|úr}}:
: <code><nowiki>{{hu-infl-nom|ura|o|stem2=úr}}</nowiki></code>
For {{m|hu|tűz}}:
: <code><nowiki>{{hu-infl-nom|tüze|ö|stem2=tűz}}</nowiki></code>
For {{m|hu|levél}}:
: <code><nowiki>{{hu-infl-nom|levele|e|stem2=levél}}</nowiki></code>
For {{m|hu|név}}:
: <code><nowiki>{{hu-infl-nom|neve|e|stem2=név}}</nowiki></code>
Some words have a fill vowel before the final consonant, which is lost whenever the final vowel of the stem is present (in the plural, accusative singular and superessive singular). In these cases, both stem parameters must be specified.
For {{m|hu|hatalom}} (plural {{m|hu|hatalmak}}):
: <code><nowiki>{{hu-infl-nom|hatalma|o|stem2=hatalom|spe_sg_stem=hatalm}}</nowiki></code>
For {{m|hu|álom}} (plural {{m|hu|álmok}}):
: <code><nowiki>{{hu-infl-nom|álmo|o|stem2=álom|spe_sg_stem=álm}}</nowiki></code>
For {{m|hu|tükör}} (plural {{m|hu|tükrök}}):
: <code><nowiki>{{hu-infl-nom|tükrö|ö|stem2=tükör|spe_sg_stem=tükr}}</nowiki></code>
For {{m|hu|fejedelem}} (plural {{m|hu|fejedelmek}}):
: <code><nowiki>{{hu-infl-nom|fejedelme|e|stem2=fejedelem|spe_sg_stem=fejedelm|esm_sg=1|esm_pl=1}}</nowiki></code>
There are 10 nouns with a v-stem ({{m|hu|cső}}, {{m|hu|fű}}, {{m|hu|hó}}, {{m|hu|kő}}, {{m|hu|lé}}, {{m|hu|ló}}, {{m|hu|mű}}, {{m|hu|szó}}, {{m|hu|tó}}, {{m|hu|tő}}). They will need one or both stem parameters, depending on how the superessive singular is formed.
For {{m|hu|tó}}:
: <code><nowiki>{{hu-infl-nom|tava|o|stem2=tó|spe_sg_stem=tav}}</nowiki></code>
For {{m|hu|szó}}:
: <code><nowiki>{{hu-infl-nom|szava|o|-|stem2=szó}}</nowiki></code>
For {{m|hu|lé}}:
: <code><nowiki>{{hu-infl-nom|leve|e|stem2=lé}}</nowiki></code>
For {{m|hu|mű}}:
: <code><nowiki>{{hu-infl-nom|műve|ö|stem2=mű|spe_sg_stem=műv}}</nowiki></code>
===Abbreviations and foreign spellings===
Abbreviations and words with foreign spellings must use the <code>v=</code> parameter, to indicate which final consonant is actually pronounced.
For {{m|hu|ápr.}} (abbreviation of {{m|hu|április}}):
: <code><nowiki>{{hu-infl-nom|ápr.-o|o|-|v=s}}</nowiki></code>
For {{m|hu|FÁK}}:
: <code><nowiki>{{hu-infl-nom|FÁK-a|o|v=k|n=sg}}</nowiki></code>
For {{m|hu|szept.}} (abbreviation of {{m|hu|szeptember}}:
: <code><nowiki>{{hu-infl-nom|szept.-e|e|-|v=r}}</nowiki></code>
For {{m|hu|OS}}
: <code><nowiki>{{hu-infl-nom|OS-e|e|-|v=s}}</nowiki></code>
===Possessive forms===
The possessive forms of nominals can further be inflected by appending case suffixes. Final -a and -e change to -á and -é, respectively.
For {{m|hu|ablaka||his/her/its window}}:
: <code><nowiki>{{hu-infl-nom|ablaká|o|n=sg|esm_sg=1}}</nowiki></code>
For {{m|hu|ablakunk||our window}}:
: <code><nowiki>{{hu-infl-nom|ablakunka|o|n=sg|esm_sg=1}}</nowiki></code>
For {{m|hu|kertje||his/her/its garden}}:
: <code><nowiki>{{hu-infl-nom|kertjé|e|n=sg|esm_sg=1}}</nowiki></code>
<includeonly>
[[ကဏ္ဍ:ထာမ်ပလိက်ဆၜိုတ်ဒုၚ်ယၟုအပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်|*]]
</includeonly>
ivjds6cw08s1ujada471aq8p4xpc96v
ကဏ္ဍ:ထာမ်ပလိက်ဆၜိုတ်ဒုၚ်ယၟုအပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်
14
295378
395907
2026-05-29T17:31:36Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်ဆၜိုတ်ဒုၚ်ယၟုအပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]]"
395907
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်ဆၜိုတ်ဒုၚ်ယၟုအပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]]
oxbqz3zbe55nceozpd83ifpwv7td5ov
ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်
14
295379
395908
2026-05-29T17:33:41Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်ဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]]"
395908
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်ဟာန်ဂါရေဝ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဟ]]
nba0hyv5ehbcreo072skheuy4cohah4
ထာမ်ပလိက်:hu-pos-tok
10
295380
395909
2026-05-29T17:35:51Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{hu-infl-pos-table<!-- -->|n={{#switch:{{{n|{{{form|}}}}}}|sg|sing=sg|pl=pl}}<!-- -->|1sg_sg={{{1}}}m<!-- -->|2sg_sg={{{1}}}d<!-- -->|3sg_sg=[[{{{1}}}ja]]{{#if:{{{3sg_sg2|}}}|, [[{{{3sg_sg2}}}]]}}<!-- NB: the first form needs linking whenever there is a second variant -->|1pl_sg={{{1}}}nk<!-- -->|2pl_sg={{{1}}}tok<!-- -->|3pl_sg=[[{{{1}}}juk]]{{#if:{{{3pl_sg2|}}}|, [[{{{3pl_sg2}}}]]}}<!-- NB: the first form needs li..."
395909
wikitext
text/x-wiki
{{hu-infl-pos-table<!--
-->|n={{#switch:{{{n|{{{form|}}}}}}|sg|sing=sg|pl=pl}}<!--
-->|1sg_sg={{{1}}}m<!--
-->|2sg_sg={{{1}}}d<!--
-->|3sg_sg=[[{{{1}}}ja]]{{#if:{{{3sg_sg2|}}}|, [[{{{3sg_sg2}}}]]}}<!-- NB: the first form needs linking whenever there is a second variant
-->|1pl_sg={{{1}}}nk<!--
-->|2pl_sg={{{1}}}tok<!--
-->|3pl_sg=[[{{{1}}}juk]]{{#if:{{{3pl_sg2|}}}|, [[{{{3pl_sg2}}}]]}}<!-- NB: the first form needs linking whenever there is a second variant
-->|1sg_pl=[[{{{1}}}{{{2|}}}im]]{{#if:{{{2|}}}| <small>(''or'' [[{{{1}}}im]])</small>}}<!--
-->|2sg_pl=[[{{{1}}}{{{2|}}}id]]{{#if:{{{2|}}}| <small>(''or'' [[{{{1}}}id]])</small>}}<!--
-->|3sg_pl=[[{{{1}}}{{{2|}}}i]]{{#if:{{{2|}}}| <small>(''or'' [[{{{1}}}i]])</small>}}<!--
-->|1pl_pl=[[{{{1}}}{{{2|}}}ink]]{{#if:{{{2|}}}| <small>(''or'' [[{{{1}}}ink]])</small>}}<!--
-->|2pl_pl=[[{{{1}}}{{{2|}}}itok]]{{#if:{{{2|}}}| <small>(''or'' [[{{{1}}}itok]])</small>}}<!--
-->|3pl_pl=[[{{{1}}}{{{2|}}}ik]]{{#if:{{{2|}}}| <small>(''or'' [[{{{1}}}ik]])</small>}}<!--
-->|perspron={{{perspron|}}}<!--
-->}}<!--
-->{{#if:{{{form|}}}| }}<!--
--><noinclude>{{documentation}}</noinclude>
3hkzdkl57rp8glydup0600cd9ckhyjd
ထာမ်ပလိက်:hu-pos-tok/documentation
10
295381
395910
2026-05-29T17:38:30Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} == Usage == This template generates the possessive forms of Hungarian nouns with the following characteristics: * Their final letter is a vowel: {{m|hu|hordó|t=barrel}}. * They form their second-person possessive plural by adding '''{{l|hu|-tok}}''': {{m|hu|hordótok|t=your barrel}}. The possessive template should be placed after the ====Declension==== header, under the regular declension..."
395910
wikitext
text/x-wiki
{{documentation subpage}}
== Usage ==
This template generates the possessive forms of Hungarian nouns with the following characteristics:
* Their final letter is a vowel: {{m|hu|hordó|t=barrel}}.
* They form their second-person possessive plural by adding '''{{l|hu|-tok}}''': {{m|hu|hordótok|t=your barrel}}.
The possessive template should be placed after the ====Declension==== header, under the regular declension template.
==Unnamed parameters==
; <code>1=</code>
: stem, remains unchanged in all forms. If the noun lengthens the final vowel, provide the modified stem.
; <code>2=</code>
: the third-person singular possessive ending for a plural noun, the default is '''-i'''; provide only if the ending requires an additional -ja, mostly for nouns ending in '''-i'''.
==Named parameters==
; <code>n=sg</code>
:add this parameter if the noun has only singular forms. The plural column will contain dashes.
; <code>n=pl</code>
:add this parameter if the noun has only plural forms. The singular column will contain dashes.
; <code>3sg_sg2=</code>
: the full variant for third-person singular single-possession form, placed below the regular form separated by a new line
; <code>3pl_sg2=</code>
: the full variant for third-person plural single-possession form, placed below the regular form separated by a new line
==Examples==
For {{m|hu|hordó|t=barrel}}:
:<code><nowiki>{{hu-pos-tok|hordó}}</nowiki></code>
For {{m|hu|éjszaka|t=night}} (it lengthens the final vowel):
:<code><nowiki>{{hu-pos-tok|éjszaká}}</nowiki></code>
For {{m|hu|kocsi|t=car}} (it requires an additional -ja):
:<code><nowiki>{{hu-pos-tok|kocsi|ja}}</nowiki></code>
For {{m|hu|csikó|t=foal}} (it has variant forms in third-person):
:<code><nowiki>{{hu-pos-tok|csikó|3sg_sg2=csikaja|3pl_sg2=csikajuk}}</nowiki></code>
<includeonly>
[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်|pos-tok]]
</includeonly>
lbp3ji2v2th6tmoapbrv3xrh9243cqq
ထာမ်ပလိက်:hu-infl-pos-table
10
295382
395911
2026-05-29T17:59:27Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{inflection-table-top|title={{#if:{{{perspron|}}}|သဗ္ဗနာမ်ဗီုပြၚ်အဆက်လက္ကရဴဆေၚ်စပ်ကဵုပူဂဵုနကဵုဝေါဟာ {{m|hu||{{pagename}}}}|ဗီုပြၚ်ပၟိက်သၟိက်မိက်ဂွံပိုၚ်ပြဳနကဵုဝေါဟာ {{m|hu||{{pagename}}}}}}|palette=green|tall=yes}} |- ! class="outer" | တ..."
395911
wikitext
text/x-wiki
{{inflection-table-top|title={{#if:{{{perspron|}}}|သဗ္ဗနာမ်ဗီုပြၚ်အဆက်လက္ကရဴဆေၚ်စပ်ကဵုပူဂဵုနကဵုဝေါဟာ {{m|hu||{{pagename}}}}|ဗီုပြၚ်ပၟိက်သၟိက်မိက်ဂွံပိုၚ်ပြဳနကဵုဝေါဟာ {{m|hu||{{pagename}}}}}}|palette=green|tall=yes}}
|-
! class="outer" | တၠဒြပ်
! class="outer" | ပိုန်ဘိုက်မွဲတၠ
! class="outer" | ပိုန်ဘိုက်သ္ကုတ်ကရေက်ဗွဲမဂၠိုၚ်
|-
! ပူဂဵုဒယှ်ေမရနုက်ကဵု၁
| {{#switch:{{{n|}}}|sg|={{#if:{{{1sg_sg|}}}|{{l-self|hu|{{{1sg_sg}}}}}|—}}|#default=—}}
| {{#switch:{{{n|}}}|pl|={{#if:{{{1sg_pl|}}}|{{l-self|hu|{{{1sg_pl}}}}}|—}}|#default=—}}
|-
! ပူဂဵုဒယှ်ေမရနုက်ကဵု၂
| {{#switch:{{{n|}}}|sg|={{#if:{{{2sg_sg|}}}|{{l-self|hu|{{{2sg_sg}}}}}|—}}|#default=—}}
| {{#switch:{{{n|}}}|pl|={{#if:{{{2sg_pl|}}}|{{l-self|hu|{{{2sg_pl}}}}}|—}}|#default=—}}
|-
! ပူဂဵုဒယှ်ေမရနုက်ကဵု၃
| {{#switch:{{{n|}}}|sg|={{#if:{{{3sg_sg|}}}|{{l-self|hu|{{{3sg_sg}}}}}|—}}|#default=—}}
| {{#switch:{{{n|}}}|pl|={{#if:{{{3sg_pl|}}}|{{l-self|hu|{{{3sg_pl}}}}}|—}}|#default=—}}
|-
! ကိုန်ဗဟုဝစ်ပူဂဵုမရနုက်ကဵု၁
| {{#switch:{{{n|}}}|sg|={{#if:{{{1pl_sg|}}}|{{l-self|hu|{{{1pl_sg}}}}}|—}}|#default=—}}
| {{#switch:{{{n|}}}|pl|={{#if:{{{1pl_pl|}}}|{{l-self|hu|{{{1pl_pl}}}}}|—}}|#default=—}}
|-
! ကိုန်ဗဟုဝစ်ပူဂဵုမရနုက်ကဵု၂
| {{#switch:{{{n|}}}|sg|={{#if:{{{2pl_sg|}}}|{{l-self|hu|{{{2pl_sg}}}}}|—}}|#default=—}}
| {{#switch:{{{n|}}}|pl|={{#if:{{{2pl_pl|}}}|{{l-self|hu|{{{2pl_pl}}}}}|—}}|#default=—}}
|-
! ကိုန်ဗဟုဝစ်ပူဂဵုမရနုက်ကဵု၃
| {{#switch:{{{n|}}}|sg|={{#if:{{{3pl_sg|}}}|{{l-self|hu|{{{3pl_sg}}}}}|—}}|#default=—}}
| {{#switch:{{{n|}}}|pl|={{#if:{{{3pl_pl|}}}|{{l-self|hu|{{{3pl_pl}}}}}|—}}|#default=—}}
{{inflection-table-bottom}}<noinclude>{{documentation}}</noinclude>
8yrvrsj6we6iubqx7hyhd3hthmojvmv
ထာမ်ပလိက်:hu-infl-pos-table/documentation
10
295383
395912
2026-05-29T18:01:05Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} This template is used internally by other Hungarian possessive templates. It should not be used directly in entries. <includeonly> [[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်|pos]] </includeonly>"
395912
wikitext
text/x-wiki
{{documentation subpage}}
This template is used internally by other Hungarian possessive templates. It should not be used directly in entries.
<includeonly>
[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဟာန်ဂါရေဝ်ဂမၠိုၚ်|pos]]
</includeonly>
qwycy3wvo1aamxb1yi0rkhrmphqdkka
ထာမ်ပလိက်:R:kne:Vanoverbergh 1933
10
295384
395913
2026-05-29T18:02:41Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:quote|call_template |template=cite-book |en,kne |year=1933 |author=Morice Vanoverbergh |title=A Dictionary of Lepanto Igorot or Kankanay. As it is spoken at Bauco |publisher=Verlag der Internationalen Zeitschrift „Anthropos“ |location=Mödling bei Wien, St. Gabriel, Österreich |series=Linguistische Anthropos-Bibliothek |seriesvolume=XII |oclc=1110007 |url=https://nlpdl.nlp.gov.ph/499/1933/83-11234/home...."
395913
wikitext
text/x-wiki
{{#invoke:quote|call_template
|template=cite-book
|en,kne
|year=1933
|author=Morice Vanoverbergh
|title=A Dictionary of Lepanto Igorot or Kankanay. As it is spoken at Bauco
|publisher=Verlag der Internationalen Zeitschrift „Anthropos“
|location=Mödling bei Wien, St. Gabriel, Österreich
|series=Linguistische Anthropos-Bibliothek
|seriesvolume=XII
|oclc=1110007
|url=https://nlpdl.nlp.gov.ph/499/1933/83-11234/home.htm
|entry={{#if: {{{2|}}}| {{{1}}}” & “{{{2}}} | {{{entry|{{{1|{{pagename}}}}}}}} }}
|pageparam=page
|propagateparams=t
|allowparams=entry,1,2
}}<noinclude>
{{Documentation}}
</noinclude>
4cg8ixadsdp17nn37pma63762yanqle
ထာမ်ပလိက်:documentation
10
295385
395915
2026-05-29T18:03:51Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:documentation]] ဇရေင် [[ထာမ်ပလိက်:Documentation]]
395915
wikitext
text/x-wiki
#REDIRECT [[ထာမ်ပလိက်:Documentation]]
f14wl7hp80hta0be113l18fo0r050tu
ထာမ်ပလိက်:documentation/documentation
10
295386
395917
2026-05-29T18:03:52Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:documentation/documentation]] ဇရေင် [[ထာမ်ပလိက်:Documentation/documentation]]
395917
wikitext
text/x-wiki
#REDIRECT [[ထာမ်ပလိက်:Documentation/documentation]]
ndatdhxysyw58k43c7f9goth0gc7na3
ထာမ်ပလိက်:R:kne:Vanoverbergh 1933/documentation
10
295387
395918
2026-05-29T18:04:45Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} ==Template data== <templatedata> { "params": { "1": { "aliases": [ "entry" ], "label": "Entry name", "description": "Defaults to the page name", "type": "string" }, "page": { "label": "Page number", "description": "The page number of the entry", "type": "number" }, "text": { "label": "Quoted text, if applicable", "type": "string" }, "t": {..."
395918
wikitext
text/x-wiki
{{documentation subpage}}
==Template data==
<templatedata>
{
"params": {
"1": {
"aliases": [
"entry"
],
"label": "Entry name",
"description": "Defaults to the page name",
"type": "string"
},
"page": {
"label": "Page number",
"description": "The page number of the entry",
"type": "number"
},
"text": {
"label": "Quoted text, if applicable",
"type": "string"
},
"t": {
"label": "Quoted text English translation, if applicable",
"type": "string"
}
},
"format": "inline",
"description": "This template is for referencing the comprehensive Kankanaey dictionary authored by Maurice Vanoverbergh."
}
</templatedata>
==Considerations==
===Southern or Northern?===
It is unclear whether this dictionary is focused on Southern terminologies or Northern. This dictionary is targeted at speech in Bauko, which is known to partly have both Southern and Northern Kankanaeys. It is known that "{{m|kne|aw}}" is Southern while "{{m|kne|owen}}" is Northern, however this dictionary lists "{{m|kne|aw}}" and not "{{m|kne|owen}}". However, it is also known that "{{m|kne|maga}}" is Southern while "{{m|kne|maid}}" is Northern, however this dictionary lists "{{m|kne|maid}}" and not "{{m|kne|maga}}".
===Lack of part of speech===
This dictionary does not list by default each entry's part of speech; only certain entries have them. Extra care is needed to decide what the actual part of speech an entry is.
===Verbs===
Vanoverbergh lists verbs as their roots. This is in contranst to Wiktionary.
Additionally, he lists verbs as having tenses. This is incorrect as Kankanaey verbs have ''aspect'', not ''tense''.
===Affixes===
Affixes are not marked with a dash "-". This makes it hard to identify whether an entry is an affix or a regular word.
===e versus a===
Vanoverbergh sometimes uses "e" instead of "a" due to dialectal differences. This is most evident in "{{m|kne|man-}}", as he uses "{{m|kne|men-}}".
===Sort order of "ng"===
This dictionary's sorting of "ng" is inconsistent. Sometimes it is listed after "n" as if it was a separate letter, while sometimes it is listed as if "ng" are two separate "n" and "g" letters.
<includeonly>
{{refcat|kne}}
</includeonly>
tqrjex3n7e9nxz475t4a3p1k0j2p2f3
ကဏ္ဍ:ထာမ်ပလိက်နိဿဲကာန်ခါနာအဳဂမၠိုၚ်
14
295388
395919
2026-05-29T18:07:05Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်ကာန်ခါနာအဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]]"
395919
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်ကာန်ခါနာအဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]]
cee4tssjue4jwu4fu8rqzlqsoeg9h49
395920
395919
2026-05-29T18:08:16Z
咽頭べさ
33
395920
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်လေန်ကာန်ခါနာအဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်နိဿဲဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]]
rmj31uo5xx4c2s0iqc0d6huaj5btgfx
ကဏ္ဍ:ထာမ်ပလိက်လေန်ကာန်ခါနာအဳဂမၠိုၚ်
14
295389
395921
2026-05-29T18:09:04Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်ကာန်ခါနာအဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်လေန်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]]"
395921
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်ကာန်ခါနာအဳဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်လေန်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]]
szp9rg3hsd6euw0zbyajadlkw7383y4
ကဏ္ဍ:ထာမ်ပလိက်ကာန်ခါနာအဳဂမၠိုၚ်
14
295390
395922
2026-05-29T18:10:10Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ဘာသာကာန်ခါနာအဳ]][[ကဏ္ဍ:ထာမ်ပလိက်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]]"
395922
wikitext
text/x-wiki
[[ကဏ္ဍ:ဘာသာကာန်ခါနာအဳ]][[ကဏ္ဍ:ထာမ်ပလိက်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|က]]
gq7byk3je0s1hp3gc28efozdjb74vch
ထာမ်ပလိက်:R:du Cange
10
295391
395924
2026-05-29T18:12:02Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:R:du Cange]] ဇရေင် [[ထာမ်ပလိက်:R:la:du Cange]]
395924
wikitext
text/x-wiki
#REDIRECT [[ထာမ်ပလိက်:R:la:du Cange]]
7ltcolshs7lfdswy6qxx2oo5mu02fn5
ထာမ်ပလိက်:R:du Cange/documentation
10
295392
395926
2026-05-29T18:12:02Z
咽頭べさ
33
咽頭べさ ပြံင်ပဆုဲလဝ် မုက်လိက် [[ထာမ်ပလိက်:R:du Cange/documentation]] ဇရေင် [[ထာမ်ပလိက်:R:la:du Cange/documentation]]
395926
wikitext
text/x-wiki
#REDIRECT [[ထာမ်ပလိက်:R:la:du Cange/documentation]]
9c3iz9rfn1c3m47dtsc0ubfdah1sb12
ထာမ်ပလိက်:lt-pr
10
295393
395927
2026-05-29T18:13:11Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:lt-pron|show}}<noinclude>{{documentation}}</noinclude>"
395927
wikitext
text/x-wiki
{{#invoke:lt-pron|show}}<noinclude>{{documentation}}</noinclude>
p6h7t8o8qyxdia67brluph5p1k5ncri
မဝ်ဂျူ:lt-pron/doc
828
295394
395929
2026-05-29T18:16:29Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{status|beta}} This module is not to be directly used. It is used by {{tl|lt-pr}}, see there for usage. ===Testcases=== {{#invoke:lt-pron/testcases|run_tests}} <includeonly> {{module cat|lt}} </includeonly>"
395929
wikitext
text/x-wiki
{{status|beta}}
This module is not to be directly used. It is used by {{tl|lt-pr}}, see there for usage.
===Testcases===
{{#invoke:lt-pron/testcases|run_tests}}
<includeonly>
{{module cat|lt}}
</includeonly>
p0t1w8xp43giai1v597sd21xaf3cjo2
မဝ်ဂျူ:lt-pron/testcases/doc
828
295395
395931
2026-05-29T18:19:15Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:lt-pron/testcases|run_tests|comments=1}} <includeonly> {{module cat|lt}} </includeonly>"
395931
wikitext
text/x-wiki
{{#invoke:lt-pron/testcases|run_tests|comments=1}}
<includeonly>
{{module cat|lt}}
</includeonly>
ri2w4m31wo5ypnah5pl6hduwchdvcb1
မဝ်ဂျူ:User:TongcyDai/lt-pron
828
295396
395932
2026-05-29T18:20:12Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "--[==[ Backend for {{lt-pr}}: IPA, hyphenation, and rhyme generation. Author: TongcyDai ]==] local export = {} local m_debug = require("Module:debug") local m_str = require("Module:string utilities") local m_lt_common = require("Module:lt-common") local u = m_str.char local ulower = m_str.lower local uupper = m_str.upper local usub = m_str.sub local ulen = m_str.len local ugsub = m_str.gsub local ufind =..."
395932
Scribunto
text/plain
--[==[
Backend for {{lt-pr}}: IPA, hyphenation, and rhyme generation.
Author: TongcyDai
]==]
local export = {}
local m_debug = require("Module:debug")
local m_str = require("Module:string utilities")
local m_lt_common = require("Module:lt-common")
local u = m_str.char
local ulower = m_str.lower
local uupper = m_str.upper
local usub = m_str.sub
local ulen = m_str.len
local ugsub = m_str.gsub
local ufind = m_str.find
local umatch = m_str.match
local rsplit = m_str.split
-- Accent mark constants (re-exported from Module:lt-common to keep all
-- Lithuanian modules in sync).
local GRAVE = m_lt_common.GRAVE -- U+0300
local ACUTE = m_lt_common.ACUTE -- U+0301
local TILDE = m_lt_common.TILDE -- U+0303
local DOTABOVE = m_lt_common.DOTABOVE -- U+0307
local OGONEK = m_lt_common.OGONEK -- U+0328
-- M4: Reuse Module:lt-common's display formatter directly instead of
-- maintaining a parallel local copy.
local makeDisplayText = m_lt_common.makeDisplayText
-- Liaison marker: U+203F UNDERTIE — separates clitics from their stressed host
-- in input. The phonological grammar treats it like a "soft" word boundary that
-- is transparent to several cross-word processes (palatalization spread,
-- geminate / fricative simplification, place assimilation), per VLKK §19–§23.
local LIAISON = u(0x203F)
-- Lazy-loaded external modules
local m_IPA
local audio_module = "Module:audio"
local homophones_module = "Module:homophones"
local hyphenation_module = "Module:hyphenation"
local rhymes_module = "Module:rhymes"
local parameters_module = "Module:parameters"
local parse_util_module = "Module:parse utilities"
local concat = table.concat
local insert = table.insert
local lang_obj
local function get_lang()
if not lang_obj then
lang_obj = require("Module:languages").getByCode("lt")
end
return lang_obj
end
local function track(reason)
m_debug.track("lt-pron/" .. reason)
end
-- ============================================================================
-- SECTION 1: Orthography & Phonology Definitions
-- ============================================================================
-- Suffix table for automatic phonetic adjustments (currently disabled)
-- Exact matching lists (must include precomposed normalized tone markers)
--[[
local SUFFIX_LOAN = {
["fòbas"]=true, ["fòbė"]=true, ["fòbija"]=true, ["fònas"]=true, ["fònė"]=true,
["lògas"]=true, ["lògija"]=true, ["skòpas"]=true
}
--]]
-- Consonant classes for syllabification (Sonority Hierarchy)
local CLASS = {
R = {["l"]=true, ["m"]=true, ["n"]=true, ["r"]=true, ["v"]=true, ["j"]=true},
S = {["s"]=true, ["z"]=true, ["š"]=true, ["ž"]=true, ["f"]=true, ["x"]=true, ["h"]=true, ["ch"]=true},
T = {["p"]=true, ["b"]=true, ["t"]=true, ["d"]=true, ["k"]=true, ["g"]=true, ["c"]=true, ["dz"]=true, ["č"]=true, ["dž"]=true}
}
-- Front vowels trigger palatalization
local FRONT_V = {
["e"]=true, ["ę"]=true, ["ė"]=true, ["i"]=true, ["į"]=true, ["y"]=true, ["ie"]=true, ["ei"]=true, ["eu"]=true
}
-- Vowel -> Base IPA mapping (Unstressed short/inherent)
local V_IPA = {
["a"] = "ɐ", ["ą"] = "ɑː", ["e"] = "ɛ", ["ę"] = "æː", ["ė"] = "eː",
["i"] = "ɪ", ["į"] = "iː", ["y"] = "iː",
["u"] = "ʊ", ["ų"] = "uː", ["ū"] = "uː", ["o"] = "oː",
["ie"] = "iɛ", ["uo"] = "uɔ",
-- Unstressed simple diphthongs
["ai"] = "ɐɪ", ["au"] = "ɒʊ", ["ei"] = "ɛɪ", ["eu"] = "ɛʊ",
["ui"] = "ʊɪ", ["oi"] = "ɔɪ", ["ou"] = "ɔʊ"
}
-- Consonant -> Base IPA mapping (Unpalatalized)
local CONS_IPA = {
["b"] = "b", ["c"] = "t͡s", ["č"] = "t͡ʃ", ["d"] = "d",
["dz"] = "d͡z", ["dž"] = "d͡ʒ", ["ch"] = "x",
["f"] = "f", ["g"] = "ɡ", ["h"] = "ɣ", ["j"] = "j",
["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n",
["p"] = "p", ["r"] = "r", ["s"] = "s", ["š"] = "ʃ",
["t"] = "t", ["v"] = "ʋ", ["z"] = "z", ["ž"] = "ʒ",
}
-- Voicing pairs for Voicing Assimilation
local VOICING_PAIRS = {
["p"]="b", ["b"]="p", ["t"]="d", ["d"]="t", ["k"]="g", ["g"]="k",
["c"]="dz", ["dz"]="c", ["č"]="dž", ["dž"]="č",
["s"]="z", ["z"]="s", ["š"]="ž", ["ž"]="š", ["x"]="ɣ", ["ɣ"]="x"
}
local function is_voiced(c)
local voiced_set = {["b"]=true, ["d"]=true, ["g"]=true, ["dz"]=true, ["dž"]=true, ["z"]=true, ["ž"]=true, ["ɣ"]=true}
return voiced_set[c] == true
end
-- Accent pairs for conjugation module support
-- Maps base vowel/diphthong to accented forms (falling/rising)
local ACCENT_PAIRS = {
-- Long vowels (acute or tilde)
["ą"] = {acute="ą"..ACUTE, tilde="ą"..TILDE},
["ę"] = {acute="ę"..ACUTE, tilde="ę"..TILDE},
["ė"] = {acute="ė"..ACUTE, tilde="ė"..TILDE},
["y"] = {acute="y"..ACUTE, tilde="y"..TILDE},
["į"] = {acute="į"..ACUTE, tilde="į"..TILDE},
["ū"] = {acute="ū"..ACUTE, tilde="ū"..TILDE},
["ų"] = {acute="ų"..ACUTE, tilde="ų"..TILDE},
-- a/e can be short (grave) or long (tilde)
["a"] = {grave="a"..GRAVE, tilde="a"..TILDE},
["e"] = {grave="e"..GRAVE, tilde="e"..TILDE},
-- o: ó/õ/o are long, ò is short
["o"] = {acute="o"..ACUTE, grave="o"..GRAVE, tilde="o"..TILDE},
-- Short vowels i/u (only grave)
["i"] = {grave="i"..GRAVE},
["u"] = {grave="u"..GRAVE},
-- Simple diphthongs
["ai"] = {acute="a"..ACUTE.."i", tilde="a".."i"..TILDE},
["au"] = {acute="a"..ACUTE.."u", tilde="a".."u"..TILDE},
["ei"] = {acute="e"..ACUTE.."i", tilde="e".."i"..TILDE},
["ui"] = {grave="u"..GRAVE.."i", tilde="u".."i"..TILDE},
-- Complex diphthongs
["ie"] = {acute="i"..ACUTE.."e", tilde="i".."e"..TILDE},
["uo"] = {acute="u"..ACUTE.."o", tilde="u".."o"..TILDE},
-- Mixed diphthongs (a series - acute/tilde)
["al"] = {acute="a"..ACUTE.."l", tilde="a".."l"..TILDE},
["am"] = {acute="a"..ACUTE.."m", tilde="a".."m"..TILDE},
["an"] = {acute="a"..ACUTE.."n", tilde="a".."n"..TILDE},
["ar"] = {acute="a"..ACUTE.."r", tilde="a".."r"..TILDE},
-- Mixed diphthongs (e series - acute/grave/tilde, grave for foreign)
["el"] = {acute="e"..ACUTE.."l", grave="e"..GRAVE.."l", tilde="e".."l"..TILDE},
["em"] = {acute="e"..ACUTE.."m", grave="e"..GRAVE.."m", tilde="e".."m"..TILDE},
["en"] = {acute="e"..ACUTE.."n", grave="e"..GRAVE.."n", tilde="e".."n"..TILDE},
["er"] = {acute="e"..ACUTE.."r", grave="e"..GRAVE.."r", tilde="e".."r"..TILDE},
-- Mixed diphthongs (i series - grave/tilde)
["il"] = {grave="i"..GRAVE.."l", tilde="i".."l"..TILDE},
["im"] = {grave="i"..GRAVE.."m", tilde="i".."m"..TILDE},
["in"] = {grave="i"..GRAVE.."n", tilde="i".."n"..TILDE},
["ir"] = {grave="i"..GRAVE.."r", tilde="i".."r"..TILDE},
-- Mixed diphthongs (u series - grave/tilde)
["ul"] = {grave="u"..GRAVE.."l", tilde="u".."l"..TILDE},
["um"] = {grave="u"..GRAVE.."m", tilde="u".."m"..TILDE},
["un"] = {grave="u"..GRAVE.."n", tilde="u".."n"..TILDE},
["ur"] = {grave="u"..GRAVE.."r", tilde="u".."r"..TILDE},
-- Foreign diphthongs (grave only)
["eu"] = {grave="e"..GRAVE.."u"},
["oi"] = {grave="o"..GRAVE.."i"},
["ou"] = {grave="o"..GRAVE.."u"},
["ol"] = {grave="o"..GRAVE.."l"},
["om"] = {grave="o"..GRAVE.."m"},
["on"] = {grave="o"..GRAVE.."n"},
["or"] = {grave="o"..GRAVE.."r"},
}
-- ============================================================================
-- SECTION 2: Lexical Normalization (Avoid NFD destruction)
-- ============================================================================
-- Helper: Remove all accent marks from text (moved here for early use).
-- Delegates to Module:lt-common to keep the de-accenting logic shared.
local function remove_all_accents(text)
return m_lt_common.to_stem_bare(mw.ustring.toNFD(text))
end
-- Extract pagename from input or load from headword data
local function get_pagename(input)
-- Check for manual override: <base:xxx>
local manual = input:match("<base:([^>]+)>")
if manual then
return manual, input:gsub("<base:[^>]+>", "")
end
-- Load from headword data
local success, data = pcall(function()
return mw.loadData("Module:headword/data").pagename
end)
if success and data then
return data, input
end
return nil, input
end
-- Identify respelling j and (j) positions
local function identify_respelling_glides(input_with_accents, pagename)
if not pagename then
return {} -- No pagename, no respelling detection
end
-- Remove all accents from input
local input_clean = remove_all_accents(input_with_accents)
-- Remove special markers (^, .)
input_clean = ugsub(input_clean, "[%^%.]", "")
-- Remove <base:...> if present
input_clean = ugsub(input_clean, "<base:[^>]+>", "")
-- Remove literal ˌ
input_clean = ugsub(input_clean, "ˌ", "")
-- Remove softening mark ʼ
input_clean = ugsub(input_clean, "ʼ", "")
-- Remove (j) markers - replace with j for comparison
input_clean = ugsub(input_clean, "%(j%)", "j")
-- Remove spaces for comparison
input_clean = ugsub(input_clean, " ", "")
-- Convert to NFC for comparison
input_clean = mw.ustring.toNFC(input_clean)
-- Normalize pagename (lowercase, remove spaces)
local pagename_clean = ulower(pagename)
pagename_clean = ugsub(pagename_clean, " ", "")
-- Find respelling j positions (j in input but not in pagename)
local respelling_positions = {}
local input_idx = 1
local page_idx = 1
while input_idx <= ulen(input_clean) do
local input_char = usub(input_clean, input_idx, input_idx)
if input_char == "j" then
-- Check if this j exists in pagename at corresponding position
local page_char = page_idx <= ulen(pagename_clean)
and usub(pagename_clean, page_idx, page_idx)
if page_char ~= "j" then
-- This is a respelling j
insert(respelling_positions, input_idx)
input_idx = input_idx + 1
-- Don't advance page_idx
else
-- This is an original j
input_idx = input_idx + 1
page_idx = page_idx + 1
end
else
input_idx = input_idx + 1
page_idx = page_idx + 1
end
end
return respelling_positions
end
-- Safe mapping to extract tones without destroying precomposed characters
local TONE_MAP = {
["á"]="a,acute", ["à"]="a,grave", ["ã"]="a,tilde",
["é"]="e,acute", ["è"]="e,grave", ["ẽ"]="e,tilde",
["í"]="i,acute", ["ì"]="i,grave", ["ĩ"]="i,tilde",
["ý"]="y,acute", ["ỳ"]="y,grave", ["ỹ"]="y,tilde",
["ú"]="u,acute", ["ù"]="u,grave", ["ũ"]="u,tilde",
["ó"]="o,acute", ["ò"]="o,grave", ["õ"]="o,tilde",
-- Precomposed vowels with macrons/ogoneks + tones (represented here via standard combinations)
["ą́"]="ą,acute", ["ą̃"]="ą,tilde",
["ę́"]="ę,acute", ["ę̃"]="ę,tilde",
["ė́"]="ė,acute", ["ė̃"]="ė,tilde",
["į́"]="į,acute", ["į̃"]="į,tilde",
["ų́"]="ų,acute", ["ų̃"]="ų,tilde",
["ū́"]="ū,acute", ["ū̃"]="ū,tilde",
-- Tilde on liquids (for semi-diphthongs)
["l̃"]="l,tilde", ["m̃"]="m,tilde", ["ñ"]="n,tilde", ["r̃"]="r,tilde",
["j̃"]="j,tilde"
}
-- Resolves NFD back to safe representation if input was somehow NFD.
-- The PUA rejection (with replacement hints), non-standard format tracking,
-- and i/j-with-dotabove canonicalization are all delegated to Module:lt-common
-- so that all Lithuanian modules share one implementation. Only the
-- TONE_MAP-based codepoint parser remains local since it produces the
-- token list specifically consumed by lt-pron's tokenizer.
local function safe_normalize(text)
-- Reject PUA characters with helpful "use X instead" hints.
m_lt_common.reject_pua(text)
-- Track non-standard input encodings for analytics. Detection runs on
-- the raw input (in NFD internally) before any canonicalization, so the
-- counts reflect what editors actually typed.
local has_dotless, has_precomp_i = m_lt_common.detect_nonstandard(text)
if has_dotless then track('dotless-ij') end
if has_precomp_i then track('precomposed-i-accent') end
-- "Explicit dotabove" (i.e., i/j + U+0307 + accent) is the *correct*
-- input form for accented i/j and is tracked separately to monitor
-- editor adoption. This check stays local since lt-common's
-- detect_nonstandard intentionally only flags the wrong forms.
if ufind(mw.ustring.toNFD(text), "[ij]" .. DOTABOVE) then
track('explicit-dotabove')
end
-- Normalize: drops stray dot-aboves between i/j and accents,
-- converts dotless ı/ȷ to standard i/j, returns clean NFC.
text = m_lt_common.canonicalize_input(text)
-- Parse the canonicalized NFC string into {char, tone} tokens.
-- TONE_MAP entries are 1- or 2-codepoint precomposed sequences
-- (e.g. "á" is one codepoint; "ą́" is "ą" + U+0301). The lookup tries
-- the 2-codepoint match first, then falls back to the 1-codepoint match.
local result = {}
local i = 1
while i <= ulen(text) do
local c = usub(text, i, i)
local c_lower = ulower(c) -- Convert to lowercase for TONE_MAP lookup
-- Look ahead for combining marks if any slipped through
local next_c = usub(text, i+1, i+1)
local next_c_lower = ulower(next_c)
local combined = c_lower .. next_c_lower
if TONE_MAP[combined] then
local parts = rsplit(TONE_MAP[combined], ",")
-- Preserve original case of base character
local base_char = parts[1]
if c ~= c_lower then
base_char = uupper(base_char)
end
insert(result, {char = base_char, tone = parts[2]})
i = i + 2
elseif TONE_MAP[c_lower] then
local parts = rsplit(TONE_MAP[c_lower], ",")
-- Preserve original case of base character
local base_char = parts[1]
if c ~= c_lower then
base_char = uupper(base_char)
end
insert(result, {char = base_char, tone = parts[2]})
i = i + 1
else
insert(result, {char = c, tone = nil})
i = i + 1
end
end
return result
end
-- ============================================================================
-- SECTION 3: Tokenization & Diphthong/Digraph resolution
-- ============================================================================
local function get_type(c)
local lc = ulower(c)
if V_IPA[lc] then return "V" end
if CLASS.R[lc] then return "R" end
if CLASS.S[lc] then return "S" end
if CLASS.T[lc] then return "T" end
return "UNKNOWN"
end
-- Helper: Convert token array back to NFC string for suffix/prefix matching
local function tokens_to_string(tok_list)
local s = ""
for _, t in ipairs(tok_list) do
local c = t.char
if t.tone == "grave" then c = c .. GRAVE
elseif t.tone == "acute" then c = c .. ACUTE
elseif t.tone == "tilde" then c = c .. TILDE
end
s = s .. c
end
return mw.ustring.toNFC(s)
end
-- Apply automatic properties based on word structure (e.g. loanwords)
local function apply_auto_properties(tokens)
local word_str = ulower(tokens_to_string(tokens))
--[[ Suffix detection for loanword quality (currently disabled)
local matched_loan_suff = nil
for suff, _ in pairs(SUFFIX_LOAN) do
if usub(word_str, -ulen(suff)) == suff then
matched_loan_suff = suff; break
end
end
if matched_loan_suff then
local suff_len = ulen(matched_loan_suff)
local acc_len = 0
for i = #tokens, 1, -1 do
local t = tokens[i]
acc_len = acc_len + ulen(tokens_to_string({t}))
if t.type == "V" and ulower(t.char) == "o" then
t.auto_targeted = true
if not t.force_default then t.loan_quality = true end
end
if acc_len >= suff_len then break end
end
end
-- Check for redundant asterisks globally
for _, t in ipairs(tokens) do
if t.force_default and not t.auto_targeted then
track('redundant-asterisk')
end
end
--]]
return tokens
end
-- Strict whitelist for valid diphthong and tone combinations
local function is_strict_diphthong(c1, t1, c2, t2)
local combo = c1 .. c2
-- Unstressed: neither element has a tone
if not t1 and not t2 then
return (combo == "ie" or combo == "uo" or combo == "ai" or combo == "au" or combo == "ei" or combo == "ui" or combo == "oi" or combo == "ou" or combo == "eu")
end
-- Tone on the first element (acute or grave)
if t1 and not t2 then
if t1 == "acute" then
return (combo == "ai" or combo == "au" or combo == "ei" or combo == "ie" or combo == "uo" or combo == "oi")
elseif t1 == "grave" then
return (combo == "ui" or combo == "oi" or combo == "ou" or combo == "eu")
end
end
-- Tone on the second element (tilde)
if not t1 and t2 then
if t2 == "tilde" then
return (combo == "ai" or combo == "au" or combo == "ei" or combo == "ui" or combo == "ie" or combo == "uo" or combo == "eu")
end
end
return false
end
local function tokenize(text_str, pagename)
local raw_chars = safe_normalize(text_str)
-- Identify respelling glides
local respelling_j_positions = identify_respelling_glides(text_str, pagename)
local tokens = {}
local i = 1
while i <= #raw_chars do
local curr = raw_chars[i]
local nxt = raw_chars[i+1]
local lc_curr = ulower(curr.char)
local lc_nxt = nxt and ulower(nxt.char)
-- Explicit Modifiers
if curr.char == "^" then
local last_v = nil
for j = #tokens, 1, -1 do
if tokens[j].type == "V" then last_v = tokens[j]; break end
end
if last_v then
local lc_v = ulower(last_v.char)
local base_v = usub(lc_v, -1) -- Last char for silent i combinations
local is_e_base = (lc_v == "e") or (base_v == "e")
local is_o_base = (lc_v == "o") or (base_v == "o")
-- Check for valid e: no tone or grave only
if is_e_base then
if not last_v.tone or last_v.tone == "grave" then
last_v.loan_quality = true
elseif last_v.tone == "acute" then
error("lt-pron: '^' cannot be used with acute 'é' (use only with plain 'e' or grave 'è')")
elseif last_v.tone == "tilde" then
error("lt-pron: '^' cannot be used with tilde 'ẽ' (use only with plain 'e' or grave 'è')")
end
-- Check for valid o: no tone only
elseif is_o_base then
if not last_v.tone then
last_v.loan_quality = true
elseif last_v.tone == "grave" then
error("lt-pron: '^' is redundant for 'ò' (already pronounced [ɔ])")
elseif last_v.tone == "acute" or last_v.tone == "tilde" then
error("lt-pron: '^' cannot be used with 'ó' or 'õ' (native long vowels)")
end
else
error("lt-pron: '^' can only be used with 'e' (plain/grave) or 'o' (plain). Found: '" .. lc_v .. "'")
end
end
i = i + 1
-- Check for (j) marker
elseif curr.char == "(" and i + 2 <= #raw_chars then
local char2 = raw_chars[i+1]
local char3 = raw_chars[i+2]
if char2.char == "j" and char3.char == ")" then
insert(tokens, {char = "j", type = "R", tone = nil, is_respelling = true, is_optional = true, original_char = "-"})
i = i + 3
else
error("lt-pron: '(' must be followed by 'j)' to form the (j) glide marker")
end
--[[ Asterisk modifier (currently disabled)
elseif curr.char == "*" then
local last_v = nil
for j = #tokens, 1, -1 do
if tokens[j].type == "V" then last_v = tokens[j]; break end
end
if last_v then last_v.force_default = true end
i = i + 1
--]]
elseif curr.char == "." then
insert(tokens, {char = ".", type = "BOUNDARY"})
i = i + 1
elseif curr.char == "ˌ" then
insert(tokens, {char = "ˌ", type = "SECONDARY_STRESS_BOUNDARY"})
i = i + 1
elseif curr.char == "ʼ" then
-- Softening mark: palatalize the preceding consonant
for j = #tokens, 1, -1 do
local tok = tokens[j]
if tok.type == "T" or tok.type == "S" or tok.type == "R" then
tok.softening_mark = true
break
end
end
i = i + 1
elseif lc_curr == "d" and nxt and (lc_nxt == "z" or lc_nxt == "ž") then
insert(tokens, {char = curr.char .. nxt.char, type = "T", tone = nil})
i = i + 2
elseif lc_curr == "c" and nxt and lc_nxt == "h" then
insert(tokens, {char = curr.char .. nxt.char, type = "S", tone = nil})
i = i + 2
-- Special handling for V + j̃ (final j with tilde): treat as V + ĩ diphthong
elseif curr.type ~= "BOUNDARY" and get_type(curr.char) == "V" and nxt and ulower(nxt.char) == "j" and nxt.tone == "tilde" then
-- Check if this is word-final (no more non-boundary tokens after j̃)
local is_final = true
for k = i + 2, #raw_chars do
if raw_chars[k].char ~= " " then
is_final = false
break
end
end
if is_final then
-- Combine V + j̃ as a diphthong V + ĩ (e.g., uj̃ → ui̇̃)
local v_char = ulower(curr.char)
local combined_char = v_char .. "i" -- e.g., "u" + "i" = "ui"
local tone = "tilde" -- j̃'s tilde
local tone_position = 2 -- Tilde is on the second vowel (i)
insert(tokens, {
char = combined_char,
type = "V",
tone = tone,
tone_position = tone_position,
original_char = v_char .. "j" -- For hyphenation: display as "uj̃"
})
i = i + 2
else
-- Not final, treat as regular V + j
insert(tokens, {char = curr.char, type = "V", tone = curr.tone})
i = i + 1
end
elseif curr.type ~= "BOUNDARY" and get_type(curr.char) == "V" and nxt and get_type(nxt.char) == "V" then
local nxt_nxt = raw_chars[i+2]
local lc_nxt_nxt = nxt_nxt and ulower(nxt_nxt.char)
local back_diph = lc_nxt and lc_nxt_nxt and (lc_nxt .. lc_nxt_nxt)
local is_silent_i_diph = (lc_curr == "i" and not curr.tone) and
(back_diph == "au" or back_diph == "ai" or back_diph == "ou" or back_diph == "oi" or back_diph == "uo")
local is_silent_i_mono = (lc_curr == "i" and not curr.tone) and
(lc_nxt == "a" or lc_nxt == "ą" or lc_nxt == "o" or
lc_nxt == "u" or lc_nxt == "ų" or lc_nxt == "ū" or lc_nxt == "ɔ")
local is_valid_diph = is_strict_diphthong(lc_curr, curr.tone, lc_nxt, nxt and nxt.tone)
if is_valid_diph and nxt_nxt and get_type(nxt_nxt.char) == "V" then
local lc_nxt_nxt = ulower(nxt_nxt.char)
local is_next_valid_diph = is_strict_diphthong(lc_nxt, nxt.tone, lc_nxt_nxt, nxt_nxt.tone)
if is_next_valid_diph then
-- Resolve ambiguous triplets (e.g., auo -> a.uo is standard)
-- Break the first valid diphthong unless explicitly stressed
if not curr.tone then
is_valid_diph = false
end
end
end
if is_silent_i_diph then
local tone = nxt.tone or nxt_nxt.tone
local tone_position = nil
if tone then
-- For silent i diphthongs (e.g., iau), position is relative to the full string
-- Position 2 = middle vowel, Position 3 = last vowel
if nxt.tone then
tone_position = 2
elseif nxt_nxt.tone then
tone_position = 3
end
end
insert(tokens, {char = curr.char .. nxt.char .. nxt_nxt.char, type = "V", tone = tone, has_silent_i = true, tone_position = tone_position})
i = i + 3
elseif is_silent_i_mono then
local tone = nxt.tone
local tone_position = nil
if tone then
-- For silent i monosyllables (e.g., ia), position 2 = second character
tone_position = 2
end
insert(tokens, {char = curr.char .. nxt.char, type = "V", tone = tone, has_silent_i = true, tone_position = tone_position})
i = i + 2
elseif is_valid_diph then
local tone = curr.tone or nxt.tone
local tone_position = nil
if tone then
-- Record which vowel carries the tone (1 = first, 2 = second)
if curr.tone then
tone_position = 1
elseif nxt.tone then
tone_position = 2
end
end
insert(tokens, {char = curr.char .. nxt.char, type = "V", tone = tone, tone_position = tone_position})
i = i + 2
else
insert(tokens, {char = curr.char, type = "V", tone = curr.tone})
i = i + 1
end
else
if curr.char ~= " " then
local tok_type = get_type(curr.char)
local is_respelling_j = false
-- Check if this is a respelling j
if ulower(curr.char) == "j" and #respelling_j_positions > 0 then
-- Build cleaned string up to current position to find clean position
local cleaned_so_far = ""
for k = 1, i do
local c = raw_chars[k]
if c.char ~= "^" and c.char ~= "." and c.char ~= " " and c.char ~= "ʼ" and c.char ~= "ˌ" then
local char_clean = c.char
-- Don't add tone marks to cleaned string
if not c.tone then
cleaned_so_far = cleaned_so_far .. char_clean
else
-- Add base character without tone
cleaned_so_far = cleaned_so_far .. char_clean
end
end
end
local clean_pos = ulen(cleaned_so_far)
-- Check if this position is in respelling list
for _, pos in ipairs(respelling_j_positions) do
if pos == clean_pos then
is_respelling_j = true
break
end
end
end
if ulower(curr.char) == "j" then
local tok_data = {char = curr.char, type = tok_type, tone = curr.tone, is_respelling = is_respelling_j, is_optional = false}
if is_respelling_j then
tok_data.original_char = "-" -- Respelling: use "-" to indicate not in orthography
end
insert(tokens, tok_data)
else
insert(tokens, {char = curr.char, type = tok_type, tone = curr.tone})
end
end
i = i + 1
end
end
-- Validate respelling glides are between vowels
for i, tok in ipairs(tokens) do
if tok.is_respelling then
local prev_is_vowel = false
local next_is_vowel = false
-- Check previous non-boundary token
for j = i - 1, 1, -1 do
if tokens[j].type ~= "BOUNDARY" and tokens[j].type ~= "SECONDARY_STRESS_BOUNDARY" then
prev_is_vowel = (tokens[j].type == "V")
break
end
end
-- Check next non-boundary token
for j = i + 1, #tokens do
if tokens[j].type ~= "BOUNDARY" and tokens[j].type ~= "SECONDARY_STRESS_BOUNDARY" then
next_is_vowel = (tokens[j].type == "V")
break
end
end
if not (prev_is_vowel and next_is_vowel) then
error("lt-pron: Respelling glide 'j' or '(j)' must be between two vowels")
end
end
end
return apply_auto_properties(tokens)
end
-- ============================================================================
-- SECTION 4: Syllabification
-- ============================================================================
-- Pre-syllabification: Merge geminate (doubled) consonants
-- This must happen BEFORE syllabification to prevent false mixed diphthongs
-- For example: pérrašo → pér-ra-šo would incorrectly treat ér as a mixed diphthong
-- By merging rr→r first, we get pé-ra-šo, correctly keeping é as a pure vowel
local function merge_geminate_consonants(tokens)
local SIBILANTS = {["s"]=true, ["z"]=true, ["š"]=true, ["ž"]=true}
local i = 1
while i < #tokens do
local tok = tokens[i]
local nxt = tokens[i+1]
local tok_is_cons = (tok.type == "T" or tok.type == "S" or tok.type == "R")
local nxt_is_cons = (nxt.type == "T" or nxt.type == "S" or nxt.type == "R")
if tok_is_cons and nxt_is_cons then
local tok_char = ulower(tok.char)
local nxt_char = ulower(nxt.char)
local tok_is_sib = SIBILANTS[tok_char]
local nxt_is_sib = SIBILANTS[nxt_char]
-- Merge if: (1) both are sibilants, or (2) identical consonants
if (tok_is_sib and nxt_is_sib) or (tok_char == nxt_char) then
-- Remove the first token (keep the second)
table.remove(tokens, i)
-- Don't increment i, check the same position again
else
i = i + 1
end
else
i = i + 1
end
end
end
local function syllabify(tokens)
local syllables = {}
local current_syl = {}
-- Check for leading secondary stress marker
local has_initial_secondary_stress = false
if #tokens > 0 and tokens[1].type == "SECONDARY_STRESS_BOUNDARY" then
has_initial_secondary_stress = true
end
local v_indices = {}
for i, tok in ipairs(tokens) do
if tok.type == "V" then insert(v_indices, i) end
end
if #v_indices == 0 then return {tokens} end -- Edge case: no vowels
local boundaries = {}
-- Sonority Sequencing Algorithm combined with Morphophonological Maximum Onset
for idx = 1, #v_indices - 1 do
local v1_idx = v_indices[idx]
local v2_idx = v_indices[idx + 1]
local raw_c_tokens = {}
local forced_boundary_idx = nil
for i = v1_idx + 1, v2_idx - 1 do
if tokens[i].type == "BOUNDARY" or tokens[i].type == "SECONDARY_STRESS_BOUNDARY" then
forced_boundary_idx = i
else
insert(raw_c_tokens, {t=tokens[i], orig_idx=i})
end
end
if forced_boundary_idx then
boundaries[forced_boundary_idx] = true
elseif #raw_c_tokens == 0 then
-- Hiatus
boundaries[v2_idx] = true
else
-- Macro-Token Grouping: Treat consecutive sibilants (S) as a single phonological unit
local macro_c = {}
local i = 1
while i <= #raw_c_tokens do
local current = raw_c_tokens[i]
if current.t.type == "S" then
local absorbed = {current}
local j = i + 1
-- Absorb any subsequent S tokens into this macro unit, keeping track of them
while j <= #raw_c_tokens and raw_c_tokens[j].t.type == "S" do
insert(absorbed, raw_c_tokens[j])
j = j + 1
end
insert(macro_c, {type = "S", orig_idx = current.orig_idx, tokens = absorbed})
i = j
else
insert(macro_c, {type = current.t.type, orig_idx = current.orig_idx, tokens = {current}})
i = i + 1
end
end
local m_count = #macro_c
if m_count == 1 then
-- V.CV (or V.SSV, e.g., sausšala -> sau.sšala)
boundaries[macro_c[1].orig_idx] = true
elseif m_count == 2 then
local t1, t2 = macro_c[1].type, macro_c[2].type
-- ST, SR, TR -> V.CCV
if (t1=="S" and t2=="T") or (t1=="S" and t2=="R") or (t1=="T" and t2=="R") then
boundaries[macro_c[1].orig_idx] = true
else
-- Handle TSS and RSS sequences properly (e.g., Oksfordas, transformavo).
-- If the macro cluster is T+S or R+S, and the S unit absorbed multiple sibilants,
-- split between the first and second sibilant (TS.S, RS.S).
if (t1=="T" or t1=="R") and t2=="S" and #macro_c[2].tokens > 1 then
boundaries[macro_c[2].tokens[2].orig_idx] = true
else
-- Default VC.CV
boundaries[macro_c[2].orig_idx] = true
end
end
elseif m_count == 3 then
local t1, t2, t3 = macro_c[1].type, macro_c[2].type, macro_c[3].type
if t1=="S" and t2=="T" and t3=="R" then
boundaries[macro_c[1].orig_idx] = true -- V.CCCV
elseif (t1=="T" and t2=="S" and t3=="T") or
(t1=="R" and t2=="S" and t3=="T") or
(t1=="T" and t2=="S" and t3=="R") or
(t1=="R" and t2=="T" and t3=="R") or
(t1=="T" and t2=="T" and t3=="R") or
(t1=="R" and t2=="S" and t3=="R") then
boundaries[macro_c[2].orig_idx] = true -- VC.CCV (includes RSR, e.g., konfliktas)
else
boundaries[macro_c[3].orig_idx] = true -- VCC.CV
end
elseif m_count == 4 then
-- Identify the 4-consonant pattern according to the 8 documented combinations
local pattern = macro_c[1].type .. macro_c[2].type .. macro_c[3].type .. macro_c[4].type
if pattern == "RSTR" or pattern == "TSTR" then
-- R.STR, T.STR -> VC.CCCV
boundaries[macro_c[2].orig_idx] = true
elseif pattern == "RTRR" or pattern == "TSTS" then
-- RTR.R, TST.S -> VCCC.CV
boundaries[macro_c[4].orig_idx] = true
else
-- RT.ST, RT.SR, RT.TR, ST.TR -> VCC.CCV
boundaries[macro_c[3].orig_idx] = true
end
elseif m_count >= 5 then
-- Fallback for >=5 logical consonant units
track('complex-consonant-cluster')
boundaries[macro_c[3].orig_idx] = true
end
end
end
-- Construct syllables
local secondary_stress_syllables = {}
for i, tok in ipairs(tokens) do
if boundaries[i] and #current_syl > 0 then
insert(syllables, current_syl)
-- If this is a secondary stress boundary, mark the NEXT syllable
if tok.type == "SECONDARY_STRESS_BOUNDARY" then
secondary_stress_syllables[#syllables + 1] = true
end
current_syl = {}
end
if tok.type ~= "BOUNDARY" and tok.type ~= "SECONDARY_STRESS_BOUNDARY" then
insert(current_syl, tok)
end
end
if #current_syl > 0 then insert(syllables, current_syl) end
-- Apply secondary stress marks
for idx, _ in pairs(secondary_stress_syllables) do
if syllables[idx] then
syllables[idx].secondary_stress = true
end
end
-- Apply initial secondary stress if present
if has_initial_secondary_stress and #syllables > 0 then
syllables[1].secondary_stress = true
end
return syllables
end
-- ============================================================================
-- SECTION 5: Base IPA Mapping & Stress Assignment
-- ============================================================================
local function is_mixed_diphthong(syl, v_idx)
local v_tok = syl[v_idx]
local lc_v = ulower(v_tok.char)
-- Strip silent 'i' for accurate length calculation
if v_tok.has_silent_i then
lc_v = usub(lc_v, 2)
end
-- Digraphs (ie, uo, ai, au, etc.) or natively long vowels (ą, ę, ė, į, y, ų, ū)
-- do NOT form mixed diphthongs with subsequent resonants.
-- Only short a, e, i, u, o can form true mixed diphthongs.
if ulen(lc_v) > 1 then return false end
local LONG_V = {["ą"]=true, ["ę"]=true, ["ė"]=true, ["į"]=true, ["y"]=true, ["ų"]=true, ["ū"]=true}
if LONG_V[lc_v] then return false end
-- V + R in the SAME syllable (coda)
if v_idx < #syl and syl[v_idx+1].type == "R" then
-- j and v are not considered for typical liquid semi-diphthongs length rules
if syl[v_idx+1].char ~= "j" and syl[v_idx+1].char ~= "v" then
return true
end
end
return false
end
local function map_base_phonetics(syllables)
local stress_prefix = nil
for _, syl in ipairs(syllables) do
local v_idx = nil
for i, tok in ipairs(syl) do
if tok.type == "V" then v_idx = i; break end
end
if v_idx then
local v_tok = syl[v_idx]
local v_char = v_tok.char
local lc_v_char = ulower(v_char)
local tone = v_tok.tone
local is_mixed = is_mixed_diphthong(syl, v_idx)
if is_mixed and not tone then
local r_tok = syl[v_idx+1]
if r_tok.tone then tone = r_tok.tone end
end
if v_tok.has_silent_i then
v_tok.silent_i = true
local actual_vowel = usub(lc_v_char, 2)
v_tok.ipa = V_IPA[actual_vowel] or "ɐ"
lc_v_char = actual_vowel
else
-- Apply loan quality base default for 'o' and 'e'
v_tok.ipa = V_IPA[lc_v_char] or "ɐ"
if lc_v_char == "o" and v_tok.loan_quality then
v_tok.ipa = "ɔ"
end
if lc_v_char == "e" and v_tok.loan_quality then
v_tok.ipa = "e"
end
-- Automatically prepend glide 'j' to syllable-initial 'ie'
if lc_v_char == "ie" and v_idx == 1 then
v_tok.ipa = "jiɛ"
end
end
if tone then
local s_mark = ""
local v_base = usub(lc_v_char, 1, 1)
if tone == "acute" then
s_mark = "¹ˈ"
if lc_v_char == "a" then
if is_mixed then v_tok.ipa = "ɑˑ" else v_tok.ipa = "ɑː" end
elseif lc_v_char == "e" then
-- Note: '^' modifier not allowed with acute 'é' as of current rules
-- This code path preserved for consistency
if is_mixed and v_tok.loan_quality then
v_tok.ipa = "ɛ" -- Loanword é in mixed diphthongs is short /ɛ/ without length
elseif is_mixed then
v_tok.ipa = "æˑ" -- Mixed diphthong: half-long
else
v_tok.ipa = "æː" -- Pure vowel: full-long
end
elseif lc_v_char == "ai" then v_tok.ipa = "ɑˑɪ"
elseif lc_v_char == "au" then v_tok.ipa = "ɑˑʊ"
elseif lc_v_char == "ei" then v_tok.ipa = "æˑɪ"
elseif lc_v_char == "eu" then v_tok.ipa = "æˑʊ"
elseif lc_v_char == "oi" then v_tok.ipa = "oˑɪ"
end
elseif tone == "grave" then
if is_mixed and (v_base == "i" or v_base == "u" or v_base == "e" or v_base == "o") then
s_mark = "¹ˈ"
elseif lc_v_char == "ui" or lc_v_char == "oi" or lc_v_char == "ou" or lc_v_char == "eu" then
s_mark = "¹ˈ"
else
s_mark = "ˈ"
end
-- Handle loanword variants for grave
if lc_v_char == "e" and v_tok.loan_quality then
v_tok.ipa = "e" -- è^ (loanword) → [e]
end
if lc_v_char == "o" then
v_tok.ipa = "ɔ" -- ò (always loanword) → [ɔ]
end
elseif tone == "tilde" then
s_mark = "²ˈ"
if lc_v_char == "ai" then v_tok.ipa = "ɐɪˑ"
elseif lc_v_char == "au" then v_tok.ipa = "ɒʊˑ"
elseif lc_v_char == "ei" then v_tok.ipa = "ɛɪˑ"
elseif lc_v_char == "eu" then v_tok.ipa = "ɛʊˑ"
elseif lc_v_char == "ui" then v_tok.ipa = "ʊɪˑ"
elseif lc_v_char == "a" then
if is_mixed then v_tok.ipa = "ɐ" else v_tok.ipa = "ɑː" end
elseif lc_v_char == "e" then
if is_mixed then v_tok.ipa = "ɛ" else v_tok.ipa = "æː" end
end
end
syl.stress = s_mark
end
-- Set base IPA for consonants (case-insensitive mapping)
for i, tok in ipairs(syl) do
if tok.type ~= "V" then
local lc_c = ulower(tok.char)
tok.ipa = CONS_IPA[lc_c] or lc_c
end
end
-- Handle tilde half-length on mixed diphthong coda
if tone == "tilde" and is_mixed then
local r_tok = syl[v_idx+1]
r_tok.half_long = true
end
else
-- Syllable with no vowel (e.g. leftover consonant)
for i, tok in ipairs(syl) do
local lc_c = ulower(tok.char)
tok.ipa = CONS_IPA[lc_c] or lc_c
end
end
end
end
-- ============================================================================
-- SECTION 6: Phonetic Polish Passes
-- ============================================================================
-- Pass 1: Palatalization Spreading (Right-to-Left)
--
-- right_context_palatalizing (optional, used by the cross-word pipeline):
-- When the current word is followed by a liaisoned word whose first effective
-- phoneme is "soft" (front V, j, or a palatalized consonant), pass `true` so
-- that:
-- 1. spread_active starts true (allowing the word's last consonant to
-- receive ʲ even though it has no in-word right neighbour);
-- 2. is_direct evaluates to true for that last consonant when it is k/g,
-- so VLKK §19's "lyk‿jója → [lʲiːkʲ‿…]" pattern is produced.
-- When omitted or false, the function behaves exactly like the within-word
-- palatalization that this module has always done.
local function apply_palatalization(syllables, right_context_palatalizing)
-- Flatten tokens for cross-syllable spreading
local flat_tokens = {}
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do insert(flat_tokens, tok) end
end
-- First pass: Apply softening marks (no spreading)
for i = 1, #flat_tokens do
local tok = flat_tokens[i]
if tok.softening_mark and tok.ipa ~= "" then
tok.ipa = tok.ipa .. "ʲ"
tok.is_palatalized = true
end
end
local spread_active = right_context_palatalizing and true or false
for i = #flat_tokens, 1, -1 do
local tok = flat_tokens[i]
local lc_char = ulower(tok.char)
if tok.type == "V" then
if tok.silent_i or FRONT_V[lc_char] then
spread_active = true
else
spread_active = false
end
elseif lc_char == "j" then
-- Preserve special IPA for final j (ɪ̯), don't override it
if tok.ipa ~= "ɪ̯" and tok.ipa ~= "" then
tok.ipa = "j"
end
tok.is_palatalized = true
spread_active = true
else
if spread_active then
-- Check if the palatalization is DIRECT (immediate contact with front V or j)
local is_direct = false
local nxt = flat_tokens[i+1]
if nxt then
local nxt_lc = ulower(nxt.char)
if (nxt.type == "V" and (nxt.silent_i or FRONT_V[nxt_lc])) or nxt_lc == "j" then
is_direct = true
end
elseif right_context_palatalizing then
-- No in-word neighbour, but a liaisoned soft phoneme follows.
is_direct = true
end
tok.is_palatalized = true
if lc_char == "k" or lc_char == "g" then
if is_direct and tok.ipa ~= "" then
tok.ipa = tok.ipa .. "ʲ"
end
-- DO NOT set spread_active to false! k/g are transparent to spreading.
else
if tok.ipa ~= "" then
tok.ipa = tok.ipa .. "ʲ"
end
end
end
end
end
end
-- Pass 2: Voicing Assimilation (Right-to-Left)
local function apply_voicing_assimilation(syllables)
local flat_tokens = {}
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do insert(flat_tokens, tok) end
end
local target_voice = nil
for i = #flat_tokens, 1, -1 do
local tok = flat_tokens[i]
if tok.type == "V" or tok.type == "R" then
target_voice = nil -- Blocked by vowels and resonants
elseif tok.type == "S" or tok.type == "T" then
local lc_char = ulower(tok.char)
local is_uppercase = (tok.char ~= lc_char)
if target_voice == nil then
-- Establish new assimilation target
target_voice = is_voiced(lc_char) and "voiced" or "voiceless"
else
-- Assimilate
local current_is_voiced = is_voiced(lc_char)
if target_voice == "voiced" and not current_is_voiced then
local new_char = VOICING_PAIRS[lc_char] or lc_char
tok.char = is_uppercase and uupper(new_char) or new_char
elseif target_voice == "voiceless" and current_is_voiced then
local new_char = VOICING_PAIRS[lc_char] or lc_char
tok.char = is_uppercase and uupper(new_char) or new_char
end
-- Update IPA based on new character, preserving palatalization
local lc_new_char = ulower(tok.char)
local new_ipa = CONS_IPA[lc_new_char] or lc_new_char
if tok.is_palatalized and lc_new_char ~= "k" and lc_new_char ~= "g" then
new_ipa = new_ipa .. "ʲ"
end
tok.ipa = new_ipa
end
end
end
end
-- Pass 3: Nasal Assimilation (n -> ŋ before velars k/g and post-velar ch/h)
-- Per VLKK §6.3, n assimilates to the place of articulation of any following
-- velar/uvular consonant, including the fricatives ch [x] and h [ɣ].
local function apply_nasal_assimilation(syllables)
local flat_tokens = {}
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do insert(flat_tokens, tok) end
end
for i = 1, #flat_tokens - 1 do
local tok = flat_tokens[i]
-- Ignore vowels for lookahead
local lookahead = i + 1
while lookahead <= #flat_tokens and flat_tokens[lookahead].type == "V" do
if flat_tokens[lookahead].silent_i then
lookahead = lookahead + 1
else
break
end
end
local nxt = flat_tokens[lookahead]
if nxt and ulower(tok.char) == "n" then
local lc_nxt = ulower(nxt.char)
if lc_nxt == "k" or lc_nxt == "g" or lc_nxt == "ch" or lc_nxt == "h" then
-- Check actual IPA string for direct palatalization
if ufind(nxt.ipa, "ʲ") then
tok.ipa = "ŋʲ"
else
tok.ipa = "ŋ"
end
end
end
end
end
-- Pass 4: Vowel Quality Adjustments after Palatalization
local function adjust_vowel_quality(syllables)
for _, syl in ipairs(syllables) do
local has_palatal_onset = false
for i, tok in ipairs(syl) do
if tok.type ~= "V" and (tok.is_palatalized or ulower(tok.char) == "j") then
has_palatal_onset = true
elseif tok.type == "V" and has_palatal_onset then
local ipa = tok.ipa
-- Shift a/e quality
if ipa == "ɐ" then ipa = "ɛ"
elseif ipa == "ɑː" then ipa = "æː"
-- Diphthong shifts for iau / iai
elseif ipa == "ɒʊ" then ipa = "ɛʊ"
elseif ipa == "ɑˑʊ" then ipa = "æˑʊ"
elseif ipa == "ɒʊˑ" then ipa = "ɛʊˑ"
elseif ipa == "ɐɪ" then ipa = "ɛɪ"
elseif ipa == "ɑˑɪ" then ipa = "æˑɪ"
elseif ipa == "ɐɪˑ" then ipa = "ɛɪˑ"
end
-- Dynamic fronting for u/o/ɔ (adds U+031F)
-- Matches ONLY the first character (^) to avoid double fronting in uɔ
ipa = ugsub(ipa, "^([uʊoɔ])", "%1̟")
tok.ipa = ipa
has_palatal_onset = false
end
end
end
end
-- Pass 5: Terminal Devoicing (Word-final obstruent devoicing)
local function apply_terminal_devoicing(syllables)
if #syllables == 0 then return end
local last_syl = syllables[#syllables]
local last_tok = last_syl[#last_syl]
if last_tok and (last_tok.type == "S" or last_tok.type == "T") then
local lc_char = ulower(last_tok.char)
local is_uppercase = (last_tok.char ~= lc_char)
if is_voiced(lc_char) then
local devoiced = VOICING_PAIRS[lc_char]
if devoiced then
last_tok.char = is_uppercase and uupper(devoiced) or devoiced
local new_ipa = CONS_IPA[devoiced] or devoiced
if last_tok.is_palatalized then new_ipa = new_ipa .. "ʲ" end
last_tok.ipa = new_ipa
end
end
end
end
-- Pass 6: Place Assimilation (Sibilant + Affricate)
-- When a sibilant meets an affricate, the sibilant adjusts its place of articulation:
-- s+č→š, z+dž→ž, š+c→s, ž+dz→z
local function apply_place_assimilation(syllables)
local flat_tokens = {}
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do insert(flat_tokens, tok) end
end
local PLACE_ASSIM = {
["s"] = {["t͡ʃ"] = "ʃ"},
["z"] = {["d͡ʒ"] = "ʒ"},
["ʃ"] = {["t͡s"] = "s"},
["ʒ"] = {["d͡z"] = "z"},
}
for i = 1, #flat_tokens - 1 do
local tok = flat_tokens[i]
local nxt = flat_tokens[i+1]
if tok.type == "S" and nxt.type == "T" then
local rule = PLACE_ASSIM[tok.ipa]
if rule and rule[nxt.ipa] then
tok.ipa = rule[nxt.ipa]
end
end
end
end
-- Pass 7: Geminate Simplification (Double consonant reduction)
-- NOTE: Original geminates (rr, ll, etc.) are already merged in merge_geminate_consonants.
-- This pass handles geminates created by phonetic rules (e.g., voicing assimilation: td→dd).
-- Sibilants: any two sibilants merge into one (keeping the second)
-- Other consonants: only identical pairs merge
local function apply_geminate_simplification(syllables)
local flat_tokens = {}
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do insert(flat_tokens, tok) end
end
local SIBILANTS = {["s"]=true, ["z"]=true, ["ʃ"]=true, ["ʒ"]=true}
for i = 1, #flat_tokens - 1 do
local tok = flat_tokens[i]
local nxt = flat_tokens[i+1]
local tok_is_cons = (tok.type == "T" or tok.type == "S" or tok.type == "R")
local nxt_is_cons = (nxt.type == "T" or nxt.type == "S" or nxt.type == "R")
if tok_is_cons and nxt_is_cons then
local tok_is_sib = SIBILANTS[tok.ipa]
local nxt_is_sib = SIBILANTS[nxt.ipa]
if tok_is_sib and nxt_is_sib then
tok.ipa = ""
elseif tok.ipa == nxt.ipa and tok.ipa ~= "" then
tok.ipa = ""
end
end
end
end
-- Pass 8: Final Consonant Vocalization (v → ʊ̯, j → ɪ̯)
-- Word-final v and j (without tilde) become non-syllabic vowels
-- Note: j with tilde is already converted to i with tilde in tokenization
local function apply_final_consonant_vocalization(syllables)
if #syllables == 0 then return end
local last_syl = syllables[#syllables]
local last_tok = last_syl[#last_syl]
if last_tok then
local lc_char = ulower(last_tok.char)
-- Final v → ʊ̯
if lc_char == "v" then
last_tok.ipa = "ʊ̯"
-- Keep type as "R" (resonant) for now - it's treated as non-syllabic
end
-- Final j (without tilde) → ɪ̯
-- Note: j with tilde is already converted to i in tokenization, so won't reach here
if lc_char == "j" and last_tok.tone ~= "tilde" then
last_tok.ipa = "ɪ̯"
-- Keep type as "R" (resonant) for now - it's treated as non-syllabic
end
end
end
-- ============================================================================
-- SECTION 7: Output Assembly
-- ============================================================================
-- ---------------------------------------------------------------------------
-- Cross-word phonology helpers
-- ---------------------------------------------------------------------------
-- Split a term into segments at spaces only. Returns a plain list of
-- non-empty word strings. The liaison marker ‿ is reserved for IPA output
-- and must never appear in input; if it does, raise an error so the editor
-- knows to use a regular space instead.
local function split_into_segments(term)
if term and ufind(term, LIAISON) then
error("lt-pron: the liaison marker \"" .. LIAISON ..
"\" (U+203F) must not appear in the input. Use a regular " ..
"space between words; the module decides where to insert ‿ " ..
"in the IPA output based on stress.")
end
local segs = {}
for _, w in ipairs(rsplit(term or "", " ")) do
if w ~= "" then insert(segs, w) end
end
return segs
end
-- True if any syllable in the word has primary or secondary stress.
local function word_has_stress(syllables)
for _, syl in ipairs(syllables) do
if syl.stress or syl.secondary_stress then
return true
end
end
return false
end
-- Compute the clitic group anchor for each word in a phrase.
--
-- A "clitic group" is a stressed word together with all unstressed words that
-- prosodically attach to it. Two adjacent words share a liaison ‿ iff they
-- belong to the same group. The algorithm:
--
-- 1. Every stressed word is its own anchor.
-- 2. Each unstressed word looks FORWARD for the nearest stressed word
-- (proclitic case, e.g., "iš namų̃" — iš leans on namų̃).
-- 3. If no stressed word follows, look BACKWARD instead (enclitic case,
-- e.g., "sakaũ gi" — gi leans on sakaũ).
-- 4. If the entire phrase has no stressed word (rare edge case), all
-- unstressed words share a single pseudo-group with anchor 0.
--
-- This matches VLKK §4.7 examples like "iš namų̃ [ɪʃ‿nɐ²ˈmuː]" (proclitic)
-- and "sakaũ gi [sɐ²ˈkɒʊˑ‿ɡʲɪ]" (enclitic), and produces the correct
-- behavior for VLKK §4.9's "išėjaũ į kiẽmą" where the unstressed į proclitic
-- to kiẽmą while išėjaũ stands alone.
local function compute_clitic_anchors(word_data)
local anchors = {}
-- Pass 1: stressed words anchor themselves.
for i, wd in ipairs(word_data) do
if word_has_stress(wd.syllables) then
anchors[i] = i
end
end
-- Pass 2: RTL — each unstressed word adopts the next word's anchor.
-- Scanning RTL means each position can simply copy anchors[i+1], which
-- already points to the nearest stressed word to the right (or nil).
for i = #word_data - 1, 1, -1 do
if anchors[i] == nil then
anchors[i] = anchors[i+1]
end
end
-- Pass 3: LTR — words still without an anchor (no stressed word to the
-- right) fall back to the nearest stressed word on the left.
for i = 2, #word_data do
if anchors[i] == nil then
anchors[i] = anchors[i-1]
end
end
-- Pass 4: entire phrase has no stress at all — bundle everything into
-- pseudo-group 0 so the words at least share liaison with each other.
if #word_data > 0 and anchors[1] == nil then
for i = 1, #word_data do
anchors[i] = 0
end
end
return anchors
end
-- Find the first non-empty token across syllables (skips silent or zero-IPA
-- tokens that don't realize a phoneme).
local function first_effective_token(syllables)
for _, syl in ipairs(syllables) do
for _, tok in ipairs(syl) do
if tok.ipa ~= "" then return tok end
end
end
return nil
end
-- Find the last non-empty token across syllables.
local function last_effective_token(syllables)
for s = #syllables, 1, -1 do
local syl = syllables[s]
for t = #syl, 1, -1 do
if syl[t].ipa ~= "" then return syl[t] end
end
end
return nil
end
-- Return true if the next word's first effective phoneme triggers
-- palatalization across the liaison boundary (front V, j, or an already
-- palatalized consonant). Must be called AFTER the next word's palatalization
-- pass has run, so `is_palatalized` is reliable.
local function first_token_palatalizes(syllables)
local tok = first_effective_token(syllables)
if not tok then return false end
local lc = ulower(tok.char)
if tok.type == "V" then
return tok.silent_i or FRONT_V[lc] or false
end
if lc == "j" then return true end
return tok.is_palatalized == true
end
-- VLKK §20: the preposition "už" keeps its [ʒ] (i.e. terminal devoicing is
-- skipped) when the next liaisoned word starts with a vowel or sonorant
-- consonant (n, m, l, r, j, v). Other words always undergo terminal devoicing.
local UZ_SKIP_SONORANTS = {
["n"]=true, ["m"]=true, ["l"]=true,
["r"]=true, ["j"]=true, ["v"]=true,
}
local function should_skip_devoicing_for_uz(word_text, next_syllables)
-- remove_all_accents returns NFD; we have to fold it back to NFC before
-- comparing against the literal "už" because ž (U+017E) decomposes to
-- z + COMBINING CARON (U+030C) in NFD form.
local clean = ulower(mw.ustring.toNFC(remove_all_accents(word_text or "")))
if clean ~= "už" then return false end
if not next_syllables then return false end
local nxt = first_effective_token(next_syllables)
if not nxt then return false end
if nxt.type == "V" then return true end
return UZ_SKIP_SONORANTS[ulower(nxt.char)] == true
end
-- VLKK §21b / §22b / §23b: at a liaison boundary, the last consonant of W1
-- and the first consonant of W2 may interact. We mirror within-word place
-- assimilation and geminate / sibilant simplification, applied just before
-- W1's palatalization pass so that any new ipa (e.g. s → ʃ) gets palatalized
-- correctly when needed.
local CROSSWORD_PLACE_ASSIM = {
["s"] = {["t͡ʃ"] = "ʃ"},
["z"] = {["d͡ʒ"] = "ʒ"},
["ʃ"] = {["t͡s"] = "s"},
["ʒ"] = {["d͡z"] = "z"},
}
local CROSSWORD_SIBILANTS = {["s"]=true, ["z"]=true, ["ʃ"]=true, ["ʒ"]=true}
local function strip_trailing_palatal(ipa)
if not ipa then return "" end
local stripped = ugsub(ipa, "ʲ$", "")
return stripped
end
local function apply_crossword_polish_at_junction(w1_syllables, w2_syllables)
local w1_last = last_effective_token(w1_syllables)
local w2_first = first_effective_token(w2_syllables)
if not (w1_last and w2_first) then return end
-- W1's last token has not yet been palatalized at this point in the
-- pipeline, so its ipa is the bare base form. W2 has already been fully
-- polished, so we must strip a trailing ʲ before using it as a key.
local w1_base = w1_last.ipa or ""
local w2_base = strip_trailing_palatal(w2_first.ipa)
if w1_base == "" or w2_base == "" then return end
-- Geminate / sibilant cluster: drop W1's last consonant entirely.
if w1_base == w2_base
or (CROSSWORD_SIBILANTS[w1_base] and CROSSWORD_SIBILANTS[w2_base]) then
w1_last.ipa = ""
return
end
-- Place assimilation: rewrite W1's last consonant base. Palatalization,
-- if any, will be re-applied by the palatalization pass.
local rule = CROSSWORD_PLACE_ASSIM[w1_base]
if rule and rule[w2_base] then
w1_last.ipa = rule[w2_base]
end
end
-- ---------------------------------------------------------------------------
-- Word-level rendering
-- ---------------------------------------------------------------------------
-- Tokenize, syllabify, and assign base phonetics for one word. Returns a
-- table { text, syllables } with the word's mutable phonological state.
local function prepare_word_state(word, pagename)
local tokens = tokenize(word, pagename)
merge_geminate_consonants(tokens)
local syllables = syllabify(tokens)
map_base_phonetics(syllables)
return {text = word, syllables = syllables}
end
-- Render a fully polished syllables list into an IPA string. Identical to
-- the original tail of process_single_word_ipa.
local function render_word_ipa(syllables)
local parts = {}
for s_idx, syl in ipairs(syllables) do
local syl_str = ""
local hiatus_sep = ""
if s_idx > 1 then
local prev_syl = syllables[s_idx - 1]
local prev_last_tok = prev_syl[#prev_syl]
local curr_first_tok = syl[1]
local prev_ends_with_v = (prev_last_tok and prev_last_tok.type == "V" and not prev_last_tok.silent_i)
local curr_starts_with_v = (curr_first_tok and curr_first_tok.type == "V" and not curr_first_tok.silent_i)
if prev_ends_with_v and curr_starts_with_v then
local prev_v_char = ulower(prev_last_tok.char)
local curr_v_char = ulower(curr_first_tok.char)
-- Only handle ie special case
if curr_v_char == "ie" then
-- 'ie' has implicit 'j' from map_base_phonetics.
syl_str = ""
hiatus_sep = ""
else
-- Standard hiatus without glide insertion
syl_str = "."
hiatus_sep = ""
end
end
end
-- Check for conflict between primary and secondary stress
if syl.secondary_stress and syl.stress then
error("lt-pron: A syllable cannot have both primary stress (tone mark) and secondary stress (ˌ)")
end
-- Add stress markers (primary or secondary, mutually exclusive)
if syl.secondary_stress then
syl_str = syl_str .. "ˌ"
elseif syl.stress then
syl_str = syl_str .. syl.stress
end
syl_str = syl_str .. hiatus_sep
for _, tok in ipairs(syl) do
if tok.is_respelling then
if tok.is_optional then
syl_str = syl_str .. "(j)"
else
syl_str = syl_str .. tok.ipa
end
else
syl_str = syl_str .. tok.ipa
end
if tok.half_long then syl_str = syl_str .. "ˑ" end
end
insert(parts, syl_str)
end
return concat(parts, "")
end
-- ---------------------------------------------------------------------------
-- Multi-word IPA assembler with cross-word phonology
-- ---------------------------------------------------------------------------
-- Pipeline for a phrase made of space-separated words. The liaison marker ‿
-- never appears in input; it is inserted into the rendered IPA according to
-- clitic-group anchors computed from per-word stress (see
-- compute_clitic_anchors above).
--
-- Per-word polish order (unchanged from VLKK §17–§19):
-- terminal devoicing → voicing assim → place assim → geminate simp →
-- final-cons vocalization → cross-word polish at junction →
-- palatalization (with cross-word right context) → nasal assim →
-- vowel quality.
--
-- Cross-word polish at the junction (§21b/§22b/§23b) runs *before* W1's
-- palatalization pass so that any rewritten ipa still receives ʲ correctly.
-- Words are processed RTL so that each W_i sees the already-polished state
-- of W_{i+1} when computing its cross-word context.
local function to_ipa(term, provided_pagename)
-- Use provided pagename if available, otherwise try to extract from term
local pagename, clean_term
if provided_pagename then
pagename = provided_pagename
clean_term = term
else
pagename, clean_term = get_pagename(term)
end
term = clean_term
-- Split input and pagename on whitespace; bail out if input contains ‿.
local input_segs = split_into_segments(term)
local pagename_segs = nil
if pagename then
pagename_segs = split_into_segments(pagename)
-- Verify word count matches (only when both are multi-word)
if #input_segs > 1 and #pagename_segs > 1 and #input_segs ~= #pagename_segs then
error("lt-pron: Input has " .. #input_segs .. " words but pagename has "
.. #pagename_segs .. " words. They must match.")
end
-- If pagename is a single word but input is multi-word, drop pagename
-- alignment (respelling detection only makes sense for exact matches).
if #pagename_segs == 1 and #input_segs > 1 then
pagename_segs = nil
end
end
-- Stage 1: tokenize / syllabify / map base phonetics for every word.
local word_data = {}
for i, seg_text in ipairs(input_segs) do
local seg_pagename = (pagename_segs and pagename_segs[i]) or nil
insert(word_data, prepare_word_state(seg_text, seg_pagename))
end
if #word_data == 0 then return "" end
-- Stage 2: compute clitic anchors and decide liaison per junction.
-- Two adjacent words share a ‿ iff they belong to the same clitic group.
local anchors = compute_clitic_anchors(word_data)
for i = 1, #word_data - 1 do
word_data[i].is_liaison = (anchors[i] == anchors[i+1])
end
if word_data[#word_data] then
word_data[#word_data].is_liaison = false -- no successor
end
-- Stage 3: flag the už §20 exception. už keeps its [ʒ] (i.e. terminal
-- devoicing is skipped) only when it is in a liaison junction with the
-- following word AND that word starts with a vowel or sonorant. Whether
-- už is proclitic or enclitic in the group doesn't matter — what matters
-- is that ‿ sits between už and the next phoneme.
for i = 1, #word_data do
local wd = word_data[i]
wd.skip_terminal_devoicing = false
if wd.is_liaison and word_data[i+1] then
if should_skip_devoicing_for_uz(wd.text, word_data[i+1].syllables) then
wd.skip_terminal_devoicing = true
end
end
end
-- Stage 4: run the polish pipeline RTL across word_data so each W_i sees
-- W_{i+1}'s polished state when computing cross-word context.
for i = #word_data, 1, -1 do
local wd = word_data[i]
local next_wd = wd.is_liaison and word_data[i+1] or nil
-- Within-word polish (passes 1-5)
if not wd.skip_terminal_devoicing then
apply_terminal_devoicing(wd.syllables)
end
apply_voicing_assimilation(wd.syllables)
apply_place_assimilation(wd.syllables)
apply_geminate_simplification(wd.syllables)
apply_final_consonant_vocalization(wd.syllables)
-- Cross-word fricative simplification / place assimilation at the
-- liaison boundary, before W1 palatalizes (so a freshly assimilated
-- s → ʃ can still pick up ʲ).
if next_wd then
apply_crossword_polish_at_junction(wd.syllables, next_wd.syllables)
end
-- Determine right palatalization context for the cross-word case.
local right_palatalizing = false
if next_wd then
right_palatalizing = first_token_palatalizes(next_wd.syllables)
end
-- Pass 6: palatalization (with cross-word context).
apply_palatalization(wd.syllables, right_palatalizing)
-- Remaining within-word passes.
apply_nasal_assimilation(wd.syllables)
adjust_vowel_quality(wd.syllables)
end
-- Stage 5: render each word and join with ‿ (same clitic group) or " ".
local result_parts = {}
for i, wd in ipairs(word_data) do
insert(result_parts, render_word_ipa(wd.syllables))
end
local result = result_parts[1] or ""
for i = 2, #word_data do
local sep = word_data[i-1].is_liaison and LIAISON or " "
result = result .. sep .. result_parts[i]
end
return result
end
-- Rhyme Extractor
local IPA_VOWELS = "aɐɑæɛeəɪiɔoʊuɒɜ"
local function get_rhyme(ipa)
-- Remove liaison and spaces before rhyme calculation
local clean_ipa = ugsub(ipa, "[‿ ]", "")
-- Search for the last stress mark from right to left
local last_stress_pos = nil
for i = ulen(clean_ipa), 1, -1 do
local char = usub(clean_ipa, i, i)
if char == "ˈ" then
-- Skip superscript if present
if i > 1 then
local prev = usub(clean_ipa, i - 1, i - 1)
if prev == "¹" or prev == "²" then
last_stress_pos = i -- Point to ˈ, skip superscript
else
last_stress_pos = i
end
else
last_stress_pos = i
end
break
end
end
if not last_stress_pos then return nil end
-- Extract content after the stress mark
local after = usub(clean_ipa, last_stress_pos + 1)
-- Remove hiatus dots for rhyme grouping
after = ugsub(after, "%.", "")
-- Find first vowel position (skip onset consonants)
local vstart = umatch(after, "()[" .. IPA_VOWELS .. "]")
if vstart then
return usub(after, vstart)
end
return after
end
-- Hyphenation generator (supports multi-word phrases)
local function get_hyphenation(term, provided_pagename)
-- Use provided pagename if available, otherwise try to extract from term
local pagename, clean_term
if provided_pagename then
pagename = provided_pagename
clean_term = term
else
pagename, clean_term = get_pagename(term)
end
term = clean_term
-- Split input on both ‿ and space, tracking the separator type so the
-- final hyphenation string can preserve liaison markers from the input.
local input_segs = split_into_segments(term)
local pagename_segs = nil
if pagename then
pagename_segs = split_into_segments(pagename)
-- Verify segment count matches (only when both are multi-segment)
if #input_segs > 1 and #pagename_segs > 1 and #input_segs ~= #pagename_segs then
error("lt-pron: Input has " .. #input_segs .. " words but pagename has "
.. #pagename_segs .. " words. They must match.")
end
-- If pagename is a single word but input is multi-word, drop alignment
if #pagename_segs == 1 and #input_segs > 1 then
pagename_segs = nil
end
end
local all_word_parts = {}
for i, seg_text in ipairs(input_segs) do
-- Get the corresponding pagename word, or nil if not available
local seg_pagename = (pagename_segs and pagename_segs[i]) or nil
local tokens = tokenize(seg_text, seg_pagename)
local syllables = syllabify(tokens)
local parts = {}
for _, syl in ipairs(syllables) do
local text = ""
for _, tok in ipairs(syl) do
-- Skip respelling characters (original_char == "-" means not in orthography)
if tok.original_char ~= "-" then
-- Re-attach original tone visually for display
local t_mark = ""
if tok.tone == "acute" then t_mark = ACUTE
elseif tok.tone == "grave" then t_mark = GRAVE
elseif tok.tone == "tilde" then t_mark = TILDE
end
-- Use original_char if set (e.g., j̃ → i internally but j in display)
-- Otherwise use tok.char
local disp_char = tok.original_char or tok.char
-- Restore original orthography for ɔ
if disp_char == "ɔ" then disp_char = "o" end
if disp_char == "Ɔ" then disp_char = "O" end
-- Handle tone placement for diphthongs
if tok.tone_position then
-- Diphthong: place tone on the specified vowel
local char_len = ulen(disp_char)
if char_len == 2 then
-- Two-character diphthong
local v1 = usub(disp_char, 1, 1)
local v2 = usub(disp_char, 2, 2)
if tok.tone_position == 1 then
text = text .. v1 .. t_mark .. v2
else
text = text .. v1 .. v2 .. t_mark
end
elseif char_len == 3 then
-- Three-character (silent i) diphthong
local v1 = usub(disp_char, 1, 1)
local v2 = usub(disp_char, 2, 2)
local v3 = usub(disp_char, 3, 3)
if tok.tone_position == 1 then
text = text .. v1 .. t_mark .. v2 .. v3
elseif tok.tone_position == 2 then
text = text .. v1 .. v2 .. t_mark .. v3
else
text = text .. v1 .. v2 .. v3 .. t_mark
end
end
else
-- Single vowel or consonant: tone goes after the character
text = text .. disp_char .. t_mark
end
end
end
parts[#parts + 1] = makeDisplayText(text)
end
insert(all_word_parts, concat(parts, "‧")) -- Use ‧ instead of standard - internally to preserve word boundaries
end
-- Hyphenation always joins multi-word phrases with a plain space — the
-- liaison marker ‿ is purely an IPA-output device and never appears here.
local combined_string = concat(all_word_parts, " ")
local final_parts = {}
for _, piece in ipairs(rsplit(combined_string, "‧")) do
insert(final_parts, piece)
end
-- Calculate actual syllable count (for correct num_syl)
local syllable_count = 0
for _, word_part in ipairs(all_word_parts) do
local word_syls = rsplit(word_part, "‧")
syllable_count = syllable_count + #word_syls
end
-- Store actual syllable count as a field (since __len doesn't work on tables in Lua 5.1)
final_parts.syllable_count = syllable_count
return final_parts
end
-- ============================================================================
-- SECTION 7.5: Conjugation Module Support Functions
-- ============================================================================
-- Export: Get syllables as string array with accents
function export.get_syllables(term)
local pagename, clean_term = get_pagename(term)
term = clean_term
local tokens = tokenize(term, pagename)
local syllables = syllabify(tokens)
local result = {}
for _, syl in ipairs(syllables) do
local text = ""
for _, tok in ipairs(syl) do
-- Skip respelling glides in syllable output
if not tok.is_respelling then
-- Re-attach original tone
local t_mark = ""
if tok.tone == "acute" then t_mark = ACUTE
elseif tok.tone == "grave" then t_mark = GRAVE
elseif tok.tone == "tilde" then t_mark = TILDE
end
-- Restore original orthography for ɔ
local disp_char = tok.char
if disp_char == "ɔ" then disp_char = "o" end
if disp_char == "Ɔ" then disp_char = "O" end
-- Handle tone placement for diphthongs
if tok.tone_position then
-- Diphthong: place tone on the specified vowel
local char_len = ulen(disp_char)
if char_len == 2 then
-- Two-character diphthong
local v1 = usub(disp_char, 1, 1)
local v2 = usub(disp_char, 2, 2)
if tok.tone_position == 1 then
text = text .. v1 .. t_mark .. v2
else
text = text .. v1 .. v2 .. t_mark
end
elseif char_len == 3 then
-- Three-character (silent i) diphthong
local v1 = usub(disp_char, 1, 1)
local v2 = usub(disp_char, 2, 2)
local v3 = usub(disp_char, 3, 3)
if tok.tone_position == 1 then
text = text .. v1 .. t_mark .. v2 .. v3
elseif tok.tone_position == 2 then
text = text .. v1 .. v2 .. t_mark .. v3
else
text = text .. v1 .. v2 .. v3 .. t_mark
end
end
else
-- Single vowel or consonant: tone goes after the character
text = text .. disp_char .. t_mark
end
end
end
-- Return NFC format with proper dotabove insertion
insert(result, makeDisplayText(text))
end
return result
end
-- Export: Check if a syllable is heavy or light
function export.is_heavy_syllable(syllable)
local pagename, clean_syllable = get_pagename(syllable)
syllable = clean_syllable
-- Validate single syllable
local tokens = tokenize(syllable, pagename)
local syllables = syllabify(tokens)
if #syllables ~= 1 then
error("is_heavy_syllable: input must be a single syllable, got " .. #syllables .. " syllables")
end
local syl = syllables[1]
-- Find the vowel token
local v_idx = nil
for i, tok in ipairs(syl) do
if tok.type == "V" then
v_idx = i
break
end
end
if not v_idx then
error("is_heavy_syllable: no vowel found in syllable")
end
local v_tok = syl[v_idx]
local lc_v = ulower(v_tok.char)
-- Handle silent i (e.g., "iau" where i is silent)
if v_tok.has_silent_i then
lc_v = usub(lc_v, 2)
end
-- Long vowels (inherently long, regardless of accent)
local LONG_VOWELS = {
["ą"]=true, ["ę"]=true, ["ė"]=true, ["į"]=true, ["y"]=true, ["ų"]=true, ["ū"]=true,
["o"]=true, ["ɔ"]=true -- o/ɔ are always long (except ò, but we treat all o as long)
}
if LONG_VOWELS[lc_v] then
return true
end
-- Diphthongs (length > 1)
if ulen(lc_v) > 1 then
return true
end
-- Mixed diphthongs: short vowel + liquid in coda position
local SHORT_VOWELS = {["a"]=true, ["e"]=true, ["i"]=true, ["u"]=true}
if SHORT_VOWELS[lc_v] and v_idx < #syl then
local next_tok = syl[v_idx + 1]
-- Liquid in coda (not j or v, which don't form mixed diphthongs)
if next_tok.type == "R" and next_tok.char ~= "j" and next_tok.char ~= "v" then
return true
end
end
-- Otherwise, it's a light syllable
return false
end
-- Export: Change accent of a syllable
function export.change_accent(syllable, target_accent)
local pagename, clean_syllable = get_pagename(syllable)
syllable = clean_syllable
-- Validate target_accent parameter
local VALID_ACCENTS = {acute=true, tilde=true, grave=true, none=true}
if not VALID_ACCENTS[target_accent] then
error("change_accent: invalid target_accent '" .. tostring(target_accent) ..
"', must be 'acute', 'tilde', 'grave', or 'none'")
end
-- Validate single syllable
local tokens = tokenize(syllable, pagename)
local syllables = syllabify(tokens)
if #syllables ~= 1 then
error("change_accent: input must be a single syllable, got " .. #syllables .. " syllables")
end
-- Remove all existing accents
local clean = remove_all_accents(syllable)
-- If target is 'none', return clean syllable
if target_accent == "none" then
return clean
end
-- Re-tokenize the clean syllable to analyze structure
local clean_tokens = tokenize(clean, pagename)
local clean_syllables = syllabify(clean_tokens)
local syl = clean_syllables[1]
-- Find vowel position and extract vowel part
local onset = ""
local vowel_part = ""
local coda = ""
local v_idx = nil
for i, tok in ipairs(syl) do
if tok.type == "V" then
v_idx = i
break
else
onset = onset .. tok.char
end
end
if not v_idx then
error("change_accent: no vowel found in syllable")
end
local v_tok = syl[v_idx]
vowel_part = ulower(v_tok.char)
-- Handle silent i
if v_tok.has_silent_i then
vowel_part = usub(vowel_part, 2)
end
-- Check if this is a mixed diphthong
local is_mixed = false
if v_idx < #syl then
local next_tok = syl[v_idx + 1]
if next_tok.type == "R" and next_tok.char ~= "j" and next_tok.char ~= "v" then
-- Mixed diphthong: vowel + liquid
vowel_part = vowel_part .. ulower(next_tok.char)
is_mixed = true
-- Collect remaining coda after the liquid
for i = v_idx + 2, #syl do
coda = coda .. syl[i].char
end
else
-- Regular syllable: collect all coda
for i = v_idx + 1, #syl do
coda = coda .. syl[i].char
end
end
end
-- Look up the accented form in ACCENT_PAIRS
if not ACCENT_PAIRS[vowel_part] then
error("change_accent: vowel/diphthong '" .. vowel_part .. "' not found in accent pairs table")
end
local accented_vowel = ACCENT_PAIRS[vowel_part][target_accent]
if not accented_vowel then
error("change_accent: accent type '" .. target_accent ..
"' is not allowed for vowel/diphthong '" .. vowel_part .. "'")
end
-- Reconstruct the syllable with new accent (in NFD format)
local result = onset .. accented_vowel .. coda
return result
end
-- ============================================================================
-- SECTION 8: Module Exports & Template Formatting (Preserved)
-- ============================================================================
local q_spec = {store = "insert-flattened", type = "qualifier"}
local a_spec = {store = "insert-flattened", type = "labels"}
local ref_spec = {store = "insert-flattened", item_dest = "refs", type = "references"}
-- Generate audio object, supporting file#caption syntax
local function generate_audio_obj(arg)
local file, caption = arg:match("^(.-)%s*#%s*(.*)$")
file = file or arg
return {file = file, caption = caption}
end
-- Parse rhyme specification with optional syllable count
local function parse_rhyme(arg, parse_err)
local function generate_obj(term)
return {rhyme = term}
end
local param_mods = {
s = {
item_dest = "num_syl",
type = "number",
sublist = true,
},
}
-- Add q/qq/a/aa/ref support if inline modifiers are present
if arg:find("<") then
param_mods.q = q_spec
param_mods.qq = q_spec
param_mods.a = a_spec
param_mods.aa = a_spec
param_mods.ref = ref_spec
end
return require(parse_util_module).parse_inline_modifiers(arg, {
param_mods = param_mods,
generate_obj = generate_obj,
parse_err = parse_err,
splitchar = ",",
})
end
-- Parse hyphenation specification (dot-separated syllables)
local function parse_hyph(arg, parse_err)
local function generate_obj(term)
local parts = rsplit(term, "%.")
return {hyph = parts, syllabification = term}
end
local param_mods = {}
-- Add q/qq/a/aa/ref support if inline modifiers are present
if arg:find("<") then
param_mods.q = q_spec
param_mods.qq = q_spec
param_mods.a = a_spec
param_mods.aa = a_spec
param_mods.ref = ref_spec
end
return require(parse_util_module).parse_inline_modifiers(arg, {
param_mods = param_mods,
generate_obj = generate_obj,
parse_err = parse_err,
splitchar = ",",
})
end
-- Parse homophone specification
local function parse_homophone(arg, parse_err)
local function generate_obj(term)
return {term = term}
end
local param_mods = {
t = {item_dest = "gloss"},
gloss = {},
pos = {},
alt = {},
lit = {},
id = {},
g = {
item_dest = "genders",
sublist = true,
},
}
-- Add q/qq/a/aa/ref support if inline modifiers are present
if arg:find("<") then
param_mods.q = q_spec
param_mods.qq = q_spec
param_mods.a = a_spec
param_mods.aa = a_spec
param_mods.ref = ref_spec
end
return require(parse_util_module).parse_inline_modifiers(arg, {
param_mods = param_mods,
generate_obj = generate_obj,
parse_err = parse_err,
splitchar = ",",
})
end
local audio_nested_mods = {
["a"] = a_spec, ["aa"] = a_spec,
["q"] = q_spec, ["qq"] = q_spec,
["text"] = {},
["IPA"] = {sublist = true},
["t"] = {item_dest = "gloss"},
["gloss"] = {},
["pos"] = {},
["lit"] = {},
["g"] = {
item_dest = "genders",
sublist = true,
},
}
local function parse_one_term(raw, parse_err)
if not raw:find("<") then
return {term = raw, audio_list = {}, rhyme_list = {}, hyph_list = {}, pagename = nil}
end
-- Extract base spelling before parse_inline_modifiers
local pagename = nil
if raw:find("<base:") then
pagename = raw:match("<base:([^>]+)>")
raw = raw:gsub("<base:[^>]+>", "")
end
local parsed = require(parse_util_module).parse_inline_modifiers(raw, {
param_mods = {
["q"] = q_spec, ["qq"] = q_spec,
["a"] = a_spec, ["aa"] = a_spec,
["ref"] = ref_spec,
["audio"] = {
store = "insert",
item_dest = "audio_list",
convert = function(arg, perr)
if arg:find("<") then
local parsed_audio = require(parse_util_module).parse_inline_modifiers(arg, {
param_mods = audio_nested_mods,
generate_obj = generate_audio_obj,
parse_err = perr,
})
parsed_audio.lang = get_lang()
local textobj = require(audio_module).construct_audio_textobj(parsed_audio)
parsed_audio.text = textobj
parsed_audio.gloss = nil
parsed_audio.pos = nil
parsed_audio.lit = nil
parsed_audio.genders = nil
return parsed_audio
end
local audio_obj = generate_audio_obj(arg)
audio_obj.lang = get_lang()
local textobj = require(audio_module).construct_audio_textobj(audio_obj)
audio_obj.text = textobj
return audio_obj
end,
},
["rhyme"] = {
store = "insert-flattened",
item_dest = "rhyme_list",
convert = parse_rhyme,
},
["hyph"] = {
store = "insert-flattened",
item_dest = "hyph_list",
convert = parse_hyph,
},
["hmp"] = {
store = "insert-flattened",
item_dest = "hmp_list",
convert = parse_homophone,
},
},
generate_obj = function(t)
return {term = t, audio_list = {}, rhyme_list = {}, hyph_list = {}, hmp_list = {}}
end,
parse_err = parse_err,
})
parsed.audio_list = parsed.audio_list or {}
parsed.rhyme_list = parsed.rhyme_list or {}
parsed.hyph_list = parsed.hyph_list or {}
parsed.hmp_list = parsed.hmp_list or {}
parsed.pagename = pagename
return parsed
end
-- Format rhyme objects with qualifiers
local function fmt_rhyme(rhyme_objs, bullet)
if not rhyme_objs or #rhyme_objs == 0 then return nil end
local rhyme_data = {}
for _, robj in ipairs(rhyme_objs) do
insert(rhyme_data, {
rhyme = robj.rhyme,
num_syl = robj.num_syl,
q = robj.q,
qq = robj.qq,
a = robj.a,
aa = robj.aa,
})
end
return bullet .. require(rhymes_module).format_rhymes({
lang = get_lang(),
rhymes = rhyme_data
})
end
-- Format hyphenation objects with qualifiers
local function fmt_hyph(hyph_objs, bullet)
if not hyph_objs or #hyph_objs == 0 then return nil end
local hyph_data = {}
for _, hobj in ipairs(hyph_objs) do
insert(hyph_data, {
hyph = hobj.hyph,
q = hobj.q,
qq = hobj.qq,
a = hobj.a,
aa = hobj.aa,
})
end
return bullet .. require(hyphenation_module).format_hyphenations({
lang = get_lang(),
hyphs = hyph_data,
caption = "Syllabification"
})
end
-- Format audio object
local function fmt_audio(audio_obj, bullet)
return bullet .. require(audio_module).format_audio(audio_obj)
end
-- Format homophone objects with qualifiers
local function fmt_hmp(hmp_objs, bullet)
if not hmp_objs or #hmp_objs == 0 then return nil end
return bullet .. require(homophones_module).format_homophones({
lang = get_lang(),
homophones = hmp_objs,
})
end
local function is_multiword_term(term)
-- split_into_segments returns a list of non-empty word strings.
return #split_into_segments(term) > 1
end
function export.show(frame)
local parargs = frame:getParent().args
local args = require(parameters_module).process(parargs, {
[1] = {default = "nãmas"},
["bullets"] = {type = "number", default = 1},
})
local input = args[1]
local nb = args.bullets
local b1 = string.rep("*", nb) .. " "
local b2 = string.rep("*", nb + 1) .. " "
local raw_terms = require(parse_util_module).split_escaping(input, ",")
local parsed_terms = {}
for i, raw in ipairs(raw_terms) do
raw = raw:match("^%s*(.-)%s*$")
local pt = parse_one_term(raw, function(msg)
error("lt-pron: " .. msg .. " (term " .. i .. ")")
end)
parsed_terms[#parsed_terms + 1] = pt
end
m_IPA = m_IPA or require("Module:IPA")
local text_parts = {}
for _, pt in ipairs(parsed_terms) do
-- Determine bullet level: same level as IPA for single pronunciation, indented for multiple
local content_bullet = (#parsed_terms == 1) and b1 or b2
-- Generate IPA
local ipa_str = to_ipa(pt.term, pt.pagename)
-- Handle rhyme: manual override, suppression, or auto-generation
local rhyme_objs = nil
local suppress_rhyme = false
if #pt.rhyme_list > 0 then
for _, robj in ipairs(pt.rhyme_list) do
if robj.rhyme == "-" then
suppress_rhyme = true
break
end
end
if not suppress_rhyme then
rhyme_objs = {}
for _, robj in ipairs(pt.rhyme_list) do
-- If num_syl not specified, try to get from auto-generated hyphenation
if not robj.num_syl then
local auto_hyph = get_hyphenation(pt.term, pt.pagename)
if auto_hyph and #auto_hyph > 0 then
-- Use syllable_count field if available (for multi-word phrases), otherwise use array length
local syl_count = auto_hyph.syllable_count or #auto_hyph
robj.num_syl = {syl_count}
end
end
insert(rhyme_objs, robj)
end
end
else
-- Auto-generate rhyme (skip if term ends with - or is a multiword term)
if not pt.term:match("%-$") and not is_multiword_term(pt.term) then
local rhyme_str = get_rhyme(ipa_str)
if rhyme_str then
local auto_hyph = get_hyphenation(pt.term, pt.pagename)
-- Use syllable_count field if available (for multi-word phrases), otherwise use array length
local num_syl = (auto_hyph and #auto_hyph > 0) and {auto_hyph.syllable_count or #auto_hyph} or nil
rhyme_objs = {{rhyme = rhyme_str, num_syl = num_syl}}
end
end
end
-- Handle hyphenation: manual override, suppression, or auto-generation
local hyph_objs = nil
local suppress_hyph = false
if #pt.hyph_list > 0 then
for _, hobj in ipairs(pt.hyph_list) do
if hobj.syllabification == "-" then
suppress_hyph = true
break
end
end
if not suppress_hyph then
hyph_objs = pt.hyph_list
end
else
-- Auto-generate hyphenation
local auto_hyph = get_hyphenation(pt.term, pt.pagename)
if auto_hyph and #auto_hyph > 0 then
hyph_objs = {{hyph = auto_hyph, syllabification = concat(auto_hyph, ".")}}
end
end
-- Format IPA with qualifiers and references
local ipa_item = {pron = "[" .. ipa_str .. "]"}
if pt.q then ipa_item.q = pt.q end
if pt.qq then ipa_item.qq = pt.qq end
if pt.a then ipa_item.a = pt.a end
if pt.aa then ipa_item.aa = pt.aa end
if pt.refs then ipa_item.refs = pt.refs end
text_parts[#text_parts + 1] = b1 .. m_IPA.format_IPA_full({
lang = get_lang(),
items = {ipa_item}
})
-- Audio
for _, aud in ipairs(pt.audio_list or {}) do
text_parts[#text_parts + 1] = fmt_audio(aud, content_bullet)
end
-- Rhyme
if rhyme_objs then
local r = fmt_rhyme(rhyme_objs, content_bullet)
if r then text_parts[#text_parts + 1] = r end
end
-- Hyphenation
if hyph_objs then
local h = fmt_hyph(hyph_objs, content_bullet)
if h then text_parts[#text_parts + 1] = h end
end
-- Homophones
if pt.hmp_list and #pt.hmp_list > 0 then
local hmp = fmt_hmp(pt.hmp_list, content_bullet)
if hmp then text_parts[#text_parts + 1] = hmp end
end
end
return concat(text_parts, "\n")
end
export.toIPA = to_ipa
export.hyphenate = get_hyphenation
export.rhyme = get_rhyme
return export
pqyyto9ntkmwafrrq481zamx4xplqhv
မဝ်ဂျူ:User:TongcyDai/lt-pron/doc
828
295397
395933
2026-05-29T18:21:03Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:User:TongcyDai/lt-pron/testcases|run_tests|comments=1}}"
395933
wikitext
text/x-wiki
{{#invoke:User:TongcyDai/lt-pron/testcases|run_tests|comments=1}}
t3q3kr2mjufakt1cg21lqt8ndqy72wu
မဝ်ဂျူ:User:TongcyDai/lt-pron/testcases
828
295398
395934
2026-05-29T18:22:03Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "local tests = require("Module:UnitTests") local m_pron = require("Module:User:TongcyDai/lt-pron") local unpack = unpack or table.unpack -- ── helpers ────────────────────────────────────────────────────────────────── local function tag_IPA(s) return '<span class="IPA">' .. s .. '<..."
395934
Scribunto
text/plain
local tests = require("Module:UnitTests")
local m_pron = require("Module:User:TongcyDai/lt-pron")
local unpack = unpack or table.unpack
-- ── helpers ──────────────────────────────────────────────────────────────────
local function tag_IPA(s)
return '<span class="IPA">' .. s .. '</span>'
end
-- Compare toIPA output
function tests:check_IPA(term, expected, comment)
self:equals(
term,
tag_IPA(mw.ustring.toNFC(m_pron.toIPA(term))),
tag_IPA(expected),
{ comment = comment, show_difference = true }
)
end
-- Compare hyphenate output
function tests:check_hyph(term, expected, comment)
local parts = m_pron.hyphenate(term)
self:equals(
term,
table.concat(parts, "‧"),
expected,
{ comment = comment, show_difference = true }
)
end
-- Compare rhyme output
function tests:check_rhyme(term, expected, comment)
local ipa = m_pron.toIPA(term)
self:equals(
term,
mw.ustring.toNFC(m_pron.rhyme(ipa) or ""),
expected,
{ comment = comment, show_difference = true }
)
end
-- ════════════════════════════════════════════════════════════════════════════
-- IPA TESTS
-- ════════════════════════════════════════════════════════════════════════════
-- A: Consonants and basic palatalization (one example per consonant, in two
-- versions: hard variant + soft variant before front vowel).
function tests:test_IPA_A_consonants_and_palatalization()
local examples = {
-- B, b
{ "bai̇̃gti", "²ˈbɐɪˑktʲɪ", "b" },
{ "bi̇̀rbt", "¹ˈbʲɪrpt", "bʲ" },
-- C, c
{ "cùkrus", "ˈt͡sʊkrʊs", "c" },
{ "ci̇̀bė", "ˈt͡sʲɪbʲeː", "cʲ" },
-- Č, č
{ "čaižùs", "t͡ʃɐɪˈʒʊs", "č" },
{ "Kãčinas", "²ˈkɑːt͡ʃʲɪnɐs", "čʲ" },
-- D, d
{ "daũg", "²ˈdɒʊˑk", "d" },
{ "di̇̀delis", "ˈdʲɪdʲɛlʲɪs", "dʲ" },
-- F, f
{ "fãzė", "²ˈfɑːzʲeː", "f" },
{ "filė̃", "fʲɪ²ˈlʲeː", "fʲ" },
-- G, g
{ "gãlas", "²ˈɡɑːlɐs", "g" },
{ "girià", "ɡʲɪˈrʲɛ", "gʲ" },
-- H, h
{ "hãlė", "²ˈɣɑːlʲeː", "h" },
{ "hi̇̀mnas", "¹ˈɣʲɪmnɐs", "hʲ" },
-- J, j
{ "jáunas<base:jaunas>", "¹ˈjæˑʊnɐs", "j" },
{ "vajè", "ʋɐˈjɛ", "j" },
-- K, k
{ "kãras", "²ˈkɑːrɐs", "k" },
{ "kitóks", "kʲɪ¹ˈtoːks", "kʲ" },
-- L, l
{ "lãbas", "²ˈlɑːbɐs", "l" },
{ "lė̃kti", "²ˈlʲeːktʲɪ", "lʲ" },
-- M, m
{ "mamà", "mɐˈmɐ", "m" },
{ "méilė", "¹ˈmʲæˑɪlʲeː", "mʲ" },
-- N, n
{ "nósis", "¹ˈnoːsʲɪs", "n" },
{ "knygà", "knʲiːˈɡɐ", "nʲ" },
-- P, p
{ "pãdas", "²ˈpɑːdɐs", "p" },
{ "pẽčius", "²ˈpʲæːt͡ʃʲʊ̟s", "pʲ" },
-- R, r
{ "rai̇̃dė", "²ˈrɐɪˑdʲeː", "r" },
{ "kairė̃", "kɐɪ²ˈrʲeː", "rʲ" },
-- S, s
{ "sõdas", "²ˈsoːdɐs", "s" },
{ "si̇́ela", "¹ˈsʲiɛlɐ", "sʲ" },
-- Š, š
{ "šókti", "¹ˈʃoːktʲɪ", "š" },
{ "šỹpsena", "²ˈʃʲiːpʲsʲɛnɐ", "šʲ" },
-- T, t
{ "tetà", "tʲɛˈtɐ", "tʲ, t" },
-- V, v
{ "svajõnė", "sʋɐ²ˈjo̟ːnʲeː", "v" },
{ "vi̇̀ltis", "¹ˈʋʲɪlʲtʲɪs", "vʲ" },
-- Z, z
{ "zui̇̃kis", "²ˈzʊɪˑkʲɪs", "z" },
{ "zi̇̀r̃zinti", "¹ˈzʲɪrʲzʲɪnʲtʲɪ", "zʲ" },
-- Ž, ž
{ "žolė̃", "ʒoː²ˈlʲeː", "ž" },
{ "žẽmė", "²ˈʒʲæːmʲeː", "žʲ" },
-- Ch, ch
{ "chalãtas", "xɐ²ˈlɑːtɐs", "ch" },
{ "cherèsas", "xʲɛˈrʲɛsɐs", "chʲ" },
-- Dz, dz
{ "dzū̃kai", "²ˈd͡zuːkɐɪ", "dz" },
{ "dzi̇̀ngt", "¹ˈd͡zʲɪŋkt", "dzʲ" },
-- Dž, dž
{ "džáulis", "¹ˈd͡ʒɑˑʊlʲɪs", "dž" },
{ "džiãzas", "²ˈd͡ʒʲæːzɐs", "džʲ" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- B: Monophthongs (short vs. long, native vs. loanword variants).
function tests:test_IPA_B_monophthongs()
local examples = {
-- A, a
{ "tàvo", "ˈtɐʋoː", "à" },
{ "vãkaras", "²ˈʋɑːkɐrɐs", "ã, a" },
-- Ą, ą
{ "rýtą", "¹ˈrʲiːtɑː", "ą" },
{ "ą́žuolas", "¹ˈɑːʒuɔlɐs", "ą́" },
{ "šą̃la", "²ˈʃɑːlɐ", "ą̃" },
-- E, e
{ "Pelesà", "pʲɛlʲɛˈsɐ", "e" },
{ "nèšti", "ˈnʲɛʃʲtʲɪ", "è" },
{ "mètras", "ˈmʲɛtrɐs", "è" },
{ "mẽnas", "²ˈmʲæːnɐs", "ẽ" },
-- E with `^`: closed short e in loanwords (VLKK §6.7.3 — written `ẹ`)
{ "se^ktà", "sʲekˈtɐ", "e^ (closed short e in loanwords)" },
{ "re^ži̇̀mas", "rʲeˈʒʲɪmɐs", "e^ (closed short e in loanwords)" },
-- Ę, ę
{ "tęsinỹs", "tʲæːsʲɪ²ˈnʲiːs", "ę" },
{ "tavę̃s", "tɐ²ˈʋʲæːs", "ę̃" },
{ "tę́vas", "¹ˈtʲæːʋɐs", "ę́" },
-- Ė, ė
{ "ėdė́jas", "eː¹ˈdʲeːjɛs", "ė, ė́" },
{ "gėlė̃", "ɡʲeː²ˈlʲeː", "ė, ė̃" },
-- I, i
{ "liki̇̀mas", "lʲɪˈkʲɪmɐs", "i, i̇̀" },
-- Į, į
{ "įlį̃sti", "iː²ˈlʲiːsʲtʲɪ", "į, į̃" },
{ "į́spūdis", "¹ˈiːspuːdʲɪs", "į́" },
-- Y, y
{ "mylė́ti", "mʲiː¹ˈlʲeːtʲɪ", "y" },
{ "ýda", "¹ˈiːdɐ", "ý" },
{ "knỹgė", "²ˈknʲiːɡʲeː", "ỹ" },
-- O, o (long native [oː] vs. short loanword [ɔ])
{ "norė́ti", "noː¹ˈrʲeːtʲɪ", "o (native, long)" },
{ "óras", "¹ˈoːrɐs", "ó" },
{ "keliõnė", "kʲɛ²ˈlʲo̟ːnʲeː", "õ" },
{ "òmas", "ˈɔmɐs", "ò (loanword, short)" }, -- FIXME: ò /oː/ exists?
{ "stòksas", "ˈstɔksɐs", "ò (loanword, grave)" },
{ "Zo^jà", "zɔˈjɛ", "o^ (loanword [ɔ], unstressed)" },
{ "žo^ngliẽrius", "ʒɔŋ²ˈɡlʲiɛrʲʊ̟s", "o^ (loanword [ɔ], unstressed)" },
{ "fo^to^parodà", "fɔtɔpɐroːˈdɐ", "o^ (loanword foto- prefix)" },
{ "ho^mo^ni̇̀mas", "ɣɔmɔˈnʲɪmɐs", "o^ (loanword homo- prefix)" },
-- U, u
{ "ugni̇̀s", "ʊˈɡnʲɪs", "u" },
{ "pùsė", "ˈpʊsʲeː", "ù" },
-- Ų, ų
{ "siųstùvas", "sʲu̟ːˈstʊʋɐs", "ų" },
{ "įskų́sti", "iː¹ˈskuːsʲtʲɪ", "ų́" },
{ "sių̃sti", "²ˈsʲu̟ːsʲtʲɪ", "ų̃" },
-- Ū, ū
{ "sū́nūs", "¹ˈsuːnuːs", "ū́, ū" },
{ "rū̃gštis", "²ˈruːkʃʲtʲɪs", "ū̃" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C1: Diphthongs ai / au — three accent positions each.
function tests:test_IPA_C1_diphthongs_ai_au()
local examples = {
-- ai
{ "táikino", "¹ˈtɑˑɪkʲɪnoː", "ái (acute)" },
{ "tai̇̃ką", "²ˈtɐɪˑkɑː", "ai̇̃ (tilde)" },
{ "taiki̇̀klis", "tɐɪˈkʲɪklʲɪs", "ai (unstressed)" },
-- au
{ "tráukia", "¹ˈtrɑˑʊkʲɛ", "áu (acute)" },
{ "patraũklų", "pɐ²ˈtrɒʊˑkluː", "aũ (tilde)" },
{ "pértrauka", "¹ˈpʲæˑrtrɒʊkɐ", "au (unstressed)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C2: Diphthongs ei / ui — three accent positions each.
function tests:test_IPA_C2_diphthongs_ei_ui()
local examples = {
-- ei
{ "méilė", "¹ˈmʲæˑɪlʲeː", "éi (acute)" },
{ "mei̇̃liai", "²ˈmʲɛɪˑlʲɛɪ", "ei̇̃ (tilde)" },
{ "meilikáuti", "mʲɛɪlʲɪ¹ˈkɑˑʊtʲɪ", "ei (unstressed)" },
-- ui
{ "kùisytis", "¹ˈkʊɪsʲiːtʲɪs", "ùi (grave; first element short)" },
{ "kui̇̃sti", "²ˈkʊɪˑsʲtʲɪ", "ui̇̃ (tilde)" },
{ "kuitinė́tis", "kʊɪtʲɪ¹ˈnʲeːtʲɪs", "ui (unstressed)" },
{ "bùivo^las", "¹ˈbʊɪʋɔlɐs", "ùi (grave; with loanword o^)" },
-- úi (acute with first element half-long) is a free-style variant; not tested separately
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C3: Variable diphthongs ie / uo (treated phonemically as monophthongs by VLKK,
-- but written as digraphs and patterning with diphthongs in accent placement).
function tests:test_IPA_C3_diphthongs_ie_uo()
local examples = {
-- ie
{ "si̇́ena", "¹ˈsʲiɛnɐ", "íe (acute)" },
{ "jiẽ<base:jie>", "²ˈjiɛ", "iẽ (tilde)" },
{ "Diẽvas", "²ˈdʲiɛʋɐs", "iẽ (tilde)" },
{ "Ki̇̀msienė", "¹ˈkʲɪmʲsʲiɛnʲeː", "ie (unstressed)" },
-- uo
{ "úodas", "¹ˈuɔdɐs", "úo (acute)" },
{ "ruduõ", "rʊ²ˈduɔ", "uõ (tilde)" },
{ "Aluojà<base:Aluoja>", "ɐluɔˈjɛ", "uo (unstressed)" },
{ "vaizduõtė", "ʋɐɪ²ˈzduɔtʲeː", "uõ (tilde, after consonant cluster)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C4: Mixed diphthongs of the a-series — al, am, an, ar.
function tests:test_IPA_C4_mixed_a()
local examples = {
-- al
{ "álkanas", "¹ˈɑˑlkɐnɐs", "ál (acute)" },
{ "al̃kis", "²ˈɐlʲˑkʲɪs", "al̃ (tilde)" },
{ "alkanáuti", "ɐlkɐ¹ˈnɑˑʊtʲɪ", "al (unstressed)" },
-- am
{ "skámbčioti", "¹ˈskɑˑmʲpʲt͡ʃʲo̟ːtʲɪ", "ám (acute)" },
{ "skam̃biai", "²ˈskɐmʲˑbʲɛɪ", "am̃ (tilde)" },
{ "skambùmas", "skɐmˈbʊmɐs", "am (unstressed)" },
-- an
{ "ránkioja<base:rankioja>", "¹ˈrɑˑŋʲkʲo̟ːjɛ", "án (acute)" },
{ "rañkdarbis", "²ˈrɐŋˑɡdɐrʲbʲɪs", "añ (tilde)" },
{ "rankinùkas", "rɐŋʲkʲɪˈnʊkɐs", "an (unstressed)" },
-- ar
{ "sárgas", "¹ˈsɑˑrɡɐs", "ár (acute)" },
{ "sar̃giai", "²ˈsɐrʲˑɡʲɛɪ", "ar̃ (tilde)" },
{ "sargýba", "sɐrʲ¹ˈɡʲiːbɐ", "ar (unstressed)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C5: Mixed diphthongs of the e-series — el, em, en, er.
-- Foreign-word variants with grave (èl, èm, èn, èr) read tvirtapradiškai
-- are tested as alternates per VLKK §9.21, §9.23.
function tests:test_IPA_C5_mixed_e()
local examples = {
-- el
{ "kélmas", "¹ˈkʲæˑlmɐs", "él (acute)" },
{ "Kel̃mė", "²ˈkʲɛlʲˑmʲeː", "el̃ (tilde)" },
{ "kelmùtis", "kʲɛlˈmʊtʲɪs", "el (unstressed)" },
{ "èlfas", "¹ˈɛlfɐs", "èl (loanword, grave; tvirtapradiškai)" },
{ "el̃fas", "²ˈɛlˑfɐs", "el̃ (loanword, tilde)" },
-- em
{ "drémžti", "¹ˈdʲrʲæˑmʲʃʲtʲɪ", "ém (acute; ž → š before t)" },
{ "drem̃bti", "²ˈdʲrʲɛmʲˑpʲtʲɪ", "em̃ (tilde)" },
{ "Trempai̇̃", "tʲrʲɛm²ˈpɐɪˑ", "em (unstressed)" },
{ "Jaržèmskis", "jɛrʲ¹ˈʒʲɛmʲsʲkʲɪs", "èm (loanword, grave)" },
{ "kem̃pingas", "²ˈkʲɛmʲˑpʲɪŋɡɐs", "em̃ (loanword, tilde)" },
-- en
{ "véngia", "¹ˈʋʲæˑŋʲɡʲɛ", "én (acute)" },
{ "žeñgsena", "²ˈʒʲɛŋˑksʲɛnɐ", "eñ (tilde)" },
{ "vengi̇̀mas", "ʋʲɛŋʲˈɡʲɪmɐs", "en (unstressed)" },
{ "ménkė", "¹ˈmʲæˑŋʲkʲeː", "én (acute, before nk)" },
{ "meñkinti", "²ˈmʲɛŋʲˑkʲɪnʲtʲɪ", "eñ (tilde, before nk)" },
{ "menkystà", "mʲɛŋʲkʲiːˈstɐ", "en (unstressed, before nk)" },
{ "hènris", "¹ˈɣʲɛnʲrʲɪs", "èn (loanword, grave)" },
{ "ceñtas", "²ˈt͡sʲɛnˑtɐs", "eñ (loanword, tilde)" },
{ "ãmen", "²ˈɑːmʲɛn", "en (loanword, unstressed final)" },
-- er
{ "nérti", "¹ˈnʲæˑrʲtʲɪ", "ér (acute)" },
{ "ner̃šti", "²ˈnʲɛrʲˑʃʲtʲɪ", "er̃ (tilde)" },
{ "nerštãvietė", "nʲɛr²ˈʃtɑːʋʲiɛtʲeː", "er (unstressed)" },
{ "ko^ncèrtas", "kɔnʲ¹ˈt͡sʲɛrtɐs", "èr (loanword, grave)" },
{ "ko^ncer̃tas", "kɔnʲ²ˈt͡sʲɛrˑtɐs", "er̃ (loanword, tilde)" },
{ "ter̃minas", "²ˈtʲɛrʲˑmʲɪnɐs", "er̃ (loanword, tilde)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C6: Mixed diphthongs of the i-series — il, im, in, ir.
function tests:test_IPA_C6_mixed_i()
local examples = {
-- il
{ "pi̇̀lti", "¹ˈpʲɪlʲtʲɪ", "ìl (grave)" },
{ "pil̃vas", "²ˈpʲɪlˑʋɐs", "il̃ (tilde)" },
{ "pilti̇̀nis", "pʲɪlʲˈtʲɪnʲɪs", "il (unstressed)" },
-- im
{ "ti̇̀mptelėjimas<base:timptelėjimas>", "¹ˈtʲɪmʲpʲtʲɛlʲeːjɪmɐs", "ìm (grave)" },
{ "tim̃pinti", "²ˈtʲɪmʲˑpʲɪnʲtʲɪ", "im̃ (tilde)" },
{ "timpinė́ti", "tʲɪmʲpʲɪ¹ˈnʲeːtʲɪ", "im (unstressed)" },
-- in
{ "gi̇̀nti", "¹ˈɡʲɪnʲtʲɪ", "ìn (grave)" },
{ "giñklas", "²ˈɡʲɪŋˑklɐs", "iñ (tilde)" },
{ "ginkluõtė", "ɡʲɪŋ²ˈkluɔtʲeː", "in (unstressed)" },
-- ir
{ "di̇̀rti", "¹ˈdʲɪrʲtʲɪ", "ìr (grave)" },
{ "dir̃žas", "²ˈdʲɪrˑʒɐs", "ir̃ (tilde)" },
{ "dirži̇̀nis", "dʲɪrʲˈʒʲɪnʲɪs", "ir (unstressed)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C7: Mixed diphthongs of the u-series — ul, um, un, ur.
function tests:test_IPA_C7_mixed_u()
local examples = {
-- ul
{ "dùlkė", "¹ˈdʊlʲkʲeː", "ùl (grave)" },
{ "dul̃kti", "²ˈdʊlʲˑktʲɪ", "ul̃ (tilde)" },
{ "dulkė́tas", "dʊlʲ¹ˈkʲeːtɐs", "ul (unstressed)" },
-- um
{ "grùmtis", "¹ˈɡrʊmʲtʲɪs", "ùm (grave)" },
{ "grum̃ba", "²ˈɡrʊmˑbɐ", "um̃ (tilde)" },
{ "grumtỹnės", "ɡrʊmʲ²ˈtʲiːnʲeːs", "um (unstressed)" },
-- un
{ "skùndė", "¹ˈskʊnʲdʲeː", "ùn (grave)" },
{ "skuñdas", "²ˈskʊnˑdɐs", "uñ (tilde)" },
{ "skundi̇̀kas", "skʊnʲˈdʲɪkɐs", "un (unstressed)" },
-- ur
{ "gùrkšnis", "¹ˈɡʊrʲkʃʲnʲɪs", "ùr (grave)" },
{ "gur̃gti", "²ˈɡʊrʲˑktʲɪ", "ur̃ (tilde)" },
{ "gurkšnóti", "ɡʊrk¹ˈʃnoːtʲɪ", "ur (unstressed)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C8: Mixed diphthongs of the foreign o-series — ol, om, on, or
-- (per VLKK §9.22, default reading is tvirtapradiškai with grave).
function tests:test_IPA_C8_mixed_o_foreign()
local examples = {
-- ol
{ "kòlba", "¹ˈkɔlbɐ", "òl (grave; default reading)" },
{ "hòldingas", "¹ˈɣɔlʲdʲɪŋɡɐs", "òl (grave)" },
-- om
{ "do^mkrãtas", "dɔm²ˈkrɑːtɐs", "om (unstressed)" },
{ "pòmpa", "¹ˈpɔmpɐ", "òm (grave)" },
-- on
{ "po^ntònas", "pɔnˈtɔnɐs", "on (unstressed) / òn (grave)" },
{ "fòndas", "¹ˈfɔndɐs", "òn (grave)" },
-- or
{ "po^rtrètas", "pɔrʲˈtʲrʲɛtɐs", "or (unstressed)" },
{ "fòrma", "¹ˈfɔrmɐ", "òr (grave)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- C9: Foreign diphthongs eu, oi, ou — three accent positions where attested.
function tests:test_IPA_C9_diphthongs_foreign_eu_oi_ou()
local examples = {
-- eu
{ "plèura", "¹ˈpʲlʲɛʊrɐ", "èu (grave; tvirtapradiškai)" },
{ "eũras", "²ˈɛʊˑrɐs", "eũ (tilde)" },
{ "Euro^pà", "ɛʊrɔˈpɐ", "eu (unstressed)" },
-- éu does not exist (é is long; éu would be long+long)
-- oi
{ "Kóiva", "¹ˈkoˑɪʋɐ", "ói (acute; rare)" },
{ "mòira", "¹ˈmɔɪrɐ", "òi (grave)" },
{ "sinusòidė", "sʲɪnʊ¹ˈsɔɪdʲeː", "òi (grave)" },
{ "bròileris", "¹ˈbrɔɪlʲɛrʲɪs", "òi (grave)" },
{ "oikumenà", "ɔɪkʊmʲɛˈnɐ", "oi (unstressed)" },
-- oi̇̃ does not exist (per VLKK: oi reads tvirtapradiškai only)
-- ou
{ "šòu", "¹ˈʃɔʊ", "òu (grave)" },
{ "klòunas", "¹ˈklɔʊnɐs", "òu (grave)" },
{ "klounadà", "klɔʊnɐˈdɐ", "ou (unstressed)" },
-- óu does not exist (ó is long)
-- oũ does not exist (per VLKK: ou reads tvirtapradiškai only)
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- D: Fake diphthongs — vowel sequences that LOOK like diphthongs but are
-- actually two separate vowels in adjacent syllables (hiatus). The module
-- must NOT collapse these into a single diphthong nucleus.
function tests:test_IPA_D_fake_diphthongs()
local examples = {
-- ai (fake)
{ "nebepàima", "nʲɛbʲɛˈpɐ.ɪmɐ", "ài" },
{ "archãika", "ɐr²ˈxɑː.ɪkɐ", "ãi" },
{ "pai̇́eško", "pɐ¹ˈjiɛʃkoː", "ai̇́" },
{ "betai̇̀nas", "bʲɛtɐ.ˈɪnɐs", "ai̇̀" },
-- au (fake)
{ "pàurzgė", "ˈpɐ.ʊrʲzʲɡʲeː", "àu" },
{ "šilãuogė", "ʃʲɪ²ˈlɑː.uɔɡʲeː", "ãu" },
{ "Naùmo^vas", "nɐ.ˈʊmɔʋɐs", "aù" },
{ "nepaúosto", "nʲɛpɐ.¹ˈuɔstoː", "aú" },
-- ei (fake)
{ "nebèima", "nʲɛˈbʲɛ.ɪmɐ", "èi" },
{ "nebei̇̀rti", "nʲɛbʲɛ.¹ˈɪrʲtʲɪ", "ei̇̀" },
{ "neji̇́eško<base:neieško>", "nʲɛ¹ˈjiɛʃkoː", "ei̇́ (no-j-insertion not found yet)" },
-- ẽi not found
-- ui (fake)
{ "sui̇̀ro", "sʊ.ˈɪroː", "ui̇̀" },
{ "sui̇́eško", "sʊ¹ˈjiɛʃkoː", "ui̇́" },
-- úi, ũi not found
-- ie (fake)
{ "besi̇̀elgė", "bʲɛˈsʲɪ.ɛlʲɡʲeː", "i̇̀e" },
{ "ˌpo^lièsteris", "ˌpɔlʲɪ.ˈɛsʲtʲɛrʲɪs", "i̇̀e" },
{ "įsiérzina", "iːsʲɪ.¹ˈæˑrʲzʲɪnɐ", "ié" },
-- i̇̃e not found
-- uo (fake)
{ "sùošė", "ˈsʊ.oːʃʲeː", "ùo" },
-- ũo, uó not found; for uò only /ʊˈɔ/ examples found
-- foreign: ao
{ "mao^ji̇̀zmas<base:maojizmas>", "mɐ.ɔˈjɪzmɐs", "ao (no-j-insertion variant)" },
-- foreign: oi (fake)
-- õi, oi̇́ not found
-- foreign: ou (fake)
{ "fino^ùgrai", "fʲɪnɔ.ˈʊɡrɐɪ", "où" },
-- õu, oú not exist
-- foreign: eu (fake)
{ "neúosti", "nʲɛ.¹ˈuɔsʲtʲɪ", "eú" },
{ "teùrginis", "tʲɛ.¹ˈʊrʲɡʲɪnʲɪs", "eù" },
-- ẽu not exist
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- E: Fronting of o/u after a palatalized consonant or j (VLKK IPA rec §4.4):
-- [oː → o̟ː], [ʊ → ʊ̟], [uː → u̟ː], [uɔ → u̟ɔ].
function tests:test_IPA_E_o_u_fronting_after_palatal()
local examples = {
-- After palatalized consonant + o/u
{ "sagióti", "sɐ¹ˈɡʲo̟ːtʲɪ", "Cʲ + o → o̟ː" },
{ "angijo^mà", "ɐŋʲɡʲɪjɔ̟ˈmɐ", "Cʲ + o^ → ɔ̟" },
{ "siuñčia", "²ˈsʲʊ̟nʲˑt͡ʃʲɛ", "Cʲ + u → ʊ̟" },
{ "sių̃sti", "²ˈsʲu̟ːsʲtʲɪ", "Cʲ + ų → u̟ː" },
{ "ãčiū", "²ˈɑːt͡ʃʲu̟ː", "Cʲ + ū → u̟ː" },
{ "liuobà", "lʲu̟ɔˈbɐ", "Cʲ + uo → u̟ɔ" },
-- After j + o/u (j inherently palatal, triggers fronting)
{ "at.jójo<base:atjojo>", "ɐtʲ¹ˈjo̟ːjo̟ː", "j + o → o̟ː" },
{ "Lo^jo^là<base:Lojola>", "lɔjɔ̟ˈlɐ", "j + o^ → ɔ̟" },
{ "Jùlė<base:Julė>", "ˈjʊ̟lʲeː", "j + u → ʊ̟" },
{ "ãkcijų<base:akcijų>", "²ˈɑːkt͡sʲɪju̟ː", "j + ų → u̟ː" },
{ "jū́ra<base:jūra>", "¹ˈju̟ːrɐ", "j + ū → u̟ː" },
{ "júodas<base:juodas>", "¹ˈju̟ɔdɐs", "j + uo → u̟ɔ" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- F: Palatalization spreading — palatalization of a front vowel propagates
-- leftward through preceding consonants, including through k/g (which do
-- not directly palatalize but transmit the feature; VLKK §13).
function tests:test_IPA_F_palatalization_spreading()
local examples = {
-- Spreading through obstruent clusters
{ "skri̇́eti", "¹ˈsʲkrʲiɛtʲɪ", "Spreading left through r and k" },
{ "displė̃jus<base:displėjus>", "dʲɪ²ˈsʲpʲlʲeːjʊ̟s", "Spreading left through cluster spl" },
-- Spreading to next syllable's onset
{ "pùlti", "¹ˈpʊlʲtʲɪ", "Palatalized l before t (softened by following i)" },
{ "méilė", "¹ˈmʲæˑɪlʲeː", "Palatalized resonant cluster" },
-- Secondary stress should not block spreading
{ "išˌverstaãkis", "ɪʃʲˌʋʲɛrstɐ.²ˈɑːkʲɪs", "Secondary stress should not stop palatalization" },
-- VLKK §13: l palatalizes through k/g before another soft consonant
{ "al̃ksnis", "²ˈɐlʲˑksʲnʲɪs", "lʲ through k before sʲnʲ (alksnis)" },
{ "álgebra", "¹ˈɑˑlʲɡʲɛbrɐ", "lʲ through gʲ before front vowel (álgebra)" },
{ "buhálteris", "bʊ¹ˈɣɑˑlʲtʲɛrʲɪs", "lʲ before tʲ (buhálteris)" },
{ "Báltija", "¹ˈbɑˑlʲtʲɪjɛ", "lʲ before tʲ (Báltija)" },
{ "fakultètas", "fɐkʊlʲˈtʲɛtɐs", "lʲ before tʲ (fakultètas)" },
{ "fi̇̀lme", "¹ˈfʲɪlʲmʲɛ", "lʲ before mʲ (fi̇̀lme)" },
{ "smùlkmena", "¹ˈsmʊlʲkmʲɛnɐ", "lʲ through kʲ before mʲ (smùlkmena)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- G: ng / nk reverse palatalization rule.
-- n + k/g normally velarizes to ŋ. The ŋ palatalizes only when the FOLLOWING
-- k/g itself palatalizes (i.e., when the cluster is directly followed by
-- a front vowel). When the k/g stays hard (because next is a consonant),
-- ŋ also stays hard.
function tests:test_IPA_G_ng_nk_reverse_palatalization()
local examples = {
{ "žiñgsnis", "²ˈʒʲɪŋˑksʲnʲɪs", "indirect: ng + s → ŋ stays hard" },
{ "plunksnẽlė", "plʊŋk²ˈsʲnʲæːlʲeː", "indirect: nk + s → ŋ stays hard" },
{ "anketà", "ɐŋʲkʲɛˈtɐ", "direct: nk + e → ŋʲkʲ" },
{ "Bangỹs", "bɐŋʲ²ˈɡʲiːs", "direct: ng + y → ŋʲɡʲ" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- G2: VLKK §6.3 extends the n → ŋ assimilation to ch [x] and h [ɣ] in
-- addition to k and g, because all four are velar/post-velar and pull n's
-- place of articulation backwards. ŋʲ surfaces when the following ch/h
-- is itself palatalized (front vowel triggers); a back vowel after ch/h
-- keeps the whole cluster hard.
function tests:test_IPA_G2_nasal_before_ch_h()
local examples = {
-- VLKK §6.3 explicit example
{ "brònchai", "¹ˈbrɔŋxɐɪ", "n + ch [x] → ŋ + x (VLKK §6.3)" },
{ "mezenchimà", "mʲɛzʲɛŋʲxʲɪˈmɐ", "ŋʲ + xʲ" },
{ "menhỹras", "mʲɛŋʲ²ˈɣʲiːrɐs", "ŋ + ɣ" },
{ "inhaliãcija", "ɪŋɣɐ²ˈlʲæːt͡sʲɪjɛ", "ŋʲ + ɣʲ" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- H1: Voicing assimilation (regressive: a stop/fricative agrees in voicing
-- with the next obstruent). VLKK §16, §17.
function tests:test_IPA_H1_voicing_assimilation()
local examples = {
{ "di̇̀rbti", "¹ˈdʲɪrʲpʲtʲɪ", "b → p before t (devoicing)" },
{ "apgáuti", "ɐb¹ˈɡɑˑʊtʲɪ", "p → b before g (voicing)" },
{ "už.trùkti", "ʊʃˈtrʊktʲɪ", "ž → š before t (devoicing)" },
{ "li̇̀pdo", "ˈlʲɪbdoː", "p → b before d (voicing)" },
{ "kàsdavo", "ˈkɐzdɐʋoː", "s → z before d (voicing)" },
{ "iš.gir̃do", "ɪʒʲ²ˈɡʲɪrˑdoː", "š → ž before g (voicing)" },
{ "iš.džiū́ti", "ɪʒʲ¹ˈd͡ʒʲu̟ːtʲɪ", "š → ž before dž (voicing)" },
{ "degtùkas", "dʲɛkˈtʊkɐs", "g → k before t (devoicing)" },
{ "žiebtùvas", "ʒʲiɛpˈtʊʋɐs", "b → p before t (devoicing)" },
{ "grį̇̃žti", "²ˈɡrʲiːʃʲtʲɪ", "ž → š before t (devoicing)" },
{ "už.púola", "ʊʃ¹ˈpuɔlɐ", "ž → š before p (devoicing)" },
{ "už.króvė", "ʊʃ¹ˈkroːʋʲeː", "ž → š before k (devoicing)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- H2: Word-final devoicing (VLKK §18).
function tests:test_IPA_H2_word_final_devoicing()
local examples = {
{ "juolàb<base:juolab>", "ju̟ɔˈlɐp", "b → p word-finally" },
{ "visàd", "ʋʲɪˈsɐt", "d → t word-finally" },
{ "jóg<base:jog>", "¹ˈjo̟ːk", "g → k word-finally" },
{ "ùž", "ˈʊʃ", "ž → š word-finally" },
{ "daũg", "²ˈdɒʊˑk", "final g → k" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- H3: Place assimilation between sibilants and affricates (VLKK §23):
-- s + č → š; z + dž → ž; š + c → s; ž + dz → z.
function tests:test_IPA_H3_place_assimilation()
local examples = {
{ "mókesčiai", "¹ˈmoːkʲɛʃʲt͡ʃʲɛɪ", "s + č → š (place assim.)" },
{ "kàsčiau", "ˈkɐʃʲt͡ʃʲɛʊ", "s + č → š (place assim.)" },
{ "vabzdžiai̇̃", "ʋɐbʲ²ˈʒʲd͡ʒʲɛɪˑ", "z + dž → ž (place assim.)" },
{ "išcukrúoti", "ɪst͡sʊ¹ˈkruɔtʲɪ", "š + c → s (place assim.)" },
{ "už.cỹpti", "ʊsʲ²ˈt͡sʲiːpʲtʲɪ", "ž + c → z (devoicing) → s (place assim.)" },
-- ž + dz not found
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- H4: Geminate simplification — two identical consonants reduce to one
-- (VLKK §21, plus the same effect on stops once they have been levelled
-- by voicing assimilation, e.g. d + t → t + t → t).
function tests:test_IPA_H4_geminate_simplification()
local examples = {
-- Sibilants (identical pairs)
{ "pùsseserė", "ˈpʊsʲɛsʲɛrʲeː", "ss → s" },
{ "iššóko", "ɪ¹ˈʃoːkoː", "šš → š" },
{ "užžiẽbti", "ʊ²ˈʒʲiɛpʲtʲɪ", "žž → ž (also b → p before t)" },
-- zz not found
-- Sonorants (Liquids and Nasals)
{ "so^ciˌjalliberãlas<base:socialliberãlas>", "sɔt͡sʲɪˌjɛlʲɪbʲɛ²ˈrɑːlɐs", "ll → l" },
{ "šė́mmargas", "¹ˈʃʲeːmɐrɡɐs", "mm → m" },
{ "viennỹtis", "ʋʲiɛ²ˈnʲiːtʲɪs", "nn → n" },
{ "pérrašo", "¹ˈpʲæːrɐʃoː", "rr → r" },
-- Bilabial stops (after voicing assimilation)
-- bb not found
{ "tar̃ppievis", "²ˈtɐrʲˑpʲiɛʋʲɪs", "pp → p" },
{ "bóbpalaikė", "¹ˈboːpɐlɐɪkʲeː", "bp → pp → p (devoicing + degemination)" },
-- pb not found
-- Alveolar stops (after voicing assimilation)
-- dd not found
{ "añttrobis", "²ˈɐnˑtroːbʲɪs", "tt → t" },
{ "Šmi̇̀dtas", "ˈʃʲmʲɪtɐs", "dt → tt → t (devoicing + degemination)" },
{ "atdarà", "ɐdɐˈrɐ", "td → dd → d (voicing + degemination)" },
-- Velar stops (after voicing assimilation)
-- gg not found (needs g + g)
{ "kiekkar̃t", "kʲiɛ²ˈkɐrˑt", "kk → k" },
{ "daugkar̃t", "dɒʊ²ˈkɐrˑt", "gk → kk → k (devoicing + degemination)" },
{ "ki̇́ekgi", "¹ˈkʲiɛɡʲɪ", "kg → gg → g (voicing + degemination)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- H5: Sibilant simplification — when two DIFFERENT sibilants meet at a
-- morpheme boundary, only the second is pronounced (VLKK §22).
function tests:test_IPA_H5_sibilant_simplification()
local examples = {
{ "išsprę́sti", "ɪ¹ˈsʲpʲrʲæːsʲtʲɪ", "šs → s" },
{ "ùžsienis", "ˈʊsʲiɛnʲɪs", "žs → s (via šs)" },
{ "pùsšimtis", "ˈpʊʃʲɪmʲtʲɪs", "sš → š" },
{ "pùszuikis", "ˈpʊzʊɪkʲɪs", "sz → z" },
{ "pùsžalis", "ˈpʊʒɐlʲɪs", "sž → ž" },
{ "išžarà", "ɪʒɐˈrɐ", "šž → ž" },
{ "ùžšovas", "ˈʊʃoːʋɐs", "žš → š" },
-- zš not found
-- šz not found
-- zs not found
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- I: Word-final j and v become non-syllabic [ɪ̯], [ʊ̯] (VLKK IPA rec §7.5).
function tests:test_IPA_I_final_j_v_nonsyllabic()
local examples = {
{ "rytój<base:rytoj>", "rʲiː¹ˈtoːɪ̯", "final j after long o → ɪ̯" },
{ "tuõj<base:tuoj>", "²ˈtuɔɪ̯", "final j after uo → ɪ̯" },
{ "viduj̃<base:viduj>", "ʋʲɪ²ˈdʊɪˑ", "final j with tilde after short u" },
{ "viršuj̃<base:virsuj>", "ʋʲɪr²ˈʃʊɪˑ", "final j with tilde after short u" },
{ "sudiẽv", "sʊ²ˈdʲiɛʊ̯", "final v after ie → ʊ̯" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- J1: Hiatus — vowel sequences pronounced as two separate syllables, marked
-- either by morpheme boundary (native: prefix `.`) or explicitly preserved
-- (foreign: user-marked `.`). VLKK §24, §25, §27.2 (i-second variant).
function tests:test_IPA_J1_hiatus()
local examples = {
-- Native prefix boundaries
{ "pa.upỹs", "pɐ.ʊ²ˈpʲiːs", "prefix pa- + u" },
{ "priim̃ti", "pʲrʲɪ.²ˈɪmʲˑtʲɪ", "prefix pri- + i" },
{ "pri̇̀ima", "ˈpʲrʲɪ.ɪmɐ", "prefix pri- + i" },
{ "pri̇̀ėmė", "ˈpʲrʲɪ.eːmʲeː", "prefix pri- + ė" },
{ "priei̇̃ti", "pʲrʲɪ.²ˈɛɪˑtʲɪ", "prefix pri- + ei" },
{ "priė̃jo", "pʲrʲɪ.²ˈeːjo̟ː", "prefix pri- + ė" },
{ "nù.imtas", "ˈnʊ.ɪmtɐs", "prefix nu- + i (user-marked)" },
-- Foreign words: hiatus preserved between non-i vowels (VLKK §25)
{ "di.acetãtas", "dʲɪ.ɐt͡sʲɛ²ˈtɑːtɐs", "foreign i.a (user-marked)" },
{ "di.akrilãtas", "dʲɪ.ɐkrʲɪ²ˈlɑːtɐs", "foreign i.a (user-marked)" },
{ "fino^ùgrai", "fʲɪnɔ.ˈʊɡrɐɪ", "foreign o.u" },
{ "paleo^nto^lògas", "pɐlʲɛ.ɔntɔˈlɔɡɐs", "paleo- + onto-: only e.o is hiatus, last ɔ is in coda" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- J2: J-insertion — in foreign words, an epenthetic [j] is inserted between
-- vowel sequences containing i (VLKK §27). Input is a respelling that
-- spells out the inserted j, optionally with `(j)` for the variable
-- forms in §27.2.
function tests:test_IPA_J2_j_insertion()
local examples = {
-- §27.1: i first → j obligatorily inserted
{ "dijãkonas", "dʲɪ²ˈjæːkoːnɐs", "ia → ija (i first, accented vowel)" },
{ "dijakonỹstė", "dʲɪjɛkoː²ˈnʲiːsʲtʲeː", "ia → ija (i first, unaccented)" },
{ "dijalèktas", "dʲɪjɛˈlʲɛktɐs", "ia → ija (i first, unaccented)" },
{ "pijani̇̀nas<base:pianinas>", "pʲɪjɛˈnʲɪnɐs", "ia → ija (respell j)" },
{ "dijèzas<base:diezas>", "dʲɪˈjɛzɐs", "ie → ije (respell j)" },
{ "audijo^fònas<base:audiofonas>", "ɒʊdʲɪjɔ̟ˈfɔnɐs", "io → ijo (respell j)" },
-- §27.2: i second → j optional, written as `(j)` in respelling
{ "teji̇̀stas<base:teistas>", "tʲɛˈjɪstɐs", "ei → eji (respell j)" },
{ "stò(j)ikas<base:stoikas>", "ˈstɔ(j)ɪkɐs", "oi: variant with (j)" },
{ "babu(j)i̇̀nai<base:babuinai>", "bɐbʊˈ(j)ɪnɐɪ", "ui: variant with (j)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- K: Word juncture — clitic liaison `‿` is inserted between an unstressed
-- word and a following stressed word; word-final voiced obstruents devoice.
-- Input uses a regular space; the module inserts `‿` automatically.
function tests:test_IPA_K_word_juncture()
local examples = {
-- Basic liaison
{ "be ãbejo", "bʲɛ‿²ˈɑːbʲɛjo̟ː", "clitic be + main word" },
{ "kaip kàd", "kɐɪp‿ˈkɐt", "clitic kaip + main word" },
{ "kadà ne kadà", "kɐˈdɐ nʲɛ‿kɐˈdɐ", "stressed + clitic + stressed" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- K2: VLKK §19 — when a clitic and its host are joined by ‿ in the IPA
-- output, the host-side word-final voiced stops devoice (default in our
-- spec) and a "soft" first phoneme of the next word (front V, j, or a
-- palatalized C) palatalizes the now-devoiced consonant via cross-word
-- palatalization spreading. Input uses a regular space; the module
-- inserts ‿ automatically based on stress / clitic grouping.
function tests:test_IPA_K2_crossword_devoicing_palatalization()
local examples = {
-- VLKK §19 examples (devoiced variant — the one our spec produces)
{ "kad àtima", "kɐt‿ˈɐtʲɪmɐ", "d → t (back V next, no palat.)" },
{ "lyg jója<base:lyg joja>", "lʲiːkʲ‿¹ˈjo̟ːjɛ", "g → k → kʲ (j triggers palat.)" },
{ "lig miẽsto", "lʲɪkʲ‿²ˈmʲiɛstoː", "g → k → kʲ (mʲ palatalizes back)" },
{ "daug nẽša", "dɒʊkʲ‿²ˈnʲæːʃɐ", "g → k → kʲ (nʲ palatalizes back)" },
{ "lyg ródo", "lʲiːk‿¹ˈroːdoː", "g → k stays hard (r before back o)" },
{ "kad vẽža", "kɐtʲ‿²ˈʋʲæːʒɐ", "d → t → tʲ (ʋʲ palatalizes back)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- K3: VLKK §20 — the preposition už is the lone exception to word-final
-- devoicing: when it forms a clitic group with a following word starting
-- with a vowel or sonorant (j, n, m, l, r, v), its ž stays voiced.
-- Before a voiceless obstruent the regular devoicing applies. Input
-- uses a regular space.
function tests:test_IPA_K3_uz_exception_VLKK_20()
local examples = {
-- VLKK §20 explicit examples
{ "už akių̃", "ʊʒ‿ɐ²ˈkʲu̟ː", "už before vowel a (keep ž)" },
{ "už jų̃", "ʊʒ‿²ˈjuː", "už before sonorant j (keep ž)" },
{ "už lañgo", "ʊʒ‿²ˈlɑːŋɡoː", "už before sonorant l (keep ž)" },
{ "už miẽsto", "ʊʒ‿²ˈmʲiɛstoː", "už before sonorant m (keep ž)" },
{ "už nãmo", "ʊʒ‿²ˈnɑːmoː", "už before sonorant n (keep ž)" },
{ "už rýto", "ʊʒ‿¹ˈrʲiːtoː", "už before sonorant r (keep ž)" },
{ "už võko", "ʊʒ‿²ˈʋoːkoː", "už before sonorant v (keep ž)" },
{ "už stálą", "ʊʃ‿¹ˈstɑːlɑː", "už before voiceless obstruent s (devoices)" }
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- K4: VLKK §21b / §22b — when two identical consonants or two adjacent
-- sibilants meet across a liaison boundary in the IPA output, the first
-- drops out (only the second is pronounced). This is the formal variant;
-- VLKK also lists a colloquial variant that preserves both consonants,
-- but our spec follows §21b/§22b literally. Input uses a regular space.
function tests:test_IPA_K4_crossword_geminate_sibilant()
local examples = {
-- VLKK §21b — identical consonants drop the first
{ "iš šóno", "ɪ‿¹ˈʃoːnoː", "š + š → ∅ + š (identical sibilants)" },
{ "už žolė̃s", "ʊ‿ʒoː²ˈlʲeːs", "ž → š → ∅ + ž (identical after devoicing)" },
{ "ir rei̇̃kia", "ɪ‿²ˈrʲɛɪˑkʲɛ", "r + r → ∅ + r (identical sonorants)" },
{ "ar ródo", "ɐ‿¹ˈroːdoː", "r + r → ∅ + r (identical sonorants)" },
-- VLKK §22b — different sibilants also drop the first
{ "iš sẽno", "ɪ‿²ˈsʲæːnoː", "š + s → ∅ + s (different sibilants)" },
{ "už sõdo", "ʊ‿²ˈsoːdoː", "ž → š → ∅ + s (devoiced then dropped)" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- K5: VLKK §23b — across a liaison boundary, a fricative s/š/z/ž before an
-- affricate of the OTHER place of articulation assimilates: s + č → š,
-- ž + dz → z, š + c → s, z + dž → ž. Cross-word palatalization re-applies
-- after the place change, so a freshly assimilated s → ʃ still picks up
-- the ʲ from the palatalized affricate that follows. Input uses a
-- regular space.
function tests:test_IPA_K5_crossword_place_assim_VLKK_23b()
local examples = {
-- VLKK §23b examples that match our "always devoice except už" spec.
{ "vis čiùlba", "ʋʲɪʃʲ‿¹ˈt͡ʃʲʊ̟lbɐ", "s + č → š (then palatalized)" },
{ "iš ceñtro", "ɪsʲ‿²ˈt͡sʲɛnˑtroː", "š + c → s (then palatalized)" },
-- TODO: VLKK §23b also lists `už dzū̃ko → uz‿dzū̃ko [ʊz‿²ˈʣuːkoː]`,
-- in which ž + dz → z (preserving voicing across the boundary).
-- Our spec applies terminal devoicing unconditionally for non-už
-- words and only skips it for už before vowels/sonorants, so the
-- voiced obstruent dz does NOT trigger the už exception here and
-- the module currently emits `ʊʃ‿²ˈd͡zuːkoː` instead. If we ever
-- want to match VLKK §23b for this case, we would need to either
-- (a) extend the už exception to voiced obstruents, or
-- (b) run the cross-word place assimilation before terminal
-- devoicing so that ʒ + d͡z → z + d͡z survives.
-- Decide on a policy and add the test accordingly.
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- L: Secondary stress (`ˌ`) — different positions and interactions with
-- primary stress.
function tests:test_IPA_L_secondary_stress()
local examples = {
{ "ˌho^mo^fòbė", "ˌɣɔmɔˈfɔbʲeː", "initial secondary stress" },
{ "saˌvanoriáuti", "sɐˌʋɐnoː¹ˈrʲæˑʊtʲɪ", "medial secondary stress" },
{ "nebekõneˌveikti", "nʲɛbʲɛ²ˈkoːnʲɛˌʋʲɛɪktʲɪ", "secondary AFTER primary stress" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- M: Optional soft l in loanwords (VLKK §15) — by default the module reads
-- l as hard before a hard consonant; the user marks softening explicitly
-- with U+2019 (the right single quotation mark) after l.
function tests:test_IPA_M_l_dual_reading()
local examples = {
{ "pòlka", "¹ˈpɔlkɐ", "default: hard l" },
{ "pòlʼka", "¹ˈpɔlʲkɐ", "with U+02BC: soft lʲ" },
{ "válsas", "¹ˈʋɑˑlsɐs", "default: hard l" },
{ "válʼsas", "¹ˈʋɑˑlʲsɐs", "with U+02BC: soft lʲ" },
}
for _, ex in ipairs(examples) do self:check_IPA(unpack(ex)) end
end
-- ════════════════════════════════════════════════════════════════════════════
-- SYLLABIFICATION TESTS (Phonotactic models)
-- ════════════════════════════════════════════════════════════════════════════
-- A: 2-consonant cluster models.
function tests:test_hyphen_A_models_2C()
local examples = {
-- Onset patterns (V-CCV)
{ "vèsti", "vè‧sti", "ST onset" },
{ "dažnai̇̃", "da‧žnai̇̃", "SR onset" },
{ "veiklõs", "vei‧klõs", "TR onset" },
-- Split patterns (VC-CV)
{ "kalbõs", "kal‧bõs", "RT split" },
{ "ámžiaus", "ám‧žiaus", "RS split" },
}
for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end
end
-- B: 3-consonant cluster models.
function tests:test_hyphen_B_models_3C()
local examples = {
-- Onset
{ "displė̃jus<base:displėjus>", "di‧splė̃‧jus", "STR onset (V-CCCV)" },
-- Splits
{ "pýksta", "pýk‧sta", "T+ST split" },
{ "mir̃šta", "mir̃‧šta", "R+ST split" },
{ "mókslo", "mók‧slo", "T+SR split" },
{ "lengvai̇̃", "len‧gvai̇̃", "R+TR split" },
{ "atkrei̇̃pia", "at‧krei̇̃‧pia", "T+TR split" },
{ "di̇̀rbti", "di̇̀rb‧ti", "RT+T split" },
{ "elgsenõs", "elg‧se‧nõs", "RT+S split" },
{ "piktžolė̃s", "pikt‧žo‧lė̃s", "TT+S split" },
{ "Oksfòrdas", "Oks‧fòr‧das", "TS+S split (foreign)" },
{ "transfòrmavo", "trans‧fòr‧ma‧vo", "RS+S split" },
}
for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end
end
-- C: 4-consonant cluster models.
function tests:test_hyphen_C_models_4C()
local examples = {
{ "konstrùkcija<base:konstrukcija>", "kon‧strùk‧ci‧ja", "R+STR split" },
{ "apskritai̇̃", "ap‧skri‧tai̇̃", "T+STR split" },
{ "ankstà", "ank‧stà", "RT+ST split" },
{ "ži̇̀ngsnis", "ži̇̀ng‧snis", "RT+SR split" },
{ "ántplūdžio", "ánt‧plū‧džio", "RT+TR split" },
{ "postprodùkcija<base:postprodukcija>", "post‧pro‧dùk‧ci‧ja", "ST+TR split" },
{ "kontrmotỹvas", "kontr‧mo‧tỹ‧vas", "RTR+R split" },
{ "Obstfelderis", "Obst‧fel‧de‧ris", "TST+S split" }, -- FIXME: need accentuation
-- Hyphenation for theoretically-existing consonant clusters,
-- per Bendrinės lietuvių kalbos skiemuo monografija:
-- S-STR
-- RS-SR, RR-ST, ST-SR, RR-TR
-- RTR-T, RST-T, RTT-S, TST-T
}
for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end
end
-- D: Morphology-driven hyphenation — native prefixes vs. pseudo-prefixes,
-- and prefix boundaries that introduce hiatus.
function tests:test_hyphen_D_morphology()
local examples = {
-- Native prefixes (user-marked with `.`)
{ "ap.rašýti", "ap‧ra‧šý‧ti", "Native prefix ap-" },
{ "at.nèšti", "at‧nè‧šti", "Native prefix at-" },
{ "iš.mókyti", "iš‧mó‧ky‧ti", "Native prefix iš-" },
-- Pseudo-prefixes (Internationalisms): no morphological boundary
{ "atòmas", "a‧tò‧mas", "Pseudo-prefix" },
{ "apãratas", "a‧pã‧ra‧tas", "Pseudo-prefix" },
-- Prefix boundary with hiatus
{ "pa.upỹs", "pa‧u‧pỹs", "Prefix boundary with hiatus" },
{ "priim̃ti", "pri‧im̃‧ti", "Prefix boundary with hiatus" },
-- User-marked hiatus and secondary-stress boundary
{ "Kiurasã.o", "Kiu‧ra‧sã‧o", "User-marked hiatus" },
{ "išˌverstaãkis", "iš‧ver‧sta‧ã‧kis", "Secondary stress also marks syllable boundary" },
{ "jū́rų žvaigždė̃", "jū́‧rų žvaig‧ždė̃", "space should be kept" },
}
for _, ex in ipairs(examples) do self:check_hyph(unpack(ex)) end
end
-- ════════════════════════════════════════════════════════════════════════════
-- RHYME TESTS
-- ════════════════════════════════════════════════════════════════════════════
-- A: Basic rhyme extraction across vowel/diphthong types and stress patterns.
function tests:test_rhyme_A_basic()
local examples = {
{ "nakti̇̀s", "ɪs", "Short i rhyme" },
{ "kalbà", "ɐ", "Short a rhyme" },
{ "homològas", "ɔɡɐs", "Loanword o rhyme" },
{ "naũjas", "ɒʊˑjɛs", "Diphthong rhyme" },
{ "var̃das", "ɐrˑdɐs", "Mixed diphthong rhyme" },
{ "mótina", "oːtʲɪnɐ", "Long o rhyme" },
{ "vil̃kas", "ɪlˑkɐs", "Mixed diphthong rhyme" },
{ "nebekõneˌveikti", "oːnʲɛˌʋʲɛɪktʲɪ", "Secondary stress should be stripped" },
}
for _, ex in ipairs(examples) do self:check_rhyme(unpack(ex)) end
end
return tests
q50s5g6upf7tf2tiuz3c1yjr610hs4a
ကဏ္ဍ:မဝ်ဂျူလေတ်တူယဵုနဳယျာဂမၠိုၚ်
14
295399
395935
2026-05-29T18:25:29Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » :ကဏ္ဍ:ဘာသာလေတ်တူယဵုနဳယျာ|လေတ်တူယဵု..."
395935
wikitext
text/x-wiki
[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာလေတ်တူယဵုနဳယျာ|လေတ်တူယဵုနဳယျာ]] » '''မဝ်ဂျူဂမၠိုၚ်'''
:[[:ကဏ္ဍ:မဝ်ဂျူဂမၠိုၚ်|မဝ်ဂျူ]]ဘာသာလေတ်တူယဵုနဳယျာ၊ မနွံကဵုလုပ်အဝေါၚ်ကုဒ် Lua နကဵုမကၠောန်ဗဒှ် ကဵု မစဳရေၚ်ယဵုဒုၚ်သ္ပမာန်ဂမၠိုၚ်။
[[ကဏ္ဍ:ဘာသာလေတ်တူယဵုနဳယျာ]][[ကဏ္ဍ:မဝ်ဂျူဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|လ]]
4f8tob05809agchx7g6o25tn27fbwsm
မဝ်ဂျူ:lt-common/doc
828
295400
395937
2026-05-29T18:28:26Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|lt}} </includeonly>"
395937
wikitext
text/x-wiki
{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. -->
<includeonly>
{{module cat|lt}}
</includeonly>
js7rgpxcn6jiutxja12panln1tuz2fd
ထာမ်ပလိက်:lt-pr/documentation
10
295401
395938
2026-05-29T18:30:15Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} {{status|beta}} {{uses lua|Module:lt-pron}} This template generates the pronunciation section for Lithuanian terms, including IPA, rhymes, and syllabification, as well as (if manually specified) audio files and homophones. Unlike {{tl|IPA}}, this template automatically handles: * Syllabification based on sonority hierarchy and morphological boundaries * Stress realization (different phonetic..."
395938
wikitext
text/x-wiki
{{documentation subpage}}
{{status|beta}}
{{uses lua|Module:lt-pron}}
This template generates the pronunciation section for Lithuanian terms, including IPA, rhymes, and syllabification, as well as (if manually specified) audio files and homophones.
Unlike {{tl|IPA}}, this template automatically handles:
* Syllabification based on sonority hierarchy and morphological boundaries
* Stress realization (different phonetic values under different stress patterns)
* Consonant palatalization before front vowels
* Voicing assimilation and final devoicing
* Nasal velarization and sibilant fusion
In most cases, you only need to provide the '''form with stress diacritics''' (acute ´, grave `, or tilde ~), and the module will generate the correct IPA, rhyme, and syllabification automatically. Respelling is only needed in specific cases described below.
==Usage==
===Quick reference table===
{|class="wikitable"
! Page
! Example
! Comment
|-
| rowspan=2 | {{m|lt|nãmas}}
| <code><nowiki>{{lt-pr|nãmas}}</nowiki></code>
| rowspan=2 | Most of the time, only the form with stress diacritics is needed.
|-
| {{lt-pr|nãmas}}
|-
| rowspan=2 | {{m|lt|aprašýti}}
| <code><nowiki>{{lt-pr|ap.rašýti}}</nowiki></code>
| rowspan=2 | Use a period (<code>.</code>) to mark syllable boundaries when morphology overrides phonology, such as prefix boundaries.
|-
| {{lt-pr|ap.rašýti}}
|-
| rowspan=2 | {{m|lt|paupỹs}}
| <code><nowiki>{{lt-pr|pa.upỹs}}</nowiki></code>
| rowspan=2 | Use a period (<code>.</code>) to prevent vowel sequences from being treated as diphthongs.
|-
| {{lt-pr|pa.upỹs}}
|-
| rowspan=2 | {{m|lt|Zojà}}
| <code><nowiki>{{lt-pr|Zo^jà}}</nowiki></code>
| rowspan=2 | Use a caret (<code>^</code>) after o to mark loanword short {{IPAchar|[ɔ]}} (native words and early loanwords have long {{IPAchar|[oː]}}).
|-
| {{lt-pr|Zo^jà}}
|-
| rowspan=2 | {{m|lt|homònimas}}
| <code><nowiki>{{lt-pr|ho^mo^ni̇̀mas}}</nowiki></code>
| rowspan=2 | The caret is also used in mixed diphthongs with short o: ol, om, on, or.
|-
| {{lt-pr|ho^mo^ni̇̀mas}}
|-
| rowspan=2 | {{m|lt|dièzas}}
| <code><nowiki>{{lt-pr|dijèzas}}</nowiki></code>
| rowspan=2 | Insert j between vowels to mark a glide (mainly in loanwords). Use <base:...> when the respelling differs from the original spelling.
|-
| {{lt-pr|dijèzas<base:diezas>}}
|-
| rowspan=2 | {{m|lt|savanoriáuti}}
| <code><nowiki>{{lt-pr|saˌvanoriáuti}}</nowiki></code>
| rowspan=2 | Use <code>ˌ</code> to mark secondary stress in long words.
|-
| {{lt-pr|saˌvanoriáuti}}
|-
| rowspan=2 | {{m|lt|atsaistyti}}
| <code><nowiki>{{lt-pr|atsáistyti,atsaistýti}}</nowiki></code>
| rowspan=2 | Multiple comma-separated pronunciations can be given.
|-
| {{lt-pr|atsáistyti,atsaistýti}}
|-
| rowspan=2 | {{m|lt|dešinė}}
| <code><nowiki>{{lt-pr|dešinė̃<audio:LL-Q9083 (lit)-Trimkev-dešinė.wav>}}</nowiki></code>
| rowspan=2 | Use the inline modifier syntax to add audio files and other properties.
|-
| {{lt-pr|dešinė̃<audio:LL-Q9083 (lit)-Trimkev-dešinė.wav>}}
|-
| rowspan=2 | {{m|lt|trauks}}
| <code><nowiki>{{lt-pr|traũks<q:standard>,tráuks<q:dialectal><audio:Example.ogg>}}</nowiki></code>
| rowspan=2 | Multiple modifiers can be stacked. Here we add qualifiers and an audio file to different pronunciations.
|-
| {{lt-pr|traũks<q:standard>,tráuks<q:dialectal><audio:Example.ogg>}}
|}
==When respelling is needed==
In most cases, simply entering the form with stress diacritics is sufficient. The module automatically handles syllabification, palatalization, assimilation, and other phonological processes. However, '''respelling is required''' in the following specific cases:
===Syllable boundaries===
Use a period (<code>.</code>) to mark syllable boundaries when '''morphology overrides phonology'''.
Native prefixes ({{m|lt|ap-}}, {{m|lt|at-}}, {{m|lt|iš-}}, {{m|lt|nu-}}, {{m|lt|pa-}}, {{m|lt|pri-}}, etc.) create morphological syllable boundaries that override default phonological syllabification. Mark these boundaries with a period when the prefix is followed by a vowel or when syllabification differs from the default.
Examples:
* {{m|lt|aprašýti}}: <code>ap.rašýti</code> → ap‧ra‧šý‧ti
* {{m|lt|atnèšti}}: <code>at.nèšti</code> → at‧nè‧šti
* {{m|lt|išmókyti}}: <code>iš.mókyti</code> → iš‧mó‧ky‧ti
* {{m|lt|paupỹs}}: <code>pa.upỹs</code> → pa‧u‧pỹs
* {{m|lt|nùimtas}}: <code>nù.imtas</code> → nù‧im‧tas
When two vowels/consonants should be in separate syllables but would normally be treated as a diphthong/consonant cluster, use a period to separate them too.
===Loanword short o===
Outside of “standard” diphthongs (like {{m|lt||uo}}, {{m|lt||oi}}, {{m|lt||ou}}), the letter '''{{m|lt||o}}''' has two main pronunciations in Lithuanian:
* Native words and early loanwords: long {{IPAchar|[oː]}}
* Modern loanwords: short {{IPAchar|[ɔ]}}
The module can automatically detect the short loanword {{IPAchar|[ɔ]}} when it is written with a grave accent ({{m|lt||ò}}) or in the diphthongs {{m|lt||uo}}, {{m|lt||oi}}, and {{m|lt||ou}}.
However, in unaccented positions or mixed diphthongs, plain '''o''' is ambiguous. In these cases, you must use a '''caret''' (<code>^</code>) after the '''o''' to explicitly mark the short loanword pronunciation.
====When to use the caret (<code>^</code>)====
Use the caret for '''unaccented monophthong''' and '''mixed diphthongs with a tilde''' ({{m|lt||ol̃}}, {{m|lt||om̃}}, {{m|lt||oñ}}, {{m|lt||or̃}}) or no accent ({{m|lt||ol}}, {{m|lt||om}}, {{m|lt||on}}, {{m|lt||or}}):
* {{m|lt|Z'''o'''jà}}: <code>Z'''o^'''jà</code> → {{IPAchar|[zɔˈjɛ]}}
* {{m|lt|f'''o'''t'''o'''parodà}}: <code>f'''o^'''t'''o^'''parodà</code> → {{IPAchar|[fɔtɔpɐroːˈdɐ]}}
* {{m|lt|šlãkbet'''on'''is}}: <code>šlãkbeˌt'''o^n'''is</code> → {{IPAchar|[²ˈʃlɑːɡbʲɛˌtɔnʲɪs]}}
* {{m|lt|B'''or̃'''tnikas}}: <code>B'''o^r̃'''tnikas</code> → {{IPAchar|[²ˈbɔrʲˑtʲnʲɪkɐs]}}
====When NOT to use the caret====
Do not use the caret if the vowel has a grave accent ('''{{m|lt||ò}}'''), as the module already knows '''{{m|lt||ò}}''' is short. The foreign diphthongs '''{{m|lt||oi}}'''/'''{{m|lt||ou}}''' also automatically produce {{IPAchar|[ɔɪ]}}/{{IPAchar|[ɔʊ]}} (as well as the native diphthong '''{{m|lt||uo}}''' {{IPAchar|[uɔ]}}).
* {{m|lt|k'''òl'''ba}}: <code>k'''òl'''ba</code> → {{IPAchar|[¹ˈkɔlbɐ]}}
* {{m|lt|z'''òm'''ša}}: <code>z'''òm'''ša</code> → {{IPAchar|[¹ˈzɔmʃɐ]}}
* {{m|lt|m'''òi'''ra}}: <code>m'''òi'''ra</code> → {{IPAchar|[¹ˈmɔɪrɐ]}}
* {{m|lt|š'''òu'''}}: <code>š'''òu'''</code> → {{IPAchar|[¹ˈʃɔʊ]}}
* {{m|lt|'''oi'''kumenà}}: <code>'''oi'''kumenà</code> → {{IPAchar|[ɔɪkʊmʲɛˈnɐ]}}
* {{m|lt|kl'''ou'''nadà}}: <code>kl'''ou'''nadà</code> → {{IPAchar|[klɔʊnɐˈdɐ]}}
===Glide insertion===
In some words (mainly loanwords), a glide {{IPAchar|[j]}} appears between vowels even though it's not written in the standard spelling. Use <code>j</code> or <code>(j)</code> in the respelling to indicate this:
* <code>j</code>: Mandatory glide (always pronounced)
* <code>(j)</code>: Optional glide (some speakers pronounce it, others don't)
====How to determine which to use====
Check the VDU pronunciation transcriber (see Resources below). If it shows:
* Only {{IPAchar|[j]}}: use <code>j</code> in respelling
* Both {{IPAchar|[j]}} and no {{IPAchar|[j]}} as variants: use <code>(j)</code> in respelling
;Examples:
* {{m|lt|dièzas}}: <code>di'''j'''èzas</code> → {{IPAchar|[dʲɪˈjɛzɐs]}}
* {{m|lt|pianinas}}: <code>pi'''j'''ani̇̀nas</code> → {{IPAchar|[pʲɪjɛˈnʲɪnɐs]}}
* {{m|lt|babuìnai}}: <code>babu'''(j)'''i̇̀nai</code> → {{IPAchar|[bɐbʊˈ(j)ɪnɐɪ]}}
===Secondary stress===
In long words, compound words, or derived words, you can mark secondary stress by inserting <code>ˌ</code> before the vowel of the secondarily stressed syllable:
* {{m|lt|savanoriáuti}}: <code>saˌvanoriáuti</code> → {{IPAchar|[sɐˌʋɐnoː¹ˈrʲæˑʊtʲɪ]}}
* {{m|lt|homofòbė}}: <code>ˌho^mo^fòbė</code> → {{IPAchar|[ˌɣɔmɔˈfɔbʲeː]}}
* {{m|lt|nebekõneveikti}}: <code>nebekõneˌveikti</code> → {{IPAchar|[nʲɛbʲɛ²ˈkoːnʲɛˌʋʲɛɪktʲɪ]}}
==Advanced features==
===Inline modifiers===
The template supports inline modifiers (using the same syntax as {{tl|affix}}, {{tl|desc}}, and similar templates) to add properties to specific pronunciations:
; <code><q:''qualifier''></code>, <code><qq:''qualifier''></code>
: Add a left qualifier (q) or right qualifier (qq) to the pronunciation.
: Example: <code>nãmas<q:standard></code>
; <code><a:''accent''></code>, <code><aa:''accent''></code>
: Add an accent label (a) or right accent label (aa).
: Example: <code>nãmas<a:Northern></code>
; <code><ref:''reference''></code>
: Add a reference to the pronunciation.
: Example: <code><nowiki>nù.imtas<ref:{{R:lt:VLKK}}></nowiki></code>
; <code><audio:''filename''></code>
: Add an audio file. Nested modifiers can be used within the audio tag:
: Example: <code><audio:file.wav<a:Standard><text:full sentence>></code>
; <code><hmp:''homophone''></code>
: Specify a homophone. Multiple homophones can be comma-separated.
: Example: <code>tei̇̃gti<hmp:tei̇̃kti></code>
; <code><rhyme:''rhyme''></code>
: <!--Override the automatically generated rhyme. -->Use <code><rhyme:-></code> to suppress rhyme generation.
; <code><hyph:''syllabification''></code>
: <!--Override the automatically generated syllabification.-->Use <code><hyph:-></code> to suppress syllabification generation.
; <code><base:''name''></code>
: Explicitly specify the page name '''without stress diacritics or respelling'''. This is primarily used in template code, documentation pages, and test pages where the module cannot automatically determine the original spelling from the page title. <!--When glide insertion (j) is used, the module compares the respelling against this base spelling to identify which j's are original versus inserted, ensuring correct syllabification in the output.-->
: Example: <code>dijèzas<base:diezas></code>
===Multiple pronunciations===
Multiple pronunciations can be specified by separating them with commas (no spaces):
<code><nowiki>{{lt-pr|atsáistyti,atsaistýti}}</nowiki></code>
Each pronunciation can have its own inline modifiers:
<code><nowiki>{{lt-pr|nãmas<q:standard>,nãmas<q:dialectal><a:Northern>}}</nowiki></code>
==Resources==
; [https://kalbu.vdu.lt/en/resources/pronunciation/#dabartines-lietuviu-kalbos-tarties-zodynas VDU Pronunciation Dictionary]
: Search for words to find their accented forms and basic grammatical information (headwords only).
; [https://kalbu.vdu.lt/en/resources/pronunciation/#fonetinis-transkribuoklis VDU Phonetic Transcriber]
: Generate IPA transcriptions (including inflected forms).
; [https://kalbu.vdu.lt/mokymosi-priemones/kirciuoklis/ VDU Stress Marker]
: Generate forms with stress diacritics (including inflected forms).
; [https://kirtis.info/#/krc Online Accentuation]
: Generate forms with stress diacritics (including inflected forms).
==Parameters==
; {{para|1}}
: The form with stress diacritics, or comma-separated forms for multiple pronunciations. Respelling may be needed as described above. This parameter is '''required'''.
<includeonly>
{{tcat}}
</includeonly>
lqv96qlstv85ba4v1481dktahnk5o18