Wiktionary
tpiwiktionary
https://tpi.wiktionary.org/wiki/Fran_Pes
MediaWiki 1.39.0-wmf.23
case-sensitive
Media
Sipesol
Toktok
Yusa
Toktok bilong yusa
Wiktionary
Wiktionary toktok
Fail
Toktok bilong fail
MediaWiki
Toktok bilong mediawiki
Templet
Toktok bilong templet
Halivim
Toktok bilong halivim
Grup
Toktok bilong grup
TimedText
TimedText talk
Module
Module talk
Gadget
Gadget talk
Gadget definition
Gadget definition talk
Module:languages/data2
828
3647
13338
13209
2022-08-04T12:39:06Z
Asinis632
1829
Scribunto
text/plain
local u = mw.ustring.char
-- UTF-8 encoded strings for some commonly-used diacritics
local GRAVE = u(0x0300)
local ACUTE = u(0x0301)
local CIRC = u(0x0302)
local TILDE = u(0x0303)
local MACRON = u(0x0304)
local BREVE = u(0x0306)
local DOTABOVE = u(0x0307)
local DIAER = u(0x0308)
local CARON = u(0x030C)
local DGRAVE = u(0x030F)
local INVBREVE = u(0x0311)
local DOTBELOW = u(0x0323)
local RINGBELOW = u(0x0325)
local CEDILLA = u(0x0327)
local OGONEK = u(0x0328)
local CGJ = u(0x034F) -- combining grapheme joiner
local DOUBLEINVBREVE = u(0x0361)
-- Punctuation to be used for standardChars field
local PUNCTUATION = ' !#%&*+,-./:;<=>?@^_`|~\'()'
local Cyrl = {"Cyrl"}
local Latn = {"Latn"}
local LatnArab = {"Latn", "Arab"}
local m = {}
m["aa"] = {
"Afar",
27811,
"cus-eas",
Latn,
entry_name = { remove_diacritics = ACUTE},
}
m["ab"] = {
"Abkhaz",
5111,
"cau-abz",
{"Cyrl", "Geor", "Latn"},
translit_module = "ab-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["ae"] = {
"Avestan",
29572,
"ira-cen",
{"Avst", "Gujr"},
translit_module = "Avst-translit",
wikipedia_article = "Avestan",
}
m["af"] = {
"Afrikaans",
14196,
"gmw",
LatnArab,
ancestors = {"nl"},
sort_key = {
from = {"[äáâà]", "[ëéêè]", "[ïíîì]", "[öóôò]", "[üúûù]", "[ÿýŷỳ]", "^-", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" }} ,
}
m["ak"] = {
"Akan",
28026,
"alv-ctn",
Latn,
}
m["am"] = {
"Amharic",
28244,
"sem-eth",
{"Ethi"},
translit_module = "Ethi-translit",
}
m["an"] = {
"Aragonese",
8765,
"roa-ibe",
Latn,
ancestors = {"roa-oan"},
}
m["ar"] = {
"Arabic",
13955,
"sem-arb",
{"Arab", "Hebr", "Brai"},
-- replace alif waṣl with alif
-- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha,
-- damma, kasra, shadda, sukun, superscript (dagger) alef
entry_name = {
from = {u(0x0671), u(0x0640), "[" .. u(0x064B) .. "-" .. u(0x0652) .. "]", u(0x0670)},
to = {u(0x0627)}},
-- put Judeo-Arabic (Hebrew-script Arabic) under the category header
-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles
sort_key = {
from = {"^%f[" .. u(0x5D0) .. "-" .. u(0x5EA) .. "]"},
to = {u(0xFB21)},
},
translit_module = "ar-translit",
}
m["as"] = {
"Assamese",
29401,
"inc-eas",
{"as-Beng"},
ancestors = {"inc-mas"},
translit_module = "as-translit",
}
m["av"] = {
"Avar",
29561,
"cau-nec",
Cyrl,
ancestors = {"oav"},
translit_module = "av-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["ay"] = {
"Aymara",
4627,
"sai-aym",
Latn,
}
m["az"] = {
"Azerbaijani",
9292,
"trk-ogz",
{"Latn", "Cyrl", "fa-Arab"},
ancestors = {"trk-oat"},
}
m["ba"] = {
"Bashkir",
13389,
"trk-kbu",
Cyrl,
translit_module = "ba-translit",
override_translit = true,
}
m["be"] = {
"Belarusian",
9091,
"zle",
Cyrl,
ancestors = {"orv"},
translit_module = "be-translit",
sort_key = {
from = {"Ё", "ё"},
to = {"Е" , "е"}},
entry_name = {
from = {"Ѐ", "ѐ", GRAVE, ACUTE},
to = {"Е", "е"}},
}
m["bg"] = {
"Bulgarian",
7918,
"zls",
{"Cyrl"},
ancestors = {"cu"},
translit_module = "bg-translit",
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE},
to = {"Е", "е", "И", "и"}},
}
m["bh"] = {
"Bihari",
135305,
"inc-eas",
{"Deva"},
ancestors = {"inc-mgd"},
}
m["bi"] = {
"Bislama",
35452,
"crp",
Latn,
ancestors = {"en"},
}
m["bm"] = {
"Bambara",
33243,
"dmn-emn",
Latn,
}
m["bn"] = {
"Bengali",
9610,
"inc-eas",
{"Beng", "Newa"},
ancestors = {"inc-mbn"},
translit_module = "bn-translit",
}
m["bo"] = {
"Tibetan",
34271,
"sit-tib",
{"Tibt"}, -- sometimes Deva?
ancestors = {"xct"},
translit_module = "bo-translit",
override_translit = true,
}
m["br"] = {
"Breton",
12107,
"cel-bry",
Latn,
ancestors = {"xbm"},
}
m["ca"] = {
"Catalan",
7026,
"roa",
Latn,
ancestors = {"roa-oca"},
sort_key = {
from = {"à", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "l·l"},
to = {"a", "e" , "i" , "o" , "u" , "c", "ll" }} ,
}
m["ce"] = {
"Chechen",
33350,
"cau-vay",
Cyrl,
translit_module = "ce-translit",
override_translit = true,
entry_name = {
from = {MACRON},
to = {}},
}
m["ch"] = {
"Chamorro",
33262,
"poz-sus",
Latn,
}
m["co"] = {
"Corsican",
33111,
"roa-itd",
Latn,
}
m["cr"] = {
"Cree",
33390,
"alg",
{"Cans", "Latn"},
translit_module = "translit-redirect",
}
m["cs"] = {
"Czech",
9056,
"zlw",
Latn,
ancestors = {"zlw-ocs"},
sort_key = {
from = {"á", "é", "í", "ó", "[úů]", "ý"},
to = {"a", "e", "i", "o", "u" , "y"}} ,
}
m["cu"] = {
"Old Church Slavonic",
35499,
"zls",
{"Cyrs", "Glag"},
translit_module = "Cyrs-Glag-translit",
entry_name = {
from = {u(0x0484)}, -- kamora
to = {}},
sort_key = {
from = {"оу", "є"},
to = {"у" , "е"}} ,
}
m["cv"] = {
"Chuvash",
33348,
"trk-ogr",
Cyrl,
ancestors = {"xbo"},
translit_module = "cv-translit",
override_translit = true,
}
m["cy"] = {
"Welsh",
9309,
"cel-bry",
Latn,
ancestors = {"wlm"},
sort_key = {
from = {"[âáàä]", "ch", "dd", "[êéèë]", "ff", "ngh", "[îíìï]", "ll", "[ôóòö]", "ph", "rh", "th", "[ûúùü]", "[ŵẃẁẅ]", "[ŷýỳÿ]", "'"},
to = {"a" , "c~", "d~", "e" , "f~", "g~h", "i" , "l~", "o" , "p~", "r~", "t~", "u" , "w" , "y" }} ,
standardChars = "A-IL-PR-UWYa-il-pr-uwy0-9ÂâÊêÎîÔôÛûŴŵŶŷ" .. PUNCTUATION,
}
m["da"] = {
"Danish",
9035,
"gmq",
Latn,
ancestors = {"gmq-oda"},
}
m["de"] = {
"German",
188,
"gmw",
{"Latn", "Latf"},
ancestors = {"gmh"},
sort_key = {
from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]", "ß" },
to = {"a" , "e" , "i" , "o" , "u" , "ss"}} ,
standardChars = "A-Za-z0-9ÄäÖöÜüß" .. PUNCTUATION,
}
m["dv"] = {
"Dhivehi",
32656,
"inc-ins",
{"Thaa"},
ancestors = {"elu-prk"},
translit_module = "dv-translit",
override_translit = true,
}
m["dz"] = {
"Dzongkha",
33081,
"sit-tib",
{"Tibt"},
ancestors = {"xct"},
translit_module = "bo-translit",
override_translit = true,
}
m["ee"] = {
"Ewe",
30005,
"alv-gbe",
Latn,
}
m["el"] = {
"Greek",
9129,
"grk",
{"Grek", "Brai"},
ancestors = {"grc"},
translit_module = "el-translit",
override_translit = true,
sort_key = { -- Keep this synchronized with grc, cpg, pnt, tsd
from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"},
to = {"α" , "ε" , "η" , "ι" , "ο" , "υ" , "ω" , "ρ", "σ"}} ,
standardChars = "ͺ;΄-ώϜϝ" .. PUNCTUATION,
}
m["en"] = {
"Inglis",
1860,
"gmw",
{"Latn", "Brai", "Shaw", "Dsrt"}, -- entries in Shaw or Dsrt might require prior discussion
ancestors = {"enm"},
sort_key = {
from = {"[äàáâåā]", "[ëèéêē]", "[ïìíîī]", "[öòóôō]", "[üùúûū]", "æ" , "œ" , "[çč]", "ñ", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "ae", "oe", "c" , "n"}},
wikimedia_codes = {"en", "simple"},
standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF),
}
m["eo"] = {
"Esperanto",
143,
"art",
Latn,
sort_key = {
from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ĉ]", "[ĝ]", "[ĥ]", "[ĵ]", "[ŝ]", "[ŭ]"},
to = {"a" , "e" , "i" , "o" , "u", "cĉ", "gĉ", "hĉ", "jĉ", "sĉ", "uĉ"}} ,
standardChars = "A-PRSTUVZa-prstuvzĉĈĝĜĵĴŝŜŭŬ0-9" .. PUNCTUATION,
}
m["es"] = {
"Spanish",
1321,
"roa-ibe",
{"Latn", "Brai"},
ancestors = {"osp"},
sort_key = {
from = {"á", "é", "í", "ó", "[úü]", "ç", "ñ"},
to = {"a", "e", "i", "o", "u" , "c", "n~"}},
standardChars = "A-VXYZa-vxyz0-9ÁáÉéÍíÓóÚúÑñ¿¡" .. PUNCTUATION,
}
m["et"] = {
"Estonian",
9072,
"fiu-fin",
Latn,
}
m["eu"] = {
"Basque",
8752,
"euq",
Latn,
}
m["fa"] = {
"Persian",
9168,
"ira-swi",
{"fa-Arab"},
ancestors = {"pal"}, -- "ira-mid"
entry_name = {
from = {u(0x064E), u(0x0640), u(0x064F), u(0x0650), u(0x0651), u(0x0652)},
to = {}} ,
}
m["ff"] = {
"Fula",
33454,
"alv-fwo",
{"Latn", "Adlm"},
}
m["fi"] = {
"Finnish",
1412,
"fiu-fin",
Latn,
entry_name = {
from = {"ˣ"}, -- Used to indicate gemination of the next consonant
to = {}},
sort_key = {
from = {"[áàâã]", "[éèêẽ]", "[íìîĩ]", "[óòôõ]", "[úùûũ]", "[ýỳŷüű]", "[øõő]", "æ" , "œ" , "[čç]", "š", "ž", "ß" , "[':]"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "ö" , "ae", "oe", "c" , "s", "z", "ss"}} ,
}
m["fj"] = {
"Fijian",
33295,
"poz-occ",
Latn,
}
m["fo"] = {
"Faroese",
25258,
"gmq",
Latn,
ancestors = {"non"},
}
m["fr"] = {
"French",
150,
"roa-oil",
{"Latn", "Brai"},
ancestors = {"frm"},
sort_key = {
from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ" , "œ" , "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "c", "ae", "oe"}},
standardChars = "A-Za-z0-9ÀÂÇÉÈÊËÎÏÔŒÛÙÜàâçéèêëîïôœûùü«»" .. PUNCTUATION,
}
m["fy"] = {
"West Frisian",
27175,
"gmw-fri",
Latn,
ancestors = {"ofs"},
sort_key = {
from = {"[àáâä]", "[èéêë]", "[ìíîïyỳýŷÿ]", "[òóôö]", "[ùúûü]", "æ", "[ /.-]"},
to = {"a" , "e" , "i" , "o" , "u", "ae"}} ,
standardChars = "A-PR-WYZa-pr-wyz0-9Ææâäàéêëèïìôöòúûüùỳ" .. PUNCTUATION,
}
m["ga"] = {
"Irish",
9142,
"cel-gae",
Latn,
ancestors = {"mga"},
sort_key = {
from = {"á", "é", "í", "ó", "ú", "ý", "ḃ" , "ċ" , "ḋ" , "ḟ" , "ġ" , "ṁ" , "ṗ" , "ṡ" , "ṫ" },
to = {"a", "e", "i", "o", "u", "y", "bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}} ,
standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION,
}
m["gd"] = {
"Scottish Gaelic",
9314,
"cel-gae",
Latn,
ancestors = {"mga"},
sort_key = {
from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ýỳ]"},
to = {"a" , "e" , "i" , "o" , "u" , "y" }} ,
standardChars = "A-IL-PR-Ua-il-pr-u0-9ÀàÈèÌìÒòÙù" .. PUNCTUATION,
}
m["gl"] = {
"Galician",
9307,
"roa-ibe",
Latn,
ancestors = {"roa-opt"},
sort_key = {
from = {"á", "é", "í", "ó", "ú"},
to = {"a", "e", "i", "o", "u"}} ,
}
m["gn"] = {
"Guaraní",
35876,
"tup-gua",
Latn,
}
m["gu"] = {
"Gujarati",
5137,
"inc-wes",
{"Gujr"},
ancestors = {"inc-mgu"},
translit_module = "gu-translit",
}
m["gv"] = {
"Manx",
12175,
"cel-gae",
Latn,
ancestors = {"mga"},
sort_key = {
from = {"ç", "-"},
to = {"c"}} ,
standardChars = "A-WYÇa-wyç0-9" .. PUNCTUATION,
}
m["ha"] = {
"Hausa",
56475,
"cdc-wst",
LatnArab,
sort_key = {
from = {"ɓ", "ɗ", "ƙ", "'y", "ƴ", "'" },
to = {"b~" , "d~" , "k~", "y~", "y~", "" }},
entry_name = {
from = {"R̃", "r̃", "À", "à", "È", "è", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Î", "î", "Ô", "ô", "Û", "û", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú", "Ā̀", "ā̀", "Ḕ", "ḕ", "Ī̀", "ī̀", "Ṑ", "ṑ", "Ū̀", "ū̀", GRAVE, ACUTE},
to = {"R", "r", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}},
}
m["he"] = {
"Hebrew",
9288,
"sem-can",
{"Hebr", "Phnx", "Brai"},
entry_name = {
from = {"[" .. u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. CGJ .. "]"},
to = {}} ,
}
m["hi"] = {
"Hindi",
1568,
"inc-hnd",
{"Deva", "Kthi", "Newa"},
ancestors = {"inc-ohi"},
translit_module = "hi-translit",
standardChars = "ँंअ-ऊएऐओ-घच-झट-नप-रलवशसहा-ूेैो-◌्।-॰ड़ढ़" .. PUNCTUATION,
}
m["ho"] = {
"Hiri Motu",
33617,
"crp",
Latn,
ancestors = {"meu"},
}
m["ht"] = {
"Haitian Creole",
33491,
"crp",
Latn,
ancestors = {"fr"},
}
m["hu"] = {
"Hungarian",
9067,
"urj-ugr",
{"Latn", "Hung"},
ancestors = {"ohu"},
sort_key = {
from = {"á", "é", "í", "ó", "ú", "[öő]", "[üű]", "cs", "dzs", "gy", "ly", "ny", "zs"},
to = {"a", "e", "i", "o", "u", "o~", "u~", "c~", "dz~", "g~", "l~", "n~", "z~"},
},
}
m["hy"] = {
"Armenian",
8785,
"hyx",
{"Armn", "Brai"},
ancestors = {"axm"},
translit_module = "Armn-translit",
override_translit = true,
sort_key = {
from = {"ու", "և", "եւ"},
to = {"ւ", "եվ", "եվ"}},
entry_name = {
from = {"՞", "՜", "՛", "՟", "և", "<sup>յ</sup>", "<sup>ի</sup>", "<sup>է</sup>"},
to = {"", "", "", "", "եւ", "յ", "ի", "է"}} ,
}
m["hz"] = {
"Herero",
33315,
"bnt-swb",
Latn,
}
m["ia"] = {
"Interlingua",
35934,
"art",
Latn,
}
m["id"] = {
"Indonesian",
9240,
"poz-mly",
Latn,
ancestors = {"ms"},
}
m["ie"] = {
"Interlingue",
35850,
"art",
Latn,
type = "appendix-constructed",
}
m["ig"] = {
"Igbo",
33578,
"alv-igb",
Latn,
sort_key = {
from = {"ụ", "ị", "ọ", "gb", "gh", "gw", "kp", "kw", "ṅ", "nw", "ny", "sh"},
to = {"u~" , "i~", "o~", "gy", "gz", "g~", "kz", "k~", "ny", "nz", "n~", "s~"}},
entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON },
}
m["ii"] = {
"Sichuan Yi",
34235,
"tbq-lol",
{"Yiii"},
translit_module = "ii-translit",
}
m["ik"] = {
"Inupiaq",
27183,
"esx-inu",
Latn,
}
m["io"] = {
"Ido",
35224,
"art",
Latn,
}
m["is"] = {
"Icelandic",
294,
"gmq",
Latn,
ancestors = {"non"},
}
m["it"] = {
"Italian",
652,
"roa-itd",
Latn,
sort_key = {
from = {"[àáâäå]", "[èéêë]", "[ìíîï]", "[òóôö]", "[ùúûü]"},
to = {"a" , "e" , "i" , "o" , "u" }} ,
standardChars = "A-IL-VZa-il-vz0-9" .. PUNCTUATION,
}
m["iu"] = {
"Inuktitut",
29921,
"esx-inu",
{"Cans", "Latn"},
translit_module = "translit-redirect",
override_translit = true,
}
m["ja"] = {
"Siapan",
5287,
"jpx",
{"Jpan", "Brai"},
ancestors = {"ojp"},
--[=[
-- Handled by jsort function in [[Module:ja]].
sort_key = {
from = {"[ぁァア]", "[ぃィイ]", "[ぅゔゥウヴ]", "[ぇェエ]", "[ぉォオ]", "[がゕカガヵ]", "[ぎキギ]", "[ぐクグㇰ]", "[げゖケゲヶ]", "[ごコゴ]", "[ざサザ]", "[じシジㇱ]", "[ずスズㇲ]", "[ぜセゼ]", "[ぞソゾ]", "[だタダ]", "[ぢチヂ]", "[っづッツヅ]", "[でテデ]", "[どトドㇳ]", "ナ", "ニ", "[ヌㇴ]", "ネ", "ノ", "[ばぱハバパㇵ]", "[びぴヒビピㇶ]", "[ぶぷフブプㇷ]", "[べぺヘベペㇸ]", "[ぼぽホボポㇹ]", "マ", "ミ", "[ムㇺ]", "メ", "モ", "[ゃャヤ]", "[ゅュユ]", "[ょョヨ]", "[ラㇻ]", "[リㇼ]", "[ルㇽ]", "[レㇾ]", "[ロㇿ]", "[ゎヮワヷ]", "[ヰヸ]", "[ヱヹ]", "[ヲヺ]", "ン", "[゙゚゛゜ゝゞ・ヽヾ]", "𛀀"},
to = {"あ", "い", "う", "え", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ら", "り", "る", "れ", "ろ", "わ", "ゐ", "ゑ", "を", "ん", "", "え"}},
--]=]
}
m["jv"] = {
"Javanese",
33549,
"poz-sus",
{"Latn", "Java"},
translit_module = "jv-translit",
ancestors = {"kaw"},
link_tr = true,
}
m["ka"] = {
"Georgian",
8108,
"ccs-gzn",
{"Geor", "Geok", "Hebr"}, -- Hebr is used to write Judeo-Georgian
ancestors = {"oge"},
translit_module = "Geor-translit",
override_translit = true,
entry_name = {
from = {"̂"},
to = {""}},
}
m["kg"] = {
"Kongo",
33702,
"bnt-kng",
Latn,
}
m["ki"] = {
"Kikuyu",
33587,
"bnt-kka",
Latn,
}
m["kj"] = {
"Kwanyama",
1405077,
"bnt-ova",
Latn,
}
m["kk"] = {
"Kazakh",
9252,
"trk-kno",
{"Cyrl", "Latn", "kk-Arab"},
translit_module = "kk-translit",
override_translit = true,
}
m["kl"] = {
"Greenlandic",
25355,
"esx-inu",
Latn,
}
m["km"] = {
"Khmer",
9205,
"mkh-kmr",
{"Khmr"},
ancestors = {"mkh-mkm"},
translit_module = "km-translit",
}
m["kn"] = {
"Kannada",
33673,
"dra",
{"Knda"},
ancestors = {"dra-mkn"},
translit_module = "kn-translit",
}
m["ko"] = {
"Korean",
9176,
"qfa-kor",
{"Kore", "Brai"},
ancestors = {"okm"},
-- 20210122 idea: strip parenthesized hanja from entry link
-- Hani regex is a reasonable subset of Hani from [[Module:scripts/data]],
-- last updated on 20210214.
entry_name = {
from = {
" *%([一-鿿㐀-䶿𠀀-𰀀-﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩]+%)",
},
to = {
"",
}},
display = {
from = {"%-"},
to = {},
},
translit_module = "ko-translit",
}
m["kr"] = {
"Kanuri",
36094,
"ssa-sah",
LatnArab,
sort_key = {
from = {"ny", "ǝ", "sh"},
to = {"n~", "e~", "s~"}} , -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
entry_name = {
from = {"À", "à", "È", "è", "Ǝ̀", "ǝ̀", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Ǝ̂", "ǝ̂", "Î", "î", "Ô", "ô", "Û", "û", "Ă", "ă", "Ĕ", "ĕ", "Ǝ̆", "ǝ̆", "Ĭ", "ĭ", "Ŏ", "ŏ", "Ŭ", "ŭ", "Á", "á", "É", "é", "Ǝ́", "ǝ́", "Í", "í", "Ó", "ó", "Ú", "ú", GRAVE, ACUTE},
to = {"A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u"}},
}
m["ks"] = {
"Kashmiri",
33552,
"inc-dar",
{"ks-Arab", "Deva", "Shrd", "Latn"},
translit_module = "translit-redirect",
ancestors = {"sa"},
}
-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT
m["kw"] = {
"Cornish",
25289,
"cel-bry",
Latn,
ancestors = {"cnx"},
}
m["ky"] = {
"Kyrgyz",
9255,
"trk-kip",
{"Cyrl", "Latn", "Arab"},
translit_module = "ky-translit",
override_translit = true,
}
m["la"] = {
"Latin",
397,
"itc",
Latn,
ancestors = {"itc-ola"},
entry_name = {remove_diacritics = MACRON .. BREVE .. DIAER .. DOUBLEINVBREVE},
standardChars = "A-Za-z0-9ÆæŒœĀ-ăĒ-ĕĪ-ĭŌ-ŏŪ-ŭȲȳ" .. MACRON .. BREVE .. PUNCTUATION,
}
m["lb"] = {
"Luxembourgish",
9051,
"gmw",
Latn,
ancestors = {"gmh"},
}
m["lg"] = {
"Luganda",
33368,
"bnt-nyg",
Latn,
entry_name = {
from = {"á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ", "â", "Â", "ê", "Ê", "î", "Î", "ô", "Ô", "û", "Û" },
to = {"a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U",}},
sort_key = {
from = {"ŋ"},
to = {"n"}} ,
}
m["li"] = {
"Limburgish",
102172,
"gmw",
Latn,
ancestors = {"dum"},
}
m["ln"] = {
"Lingala",
36217,
"bnt-bmo",
Latn,
}
m["lo"] = {
"Lao",
9211,
"tai-swe",
{"Laoo"},
translit_module = "lo-translit",
sort_key = {
from = {"[%pໆ]", "[່-ໍ]", "ຼ", "ຽ", "ໜ", "ໝ", "([ເແໂໃໄ])([ກ-ຮ])"},
to = {"", "", "ລ", "ຍ", "ຫນ", "ຫມ", "%2%1"}},
standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. PUNCTUATION,
}
m["lt"] = {
"Lithuanian",
9083,
"bat",
Latn,
ancestors = {"olt"},
entry_name = {
from = {"[áãà]", "[ÁÃÀ]", "[éẽè]", "[ÉẼÈ]", "[íĩì]", "[ÍĨÌ]", "[ýỹ]", "[ÝỸ]", "ñ", "[óõò]", "[ÓÕÒ]", "[úũù]", "[ÚŨÙ]", ACUTE, GRAVE, TILDE},
to = {"a", "A", "e", "E", "i", "I", "y", "Y", "n", "o", "O", "u", "U"}} ,
}
m["lu"] = {
"Luba-Katanga",
36157,
"bnt-lub",
Latn,
}
m["lv"] = {
"Latvian",
9078,
"bat",
Latn,
entry_name = {
-- This attempts to convert vowels with tone marks to vowels either with
-- or without macrons. Specifically, there should be no macrons if the
-- vowel is part of a diphthong (including resonant diphthongs such
-- pìrksts -> pirksts not #pīrksts). What we do is first convert the
-- vowel + tone mark to a vowel + tilde in a decomposed fashion,
-- then remove the tilde in diphthongs, then convert the remaining
-- vowel + tilde sequences to macroned vowels, then delete any other
-- tilde. We leave already-macroned vowels alone: Both e.g. ar and ār
-- occur before consonants. FIXME: This still might not be sufficient.
from = {"Ȩ", "ȩ", "[ÂÃÀ]", "[âãà]", "[ÊẼÈ]", "[êẽè]", "[ÎĨÌ]", "[îĩì]", "[ÔÕÒ]", "[ôõò]", "[ÛŨÙ]", "[ûũù]", "[ÑǸ]", "[ñǹ]", "[" .. CIRC .. TILDE ..GRAVE .."]", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .. "?([^aAeEiIoOuUāĀēĒīĪūŪ])", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .."?$", "([iI])" .. TILDE .. "?([eE])" .. TILDE .. "?", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "U" .. TILDE, "u" .. TILDE, TILDE},
to = {"E", "e", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "O", "o", "U" .. TILDE, "u" .. TILDE, "N", "n", TILDE, "%1%2%3", "%1%2", "%1%2", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ū", "ū", ""}},
}
m["mg"] = {
"Malagasy",
7930,
"poz-bre",
Latn,
}
m["mh"] = {
"Marshallese",
36280,
"poz-mic",
Latn,
sort_key = {
from = {"ā" , "ļ" , "m̧" , "ņ" , "n̄" , "o̧" , "ō" , "ū" },
to = {"a~", "l~", "m~", "n~", "n~~", "o~", "o~~", "u~"}} ,
}
m["mi"] = {
"Maori",
36451,
"poz-pep",
Latn,
}
m["mk"] = {
"Macedonian",
9296,
"zls",
Cyrl,
translit_module = "mk-translit",
entry_name = {
from = {ACUTE},
to = {}},
}
m["ml"] = {
"Malayalam",
36236,
"dra",
{"Mlym"},
translit_module = "ml-translit",
override_translit = true,
}
m["mn"] = {
"Mongolian",
9246,
"xgn",
{"Cyrl", "Mong", "Soyo", "Zanb"}, -- entries in Soyo or Zanb might require prior discussion
ancestors = {"cmg"},
translit_module = "mn-translit",
override_translit = true,
}
-- "mo" IS TREATED AS "ro", SEE WT:LT
m["mr"] = {
"Marathi",
1571,
"inc-sou",
{"Deva", "Modi"},
ancestors = {"omr"},
translit_module = "mr-translit",
entry_name = {
from = {"च़", "ज़", "झ़"},
to = {"च", "ज", "झ"}} ,
}
m["ms"] = {
"Malay",
9237,
"poz-mly",
{"Latn", "ms-Arab"},
}
m["mt"] = {
"Maltese",
9166,
"sem-arb",
Latn,
ancestors = {"sqr"},
sort_key = {
from = {"ċ", "ġ", "ħ"},
to = {"c", "g", "h"}
}
}
m["my"] = {
"Burmese",
9228,
"tbq-brm",
{"Mymr"},
ancestors = {"obr"},
translit_module = "my-translit",
override_translit = true,
sort_key = {
from = {"ျ", "ြ", "ွ", "ှ", "ဿ"},
to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"}},
}
m["na"] = {
"Nauruan",
13307,
"poz-mic",
Latn,
}
m["nb"] = {
"Norwegian Bokmål",
25167,
"gmq",
Latn,
ancestors = {"gmq-mno"},
wikimedia_codes = {"no"},
}
m["nd"] = {
"Northern Ndebele",
35613,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["ne"] = {
"Nepali",
33823,
"inc-pah",
{"Deva", "Newa"},
translit_module = "ne-translit",
}
m["ng"] = {
"Ndonga",
33900,
"bnt-ova",
Latn,
}
m["nl"] = {
"Dutch",
7411,
"gmw",
Latn,
ancestors = {"dum"},
sort_key = {
from = {"[äáâå]", "[ëéê]", "[ïíî]", "[öóô]", "[üúû]", "ç", "ñ", "^-"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} ,
standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF),
}
m["nn"] = {
"Norwegian Nynorsk",
25164,
"gmq",
Latn,
ancestors = {"gmq-mno"},
}
m["no"] = {
"Norwegian",
9043,
"gmq",
Latn,
ancestors = {"gmq-mno"},
}
m["nr"] = {
"Southern Ndebele",
36785,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["nv"] = {
"Navajo",
13310,
"apa",
Latn,
sort_key = {
from = {"[áą]", "[éę]", "[íį]", "[óǫ]", "ń", "^n([djlt])", "ł" , "[ʼ’']", ACUTE},
to = {"a" , "e" , "i" , "o" , "n", "ni%1" , "l~"}}, -- the tilde is used to guarantee that ł will always be sorted after all other words with l
}
m["ny"] = {
"Chichewa",
33273,
"bnt-nys",
Latn,
entry_name = {
from = {"ŵ", "Ŵ", "á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ" },
to = {"w", "W", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M"}},
sort_key = {
from = {"ng'"},
to = {"ng"}} ,
}
m["oc"] = {
"Occitan",
14185,
"roa",
{"Latn", "Hebr"},
ancestors = {"pro"},
sort_key = {
from = {"[àá]", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "([lns])·h"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "%1h" }} ,
}
m["oj"] = {
"Ojibwe",
33875,
"alg",
{"Cans", "Latn"},
sort_key = {
from = {"aa", "ʼ", "ii", "oo", "sh", "zh"},
to = {"a~", "h~", "i~", "o~", "s~", "z~"}} ,
}
m["om"] = {
"Oromo",
33864,
"cus-eas",
{"Latn", "Ethi"},
}
m["or"] = {
"Oriya",
33810,
"inc-eas",
{"Orya"},
ancestors = {"inc-mor"},
translit_module = "or-translit",
}
m["os"] = {
"Ossetian",
33968,
"xsc",
{"Cyrl", "Geor", "Latn"},
ancestors = {"oos"},
translit_module = "os-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["pa"] = {
"Punjabi",
58635,
"inc-pan",
{"Guru", "pa-Arab"},
ancestors = {"inc-opa"},
translit_module = "translit-redirect",
entry_name = {
from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658), u(0x08C7), u(0x0768)},
to = {"", "", "", "", "", "", "", "", "", "ل", "ن"}} ,
}
m["pi"] = {
"Pali",
36727,
"inc-mid",
{"Latn", "Brah", "Deva", "Beng", "Sinh", "Mymr", "Thai", "Lana", "Laoo", "Khmr"},
ancestors = {"sa"},
translit_module = "translit-redirect",
sort_key = {
from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a~", "i~", "u~", "d~", "l~", "m~", "n~", "n~~", "n~~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} ,
entry_name = {
from = {u(0xFE00)},
to = {}},
}
m["pl"] = {
"Polish",
809,
"zlw-lch",
Latn,
ancestors = {"zlw-opl"},
sort_key = {
from = {"[Ąą]", "[Ćć]", "[Ęę]", "[Łł]", "[Ńń]", "[Óó]", "[Śś]", "[Żż]", "[Źź]"},
to = {
"a" .. u(0x10FFFF),
"c" .. u(0x10FFFF),
"e" .. u(0x10FFFF),
"l" .. u(0x10FFFF),
"n" .. u(0x10FFFF),
"o" .. u(0x10FFFF),
"s" .. u(0x10FFFF),
"z" .. u(0x10FFFF),
"z" .. u(0x10FFFE)}} ,
}
m["ps"] = {
"Pashto",
58680,
"ira-pat",
{"ps-Arab"},
ancestors = {"ira-pat-pro"},
}
m["pt"] = {
"Portuguese",
5146,
"roa-ibe",
{"Latn", "Brai"},
ancestors = {"roa-opt"},
sort_key = {
from = {"[àãáâä]", "[èẽéêë]", "[ìĩíï]", "[òóôõö]", "[üúùũ]", "ç", "ñ"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} ,
}
m["qu"] = {
"Quechua",
5218,
"qwe",
Latn,
}
m["rm"] = {
"Romansch",
13199,
"roa-rhe",
Latn,
}
m["ro"] = {
"Romanian",
7913,
"roa-eas",
{"Latn", "Cyrl"},
sort_key = {
from = {"ă" , "â" , "î" , "ș" , "ț" },
to = {"a~", "a~~", "i~", "s~", "t~"}},
}
m["ru"] = {
"Russian",
7737,
"zle",
{"Cyrl", "Brai"},
translit_module = "ru-translit",
sort_key = {
from = {"ё"},
to = {"е" .. mw.ustring.char(0x10FFFF)}},
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE, DIAER},
to = {"Е", "е", "И", "и"}},
standardChars = "ЁА-яё0-9—" .. PUNCTUATION,
}
m["rw"] = {
"Rwanda-Rundi",
3217514,
"bnt-glb",
Latn,
entry_name = {
from = {"[áāâǎā́]", "[éēêěḗ]", "[íīîǐī́]", "[óōôǒṓ]", "[úūûǔū́]"},
to = {"a", "e" , "i", "o" , "u"} },
}
m["sa"] = {
"Sanskrit",
11059,
"inc-old",
{"Deva", "Bali", "as-Beng", "Beng", "Bhks", "Brah", "Gran", "Gujr", "Guru", "Java", "Khar", "Khmr", "Knda", "Lana", "Laoo", "Mlym", "Modi", "Mymr", "Nand", "Newa", "Orya", "Saur", "Shrd", "Sidd", "Sinh", "Taml", "Telu", "Thai", "Tibt", "Tirh"},
sort_key = {
from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} ,
entry_name = {
from = {u(0xFE00)},
to = {}},
translit_module = "translit-redirect",
}
m["sc"] = {
"Sardinian",
33976,
"roa",
Latn,
}
m["sd"] = {
"Sindhi",
33997,
"inc-snd",
{"sd-Arab", "Deva", "Sind", "Khoj"},
entry_name = {
from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)},
to = {u(0x0627)}},
ancestors = {"inc-vra"},
translit_module = "translit-redirect",
}
m["se"] = {
"Northern Sami",
33947,
"smi",
Latn,
entry_name = {
from = {"ạ", "[ēẹ]", "ī", "[ōọ]", "ū", "ˈ"},
to = {"a", "e" , "i", "o" , "u"} },
sort_key = {
from = {"á" , "č" , "đ" , "ŋ" , "š" , "ŧ" , "ž" },
to = {"a²", "c²", "d²", "n²", "s²", "t²", "z²"} },
standardChars = "A-PR-VZa-pr-vz0-9ÁáČčĐđŊŋŠšŦŧŽž" .. PUNCTUATION,
}
m["sg"] = {
"Sango",
33954,
"crp",
Latn,
ancestors = {"ngb"},
}
m["sh"] = {
"Serbo-Croatian",
9301,
"zls",
{"Latn", "Cyrl", "Glag"},
entry_name = {
from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }},
wikimedia_codes = {"sh", "bs", "hr", "sr"},
}
m["si"] = {
"Sinhalese",
13267,
"inc-ins",
{"Sinh"},
ancestors = {"elu-prk"},
translit_module = "si-translit",
override_translit = true,
}
m["sk"] = {
"Slovak",
9058,
"zlw",
Latn,
sort_key = {
from = {"[áä]", "é", "í", "[óô]", "ú", "ý", "ŕ", "ĺ", "[" .. DIAER .. ACUTE .. CIRC .. "]"},
to = {"a" , "e", "i", "o" , "u", "y", "r", "l", ""}} ,
}
m["sl"] = {
"Slovene",
9063,
"zls",
Latn,
entry_name = {
from = {"[ÁÀÂĀȂȀ]", "[áàâāȃȁ]", "[ÉÈÊĒȆȄỆẸ]", "[éèêēȇȅệẹə]", "[ÍÌÎĪȊȈ]", "[íìîīȋȉ]", "[ÓÒÔŌȎȌỘỌ]", "[óòôōȏȍộọ]", "[ŔȒȐ]", "[ŕȓȑ]", "[ÚÙÛŪȖȔ]", "[úùûūȗȕ]", "ł", GRAVE, ACUTE, CIRC, MACRON, DGRAVE, INVBREVE, DOTBELOW},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "l"},
},
sort_key = {
from = {"č" , "š" , "ž" },
to = {"c²", "s²", "z²"},
},
}
m["sm"] = {
"Samoan",
34011,
"poz-pnp",
Latn,
}
m["sn"] = {
"Shona",
34004,
"bnt-sho",
Latn,
entry_name = {remove_diacritics = ACUTE},
}
m["so"] = {
"Somali",
13275,
"cus-eas",
{"Latn", "Arab", "Osma"},
entry_name = {
from = {"[ÁÀÂ]", "[áàâ]", "[ÉÈÊ]", "[éèê]", "[ÍÌÎ]", "[íìî]", "[ÓÒÔ]", "[óòô]", "[ÚÙÛ]", "[úùû]", "[ÝỲ]", "[ýỳ]"},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "U" , "u", "Y", "y"}} ,
}
m["sq"] = {
"Albanian",
8748,
"sqj",
{"Latn", "Grek", "Elba"},
entry_name = {remove_diacritics = ACUTE},
sort_key = {
from = { '[âãä]', '[ÂÃÄ]', '[êẽë]', '[ÊẼË]', 'ĩ', 'Ĩ', 'õ', 'Õ', 'ũ', 'Ũ', 'ỹ', 'Ỹ', 'ç', 'Ç' },
to = { 'a', 'A', 'e', 'E', 'i', 'I', 'o', 'O', 'u', 'U', 'y', 'Y', 'c', 'C' } } ,
}
m["sr"] = {
"Sebian",
9299,
"zls",
aliases = {"Српски", "Српски језик", "српски", "српски језик", "srpski", "srpski jezik", "Srpski", "Srpski jezik"},
scripts = {"Latn", "Cyrl"},
entry_name = {
from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }},
}
m["ss"] = {
"Swazi",
34014,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["st"] = {
"Sotho",
34340,
"bnt-sts",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["su"] = {
"Sundanese",
34002,
"poz-msa",
{"Latn", "Sund"},
translit_module = "su-translit",
}
m["sv"] = {
"Swedish",
9027,
"gmq",
Latn,
ancestors = {"gmq-osw"},
}
m["sw"] = {
"Swahili",
7838,
"bnt-swh",
LatnArab,
sort_key = {
from = {"ng'", "^-"},
to = {"ngz"}} ,
}
m["ta"] = {
"Tamil",
5885,
"dra",
{"Taml"},
ancestors = {"oty"},
translit_module = "ta-translit",
override_translit = true,
}
m["te"] = {
"Telugu",
8097,
"dra",
{"Telu"},
translit_module = "te-translit",
override_translit = true,
}
m["tg"] = {
"Tajik",
9260,
"ira-swi",
{"Cyrl", "fa-Arab", "Latn"},
ancestors = {"pal"}, -- same as "fa", see WT:T:AFA
translit_module = "tg-translit",
override_translit = true,
sort_key = {
from = {"Ё", "ё"},
to = {"Е" , "е"}} ,
entry_name = {
from = {ACUTE},
to = {}} ,
}
m["th"] = {
"Tai",
9217,
"tai-swe",
{"Thai", "Brai"},
translit_module = "th-translit",
sort_key = {
from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"},
to = {"", "", "%2%1"}},
}
m["ti"] = {
"Tigrinya",
34124,
"sem-eth",
{"Ethi"},
translit_module = "Ethi-translit",
}
m["tk"] = {
"Turkmen",
9267,
"trk-ogz",
{"Latn", "Cyrl", "Arab"},
entry_name = {
from = {"ā", "ē", "ī", "ō", "ū", "ȳ", "ȫ", "ǖ", MACRON},
to = {"a", "e", "i", "o", "u", "y", "ö", "ü", ""}},
ancestors = {"trk-ogz-pro"},
}
m["tl"] = {
"Tagalog",
34057,
"phi",
{"Latn", "Tglg"},
entry_name = {
from = {"[áàâ]", "[éèê]", "[íìî]", "[óòô]", "[úùû]", ACUTE, GRAVE, CIRC},
to = {"a" , "e" , "i" , "o" , "u" }},
translit_module = "tl-translit",
override_translit = true
}
m["tn"] = {
"Tswana",
34137,
"bnt-sts",
Latn,
}
m["to"] = {
"Tongan",
34094,
"poz-pol",
Latn,
sort_key = {
from = {"ā", "ē", "ī", "ō", "ū", MACRON},
to = {"a", "e", "i", "o", "u", ""}},
entry_name = {
from = {"á", "é", "í", "ó", "ú", ACUTE},
to = {"a", "e", "i", "o", "u", ""}},
}
m["tr"] = {
"Turkish",
256,
"trk-ogz",
Latn,
ancestors = {"ota"},
}
m["ts"] = {
"Tsonga",
34327,
"bnt-tsr",
Latn,
}
m["tt"] = {
"Tatar",
25285,
"trk-kbu",
{"Cyrl", "Latn", "tt-Arab"},
translit_module = "tt-translit",
override_translit = true,
}
-- "tw" IS TREATED AS "ak", SEE WT:LT
m["ty"] = {
"Tahitian",
34128,
"poz-pep",
Latn,
}
m["ug"] = {
"Uyghur",
13263,
"trk-kar",
{"ug-Arab", "Latn", "Cyrl"},
ancestors = {"chg"},
translit_module = "ug-translit",
override_translit = true,
}
m["uk"] = {
"Ukrainian",
8798,
"zle",
Cyrl,
ancestors = {"orv"},
translit_module = "uk-translit",
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE},
to = {"Е", "е", "И", "и"}},
standardChars = "ЄІЇА-ЩЬЮ-щьюяєії" .. PUNCTUATION,
}
m["ur"] = {
"Urdu",
1617,
"inc-hnd",
{"ur-Arab"},
ancestors = {"inc-ohi"},
entry_name = {
from = {u(0x0640), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658)},
to = {}} ,
}
m["uz"] = {
"Uzbek",
9264,
"trk-kar",
{"Latn", "Cyrl", "fa-Arab"},
ancestors = {"chg"},
}
m["ve"] = {
"Venda",
32704,
"bnt-bso",
Latn,
}
m["vi"] = {
"Vietnamese",
9199,
"mkh-vie",
{"Latn", "Hani"},
ancestors = {"mkh-mvi"},
sort_key = "vi-sortkey",
}
m["vo"] = {
"Volapük",
36986,
"art",
Latn,
}
m["wa"] = {
"Walloon",
34219,
"roa-oil",
Latn,
ancestors = {"fro"},
sort_key = {
from = {"[áàâäå]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "c"}} ,
}
m["wo"] = {
"Wolof",
34257,
"alv-fwo",
LatnArab,
}
m["xh"] = {
"Xhosa",
13218,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["yi"] = {
"Yiddish",
8641,
"gmw",
{"Hebr"},
ancestors = {"gmh"},
sort_key = {
from = {"[אַאָ]", "בּ", "[וֹוּ]", "יִ", "ײַ", "פֿ"},
to = {"א", "ב", "ו", "י", "יי", "פ"}} ,
translit_module = "yi-translit",
}
m["yo"] = {
"Yoruba",
34311,
"alv-yor",
Latn,
sort_key = {
from = {"ẹ", "ọ", "gb", "ṣ"},
to = {"e~" , "o~", "g~", "s~"}},
entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON },
}
m["za"] = {
"Zhuang",
13216,
"tai",
{"Latn", "Hani"},
sort_key = {
from = {"%p"},
to = {""}},
}
m["zh"] = {
"Chinese",
7850,
"zhx",
{"Hani", "Brai", "Nshu"},
ancestors = {"ltc"},
sort_key = "zh-sortkey",
}
m["zu"] = {
"Zulu",
10179,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
return m
ic3npv3o3391rej1u5pwl6rmfk5b6y7
13339
13338
2022-08-04T12:40:06Z
Asinis632
1829
Scribunto
text/plain
local u = mw.ustring.char
-- UTF-8 encoded strings for some commonly-used diacritics
local GRAVE = u(0x0300)
local ACUTE = u(0x0301)
local CIRC = u(0x0302)
local TILDE = u(0x0303)
local MACRON = u(0x0304)
local BREVE = u(0x0306)
local DOTABOVE = u(0x0307)
local DIAER = u(0x0308)
local CARON = u(0x030C)
local DGRAVE = u(0x030F)
local INVBREVE = u(0x0311)
local DOTBELOW = u(0x0323)
local RINGBELOW = u(0x0325)
local CEDILLA = u(0x0327)
local OGONEK = u(0x0328)
local CGJ = u(0x034F) -- combining grapheme joiner
local DOUBLEINVBREVE = u(0x0361)
-- Punctuation to be used for standardChars field
local PUNCTUATION = ' !#%&*+,-./:;<=>?@^_`|~\'()'
local Cyrl = {"Cyrl"}
local Latn = {"Latn"}
local LatnArab = {"Latn", "Arab"}
local m = {}
m["aa"] = {
"Afar",
27811,
"cus-eas",
Latn,
entry_name = { remove_diacritics = ACUTE},
}
m["ab"] = {
"Abkhaz",
5111,
"cau-abz",
{"Cyrl", "Geor", "Latn"},
translit_module = "ab-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["ae"] = {
"Avestan",
29572,
"ira-cen",
{"Avst", "Gujr"},
translit_module = "Avst-translit",
wikipedia_article = "Avestan",
}
m["af"] = {
"Afrikaans",
14196,
"gmw",
LatnArab,
ancestors = {"nl"},
sort_key = {
from = {"[äáâà]", "[ëéêè]", "[ïíîì]", "[öóôò]", "[üúûù]", "[ÿýŷỳ]", "^-", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" }} ,
}
m["ak"] = {
"Akan",
28026,
"alv-ctn",
Latn,
}
m["am"] = {
"Amharic",
28244,
"sem-eth",
{"Ethi"},
translit_module = "Ethi-translit",
}
m["an"] = {
"Aragonese",
8765,
"roa-ibe",
Latn,
ancestors = {"roa-oan"},
}
m["ar"] = {
"Arabic",
13955,
"sem-arb",
{"Arab", "Hebr", "Brai"},
-- replace alif waṣl with alif
-- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha,
-- damma, kasra, shadda, sukun, superscript (dagger) alef
entry_name = {
from = {u(0x0671), u(0x0640), "[" .. u(0x064B) .. "-" .. u(0x0652) .. "]", u(0x0670)},
to = {u(0x0627)}},
-- put Judeo-Arabic (Hebrew-script Arabic) under the category header
-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles
sort_key = {
from = {"^%f[" .. u(0x5D0) .. "-" .. u(0x5EA) .. "]"},
to = {u(0xFB21)},
},
translit_module = "ar-translit",
}
m["as"] = {
"Assamese",
29401,
"inc-eas",
{"as-Beng"},
ancestors = {"inc-mas"},
translit_module = "as-translit",
}
m["av"] = {
"Avar",
29561,
"cau-nec",
Cyrl,
ancestors = {"oav"},
translit_module = "av-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["ay"] = {
"Aymara",
4627,
"sai-aym",
Latn,
}
m["az"] = {
"Azerbaijani",
9292,
"trk-ogz",
{"Latn", "Cyrl", "fa-Arab"},
ancestors = {"trk-oat"},
}
m["ba"] = {
"Bashkir",
13389,
"trk-kbu",
Cyrl,
translit_module = "ba-translit",
override_translit = true,
}
m["be"] = {
"Belarusian",
9091,
"zle",
Cyrl,
ancestors = {"orv"},
translit_module = "be-translit",
sort_key = {
from = {"Ё", "ё"},
to = {"Е" , "е"}},
entry_name = {
from = {"Ѐ", "ѐ", GRAVE, ACUTE},
to = {"Е", "е"}},
}
m["bg"] = {
"Bulgarian",
7918,
"zls",
{"Cyrl"},
ancestors = {"cu"},
translit_module = "bg-translit",
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE},
to = {"Е", "е", "И", "и"}},
}
m["bh"] = {
"Bihari",
135305,
"inc-eas",
{"Deva"},
ancestors = {"inc-mgd"},
}
m["bi"] = {
"Bislama",
35452,
"crp",
Latn,
ancestors = {"en"},
}
m["bm"] = {
"Bambara",
33243,
"dmn-emn",
Latn,
}
m["bn"] = {
"Bengali",
9610,
"inc-eas",
{"Beng", "Newa"},
ancestors = {"inc-mbn"},
translit_module = "bn-translit",
}
m["bo"] = {
"Tibetan",
34271,
"sit-tib",
{"Tibt"}, -- sometimes Deva?
ancestors = {"xct"},
translit_module = "bo-translit",
override_translit = true,
}
m["br"] = {
"Breton",
12107,
"cel-bry",
Latn,
ancestors = {"xbm"},
}
m["ca"] = {
"Catalan",
7026,
"roa",
Latn,
ancestors = {"roa-oca"},
sort_key = {
from = {"à", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "l·l"},
to = {"a", "e" , "i" , "o" , "u" , "c", "ll" }} ,
}
m["ce"] = {
"Chechen",
33350,
"cau-vay",
Cyrl,
translit_module = "ce-translit",
override_translit = true,
entry_name = {
from = {MACRON},
to = {}},
}
m["ch"] = {
"Chamorro",
33262,
"poz-sus",
Latn,
}
m["co"] = {
"Corsican",
33111,
"roa-itd",
Latn,
}
m["cr"] = {
"Cree",
33390,
"alg",
{"Cans", "Latn"},
translit_module = "translit-redirect",
}
m["cs"] = {
"Czech",
9056,
"zlw",
Latn,
ancestors = {"zlw-ocs"},
sort_key = {
from = {"á", "é", "í", "ó", "[úů]", "ý"},
to = {"a", "e", "i", "o", "u" , "y"}} ,
}
m["cu"] = {
"Old Church Slavonic",
35499,
"zls",
{"Cyrs", "Glag"},
translit_module = "Cyrs-Glag-translit",
entry_name = {
from = {u(0x0484)}, -- kamora
to = {}},
sort_key = {
from = {"оу", "є"},
to = {"у" , "е"}} ,
}
m["cv"] = {
"Chuvash",
33348,
"trk-ogr",
Cyrl,
ancestors = {"xbo"},
translit_module = "cv-translit",
override_translit = true,
}
m["cy"] = {
"Welsh",
9309,
"cel-bry",
Latn,
ancestors = {"wlm"},
sort_key = {
from = {"[âáàä]", "ch", "dd", "[êéèë]", "ff", "ngh", "[îíìï]", "ll", "[ôóòö]", "ph", "rh", "th", "[ûúùü]", "[ŵẃẁẅ]", "[ŷýỳÿ]", "'"},
to = {"a" , "c~", "d~", "e" , "f~", "g~h", "i" , "l~", "o" , "p~", "r~", "t~", "u" , "w" , "y" }} ,
standardChars = "A-IL-PR-UWYa-il-pr-uwy0-9ÂâÊêÎîÔôÛûŴŵŶŷ" .. PUNCTUATION,
}
m["da"] = {
"Danish",
9035,
"gmq",
Latn,
ancestors = {"gmq-oda"},
}
m["de"] = {
"Jeman",
188,
"gmw",
{"Latn", "Latf"},
ancestors = {"gmh"},
sort_key = {
from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]", "ß" },
to = {"a" , "e" , "i" , "o" , "u" , "ss"}} ,
standardChars = "A-Za-z0-9ÄäÖöÜüß" .. PUNCTUATION,
}
m["dv"] = {
"Dhivehi",
32656,
"inc-ins",
{"Thaa"},
ancestors = {"elu-prk"},
translit_module = "dv-translit",
override_translit = true,
}
m["dz"] = {
"Dzongkha",
33081,
"sit-tib",
{"Tibt"},
ancestors = {"xct"},
translit_module = "bo-translit",
override_translit = true,
}
m["ee"] = {
"Ewe",
30005,
"alv-gbe",
Latn,
}
m["el"] = {
"Greek",
9129,
"grk",
{"Grek", "Brai"},
ancestors = {"grc"},
translit_module = "el-translit",
override_translit = true,
sort_key = { -- Keep this synchronized with grc, cpg, pnt, tsd
from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"},
to = {"α" , "ε" , "η" , "ι" , "ο" , "υ" , "ω" , "ρ", "σ"}} ,
standardChars = "ͺ;΄-ώϜϝ" .. PUNCTUATION,
}
m["en"] = {
"Inglis",
1860,
"gmw",
{"Latn", "Brai", "Shaw", "Dsrt"}, -- entries in Shaw or Dsrt might require prior discussion
ancestors = {"enm"},
sort_key = {
from = {"[äàáâåā]", "[ëèéêē]", "[ïìíîī]", "[öòóôō]", "[üùúûū]", "æ" , "œ" , "[çč]", "ñ", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "ae", "oe", "c" , "n"}},
wikimedia_codes = {"en", "simple"},
standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF),
}
m["eo"] = {
"Esperanto",
143,
"art",
Latn,
sort_key = {
from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ĉ]", "[ĝ]", "[ĥ]", "[ĵ]", "[ŝ]", "[ŭ]"},
to = {"a" , "e" , "i" , "o" , "u", "cĉ", "gĉ", "hĉ", "jĉ", "sĉ", "uĉ"}} ,
standardChars = "A-PRSTUVZa-prstuvzĉĈĝĜĵĴŝŜŭŬ0-9" .. PUNCTUATION,
}
m["es"] = {
"Spanish",
1321,
"roa-ibe",
{"Latn", "Brai"},
ancestors = {"osp"},
sort_key = {
from = {"á", "é", "í", "ó", "[úü]", "ç", "ñ"},
to = {"a", "e", "i", "o", "u" , "c", "n~"}},
standardChars = "A-VXYZa-vxyz0-9ÁáÉéÍíÓóÚúÑñ¿¡" .. PUNCTUATION,
}
m["et"] = {
"Estonian",
9072,
"fiu-fin",
Latn,
}
m["eu"] = {
"Basque",
8752,
"euq",
Latn,
}
m["fa"] = {
"Persian",
9168,
"ira-swi",
{"fa-Arab"},
ancestors = {"pal"}, -- "ira-mid"
entry_name = {
from = {u(0x064E), u(0x0640), u(0x064F), u(0x0650), u(0x0651), u(0x0652)},
to = {}} ,
}
m["ff"] = {
"Fula",
33454,
"alv-fwo",
{"Latn", "Adlm"},
}
m["fi"] = {
"Finnish",
1412,
"fiu-fin",
Latn,
entry_name = {
from = {"ˣ"}, -- Used to indicate gemination of the next consonant
to = {}},
sort_key = {
from = {"[áàâã]", "[éèêẽ]", "[íìîĩ]", "[óòôõ]", "[úùûũ]", "[ýỳŷüű]", "[øõő]", "æ" , "œ" , "[čç]", "š", "ž", "ß" , "[':]"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "ö" , "ae", "oe", "c" , "s", "z", "ss"}} ,
}
m["fj"] = {
"Fijian",
33295,
"poz-occ",
Latn,
}
m["fo"] = {
"Faroese",
25258,
"gmq",
Latn,
ancestors = {"non"},
}
m["fr"] = {
"French",
150,
"roa-oil",
{"Latn", "Brai"},
ancestors = {"frm"},
sort_key = {
from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ" , "œ" , "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "c", "ae", "oe"}},
standardChars = "A-Za-z0-9ÀÂÇÉÈÊËÎÏÔŒÛÙÜàâçéèêëîïôœûùü«»" .. PUNCTUATION,
}
m["fy"] = {
"West Frisian",
27175,
"gmw-fri",
Latn,
ancestors = {"ofs"},
sort_key = {
from = {"[àáâä]", "[èéêë]", "[ìíîïyỳýŷÿ]", "[òóôö]", "[ùúûü]", "æ", "[ /.-]"},
to = {"a" , "e" , "i" , "o" , "u", "ae"}} ,
standardChars = "A-PR-WYZa-pr-wyz0-9Ææâäàéêëèïìôöòúûüùỳ" .. PUNCTUATION,
}
m["ga"] = {
"Irish",
9142,
"cel-gae",
Latn,
ancestors = {"mga"},
sort_key = {
from = {"á", "é", "í", "ó", "ú", "ý", "ḃ" , "ċ" , "ḋ" , "ḟ" , "ġ" , "ṁ" , "ṗ" , "ṡ" , "ṫ" },
to = {"a", "e", "i", "o", "u", "y", "bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}} ,
standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION,
}
m["gd"] = {
"Scottish Gaelic",
9314,
"cel-gae",
Latn,
ancestors = {"mga"},
sort_key = {
from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ýỳ]"},
to = {"a" , "e" , "i" , "o" , "u" , "y" }} ,
standardChars = "A-IL-PR-Ua-il-pr-u0-9ÀàÈèÌìÒòÙù" .. PUNCTUATION,
}
m["gl"] = {
"Galician",
9307,
"roa-ibe",
Latn,
ancestors = {"roa-opt"},
sort_key = {
from = {"á", "é", "í", "ó", "ú"},
to = {"a", "e", "i", "o", "u"}} ,
}
m["gn"] = {
"Guaraní",
35876,
"tup-gua",
Latn,
}
m["gu"] = {
"Gujarati",
5137,
"inc-wes",
{"Gujr"},
ancestors = {"inc-mgu"},
translit_module = "gu-translit",
}
m["gv"] = {
"Manx",
12175,
"cel-gae",
Latn,
ancestors = {"mga"},
sort_key = {
from = {"ç", "-"},
to = {"c"}} ,
standardChars = "A-WYÇa-wyç0-9" .. PUNCTUATION,
}
m["ha"] = {
"Hausa",
56475,
"cdc-wst",
LatnArab,
sort_key = {
from = {"ɓ", "ɗ", "ƙ", "'y", "ƴ", "'" },
to = {"b~" , "d~" , "k~", "y~", "y~", "" }},
entry_name = {
from = {"R̃", "r̃", "À", "à", "È", "è", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Î", "î", "Ô", "ô", "Û", "û", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú", "Ā̀", "ā̀", "Ḕ", "ḕ", "Ī̀", "ī̀", "Ṑ", "ṑ", "Ū̀", "ū̀", GRAVE, ACUTE},
to = {"R", "r", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}},
}
m["he"] = {
"Hebrew",
9288,
"sem-can",
{"Hebr", "Phnx", "Brai"},
entry_name = {
from = {"[" .. u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. CGJ .. "]"},
to = {}} ,
}
m["hi"] = {
"Hindi",
1568,
"inc-hnd",
{"Deva", "Kthi", "Newa"},
ancestors = {"inc-ohi"},
translit_module = "hi-translit",
standardChars = "ँंअ-ऊएऐओ-घच-झट-नप-रलवशसहा-ूेैो-◌्।-॰ड़ढ़" .. PUNCTUATION,
}
m["ho"] = {
"Hiri Motu",
33617,
"crp",
Latn,
ancestors = {"meu"},
}
m["ht"] = {
"Haitian Creole",
33491,
"crp",
Latn,
ancestors = {"fr"},
}
m["hu"] = {
"Hungarian",
9067,
"urj-ugr",
{"Latn", "Hung"},
ancestors = {"ohu"},
sort_key = {
from = {"á", "é", "í", "ó", "ú", "[öő]", "[üű]", "cs", "dzs", "gy", "ly", "ny", "zs"},
to = {"a", "e", "i", "o", "u", "o~", "u~", "c~", "dz~", "g~", "l~", "n~", "z~"},
},
}
m["hy"] = {
"Armenian",
8785,
"hyx",
{"Armn", "Brai"},
ancestors = {"axm"},
translit_module = "Armn-translit",
override_translit = true,
sort_key = {
from = {"ու", "և", "եւ"},
to = {"ւ", "եվ", "եվ"}},
entry_name = {
from = {"՞", "՜", "՛", "՟", "և", "<sup>յ</sup>", "<sup>ի</sup>", "<sup>է</sup>"},
to = {"", "", "", "", "եւ", "յ", "ի", "է"}} ,
}
m["hz"] = {
"Herero",
33315,
"bnt-swb",
Latn,
}
m["ia"] = {
"Interlingua",
35934,
"art",
Latn,
}
m["id"] = {
"Indonesian",
9240,
"poz-mly",
Latn,
ancestors = {"ms"},
}
m["ie"] = {
"Interlingue",
35850,
"art",
Latn,
type = "appendix-constructed",
}
m["ig"] = {
"Igbo",
33578,
"alv-igb",
Latn,
sort_key = {
from = {"ụ", "ị", "ọ", "gb", "gh", "gw", "kp", "kw", "ṅ", "nw", "ny", "sh"},
to = {"u~" , "i~", "o~", "gy", "gz", "g~", "kz", "k~", "ny", "nz", "n~", "s~"}},
entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON },
}
m["ii"] = {
"Sichuan Yi",
34235,
"tbq-lol",
{"Yiii"},
translit_module = "ii-translit",
}
m["ik"] = {
"Inupiaq",
27183,
"esx-inu",
Latn,
}
m["io"] = {
"Ido",
35224,
"art",
Latn,
}
m["is"] = {
"Icelandic",
294,
"gmq",
Latn,
ancestors = {"non"},
}
m["it"] = {
"Italian",
652,
"roa-itd",
Latn,
sort_key = {
from = {"[àáâäå]", "[èéêë]", "[ìíîï]", "[òóôö]", "[ùúûü]"},
to = {"a" , "e" , "i" , "o" , "u" }} ,
standardChars = "A-IL-VZa-il-vz0-9" .. PUNCTUATION,
}
m["iu"] = {
"Inuktitut",
29921,
"esx-inu",
{"Cans", "Latn"},
translit_module = "translit-redirect",
override_translit = true,
}
m["ja"] = {
"Siapan",
5287,
"jpx",
{"Jpan", "Brai"},
ancestors = {"ojp"},
--[=[
-- Handled by jsort function in [[Module:ja]].
sort_key = {
from = {"[ぁァア]", "[ぃィイ]", "[ぅゔゥウヴ]", "[ぇェエ]", "[ぉォオ]", "[がゕカガヵ]", "[ぎキギ]", "[ぐクグㇰ]", "[げゖケゲヶ]", "[ごコゴ]", "[ざサザ]", "[じシジㇱ]", "[ずスズㇲ]", "[ぜセゼ]", "[ぞソゾ]", "[だタダ]", "[ぢチヂ]", "[っづッツヅ]", "[でテデ]", "[どトドㇳ]", "ナ", "ニ", "[ヌㇴ]", "ネ", "ノ", "[ばぱハバパㇵ]", "[びぴヒビピㇶ]", "[ぶぷフブプㇷ]", "[べぺヘベペㇸ]", "[ぼぽホボポㇹ]", "マ", "ミ", "[ムㇺ]", "メ", "モ", "[ゃャヤ]", "[ゅュユ]", "[ょョヨ]", "[ラㇻ]", "[リㇼ]", "[ルㇽ]", "[レㇾ]", "[ロㇿ]", "[ゎヮワヷ]", "[ヰヸ]", "[ヱヹ]", "[ヲヺ]", "ン", "[゙゚゛゜ゝゞ・ヽヾ]", "𛀀"},
to = {"あ", "い", "う", "え", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ら", "り", "る", "れ", "ろ", "わ", "ゐ", "ゑ", "を", "ん", "", "え"}},
--]=]
}
m["jv"] = {
"Javanese",
33549,
"poz-sus",
{"Latn", "Java"},
translit_module = "jv-translit",
ancestors = {"kaw"},
link_tr = true,
}
m["ka"] = {
"Georgian",
8108,
"ccs-gzn",
{"Geor", "Geok", "Hebr"}, -- Hebr is used to write Judeo-Georgian
ancestors = {"oge"},
translit_module = "Geor-translit",
override_translit = true,
entry_name = {
from = {"̂"},
to = {""}},
}
m["kg"] = {
"Kongo",
33702,
"bnt-kng",
Latn,
}
m["ki"] = {
"Kikuyu",
33587,
"bnt-kka",
Latn,
}
m["kj"] = {
"Kwanyama",
1405077,
"bnt-ova",
Latn,
}
m["kk"] = {
"Kazakh",
9252,
"trk-kno",
{"Cyrl", "Latn", "kk-Arab"},
translit_module = "kk-translit",
override_translit = true,
}
m["kl"] = {
"Greenlandic",
25355,
"esx-inu",
Latn,
}
m["km"] = {
"Khmer",
9205,
"mkh-kmr",
{"Khmr"},
ancestors = {"mkh-mkm"},
translit_module = "km-translit",
}
m["kn"] = {
"Kannada",
33673,
"dra",
{"Knda"},
ancestors = {"dra-mkn"},
translit_module = "kn-translit",
}
m["ko"] = {
"Korean",
9176,
"qfa-kor",
{"Kore", "Brai"},
ancestors = {"okm"},
-- 20210122 idea: strip parenthesized hanja from entry link
-- Hani regex is a reasonable subset of Hani from [[Module:scripts/data]],
-- last updated on 20210214.
entry_name = {
from = {
" *%([一-鿿㐀-䶿𠀀-𰀀-﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩]+%)",
},
to = {
"",
}},
display = {
from = {"%-"},
to = {},
},
translit_module = "ko-translit",
}
m["kr"] = {
"Kanuri",
36094,
"ssa-sah",
LatnArab,
sort_key = {
from = {"ny", "ǝ", "sh"},
to = {"n~", "e~", "s~"}} , -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
entry_name = {
from = {"À", "à", "È", "è", "Ǝ̀", "ǝ̀", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Ǝ̂", "ǝ̂", "Î", "î", "Ô", "ô", "Û", "û", "Ă", "ă", "Ĕ", "ĕ", "Ǝ̆", "ǝ̆", "Ĭ", "ĭ", "Ŏ", "ŏ", "Ŭ", "ŭ", "Á", "á", "É", "é", "Ǝ́", "ǝ́", "Í", "í", "Ó", "ó", "Ú", "ú", GRAVE, ACUTE},
to = {"A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u"}},
}
m["ks"] = {
"Kashmiri",
33552,
"inc-dar",
{"ks-Arab", "Deva", "Shrd", "Latn"},
translit_module = "translit-redirect",
ancestors = {"sa"},
}
-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT
m["kw"] = {
"Cornish",
25289,
"cel-bry",
Latn,
ancestors = {"cnx"},
}
m["ky"] = {
"Kyrgyz",
9255,
"trk-kip",
{"Cyrl", "Latn", "Arab"},
translit_module = "ky-translit",
override_translit = true,
}
m["la"] = {
"Latin",
397,
"itc",
Latn,
ancestors = {"itc-ola"},
entry_name = {remove_diacritics = MACRON .. BREVE .. DIAER .. DOUBLEINVBREVE},
standardChars = "A-Za-z0-9ÆæŒœĀ-ăĒ-ĕĪ-ĭŌ-ŏŪ-ŭȲȳ" .. MACRON .. BREVE .. PUNCTUATION,
}
m["lb"] = {
"Luxembourgish",
9051,
"gmw",
Latn,
ancestors = {"gmh"},
}
m["lg"] = {
"Luganda",
33368,
"bnt-nyg",
Latn,
entry_name = {
from = {"á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ", "â", "Â", "ê", "Ê", "î", "Î", "ô", "Ô", "û", "Û" },
to = {"a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U",}},
sort_key = {
from = {"ŋ"},
to = {"n"}} ,
}
m["li"] = {
"Limburgish",
102172,
"gmw",
Latn,
ancestors = {"dum"},
}
m["ln"] = {
"Lingala",
36217,
"bnt-bmo",
Latn,
}
m["lo"] = {
"Lao",
9211,
"tai-swe",
{"Laoo"},
translit_module = "lo-translit",
sort_key = {
from = {"[%pໆ]", "[່-ໍ]", "ຼ", "ຽ", "ໜ", "ໝ", "([ເແໂໃໄ])([ກ-ຮ])"},
to = {"", "", "ລ", "ຍ", "ຫນ", "ຫມ", "%2%1"}},
standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. PUNCTUATION,
}
m["lt"] = {
"Lithuanian",
9083,
"bat",
Latn,
ancestors = {"olt"},
entry_name = {
from = {"[áãà]", "[ÁÃÀ]", "[éẽè]", "[ÉẼÈ]", "[íĩì]", "[ÍĨÌ]", "[ýỹ]", "[ÝỸ]", "ñ", "[óõò]", "[ÓÕÒ]", "[úũù]", "[ÚŨÙ]", ACUTE, GRAVE, TILDE},
to = {"a", "A", "e", "E", "i", "I", "y", "Y", "n", "o", "O", "u", "U"}} ,
}
m["lu"] = {
"Luba-Katanga",
36157,
"bnt-lub",
Latn,
}
m["lv"] = {
"Latvian",
9078,
"bat",
Latn,
entry_name = {
-- This attempts to convert vowels with tone marks to vowels either with
-- or without macrons. Specifically, there should be no macrons if the
-- vowel is part of a diphthong (including resonant diphthongs such
-- pìrksts -> pirksts not #pīrksts). What we do is first convert the
-- vowel + tone mark to a vowel + tilde in a decomposed fashion,
-- then remove the tilde in diphthongs, then convert the remaining
-- vowel + tilde sequences to macroned vowels, then delete any other
-- tilde. We leave already-macroned vowels alone: Both e.g. ar and ār
-- occur before consonants. FIXME: This still might not be sufficient.
from = {"Ȩ", "ȩ", "[ÂÃÀ]", "[âãà]", "[ÊẼÈ]", "[êẽè]", "[ÎĨÌ]", "[îĩì]", "[ÔÕÒ]", "[ôõò]", "[ÛŨÙ]", "[ûũù]", "[ÑǸ]", "[ñǹ]", "[" .. CIRC .. TILDE ..GRAVE .."]", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .. "?([^aAeEiIoOuUāĀēĒīĪūŪ])", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .."?$", "([iI])" .. TILDE .. "?([eE])" .. TILDE .. "?", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "U" .. TILDE, "u" .. TILDE, TILDE},
to = {"E", "e", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "O", "o", "U" .. TILDE, "u" .. TILDE, "N", "n", TILDE, "%1%2%3", "%1%2", "%1%2", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ū", "ū", ""}},
}
m["mg"] = {
"Malagasy",
7930,
"poz-bre",
Latn,
}
m["mh"] = {
"Marshallese",
36280,
"poz-mic",
Latn,
sort_key = {
from = {"ā" , "ļ" , "m̧" , "ņ" , "n̄" , "o̧" , "ō" , "ū" },
to = {"a~", "l~", "m~", "n~", "n~~", "o~", "o~~", "u~"}} ,
}
m["mi"] = {
"Maori",
36451,
"poz-pep",
Latn,
}
m["mk"] = {
"Macedonian",
9296,
"zls",
Cyrl,
translit_module = "mk-translit",
entry_name = {
from = {ACUTE},
to = {}},
}
m["ml"] = {
"Malayalam",
36236,
"dra",
{"Mlym"},
translit_module = "ml-translit",
override_translit = true,
}
m["mn"] = {
"Mongolian",
9246,
"xgn",
{"Cyrl", "Mong", "Soyo", "Zanb"}, -- entries in Soyo or Zanb might require prior discussion
ancestors = {"cmg"},
translit_module = "mn-translit",
override_translit = true,
}
-- "mo" IS TREATED AS "ro", SEE WT:LT
m["mr"] = {
"Marathi",
1571,
"inc-sou",
{"Deva", "Modi"},
ancestors = {"omr"},
translit_module = "mr-translit",
entry_name = {
from = {"च़", "ज़", "झ़"},
to = {"च", "ज", "झ"}} ,
}
m["ms"] = {
"Malay",
9237,
"poz-mly",
{"Latn", "ms-Arab"},
}
m["mt"] = {
"Maltese",
9166,
"sem-arb",
Latn,
ancestors = {"sqr"},
sort_key = {
from = {"ċ", "ġ", "ħ"},
to = {"c", "g", "h"}
}
}
m["my"] = {
"Burmese",
9228,
"tbq-brm",
{"Mymr"},
ancestors = {"obr"},
translit_module = "my-translit",
override_translit = true,
sort_key = {
from = {"ျ", "ြ", "ွ", "ှ", "ဿ"},
to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"}},
}
m["na"] = {
"Nauruan",
13307,
"poz-mic",
Latn,
}
m["nb"] = {
"Norwegian Bokmål",
25167,
"gmq",
Latn,
ancestors = {"gmq-mno"},
wikimedia_codes = {"no"},
}
m["nd"] = {
"Northern Ndebele",
35613,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["ne"] = {
"Nepali",
33823,
"inc-pah",
{"Deva", "Newa"},
translit_module = "ne-translit",
}
m["ng"] = {
"Ndonga",
33900,
"bnt-ova",
Latn,
}
m["nl"] = {
"Dutch",
7411,
"gmw",
Latn,
ancestors = {"dum"},
sort_key = {
from = {"[äáâå]", "[ëéê]", "[ïíî]", "[öóô]", "[üúû]", "ç", "ñ", "^-"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} ,
standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF),
}
m["nn"] = {
"Norwegian Nynorsk",
25164,
"gmq",
Latn,
ancestors = {"gmq-mno"},
}
m["no"] = {
"Norwegian",
9043,
"gmq",
Latn,
ancestors = {"gmq-mno"},
}
m["nr"] = {
"Southern Ndebele",
36785,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["nv"] = {
"Navajo",
13310,
"apa",
Latn,
sort_key = {
from = {"[áą]", "[éę]", "[íį]", "[óǫ]", "ń", "^n([djlt])", "ł" , "[ʼ’']", ACUTE},
to = {"a" , "e" , "i" , "o" , "n", "ni%1" , "l~"}}, -- the tilde is used to guarantee that ł will always be sorted after all other words with l
}
m["ny"] = {
"Chichewa",
33273,
"bnt-nys",
Latn,
entry_name = {
from = {"ŵ", "Ŵ", "á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ" },
to = {"w", "W", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M"}},
sort_key = {
from = {"ng'"},
to = {"ng"}} ,
}
m["oc"] = {
"Occitan",
14185,
"roa",
{"Latn", "Hebr"},
ancestors = {"pro"},
sort_key = {
from = {"[àá]", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "([lns])·h"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "%1h" }} ,
}
m["oj"] = {
"Ojibwe",
33875,
"alg",
{"Cans", "Latn"},
sort_key = {
from = {"aa", "ʼ", "ii", "oo", "sh", "zh"},
to = {"a~", "h~", "i~", "o~", "s~", "z~"}} ,
}
m["om"] = {
"Oromo",
33864,
"cus-eas",
{"Latn", "Ethi"},
}
m["or"] = {
"Oriya",
33810,
"inc-eas",
{"Orya"},
ancestors = {"inc-mor"},
translit_module = "or-translit",
}
m["os"] = {
"Ossetian",
33968,
"xsc",
{"Cyrl", "Geor", "Latn"},
ancestors = {"oos"},
translit_module = "os-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["pa"] = {
"Punjabi",
58635,
"inc-pan",
{"Guru", "pa-Arab"},
ancestors = {"inc-opa"},
translit_module = "translit-redirect",
entry_name = {
from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658), u(0x08C7), u(0x0768)},
to = {"", "", "", "", "", "", "", "", "", "ل", "ن"}} ,
}
m["pi"] = {
"Pali",
36727,
"inc-mid",
{"Latn", "Brah", "Deva", "Beng", "Sinh", "Mymr", "Thai", "Lana", "Laoo", "Khmr"},
ancestors = {"sa"},
translit_module = "translit-redirect",
sort_key = {
from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a~", "i~", "u~", "d~", "l~", "m~", "n~", "n~~", "n~~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} ,
entry_name = {
from = {u(0xFE00)},
to = {}},
}
m["pl"] = {
"Polish",
809,
"zlw-lch",
Latn,
ancestors = {"zlw-opl"},
sort_key = {
from = {"[Ąą]", "[Ćć]", "[Ęę]", "[Łł]", "[Ńń]", "[Óó]", "[Śś]", "[Żż]", "[Źź]"},
to = {
"a" .. u(0x10FFFF),
"c" .. u(0x10FFFF),
"e" .. u(0x10FFFF),
"l" .. u(0x10FFFF),
"n" .. u(0x10FFFF),
"o" .. u(0x10FFFF),
"s" .. u(0x10FFFF),
"z" .. u(0x10FFFF),
"z" .. u(0x10FFFE)}} ,
}
m["ps"] = {
"Pashto",
58680,
"ira-pat",
{"ps-Arab"},
ancestors = {"ira-pat-pro"},
}
m["pt"] = {
"Portuguese",
5146,
"roa-ibe",
{"Latn", "Brai"},
ancestors = {"roa-opt"},
sort_key = {
from = {"[àãáâä]", "[èẽéêë]", "[ìĩíï]", "[òóôõö]", "[üúùũ]", "ç", "ñ"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} ,
}
m["qu"] = {
"Quechua",
5218,
"qwe",
Latn,
}
m["rm"] = {
"Romansch",
13199,
"roa-rhe",
Latn,
}
m["ro"] = {
"Romanian",
7913,
"roa-eas",
{"Latn", "Cyrl"},
sort_key = {
from = {"ă" , "â" , "î" , "ș" , "ț" },
to = {"a~", "a~~", "i~", "s~", "t~"}},
}
m["ru"] = {
"Russian",
7737,
"zle",
{"Cyrl", "Brai"},
translit_module = "ru-translit",
sort_key = {
from = {"ё"},
to = {"е" .. mw.ustring.char(0x10FFFF)}},
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE, DIAER},
to = {"Е", "е", "И", "и"}},
standardChars = "ЁА-яё0-9—" .. PUNCTUATION,
}
m["rw"] = {
"Rwanda-Rundi",
3217514,
"bnt-glb",
Latn,
entry_name = {
from = {"[áāâǎā́]", "[éēêěḗ]", "[íīîǐī́]", "[óōôǒṓ]", "[úūûǔū́]"},
to = {"a", "e" , "i", "o" , "u"} },
}
m["sa"] = {
"Sanskrit",
11059,
"inc-old",
{"Deva", "Bali", "as-Beng", "Beng", "Bhks", "Brah", "Gran", "Gujr", "Guru", "Java", "Khar", "Khmr", "Knda", "Lana", "Laoo", "Mlym", "Modi", "Mymr", "Nand", "Newa", "Orya", "Saur", "Shrd", "Sidd", "Sinh", "Taml", "Telu", "Thai", "Tibt", "Tirh"},
sort_key = {
from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} ,
entry_name = {
from = {u(0xFE00)},
to = {}},
translit_module = "translit-redirect",
}
m["sc"] = {
"Sardinian",
33976,
"roa",
Latn,
}
m["sd"] = {
"Sindhi",
33997,
"inc-snd",
{"sd-Arab", "Deva", "Sind", "Khoj"},
entry_name = {
from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)},
to = {u(0x0627)}},
ancestors = {"inc-vra"},
translit_module = "translit-redirect",
}
m["se"] = {
"Northern Sami",
33947,
"smi",
Latn,
entry_name = {
from = {"ạ", "[ēẹ]", "ī", "[ōọ]", "ū", "ˈ"},
to = {"a", "e" , "i", "o" , "u"} },
sort_key = {
from = {"á" , "č" , "đ" , "ŋ" , "š" , "ŧ" , "ž" },
to = {"a²", "c²", "d²", "n²", "s²", "t²", "z²"} },
standardChars = "A-PR-VZa-pr-vz0-9ÁáČčĐđŊŋŠšŦŧŽž" .. PUNCTUATION,
}
m["sg"] = {
"Sango",
33954,
"crp",
Latn,
ancestors = {"ngb"},
}
m["sh"] = {
"Serbo-Croatian",
9301,
"zls",
{"Latn", "Cyrl", "Glag"},
entry_name = {
from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }},
wikimedia_codes = {"sh", "bs", "hr", "sr"},
}
m["si"] = {
"Sinhalese",
13267,
"inc-ins",
{"Sinh"},
ancestors = {"elu-prk"},
translit_module = "si-translit",
override_translit = true,
}
m["sk"] = {
"Slovak",
9058,
"zlw",
Latn,
sort_key = {
from = {"[áä]", "é", "í", "[óô]", "ú", "ý", "ŕ", "ĺ", "[" .. DIAER .. ACUTE .. CIRC .. "]"},
to = {"a" , "e", "i", "o" , "u", "y", "r", "l", ""}} ,
}
m["sl"] = {
"Slovene",
9063,
"zls",
Latn,
entry_name = {
from = {"[ÁÀÂĀȂȀ]", "[áàâāȃȁ]", "[ÉÈÊĒȆȄỆẸ]", "[éèêēȇȅệẹə]", "[ÍÌÎĪȊȈ]", "[íìîīȋȉ]", "[ÓÒÔŌȎȌỘỌ]", "[óòôōȏȍộọ]", "[ŔȒȐ]", "[ŕȓȑ]", "[ÚÙÛŪȖȔ]", "[úùûūȗȕ]", "ł", GRAVE, ACUTE, CIRC, MACRON, DGRAVE, INVBREVE, DOTBELOW},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "l"},
},
sort_key = {
from = {"č" , "š" , "ž" },
to = {"c²", "s²", "z²"},
},
}
m["sm"] = {
"Samoan",
34011,
"poz-pnp",
Latn,
}
m["sn"] = {
"Shona",
34004,
"bnt-sho",
Latn,
entry_name = {remove_diacritics = ACUTE},
}
m["so"] = {
"Somali",
13275,
"cus-eas",
{"Latn", "Arab", "Osma"},
entry_name = {
from = {"[ÁÀÂ]", "[áàâ]", "[ÉÈÊ]", "[éèê]", "[ÍÌÎ]", "[íìî]", "[ÓÒÔ]", "[óòô]", "[ÚÙÛ]", "[úùû]", "[ÝỲ]", "[ýỳ]"},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "U" , "u", "Y", "y"}} ,
}
m["sq"] = {
"Albanian",
8748,
"sqj",
{"Latn", "Grek", "Elba"},
entry_name = {remove_diacritics = ACUTE},
sort_key = {
from = { '[âãä]', '[ÂÃÄ]', '[êẽë]', '[ÊẼË]', 'ĩ', 'Ĩ', 'õ', 'Õ', 'ũ', 'Ũ', 'ỹ', 'Ỹ', 'ç', 'Ç' },
to = { 'a', 'A', 'e', 'E', 'i', 'I', 'o', 'O', 'u', 'U', 'y', 'Y', 'c', 'C' } } ,
}
m["sr"] = {
"Sebian",
9299,
"zls",
aliases = {"Српски", "Српски језик", "српски", "српски језик", "srpski", "srpski jezik", "Srpski", "Srpski jezik"},
scripts = {"Latn", "Cyrl"},
entry_name = {
from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }},
}
m["ss"] = {
"Swazi",
34014,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["st"] = {
"Sotho",
34340,
"bnt-sts",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["su"] = {
"Sundanese",
34002,
"poz-msa",
{"Latn", "Sund"},
translit_module = "su-translit",
}
m["sv"] = {
"Swedish",
9027,
"gmq",
Latn,
ancestors = {"gmq-osw"},
}
m["sw"] = {
"Swahili",
7838,
"bnt-swh",
LatnArab,
sort_key = {
from = {"ng'", "^-"},
to = {"ngz"}} ,
}
m["ta"] = {
"Tamil",
5885,
"dra",
{"Taml"},
ancestors = {"oty"},
translit_module = "ta-translit",
override_translit = true,
}
m["te"] = {
"Telugu",
8097,
"dra",
{"Telu"},
translit_module = "te-translit",
override_translit = true,
}
m["tg"] = {
"Tajik",
9260,
"ira-swi",
{"Cyrl", "fa-Arab", "Latn"},
ancestors = {"pal"}, -- same as "fa", see WT:T:AFA
translit_module = "tg-translit",
override_translit = true,
sort_key = {
from = {"Ё", "ё"},
to = {"Е" , "е"}} ,
entry_name = {
from = {ACUTE},
to = {}} ,
}
m["th"] = {
"Tai",
9217,
"tai-swe",
{"Thai", "Brai"},
translit_module = "th-translit",
sort_key = {
from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"},
to = {"", "", "%2%1"}},
}
m["ti"] = {
"Tigrinya",
34124,
"sem-eth",
{"Ethi"},
translit_module = "Ethi-translit",
}
m["tk"] = {
"Turkmen",
9267,
"trk-ogz",
{"Latn", "Cyrl", "Arab"},
entry_name = {
from = {"ā", "ē", "ī", "ō", "ū", "ȳ", "ȫ", "ǖ", MACRON},
to = {"a", "e", "i", "o", "u", "y", "ö", "ü", ""}},
ancestors = {"trk-ogz-pro"},
}
m["tl"] = {
"Tagalog",
34057,
"phi",
{"Latn", "Tglg"},
entry_name = {
from = {"[áàâ]", "[éèê]", "[íìî]", "[óòô]", "[úùû]", ACUTE, GRAVE, CIRC},
to = {"a" , "e" , "i" , "o" , "u" }},
translit_module = "tl-translit",
override_translit = true
}
m["tn"] = {
"Tswana",
34137,
"bnt-sts",
Latn,
}
m["to"] = {
"Tongan",
34094,
"poz-pol",
Latn,
sort_key = {
from = {"ā", "ē", "ī", "ō", "ū", MACRON},
to = {"a", "e", "i", "o", "u", ""}},
entry_name = {
from = {"á", "é", "í", "ó", "ú", ACUTE},
to = {"a", "e", "i", "o", "u", ""}},
}
m["tr"] = {
"Turkish",
256,
"trk-ogz",
Latn,
ancestors = {"ota"},
}
m["ts"] = {
"Tsonga",
34327,
"bnt-tsr",
Latn,
}
m["tt"] = {
"Tatar",
25285,
"trk-kbu",
{"Cyrl", "Latn", "tt-Arab"},
translit_module = "tt-translit",
override_translit = true,
}
-- "tw" IS TREATED AS "ak", SEE WT:LT
m["ty"] = {
"Tahitian",
34128,
"poz-pep",
Latn,
}
m["ug"] = {
"Uyghur",
13263,
"trk-kar",
{"ug-Arab", "Latn", "Cyrl"},
ancestors = {"chg"},
translit_module = "ug-translit",
override_translit = true,
}
m["uk"] = {
"Ukrainian",
8798,
"zle",
Cyrl,
ancestors = {"orv"},
translit_module = "uk-translit",
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE},
to = {"Е", "е", "И", "и"}},
standardChars = "ЄІЇА-ЩЬЮ-щьюяєії" .. PUNCTUATION,
}
m["ur"] = {
"Urdu",
1617,
"inc-hnd",
{"ur-Arab"},
ancestors = {"inc-ohi"},
entry_name = {
from = {u(0x0640), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658)},
to = {}} ,
}
m["uz"] = {
"Uzbek",
9264,
"trk-kar",
{"Latn", "Cyrl", "fa-Arab"},
ancestors = {"chg"},
}
m["ve"] = {
"Venda",
32704,
"bnt-bso",
Latn,
}
m["vi"] = {
"Vietnamese",
9199,
"mkh-vie",
{"Latn", "Hani"},
ancestors = {"mkh-mvi"},
sort_key = "vi-sortkey",
}
m["vo"] = {
"Volapük",
36986,
"art",
Latn,
}
m["wa"] = {
"Walloon",
34219,
"roa-oil",
Latn,
ancestors = {"fro"},
sort_key = {
from = {"[áàâäå]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "c"}} ,
}
m["wo"] = {
"Wolof",
34257,
"alv-fwo",
LatnArab,
}
m["xh"] = {
"Xhosa",
13218,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["yi"] = {
"Yiddish",
8641,
"gmw",
{"Hebr"},
ancestors = {"gmh"},
sort_key = {
from = {"[אַאָ]", "בּ", "[וֹוּ]", "יִ", "ײַ", "פֿ"},
to = {"א", "ב", "ו", "י", "יי", "פ"}} ,
translit_module = "yi-translit",
}
m["yo"] = {
"Yoruba",
34311,
"alv-yor",
Latn,
sort_key = {
from = {"ẹ", "ọ", "gb", "ṣ"},
to = {"e~" , "o~", "g~", "s~"}},
entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON },
}
m["za"] = {
"Zhuang",
13216,
"tai",
{"Latn", "Hani"},
sort_key = {
from = {"%p"},
to = {""}},
}
m["zh"] = {
"Chinese",
7850,
"zhx",
{"Hani", "Brai", "Nshu"},
ancestors = {"ltc"},
sort_key = "zh-sortkey",
}
m["zu"] = {
"Zulu",
10179,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
return m
rr08cwq52vf14ctn6kcpvqf40nplp9a
13340
13339
2022-08-04T12:41:04Z
Asinis632
1829
Scribunto
text/plain
local u = mw.ustring.char
-- UTF-8 encoded strings for some commonly-used diacritics
local GRAVE = u(0x0300)
local ACUTE = u(0x0301)
local CIRC = u(0x0302)
local TILDE = u(0x0303)
local MACRON = u(0x0304)
local BREVE = u(0x0306)
local DOTABOVE = u(0x0307)
local DIAER = u(0x0308)
local CARON = u(0x030C)
local DGRAVE = u(0x030F)
local INVBREVE = u(0x0311)
local DOTBELOW = u(0x0323)
local RINGBELOW = u(0x0325)
local CEDILLA = u(0x0327)
local OGONEK = u(0x0328)
local CGJ = u(0x034F) -- combining grapheme joiner
local DOUBLEINVBREVE = u(0x0361)
-- Punctuation to be used for standardChars field
local PUNCTUATION = ' !#%&*+,-./:;<=>?@^_`|~\'()'
local Cyrl = {"Cyrl"}
local Latn = {"Latn"}
local LatnArab = {"Latn", "Arab"}
local m = {}
m["aa"] = {
"Afar",
27811,
"cus-eas",
Latn,
entry_name = { remove_diacritics = ACUTE},
}
m["ab"] = {
"Abkhaz",
5111,
"cau-abz",
{"Cyrl", "Geor", "Latn"},
translit_module = "ab-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["ae"] = {
"Avestan",
29572,
"ira-cen",
{"Avst", "Gujr"},
translit_module = "Avst-translit",
wikipedia_article = "Avestan",
}
m["af"] = {
"Afrikaans",
14196,
"gmw",
LatnArab,
ancestors = {"nl"},
sort_key = {
from = {"[äáâà]", "[ëéêè]", "[ïíîì]", "[öóôò]", "[üúûù]", "[ÿýŷỳ]", "^-", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" }} ,
}
m["ak"] = {
"Akan",
28026,
"alv-ctn",
Latn,
}
m["am"] = {
"Amharic",
28244,
"sem-eth",
{"Ethi"},
translit_module = "Ethi-translit",
}
m["an"] = {
"Aragonese",
8765,
"roa-ibe",
Latn,
ancestors = {"roa-oan"},
}
m["ar"] = {
"Arabic",
13955,
"sem-arb",
{"Arab", "Hebr", "Brai"},
-- replace alif waṣl with alif
-- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha,
-- damma, kasra, shadda, sukun, superscript (dagger) alef
entry_name = {
from = {u(0x0671), u(0x0640), "[" .. u(0x064B) .. "-" .. u(0x0652) .. "]", u(0x0670)},
to = {u(0x0627)}},
-- put Judeo-Arabic (Hebrew-script Arabic) under the category header
-- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles
sort_key = {
from = {"^%f[" .. u(0x5D0) .. "-" .. u(0x5EA) .. "]"},
to = {u(0xFB21)},
},
translit_module = "ar-translit",
}
m["as"] = {
"Assamese",
29401,
"inc-eas",
{"as-Beng"},
ancestors = {"inc-mas"},
translit_module = "as-translit",
}
m["av"] = {
"Avar",
29561,
"cau-nec",
Cyrl,
ancestors = {"oav"},
translit_module = "av-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["ay"] = {
"Aymara",
4627,
"sai-aym",
Latn,
}
m["az"] = {
"Azerbaijani",
9292,
"trk-ogz",
{"Latn", "Cyrl", "fa-Arab"},
ancestors = {"trk-oat"},
}
m["ba"] = {
"Bashkir",
13389,
"trk-kbu",
Cyrl,
translit_module = "ba-translit",
override_translit = true,
}
m["be"] = {
"Belarusian",
9091,
"zle",
Cyrl,
ancestors = {"orv"},
translit_module = "be-translit",
sort_key = {
from = {"Ё", "ё"},
to = {"Е" , "е"}},
entry_name = {
from = {"Ѐ", "ѐ", GRAVE, ACUTE},
to = {"Е", "е"}},
}
m["bg"] = {
"Bulgarian",
7918,
"zls",
{"Cyrl"},
ancestors = {"cu"},
translit_module = "bg-translit",
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE},
to = {"Е", "е", "И", "и"}},
}
m["bh"] = {
"Bihari",
135305,
"inc-eas",
{"Deva"},
ancestors = {"inc-mgd"},
}
m["bi"] = {
"Bislama",
35452,
"crp",
Latn,
ancestors = {"en"},
}
m["bm"] = {
"Bambara",
33243,
"dmn-emn",
Latn,
}
m["bn"] = {
"Bengali",
9610,
"inc-eas",
{"Beng", "Newa"},
ancestors = {"inc-mbn"},
translit_module = "bn-translit",
}
m["bo"] = {
"Tibetan",
34271,
"sit-tib",
{"Tibt"}, -- sometimes Deva?
ancestors = {"xct"},
translit_module = "bo-translit",
override_translit = true,
}
m["br"] = {
"Breton",
12107,
"cel-bry",
Latn,
ancestors = {"xbm"},
}
m["ca"] = {
"Catalan",
7026,
"roa",
Latn,
ancestors = {"roa-oca"},
sort_key = {
from = {"à", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "l·l"},
to = {"a", "e" , "i" , "o" , "u" , "c", "ll" }} ,
}
m["ce"] = {
"Chechen",
33350,
"cau-vay",
Cyrl,
translit_module = "ce-translit",
override_translit = true,
entry_name = {
from = {MACRON},
to = {}},
}
m["ch"] = {
"Chamorro",
33262,
"poz-sus",
Latn,
}
m["co"] = {
"Corsican",
33111,
"roa-itd",
Latn,
}
m["cr"] = {
"Cree",
33390,
"alg",
{"Cans", "Latn"},
translit_module = "translit-redirect",
}
m["cs"] = {
"Czech",
9056,
"zlw",
Latn,
ancestors = {"zlw-ocs"},
sort_key = {
from = {"á", "é", "í", "ó", "[úů]", "ý"},
to = {"a", "e", "i", "o", "u" , "y"}} ,
}
m["cu"] = {
"Old Church Slavonic",
35499,
"zls",
{"Cyrs", "Glag"},
translit_module = "Cyrs-Glag-translit",
entry_name = {
from = {u(0x0484)}, -- kamora
to = {}},
sort_key = {
from = {"оу", "є"},
to = {"у" , "е"}} ,
}
m["cv"] = {
"Chuvash",
33348,
"trk-ogr",
Cyrl,
ancestors = {"xbo"},
translit_module = "cv-translit",
override_translit = true,
}
m["cy"] = {
"Welsh",
9309,
"cel-bry",
Latn,
ancestors = {"wlm"},
sort_key = {
from = {"[âáàä]", "ch", "dd", "[êéèë]", "ff", "ngh", "[îíìï]", "ll", "[ôóòö]", "ph", "rh", "th", "[ûúùü]", "[ŵẃẁẅ]", "[ŷýỳÿ]", "'"},
to = {"a" , "c~", "d~", "e" , "f~", "g~h", "i" , "l~", "o" , "p~", "r~", "t~", "u" , "w" , "y" }} ,
standardChars = "A-IL-PR-UWYa-il-pr-uwy0-9ÂâÊêÎîÔôÛûŴŵŶŷ" .. PUNCTUATION,
}
m["da"] = {
"Danish",
9035,
"gmq",
Latn,
ancestors = {"gmq-oda"},
}
m["de"] = {
"Jeman",
188,
"gmw",
{"Latn", "Latf"},
ancestors = {"gmh"},
sort_key = {
from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]", "ß" },
to = {"a" , "e" , "i" , "o" , "u" , "ss"}} ,
standardChars = "A-Za-z0-9ÄäÖöÜüß" .. PUNCTUATION,
}
m["dv"] = {
"Dhivehi",
32656,
"inc-ins",
{"Thaa"},
ancestors = {"elu-prk"},
translit_module = "dv-translit",
override_translit = true,
}
m["dz"] = {
"Dzongkha",
33081,
"sit-tib",
{"Tibt"},
ancestors = {"xct"},
translit_module = "bo-translit",
override_translit = true,
}
m["ee"] = {
"Ewe",
30005,
"alv-gbe",
Latn,
}
m["el"] = {
"Greek",
9129,
"grk",
{"Grek", "Brai"},
ancestors = {"grc"},
translit_module = "el-translit",
override_translit = true,
sort_key = { -- Keep this synchronized with grc, cpg, pnt, tsd
from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"},
to = {"α" , "ε" , "η" , "ι" , "ο" , "υ" , "ω" , "ρ", "σ"}} ,
standardChars = "ͺ;΄-ώϜϝ" .. PUNCTUATION,
}
m["en"] = {
"Inglis",
1860,
"gmw",
{"Latn", "Brai", "Shaw", "Dsrt"}, -- entries in Shaw or Dsrt might require prior discussion
ancestors = {"enm"},
sort_key = {
from = {"[äàáâåā]", "[ëèéêē]", "[ïìíîī]", "[öòóôō]", "[üùúûū]", "æ" , "œ" , "[çč]", "ñ", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "ae", "oe", "c" , "n"}},
wikimedia_codes = {"en", "simple"},
standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF),
}
m["eo"] = {
"Esperanto",
143,
"art",
Latn,
sort_key = {
from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ĉ]", "[ĝ]", "[ĥ]", "[ĵ]", "[ŝ]", "[ŭ]"},
to = {"a" , "e" , "i" , "o" , "u", "cĉ", "gĉ", "hĉ", "jĉ", "sĉ", "uĉ"}} ,
standardChars = "A-PRSTUVZa-prstuvzĉĈĝĜĵĴŝŜŭŬ0-9" .. PUNCTUATION,
}
m["es"] = {
"Spanish",
1321,
"roa-ibe",
{"Latn", "Brai"},
ancestors = {"osp"},
sort_key = {
from = {"á", "é", "í", "ó", "[úü]", "ç", "ñ"},
to = {"a", "e", "i", "o", "u" , "c", "n~"}},
standardChars = "A-VXYZa-vxyz0-9ÁáÉéÍíÓóÚúÑñ¿¡" .. PUNCTUATION,
}
m["et"] = {
"Estonian",
9072,
"fiu-fin",
Latn,
}
m["eu"] = {
"Basque",
8752,
"euq",
Latn,
}
m["fa"] = {
"Persian",
9168,
"ira-swi",
{"fa-Arab"},
ancestors = {"pal"}, -- "ira-mid"
entry_name = {
from = {u(0x064E), u(0x0640), u(0x064F), u(0x0650), u(0x0651), u(0x0652)},
to = {}} ,
}
m["ff"] = {
"Fula",
33454,
"alv-fwo",
{"Latn", "Adlm"},
}
m["fi"] = {
"Finnish",
1412,
"fiu-fin",
Latn,
entry_name = {
from = {"ˣ"}, -- Used to indicate gemination of the next consonant
to = {}},
sort_key = {
from = {"[áàâã]", "[éèêẽ]", "[íìîĩ]", "[óòôõ]", "[úùûũ]", "[ýỳŷüű]", "[øõő]", "æ" , "œ" , "[čç]", "š", "ž", "ß" , "[':]"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "ö" , "ae", "oe", "c" , "s", "z", "ss"}} ,
}
m["fj"] = {
"Fijian",
33295,
"poz-occ",
Latn,
}
m["fo"] = {
"Faroese",
25258,
"gmq",
Latn,
ancestors = {"non"},
}
m["fr"] = {
"Frens",
150,
"roa-oil",
{"Latn", "Brai"},
ancestors = {"frm"},
sort_key = {
from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ" , "œ" , "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "c", "ae", "oe"}},
standardChars = "A-Za-z0-9ÀÂÇÉÈÊËÎÏÔŒÛÙÜàâçéèêëîïôœûùü«»" .. PUNCTUATION,
}
m["fy"] = {
"West Frisian",
27175,
"gmw-fri",
Latn,
ancestors = {"ofs"},
sort_key = {
from = {"[àáâä]", "[èéêë]", "[ìíîïyỳýŷÿ]", "[òóôö]", "[ùúûü]", "æ", "[ /.-]"},
to = {"a" , "e" , "i" , "o" , "u", "ae"}} ,
standardChars = "A-PR-WYZa-pr-wyz0-9Ææâäàéêëèïìôöòúûüùỳ" .. PUNCTUATION,
}
m["ga"] = {
"Irish",
9142,
"cel-gae",
Latn,
ancestors = {"mga"},
sort_key = {
from = {"á", "é", "í", "ó", "ú", "ý", "ḃ" , "ċ" , "ḋ" , "ḟ" , "ġ" , "ṁ" , "ṗ" , "ṡ" , "ṫ" },
to = {"a", "e", "i", "o", "u", "y", "bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}} ,
standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION,
}
m["gd"] = {
"Scottish Gaelic",
9314,
"cel-gae",
Latn,
ancestors = {"mga"},
sort_key = {
from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ýỳ]"},
to = {"a" , "e" , "i" , "o" , "u" , "y" }} ,
standardChars = "A-IL-PR-Ua-il-pr-u0-9ÀàÈèÌìÒòÙù" .. PUNCTUATION,
}
m["gl"] = {
"Galician",
9307,
"roa-ibe",
Latn,
ancestors = {"roa-opt"},
sort_key = {
from = {"á", "é", "í", "ó", "ú"},
to = {"a", "e", "i", "o", "u"}} ,
}
m["gn"] = {
"Guaraní",
35876,
"tup-gua",
Latn,
}
m["gu"] = {
"Gujarati",
5137,
"inc-wes",
{"Gujr"},
ancestors = {"inc-mgu"},
translit_module = "gu-translit",
}
m["gv"] = {
"Manx",
12175,
"cel-gae",
Latn,
ancestors = {"mga"},
sort_key = {
from = {"ç", "-"},
to = {"c"}} ,
standardChars = "A-WYÇa-wyç0-9" .. PUNCTUATION,
}
m["ha"] = {
"Hausa",
56475,
"cdc-wst",
LatnArab,
sort_key = {
from = {"ɓ", "ɗ", "ƙ", "'y", "ƴ", "'" },
to = {"b~" , "d~" , "k~", "y~", "y~", "" }},
entry_name = {
from = {"R̃", "r̃", "À", "à", "È", "è", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Î", "î", "Ô", "ô", "Û", "û", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú", "Ā̀", "ā̀", "Ḕ", "ḕ", "Ī̀", "ī̀", "Ṑ", "ṑ", "Ū̀", "ū̀", GRAVE, ACUTE},
to = {"R", "r", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}},
}
m["he"] = {
"Hebrew",
9288,
"sem-can",
{"Hebr", "Phnx", "Brai"},
entry_name = {
from = {"[" .. u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. CGJ .. "]"},
to = {}} ,
}
m["hi"] = {
"Hindi",
1568,
"inc-hnd",
{"Deva", "Kthi", "Newa"},
ancestors = {"inc-ohi"},
translit_module = "hi-translit",
standardChars = "ँंअ-ऊएऐओ-घच-झट-नप-रलवशसहा-ूेैो-◌्।-॰ड़ढ़" .. PUNCTUATION,
}
m["ho"] = {
"Hiri Motu",
33617,
"crp",
Latn,
ancestors = {"meu"},
}
m["ht"] = {
"Haitian Creole",
33491,
"crp",
Latn,
ancestors = {"fr"},
}
m["hu"] = {
"Hungarian",
9067,
"urj-ugr",
{"Latn", "Hung"},
ancestors = {"ohu"},
sort_key = {
from = {"á", "é", "í", "ó", "ú", "[öő]", "[üű]", "cs", "dzs", "gy", "ly", "ny", "zs"},
to = {"a", "e", "i", "o", "u", "o~", "u~", "c~", "dz~", "g~", "l~", "n~", "z~"},
},
}
m["hy"] = {
"Armenian",
8785,
"hyx",
{"Armn", "Brai"},
ancestors = {"axm"},
translit_module = "Armn-translit",
override_translit = true,
sort_key = {
from = {"ու", "և", "եւ"},
to = {"ւ", "եվ", "եվ"}},
entry_name = {
from = {"՞", "՜", "՛", "՟", "և", "<sup>յ</sup>", "<sup>ի</sup>", "<sup>է</sup>"},
to = {"", "", "", "", "եւ", "յ", "ի", "է"}} ,
}
m["hz"] = {
"Herero",
33315,
"bnt-swb",
Latn,
}
m["ia"] = {
"Interlingua",
35934,
"art",
Latn,
}
m["id"] = {
"Indonesian",
9240,
"poz-mly",
Latn,
ancestors = {"ms"},
}
m["ie"] = {
"Interlingue",
35850,
"art",
Latn,
type = "appendix-constructed",
}
m["ig"] = {
"Igbo",
33578,
"alv-igb",
Latn,
sort_key = {
from = {"ụ", "ị", "ọ", "gb", "gh", "gw", "kp", "kw", "ṅ", "nw", "ny", "sh"},
to = {"u~" , "i~", "o~", "gy", "gz", "g~", "kz", "k~", "ny", "nz", "n~", "s~"}},
entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON },
}
m["ii"] = {
"Sichuan Yi",
34235,
"tbq-lol",
{"Yiii"},
translit_module = "ii-translit",
}
m["ik"] = {
"Inupiaq",
27183,
"esx-inu",
Latn,
}
m["io"] = {
"Ido",
35224,
"art",
Latn,
}
m["is"] = {
"Icelandic",
294,
"gmq",
Latn,
ancestors = {"non"},
}
m["it"] = {
"Italian",
652,
"roa-itd",
Latn,
sort_key = {
from = {"[àáâäå]", "[èéêë]", "[ìíîï]", "[òóôö]", "[ùúûü]"},
to = {"a" , "e" , "i" , "o" , "u" }} ,
standardChars = "A-IL-VZa-il-vz0-9" .. PUNCTUATION,
}
m["iu"] = {
"Inuktitut",
29921,
"esx-inu",
{"Cans", "Latn"},
translit_module = "translit-redirect",
override_translit = true,
}
m["ja"] = {
"Siapan",
5287,
"jpx",
{"Jpan", "Brai"},
ancestors = {"ojp"},
--[=[
-- Handled by jsort function in [[Module:ja]].
sort_key = {
from = {"[ぁァア]", "[ぃィイ]", "[ぅゔゥウヴ]", "[ぇェエ]", "[ぉォオ]", "[がゕカガヵ]", "[ぎキギ]", "[ぐクグㇰ]", "[げゖケゲヶ]", "[ごコゴ]", "[ざサザ]", "[じシジㇱ]", "[ずスズㇲ]", "[ぜセゼ]", "[ぞソゾ]", "[だタダ]", "[ぢチヂ]", "[っづッツヅ]", "[でテデ]", "[どトドㇳ]", "ナ", "ニ", "[ヌㇴ]", "ネ", "ノ", "[ばぱハバパㇵ]", "[びぴヒビピㇶ]", "[ぶぷフブプㇷ]", "[べぺヘベペㇸ]", "[ぼぽホボポㇹ]", "マ", "ミ", "[ムㇺ]", "メ", "モ", "[ゃャヤ]", "[ゅュユ]", "[ょョヨ]", "[ラㇻ]", "[リㇼ]", "[ルㇽ]", "[レㇾ]", "[ロㇿ]", "[ゎヮワヷ]", "[ヰヸ]", "[ヱヹ]", "[ヲヺ]", "ン", "[゙゚゛゜ゝゞ・ヽヾ]", "𛀀"},
to = {"あ", "い", "う", "え", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ら", "り", "る", "れ", "ろ", "わ", "ゐ", "ゑ", "を", "ん", "", "え"}},
--]=]
}
m["jv"] = {
"Javanese",
33549,
"poz-sus",
{"Latn", "Java"},
translit_module = "jv-translit",
ancestors = {"kaw"},
link_tr = true,
}
m["ka"] = {
"Georgian",
8108,
"ccs-gzn",
{"Geor", "Geok", "Hebr"}, -- Hebr is used to write Judeo-Georgian
ancestors = {"oge"},
translit_module = "Geor-translit",
override_translit = true,
entry_name = {
from = {"̂"},
to = {""}},
}
m["kg"] = {
"Kongo",
33702,
"bnt-kng",
Latn,
}
m["ki"] = {
"Kikuyu",
33587,
"bnt-kka",
Latn,
}
m["kj"] = {
"Kwanyama",
1405077,
"bnt-ova",
Latn,
}
m["kk"] = {
"Kazakh",
9252,
"trk-kno",
{"Cyrl", "Latn", "kk-Arab"},
translit_module = "kk-translit",
override_translit = true,
}
m["kl"] = {
"Greenlandic",
25355,
"esx-inu",
Latn,
}
m["km"] = {
"Khmer",
9205,
"mkh-kmr",
{"Khmr"},
ancestors = {"mkh-mkm"},
translit_module = "km-translit",
}
m["kn"] = {
"Kannada",
33673,
"dra",
{"Knda"},
ancestors = {"dra-mkn"},
translit_module = "kn-translit",
}
m["ko"] = {
"Korean",
9176,
"qfa-kor",
{"Kore", "Brai"},
ancestors = {"okm"},
-- 20210122 idea: strip parenthesized hanja from entry link
-- Hani regex is a reasonable subset of Hani from [[Module:scripts/data]],
-- last updated on 20210214.
entry_name = {
from = {
" *%([一-鿿㐀-䶿𠀀-𰀀-﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩]+%)",
},
to = {
"",
}},
display = {
from = {"%-"},
to = {},
},
translit_module = "ko-translit",
}
m["kr"] = {
"Kanuri",
36094,
"ssa-sah",
LatnArab,
sort_key = {
from = {"ny", "ǝ", "sh"},
to = {"n~", "e~", "s~"}} , -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
entry_name = {
from = {"À", "à", "È", "è", "Ǝ̀", "ǝ̀", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Ǝ̂", "ǝ̂", "Î", "î", "Ô", "ô", "Û", "û", "Ă", "ă", "Ĕ", "ĕ", "Ǝ̆", "ǝ̆", "Ĭ", "ĭ", "Ŏ", "ŏ", "Ŭ", "ŭ", "Á", "á", "É", "é", "Ǝ́", "ǝ́", "Í", "í", "Ó", "ó", "Ú", "ú", GRAVE, ACUTE},
to = {"A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u"}},
}
m["ks"] = {
"Kashmiri",
33552,
"inc-dar",
{"ks-Arab", "Deva", "Shrd", "Latn"},
translit_module = "translit-redirect",
ancestors = {"sa"},
}
-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT
m["kw"] = {
"Cornish",
25289,
"cel-bry",
Latn,
ancestors = {"cnx"},
}
m["ky"] = {
"Kyrgyz",
9255,
"trk-kip",
{"Cyrl", "Latn", "Arab"},
translit_module = "ky-translit",
override_translit = true,
}
m["la"] = {
"Latin",
397,
"itc",
Latn,
ancestors = {"itc-ola"},
entry_name = {remove_diacritics = MACRON .. BREVE .. DIAER .. DOUBLEINVBREVE},
standardChars = "A-Za-z0-9ÆæŒœĀ-ăĒ-ĕĪ-ĭŌ-ŏŪ-ŭȲȳ" .. MACRON .. BREVE .. PUNCTUATION,
}
m["lb"] = {
"Luxembourgish",
9051,
"gmw",
Latn,
ancestors = {"gmh"},
}
m["lg"] = {
"Luganda",
33368,
"bnt-nyg",
Latn,
entry_name = {
from = {"á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ", "â", "Â", "ê", "Ê", "î", "Î", "ô", "Ô", "û", "Û" },
to = {"a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U",}},
sort_key = {
from = {"ŋ"},
to = {"n"}} ,
}
m["li"] = {
"Limburgish",
102172,
"gmw",
Latn,
ancestors = {"dum"},
}
m["ln"] = {
"Lingala",
36217,
"bnt-bmo",
Latn,
}
m["lo"] = {
"Lao",
9211,
"tai-swe",
{"Laoo"},
translit_module = "lo-translit",
sort_key = {
from = {"[%pໆ]", "[່-ໍ]", "ຼ", "ຽ", "ໜ", "ໝ", "([ເແໂໃໄ])([ກ-ຮ])"},
to = {"", "", "ລ", "ຍ", "ຫນ", "ຫມ", "%2%1"}},
standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. PUNCTUATION,
}
m["lt"] = {
"Lithuanian",
9083,
"bat",
Latn,
ancestors = {"olt"},
entry_name = {
from = {"[áãà]", "[ÁÃÀ]", "[éẽè]", "[ÉẼÈ]", "[íĩì]", "[ÍĨÌ]", "[ýỹ]", "[ÝỸ]", "ñ", "[óõò]", "[ÓÕÒ]", "[úũù]", "[ÚŨÙ]", ACUTE, GRAVE, TILDE},
to = {"a", "A", "e", "E", "i", "I", "y", "Y", "n", "o", "O", "u", "U"}} ,
}
m["lu"] = {
"Luba-Katanga",
36157,
"bnt-lub",
Latn,
}
m["lv"] = {
"Latvian",
9078,
"bat",
Latn,
entry_name = {
-- This attempts to convert vowels with tone marks to vowels either with
-- or without macrons. Specifically, there should be no macrons if the
-- vowel is part of a diphthong (including resonant diphthongs such
-- pìrksts -> pirksts not #pīrksts). What we do is first convert the
-- vowel + tone mark to a vowel + tilde in a decomposed fashion,
-- then remove the tilde in diphthongs, then convert the remaining
-- vowel + tilde sequences to macroned vowels, then delete any other
-- tilde. We leave already-macroned vowels alone: Both e.g. ar and ār
-- occur before consonants. FIXME: This still might not be sufficient.
from = {"Ȩ", "ȩ", "[ÂÃÀ]", "[âãà]", "[ÊẼÈ]", "[êẽè]", "[ÎĨÌ]", "[îĩì]", "[ÔÕÒ]", "[ôõò]", "[ÛŨÙ]", "[ûũù]", "[ÑǸ]", "[ñǹ]", "[" .. CIRC .. TILDE ..GRAVE .."]", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .. "?([^aAeEiIoOuUāĀēĒīĪūŪ])", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .."?$", "([iI])" .. TILDE .. "?([eE])" .. TILDE .. "?", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "U" .. TILDE, "u" .. TILDE, TILDE},
to = {"E", "e", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "O", "o", "U" .. TILDE, "u" .. TILDE, "N", "n", TILDE, "%1%2%3", "%1%2", "%1%2", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ū", "ū", ""}},
}
m["mg"] = {
"Malagasy",
7930,
"poz-bre",
Latn,
}
m["mh"] = {
"Marshallese",
36280,
"poz-mic",
Latn,
sort_key = {
from = {"ā" , "ļ" , "m̧" , "ņ" , "n̄" , "o̧" , "ō" , "ū" },
to = {"a~", "l~", "m~", "n~", "n~~", "o~", "o~~", "u~"}} ,
}
m["mi"] = {
"Maori",
36451,
"poz-pep",
Latn,
}
m["mk"] = {
"Macedonian",
9296,
"zls",
Cyrl,
translit_module = "mk-translit",
entry_name = {
from = {ACUTE},
to = {}},
}
m["ml"] = {
"Malayalam",
36236,
"dra",
{"Mlym"},
translit_module = "ml-translit",
override_translit = true,
}
m["mn"] = {
"Mongolian",
9246,
"xgn",
{"Cyrl", "Mong", "Soyo", "Zanb"}, -- entries in Soyo or Zanb might require prior discussion
ancestors = {"cmg"},
translit_module = "mn-translit",
override_translit = true,
}
-- "mo" IS TREATED AS "ro", SEE WT:LT
m["mr"] = {
"Marathi",
1571,
"inc-sou",
{"Deva", "Modi"},
ancestors = {"omr"},
translit_module = "mr-translit",
entry_name = {
from = {"च़", "ज़", "झ़"},
to = {"च", "ज", "झ"}} ,
}
m["ms"] = {
"Malay",
9237,
"poz-mly",
{"Latn", "ms-Arab"},
}
m["mt"] = {
"Maltese",
9166,
"sem-arb",
Latn,
ancestors = {"sqr"},
sort_key = {
from = {"ċ", "ġ", "ħ"},
to = {"c", "g", "h"}
}
}
m["my"] = {
"Burmese",
9228,
"tbq-brm",
{"Mymr"},
ancestors = {"obr"},
translit_module = "my-translit",
override_translit = true,
sort_key = {
from = {"ျ", "ြ", "ွ", "ှ", "ဿ"},
to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"}},
}
m["na"] = {
"Nauruan",
13307,
"poz-mic",
Latn,
}
m["nb"] = {
"Norwegian Bokmål",
25167,
"gmq",
Latn,
ancestors = {"gmq-mno"},
wikimedia_codes = {"no"},
}
m["nd"] = {
"Northern Ndebele",
35613,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["ne"] = {
"Nepali",
33823,
"inc-pah",
{"Deva", "Newa"},
translit_module = "ne-translit",
}
m["ng"] = {
"Ndonga",
33900,
"bnt-ova",
Latn,
}
m["nl"] = {
"Dutch",
7411,
"gmw",
Latn,
ancestors = {"dum"},
sort_key = {
from = {"[äáâå]", "[ëéê]", "[ïíî]", "[öóô]", "[üúû]", "ç", "ñ", "^-"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} ,
standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF),
}
m["nn"] = {
"Norwegian Nynorsk",
25164,
"gmq",
Latn,
ancestors = {"gmq-mno"},
}
m["no"] = {
"Norwegian",
9043,
"gmq",
Latn,
ancestors = {"gmq-mno"},
}
m["nr"] = {
"Southern Ndebele",
36785,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["nv"] = {
"Navajo",
13310,
"apa",
Latn,
sort_key = {
from = {"[áą]", "[éę]", "[íį]", "[óǫ]", "ń", "^n([djlt])", "ł" , "[ʼ’']", ACUTE},
to = {"a" , "e" , "i" , "o" , "n", "ni%1" , "l~"}}, -- the tilde is used to guarantee that ł will always be sorted after all other words with l
}
m["ny"] = {
"Chichewa",
33273,
"bnt-nys",
Latn,
entry_name = {
from = {"ŵ", "Ŵ", "á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ" },
to = {"w", "W", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M"}},
sort_key = {
from = {"ng'"},
to = {"ng"}} ,
}
m["oc"] = {
"Occitan",
14185,
"roa",
{"Latn", "Hebr"},
ancestors = {"pro"},
sort_key = {
from = {"[àá]", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "([lns])·h"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "%1h" }} ,
}
m["oj"] = {
"Ojibwe",
33875,
"alg",
{"Cans", "Latn"},
sort_key = {
from = {"aa", "ʼ", "ii", "oo", "sh", "zh"},
to = {"a~", "h~", "i~", "o~", "s~", "z~"}} ,
}
m["om"] = {
"Oromo",
33864,
"cus-eas",
{"Latn", "Ethi"},
}
m["or"] = {
"Oriya",
33810,
"inc-eas",
{"Orya"},
ancestors = {"inc-mor"},
translit_module = "or-translit",
}
m["os"] = {
"Ossetian",
33968,
"xsc",
{"Cyrl", "Geor", "Latn"},
ancestors = {"oos"},
translit_module = "os-translit",
override_translit = true,
entry_name = {
from = {GRAVE, ACUTE},
to = {}} ,
}
m["pa"] = {
"Punjabi",
58635,
"inc-pan",
{"Guru", "pa-Arab"},
ancestors = {"inc-opa"},
translit_module = "translit-redirect",
entry_name = {
from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658), u(0x08C7), u(0x0768)},
to = {"", "", "", "", "", "", "", "", "", "ل", "ن"}} ,
}
m["pi"] = {
"Pali",
36727,
"inc-mid",
{"Latn", "Brah", "Deva", "Beng", "Sinh", "Mymr", "Thai", "Lana", "Laoo", "Khmr"},
ancestors = {"sa"},
translit_module = "translit-redirect",
sort_key = {
from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a~", "i~", "u~", "d~", "l~", "m~", "n~", "n~~", "n~~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} ,
entry_name = {
from = {u(0xFE00)},
to = {}},
}
m["pl"] = {
"Polish",
809,
"zlw-lch",
Latn,
ancestors = {"zlw-opl"},
sort_key = {
from = {"[Ąą]", "[Ćć]", "[Ęę]", "[Łł]", "[Ńń]", "[Óó]", "[Śś]", "[Żż]", "[Źź]"},
to = {
"a" .. u(0x10FFFF),
"c" .. u(0x10FFFF),
"e" .. u(0x10FFFF),
"l" .. u(0x10FFFF),
"n" .. u(0x10FFFF),
"o" .. u(0x10FFFF),
"s" .. u(0x10FFFF),
"z" .. u(0x10FFFF),
"z" .. u(0x10FFFE)}} ,
}
m["ps"] = {
"Pashto",
58680,
"ira-pat",
{"ps-Arab"},
ancestors = {"ira-pat-pro"},
}
m["pt"] = {
"Portuguese",
5146,
"roa-ibe",
{"Latn", "Brai"},
ancestors = {"roa-opt"},
sort_key = {
from = {"[àãáâä]", "[èẽéêë]", "[ìĩíï]", "[òóôõö]", "[üúùũ]", "ç", "ñ"},
to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} ,
}
m["qu"] = {
"Quechua",
5218,
"qwe",
Latn,
}
m["rm"] = {
"Romansch",
13199,
"roa-rhe",
Latn,
}
m["ro"] = {
"Romanian",
7913,
"roa-eas",
{"Latn", "Cyrl"},
sort_key = {
from = {"ă" , "â" , "î" , "ș" , "ț" },
to = {"a~", "a~~", "i~", "s~", "t~"}},
}
m["ru"] = {
"Russian",
7737,
"zle",
{"Cyrl", "Brai"},
translit_module = "ru-translit",
sort_key = {
from = {"ё"},
to = {"е" .. mw.ustring.char(0x10FFFF)}},
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE, DIAER},
to = {"Е", "е", "И", "и"}},
standardChars = "ЁА-яё0-9—" .. PUNCTUATION,
}
m["rw"] = {
"Rwanda-Rundi",
3217514,
"bnt-glb",
Latn,
entry_name = {
from = {"[áāâǎā́]", "[éēêěḗ]", "[íīîǐī́]", "[óōôǒṓ]", "[úūûǔū́]"},
to = {"a", "e" , "i", "o" , "u"} },
}
m["sa"] = {
"Sanskrit",
11059,
"inc-old",
{"Deva", "Bali", "as-Beng", "Beng", "Bhks", "Brah", "Gran", "Gujr", "Guru", "Java", "Khar", "Khmr", "Knda", "Lana", "Laoo", "Mlym", "Modi", "Mymr", "Nand", "Newa", "Orya", "Saur", "Shrd", "Sidd", "Sinh", "Taml", "Telu", "Thai", "Tibt", "Tirh"},
sort_key = {
from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} ,
entry_name = {
from = {u(0xFE00)},
to = {}},
translit_module = "translit-redirect",
}
m["sc"] = {
"Sardinian",
33976,
"roa",
Latn,
}
m["sd"] = {
"Sindhi",
33997,
"inc-snd",
{"sd-Arab", "Deva", "Sind", "Khoj"},
entry_name = {
from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)},
to = {u(0x0627)}},
ancestors = {"inc-vra"},
translit_module = "translit-redirect",
}
m["se"] = {
"Northern Sami",
33947,
"smi",
Latn,
entry_name = {
from = {"ạ", "[ēẹ]", "ī", "[ōọ]", "ū", "ˈ"},
to = {"a", "e" , "i", "o" , "u"} },
sort_key = {
from = {"á" , "č" , "đ" , "ŋ" , "š" , "ŧ" , "ž" },
to = {"a²", "c²", "d²", "n²", "s²", "t²", "z²"} },
standardChars = "A-PR-VZa-pr-vz0-9ÁáČčĐđŊŋŠšŦŧŽž" .. PUNCTUATION,
}
m["sg"] = {
"Sango",
33954,
"crp",
Latn,
ancestors = {"ngb"},
}
m["sh"] = {
"Serbo-Croatian",
9301,
"zls",
{"Latn", "Cyrl", "Glag"},
entry_name = {
from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }},
wikimedia_codes = {"sh", "bs", "hr", "sr"},
}
m["si"] = {
"Sinhalese",
13267,
"inc-ins",
{"Sinh"},
ancestors = {"elu-prk"},
translit_module = "si-translit",
override_translit = true,
}
m["sk"] = {
"Slovak",
9058,
"zlw",
Latn,
sort_key = {
from = {"[áä]", "é", "í", "[óô]", "ú", "ý", "ŕ", "ĺ", "[" .. DIAER .. ACUTE .. CIRC .. "]"},
to = {"a" , "e", "i", "o" , "u", "y", "r", "l", ""}} ,
}
m["sl"] = {
"Slovene",
9063,
"zls",
Latn,
entry_name = {
from = {"[ÁÀÂĀȂȀ]", "[áàâāȃȁ]", "[ÉÈÊĒȆȄỆẸ]", "[éèêēȇȅệẹə]", "[ÍÌÎĪȊȈ]", "[íìîīȋȉ]", "[ÓÒÔŌȎȌỘỌ]", "[óòôōȏȍộọ]", "[ŔȒȐ]", "[ŕȓȑ]", "[ÚÙÛŪȖȔ]", "[úùûūȗȕ]", "ł", GRAVE, ACUTE, CIRC, MACRON, DGRAVE, INVBREVE, DOTBELOW},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "l"},
},
sort_key = {
from = {"č" , "š" , "ž" },
to = {"c²", "s²", "z²"},
},
}
m["sm"] = {
"Samoan",
34011,
"poz-pnp",
Latn,
}
m["sn"] = {
"Shona",
34004,
"bnt-sho",
Latn,
entry_name = {remove_diacritics = ACUTE},
}
m["so"] = {
"Somali",
13275,
"cus-eas",
{"Latn", "Arab", "Osma"},
entry_name = {
from = {"[ÁÀÂ]", "[áàâ]", "[ÉÈÊ]", "[éèê]", "[ÍÌÎ]", "[íìî]", "[ÓÒÔ]", "[óòô]", "[ÚÙÛ]", "[úùû]", "[ÝỲ]", "[ýỳ]"},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "U" , "u", "Y", "y"}} ,
}
m["sq"] = {
"Albanian",
8748,
"sqj",
{"Latn", "Grek", "Elba"},
entry_name = {remove_diacritics = ACUTE},
sort_key = {
from = { '[âãä]', '[ÂÃÄ]', '[êẽë]', '[ÊẼË]', 'ĩ', 'Ĩ', 'õ', 'Õ', 'ũ', 'Ũ', 'ỹ', 'Ỹ', 'ç', 'Ç' },
to = { 'a', 'A', 'e', 'E', 'i', 'I', 'o', 'O', 'u', 'U', 'y', 'Y', 'c', 'C' } } ,
}
m["sr"] = {
"Sebian",
9299,
"zls",
aliases = {"Српски", "Српски језик", "српски", "српски језик", "srpski", "srpski jezik", "Srpski", "Srpski jezik"},
scripts = {"Latn", "Cyrl"},
entry_name = {
from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE},
to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }},
}
m["ss"] = {
"Swazi",
34014,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["st"] = {
"Sotho",
34340,
"bnt-sts",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["su"] = {
"Sundanese",
34002,
"poz-msa",
{"Latn", "Sund"},
translit_module = "su-translit",
}
m["sv"] = {
"Swedish",
9027,
"gmq",
Latn,
ancestors = {"gmq-osw"},
}
m["sw"] = {
"Swahili",
7838,
"bnt-swh",
LatnArab,
sort_key = {
from = {"ng'", "^-"},
to = {"ngz"}} ,
}
m["ta"] = {
"Tamil",
5885,
"dra",
{"Taml"},
ancestors = {"oty"},
translit_module = "ta-translit",
override_translit = true,
}
m["te"] = {
"Telugu",
8097,
"dra",
{"Telu"},
translit_module = "te-translit",
override_translit = true,
}
m["tg"] = {
"Tajik",
9260,
"ira-swi",
{"Cyrl", "fa-Arab", "Latn"},
ancestors = {"pal"}, -- same as "fa", see WT:T:AFA
translit_module = "tg-translit",
override_translit = true,
sort_key = {
from = {"Ё", "ё"},
to = {"Е" , "е"}} ,
entry_name = {
from = {ACUTE},
to = {}} ,
}
m["th"] = {
"Tai",
9217,
"tai-swe",
{"Thai", "Brai"},
translit_module = "th-translit",
sort_key = {
from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"},
to = {"", "", "%2%1"}},
}
m["ti"] = {
"Tigrinya",
34124,
"sem-eth",
{"Ethi"},
translit_module = "Ethi-translit",
}
m["tk"] = {
"Turkmen",
9267,
"trk-ogz",
{"Latn", "Cyrl", "Arab"},
entry_name = {
from = {"ā", "ē", "ī", "ō", "ū", "ȳ", "ȫ", "ǖ", MACRON},
to = {"a", "e", "i", "o", "u", "y", "ö", "ü", ""}},
ancestors = {"trk-ogz-pro"},
}
m["tl"] = {
"Tagalog",
34057,
"phi",
{"Latn", "Tglg"},
entry_name = {
from = {"[áàâ]", "[éèê]", "[íìî]", "[óòô]", "[úùû]", ACUTE, GRAVE, CIRC},
to = {"a" , "e" , "i" , "o" , "u" }},
translit_module = "tl-translit",
override_translit = true
}
m["tn"] = {
"Tswana",
34137,
"bnt-sts",
Latn,
}
m["to"] = {
"Tongan",
34094,
"poz-pol",
Latn,
sort_key = {
from = {"ā", "ē", "ī", "ō", "ū", MACRON},
to = {"a", "e", "i", "o", "u", ""}},
entry_name = {
from = {"á", "é", "í", "ó", "ú", ACUTE},
to = {"a", "e", "i", "o", "u", ""}},
}
m["tr"] = {
"Turkish",
256,
"trk-ogz",
Latn,
ancestors = {"ota"},
}
m["ts"] = {
"Tsonga",
34327,
"bnt-tsr",
Latn,
}
m["tt"] = {
"Tatar",
25285,
"trk-kbu",
{"Cyrl", "Latn", "tt-Arab"},
translit_module = "tt-translit",
override_translit = true,
}
-- "tw" IS TREATED AS "ak", SEE WT:LT
m["ty"] = {
"Tahitian",
34128,
"poz-pep",
Latn,
}
m["ug"] = {
"Uyghur",
13263,
"trk-kar",
{"ug-Arab", "Latn", "Cyrl"},
ancestors = {"chg"},
translit_module = "ug-translit",
override_translit = true,
}
m["uk"] = {
"Ukrainian",
8798,
"zle",
Cyrl,
ancestors = {"orv"},
translit_module = "uk-translit",
entry_name = {
from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE},
to = {"Е", "е", "И", "и"}},
standardChars = "ЄІЇА-ЩЬЮ-щьюяєії" .. PUNCTUATION,
}
m["ur"] = {
"Urdu",
1617,
"inc-hnd",
{"ur-Arab"},
ancestors = {"inc-ohi"},
entry_name = {
from = {u(0x0640), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658)},
to = {}} ,
}
m["uz"] = {
"Uzbek",
9264,
"trk-kar",
{"Latn", "Cyrl", "fa-Arab"},
ancestors = {"chg"},
}
m["ve"] = {
"Venda",
32704,
"bnt-bso",
Latn,
}
m["vi"] = {
"Vietnamese",
9199,
"mkh-vie",
{"Latn", "Hani"},
ancestors = {"mkh-mvi"},
sort_key = "vi-sortkey",
}
m["vo"] = {
"Volapük",
36986,
"art",
Latn,
}
m["wa"] = {
"Walloon",
34219,
"roa-oil",
Latn,
ancestors = {"fro"},
sort_key = {
from = {"[áàâäå]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"},
to = {"a" , "e" , "i" , "o" , "u" , "y" , "c"}} ,
}
m["wo"] = {
"Wolof",
34257,
"alv-fwo",
LatnArab,
}
m["xh"] = {
"Xhosa",
13218,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
m["yi"] = {
"Yiddish",
8641,
"gmw",
{"Hebr"},
ancestors = {"gmh"},
sort_key = {
from = {"[אַאָ]", "בּ", "[וֹוּ]", "יִ", "ײַ", "פֿ"},
to = {"א", "ב", "ו", "י", "יי", "פ"}} ,
translit_module = "yi-translit",
}
m["yo"] = {
"Yoruba",
34311,
"alv-yor",
Latn,
sort_key = {
from = {"ẹ", "ọ", "gb", "ṣ"},
to = {"e~" , "o~", "g~", "s~"}},
entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON },
}
m["za"] = {
"Zhuang",
13216,
"tai",
{"Latn", "Hani"},
sort_key = {
from = {"%p"},
to = {""}},
}
m["zh"] = {
"Chinese",
7850,
"zhx",
{"Hani", "Brai", "Nshu"},
ancestors = {"ltc"},
sort_key = "zh-sortkey",
}
m["zu"] = {
"Zulu",
10179,
"bnt-ngu",
Latn,
entry_name = {
from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON},
to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }},
}
return m
kkc3gpdrmedhehpi1ryz16nm6zndcie
Module:ja-headword
828
3880
13342
11272
2022-08-05T07:11:24Z
Asinis632
1829
Scribunto
text/plain
local m_ja = require("Module:ja")
local m_ja_ruby = require('Module:ja-ruby')
local find = mw.ustring.find
local export = {}
local pos_functions = {}
local lang = require("Module:languages").getByCode("ja")
local sc = require("Module:scripts").getByCode("Jpan")
local Latn = require("Module:scripts").getByCode("Latn")
local Japanese_symbols = '%ー・=?!。、'
local katakana_range = 'ァ-ヺーヽヾ'
local hiragana_range = 'ぁ-ゖーゞゝ'
local kana_range = katakana_range .. hiragana_range .. Japanese_symbols
local Japanese_scripts_range = kana_range .. '一-鿌・々'
local katakana_pattern = '^[' .. katakana_range .. Japanese_symbols .. ']*$'
local hiragana_pattern = '^[' .. hiragana_range .. Japanese_symbols .. ']*$'
local kana_pattern = '^[' .. kana_range .. ']*$'
local kana_pattern_full = '^[、' .. kana_range .. '%s%.%-%^%%]*$'
local function remove_links(text)
return (text:gsub("%[%[[^|%]]-|", ""):gsub("%[%[", ""):gsub("%]%]", ""))
end
local detect_kana_script = require("Module:fun").memoize(function(kana)
if find(kana, katakana_pattern) then
return 'kata'
elseif find(kana, hiragana_pattern) then
return 'hira'
elseif find(kana, kana_pattern) then
return 'both'
else
return nil
end
end)
local en_numerals = {
"one", "two", "three", "four", "five",
"six", "seven", "eight", "nine", "ten",
"eleven", "twelve", "thirteen", "fourteen", "fifteen"
}
local en_grades = {
"first grade", "second grade", "third grade",
"fourth grade", "fifth grade", "sixth grade",
"secondary school", "jinmeiyō", "hyōgaiji"
}
local aliases = {
['transitive']='tr', ['trans']='tr',
['intransitive']='in', ['intrans']='in', ['intr']='in',
['godan']='1', ['ichidan']='2', ['irregular']='irr'
}
local function kana_to_romaji(kana, data, args)
-- make adjustments for -u verbs and -i adjectives by placing a period before the last character
-- to prevent romanizing long vowels with macrons
if (data.pos_category == "verbs") or (data.pos_category == "adjectives" and (args["infl"] == "i" or args["infl"] == "い" or args["infl"] == "is")) then
kana = mw.ustring.gsub(kana,'([うい])$','.%1')
end
-- hyphens for prefixes, suffixes, and counters (classifiers)
if data.pos_category == "prefixes" then
kana = kana:gsub('%-?$', '-')
elseif data.pos_category == "suffixes" or data.pos_category == "counters" or data.pos_category == "classifiers" then
kana = kana:gsub('^%-?', '-')
end
-- automatic caps for proper nouns, if not already specified
if data.pos_category == "proper nouns" then
if not find(kana, '%^') then
kana = mw.ustring.gsub(kana, '^(.)', '^%1')
kana = mw.ustring.gsub(kana, '([%s%-])(.)', '%1^%2')
end
end
kana = m_ja.kana_to_romaji(kana)
return kana
end
local function historical_kana(args, data, poscat)
local hk = args["hhira"] or args["hkata"]
if hk then
if hk:match'ゐ' then
table.insert(data.categories, "Japanese terms historically spelled with ゐ")
end
if hk:match'ゑ' then
table.insert(data.categories, "Japanese terms historically spelled with ゑ")
end
if hk:match'を' and not (data.kana and data.kana:match'を') then
table.insert(data.categories, "Japanese terms historically spelled with を")
end
if hk:match'ぢ' and not (data.kana and data.kana:match'ぢ') then
table.insert(data.categories, "Japanese terms historically spelled with ぢ")
end
if hk:match'づ' and not (data.kana and data.kana:match'づ') then
table.insert(data.categories, "Japanese terms historically spelled with づ")
end
return '<sup>←' .. require('Module:ja-link').link({
lemma = hk,
}, {
hist = true,
face = 'head',
disableSelfLink = true,
}) .. '<sup>[[w:Historical kana orthography|?]]</sup></sup> '
else return '' end
end
local function assign_kana_to_kanji(head, kana, pagename)
local pat_k = '々㐀-䶵一-鿌' .. mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAFF)
if mw.ustring.len(head) == 1 or mw.ustring.match(head, '[^' .. Japanese_scripts_range .. '%[%]|%s]') then
return head, kana
end
local kanji_pos = {[0] = 0}
local link_border = 0
local head_nolink = mw.ustring.gsub(head, '()(%b[])()', function(p1, w1, p2)
if w1:sub(2, 2) ~= '[' or w1:sub(-2, -2) ~= ']' then return w1 end
for pp1 in mw.ustring.gmatch(mw.ustring.sub(head, link_border + 1, p1 - 1), '()[' .. pat_k .. ']') do
table.insert(kanji_pos, pp1 + link_border)
end
local p_pipe = mw.ustring.find(w1, '|') or 2
w1 = mw.ustring.sub(w1, p_pipe + 1, -3)
link_border = p1 - 1 + p_pipe
for pp1 in mw.ustring.gmatch(w1, '()[' .. pat_k .. ']') do
table.insert(kanji_pos, pp1 + link_border)
end
link_border = p2 - 1
return w1
end)
for pp1 in mw.ustring.gmatch(mw.ustring.sub(head, link_border + 1), '()[' .. pat_k .. ']') do
table.insert(kanji_pos, pp1 + link_border)
end
local pagetext = mw.title.new(pagename):getContent()
if not pagetext then return head, kana end
local non_kanji = {}
local last_kanji = 1
for p1 in mw.ustring.gmatch(head_nolink, '[' .. pat_k .. ']()') do
table.insert(non_kanji, mw.ustring.sub(head_nolink, last_kanji, p1 - 2))
last_kanji = p1
end
table.insert(non_kanji, mw.ustring.sub(head_nolink, last_kanji))
for kanjitab_args in pagetext:gmatch'{{%s*ja%-kanjitab%s*(|.-)}}' do
local readings = {}
local readings_len = {}
local readings_o = {}
local id = 1
for ka in kanjitab_args:gmatch'|([^|]*)' do
if not ka:match'=' then
local r_kana, r_len = ka:match'^%s*(%D*)(%d*)%s*$'
readings[id] = readings[id] or r_kana
readings_len[id] = tonumber(r_len)
id = id + 1
else
local id_t, id_n = ka:match'^%s*([ko]?)(%d+)%s*='
if id_t then
id_n = tonumber(id_n)
local r = ka:match'^.-=%s*(.-)%s*$'
if id_t == '' then
local r_kana, r_len = r:match'(%D*)(%d*)'
readings[id_n] = readings[id_n] or r_kana
readings_len[id_n] = tonumber(r_len)
elseif id_t == 'k' then
readings[id_n] = r
else
readings_o[id_n] = r
end
end
end
end
local kana_decom = {}
local reading_id = 1
local reading_len = 1
for i = 1, #non_kanji - 1 do
if reading_len <= 1 then
reading_len = readings_len[reading_id] or 1
table.insert(kana_decom, non_kanji[i])
table.insert(kana_decom, (readings[reading_id] or '') .. (readings_o[reading_id] or ''))
reading_id = reading_id + 1
else
reading_len = reading_len - 1
end
end
table.insert(kana_decom, non_kanji[#non_kanji])
if table.concat(kana_decom):gsub(' ', '') == kana:gsub('[%.%- ^]', '') then
local head_decom = {}
reading_id = 1
reading_len = 1
for i = 1, #non_kanji - 1 do
if reading_len <= 1 then
reading_len = readings_len[reading_id] or 1
table.insert(head_decom, mw.ustring.sub(head, kanji_pos[i - 1] + 1, kanji_pos[i] - 1))
table.insert(head_decom, mw.ustring.sub(head, kanji_pos[i], kanji_pos[i + reading_len - 1]))
reading_id = reading_id + 1
else
reading_len = reading_len - 1
end
end
table.insert(head_decom, mw.ustring.sub(head, kanji_pos[#non_kanji - 1] + 1))
return table.concat(head_decom, '%'), table.concat(kana_decom, '%')
end
end
return head, kana
end
local function default_seperator(text)
require('Module:debug').track('ja-headword/default separator used')
local result = {}
local p0 = 1
text = text:gsub('%[%[([^|]-)%]%]', '%1'):gsub('%[%[[^|]-|([^|]-)%]%]', '%1')
for p1, w1 in mw.ustring.gmatch(text, table.concat{
'()([々㐀-䶵一-鿌',
mw.ustring.char(0xF900),
"-",
mw.ustring.char(0xFAD9),
'𠀀-0-9A-Za-z〆〇0-9a-zA-Zα-ωΑ-Ω])',
}) do
if p0 < p1 then table.insert(result, mw.ustring.sub(text, p0, p1 - 1)) end
table.insert(result, w1)
p0 = p1 + 1
end
if p0 <= mw.ustring.len(text) then table.insert(result, mw.ustring.sub(text, p0)) end
return table.concat(result, '%')
end
-- adds category Japanese terms spelled with jōyō kanji or Japanese terms spelled with non-jōyō kanji
-- (if it contains any kanji)
local function categorize_by_kanji(data, PAGENAME)
-- remove non-kanji characters
local onlykanji = mw.ustring.gsub(PAGENAME, '[^一-鿌]', '')
local number_of_kanji = mw.ustring.len(onlykanji)
if number_of_kanji > 0 then
for i=1,mw.ustring.len(onlykanji) do
table.insert(data.categories, ("Japanese terms spelled with %s kanji"):format(en_grades[m_ja.kanji_grade(mw.ustring.sub(onlykanji,i,i))]))
end
-- categorize by number of kanji
if number_of_kanji == 1 then
table.insert(data.categories, "Japanese terms written with one Han script character")
elseif en_numerals[number_of_kanji] then
table.insert(data.categories, ("Japanese terms written with %s Han script characters"):format(en_numerals[number_of_kanji]))
end
end
-- single-kanji terms
if mw.ustring.len(PAGENAME) == 1 and mw.ustring.match(PAGENAME, '[一-鿌]') then
table.insert(data.categories, "Japanese terms spelled with " .. PAGENAME)
table.insert(data.categories, "Japanese single-kanji terms")
end
end
-- categorize by the script of the pagename or specific characters contained in it
local function extra_categorization(data, PAGENAME, katakana_category)
-- if PAGENAME is hiragana, put in that category, same for katakana (but do it at the end)
if detect_kana_script(PAGENAME) == 'hira' then table.insert(data.categories, "Siapan hiragana") end
if detect_kana_script(PAGENAME) == 'kata' then table.insert(katakana_category, "Siapan katakana") end
if find(PAGENAME, "[^" .. Japanese_scripts_range .. "]") and find(PAGENAME, '[' .. Japanese_scripts_range .. ']') then
table.insert(data.categories, "Japanese terms written in multiple scripts") end
for _,character in ipairs({'々','〆','ヶ','ゝ','ゞ','ヽ','ヾ','ゐ','ヰ','ゑ','ヱ','ゔ','ヷ','ヸ','ヹ','ヺ','・','=','゠'}) do
if mw.ustring.match(PAGENAME,character) then
table.insert(data.categories, ("Japanese terms spelled with %s"):format(character))
end
end
if find(PAGENAME, "[ァ-ヺヽヾ]") and find(PAGENAME, "[ぁ-ゖゞゝ]") and data.pos_category ~= "proverbs" and data.pos_category ~= "phrases" then
table.insert(data.categories, "Japanese terms spelled with mixed kana")
end
end
-- go through args and build inflections by finding whatever kanas were given to us
local function format_headword(args, data, head)
local headword_kana_type = detect_kana_script(remove_links(m_ja.remove_ruby_markup(head)))
local allkana, romajis = {}, {}
local rep = {}
local _insert_kana = headword_kana_type and function(k) -- pure-kana-title entry
if k == '' then return end
local key = remove_links(m_ja.remove_ruby_markup(k))
romajis[1] = kana_to_romaji(remove_links(k), data, args)
if not rep[key] then
table.insert(allkana, k)
rep[key] = true
end
end or function(k) -- non-pure-kana-title entry
if k == '' then return end
local key = m_ja.kana_to_romaji(remove_links(m_ja.remove_ruby_markup(k)))
if not rep[key] then
table.insert(romajis, kana_to_romaji(remove_links(k), data, args))
table.insert(allkana, k)
rep[key] = true
end
end
if headword_kana_type then
_insert_kana(remove_links(head))
allkana[1] = head
end
for i, arg in ipairs(args[1]) do
-- test for kana: filter out POS designations
if find(arg, kana_pattern_full) then
_insert_kana(arg)
end
end
-- accept "hira" and "kata" but let Lua decide if they are really hiragana or katakana
if args["hira"] and args["hira"] ~= "" then _insert_kana(args["hira"]) end
if args["kata"] and args["kata"] ~= "" then _insert_kana(args["kata"]) end
if args["rom"] then romajis[1] = args["rom"] end
if #allkana == 0 then error('Kana form is required') end
if #romajis == 0 then error('Romaji is required') end
local suru_ending = data.pos_category == "suru verbs" and '[[する]]' or ''
for _, kana in ipairs(allkana) do
-- add everything to inflections, except historical hiragana which is next
-- local format_result = headword_kana_type and allkana[i] or format_ruby(PAGENAME, allkana[i], data)
local format_result, format_result_preserved --<ruby> form, []() form
if headword_kana_type then
format_result = m_ja.remove_ruby_markup(kana)
format_result_preserved = remove_links(format_result) .. suru_ending
format_result = format_result .. suru_ending
else
local head_for_ruby, kana_for_ruby
if kana:match'%%' then
if head:match'%%' then
head_for_ruby, kana_for_ruby = head, kana
else
head_for_ruby, kana_for_ruby = default_seperator(head), kana
end
else
head_for_ruby, kana_for_ruby = assign_kana_to_kanji(head, kana, args.pagename)
end
local format_table = m_ja_ruby.parse_text(head_for_ruby, kana_for_ruby, {
try = 'force',
try_force_limit = 10000
})
format_result = m_ja_ruby.to_wiki(format_table, {
break_link = true,
}):gsub('<rt>(..-)</rt>', "<rt>[[" .. remove_links(m_ja.remove_ruby_markup(kana)) .."|%1]]</rt>") .. suru_ending
format_result_preserved = remove_links(m_ja_ruby.to_markup(format_table)) .. suru_ending
end
table.insert(data.heads, format_result)
data.heads_preserved = data.heads_preserved or format_result_preserved
end
suru_ending = data.pos_category == "suru verbs" and ' suru' or ''
for _, rom in ipairs(romajis) do
table.insert(data.translits, '[[' .. rom .. '#Japanese|' .. rom .. ']]' .. suru_ending)
end
if #romajis > 1 then
table.insert(data.categories, "Japanese words with multiple readings")
end
data.kana = allkana[1] and remove_links(m_ja.remove_ruby_markup(allkana[1]))
end
local function add_transitivity(data, tr)
tr = aliases[tr] or tr
if tr == "tr" then
table.insert(data.info_mid, 'transitive')
table.insert(data.categories, "Japanese transitive verbs")
elseif tr == "in" then
table.insert(data.info_mid, 'intransitive')
table.insert(data.categories, "Japanese intransitive verbs")
elseif tr == "both" then
table.insert(data.info_mid, 'transitive or intransitive')
table.insert(data.categories, "Japanese transitive verbs")
table.insert(data.categories, "Japanese intransitive verbs")
else
table.insert(data.categories, "Japanese verbs without transitivity")
end
end
local function add_inflections(data, inflection_type, cat_suffix)
local lemma = data.heads_preserved or data.heads[1]
local romaji = remove_links(data.translits[1])
inflection_type = aliases[inflection_type] or inflection_type
local function replace_suffix(lemma_from, lemma_to, romaji_from, romaji_to)
-- e.g. 持って来る, lemma = "[持](も)って来(く)る"
-- lemma_from = "くる", lemma_to = {"き","きた"}
local p_kr = katakana_range .. hiragana_range
local lemma_sub
local romaji_sub
local key_pos = {}
local i1, i2
romaji_from = romaji_from or m_ja.kana_to_romaji(lemma_from)
if type(lemma_to) ~= 'table' then lemma_to = {lemma_to} end
if type(romaji_to) ~= 'table' then romaji_to = {romaji_to} end
for i, v in ipairs(lemma_to) do
romaji_to[i] = romaji_to[i] or m_ja.kana_to_romaji(v)
end
lemma_sub = lemma
lemma_from = lemma_from ~= '' and mw.text.split(lemma_from, '') or {} -- lemma_from = {"く","る"}
local len_lemma_from = #lemma_from -- find the last two kana in "[持](も)って来(く)る"
key_pos[len_lemma_from + 1] = {-1}
for i = len_lemma_from, 1, -1 do
i1, _, i2 = mw.ustring.find(lemma_sub, '[' .. m_ja.kata_to_hira(lemma_from[i]) .. m_ja.hira_to_kata(lemma_from[i]) .. ']()[^' .. p_kr .. ']-$')
if not i1 then return nil end
i1 = i1 - 1
key_pos[i] = {i1, i2}
lemma_sub = mw.ustring.sub(lemma_sub, 1, i1)
end
romaji_sub, i1 = romaji:gsub(romaji_from .. '%s*$', '')
if i1 ~= 1 then return nil end
local result = {}
for i, v in ipairs(lemma_to) do
local result_single = {lemma_sub}
for j = 1, len_lemma_from do
table.insert(result_single, mw.ustring.sub(v, j, j))
table.insert(result_single, mw.ustring.sub(lemma, key_pos[j][2], key_pos[j + 1][1]))
end
table.insert(result_single, mw.ustring.sub(v, len_lemma_from + 1))
result[i] = {lemma = table.concat(result_single), romaji = romaji_sub .. romaji_to[i]}
-- "[持](も)って来(" .. "き" .. ")" .. "" .. "" and "[持](も)って来(" .. "き" .. ")" .. "た" .. ""
end
return result -- {{lemma="[持](も)って来(き)",romaji="motteki"},{lemma="[持](も)って来(き)た",romaji="mottekita"}}
end
local function insert_form(label, ...)
-- label = "stem" or "past" etc.
-- ... = {lemma=...,romaji=...},{lemma=...,romaji=...}
local labeled_forms = {label = label}
for _, v in ipairs{...} do
local table_form = m_ja_ruby.parse_markup(v.lemma)
local form_term = m_ja_ruby.to_wiki(table_form)
if not form_term:find'%[%[.+%]%]' then
form_term = '[[' .. m_ja_ruby.to_text(table_form) .. '#Japanese|' .. form_term .. ']]'
end
table.insert(labeled_forms, {
term = form_term,
translit = v.romaji,
})
end
table.insert(data.inflections, labeled_forms)
end
local inflected_forms
if inflection_type == '1' or inflection_type == '1s' then
table.insert(data.info_mid, '<abbr title="godan (type I) conjugation">godan</abbr>')
if cat_suffix then
table.insert(data.categories, "Japanese type 1 " .. cat_suffix)
if cat_suffix == 'verbs' and data.translits[1] and mw.ustring.find(remove_links(data.translits[1]), '[ieIEīēĪĒ]ru$') then
table.insert(data.categories, "Japanese type 1 verbs that end in -iru or -eru")
end
end
if inflection_type == '1' then
inflected_forms =
replace_suffix('く', {'き', 'いた'}, 'ku', {'ki', 'ita'}) or
replace_suffix('ぐ', {'ぎ', 'いだ'}, 'gu', {'gi', 'ida'}) or
replace_suffix('す', {'し', 'した'}, 'su', {'shi', 'shita'}) or
replace_suffix('つ', {'ち', 'った'}, 'tsu', {'chi', 'tta'}) or
replace_suffix('ぬ', {'に', 'んだ'}, 'nu', {'ni', 'nda'}) or
replace_suffix('ぶ', {'び', 'んだ'}, 'bu', {'bi', 'nda'}) or
replace_suffix('む', {'み', 'んだ'}, 'mu', {'mi', 'nda'}) or
replace_suffix('る', {'り', 'った'}, 'ru', {'ri', 'tta'}) or
replace_suffix('う', {'い', 'った'}, 'u', {'i', 'tta'})
if inflected_forms then
insert_form('stem', inflected_forms[1])
insert_form('past', inflected_forms[2])
else
require("Module:debug").track("ja-headword/godan conjugation failed")
end
else
inflected_forms =
replace_suffix('る', {'り', 'った', 'い'}, 'ru', {'ri', 'tta', 'i'}) or --くださる
replace_suffix('いく', {'いき', 'いった'}, 'iku', {'iki', 'itta'}) or --行く
replace_suffix('う', {'い', 'うた'}, 'ou', {'oi', 'ōta'}) --問う
if inflected_forms then
insert_form('stem', inflected_forms[1], inflected_forms[3])
insert_form('past', inflected_forms[2])
else
require("Module:debug").track("ja-headword/godan conjugation special failed")
end
end
elseif inflection_type == '2' then
table.insert(data.info_mid, '<abbr title="ichidan (type II) conjugation">ichidan</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese type 2 " .. cat_suffix) end
inflected_forms = replace_suffix('る', {'', 'た'}, 'ru', {'', 'ta'})
if inflected_forms then
insert_form('stem', inflected_forms[1])
insert_form('past', inflected_forms[2])
else
require("Module:debug").track("ja-headword/ichidan conjugation failed")
end
elseif inflection_type == 'suru' then
table.insert(data.info_mid, '<abbr title="suru (type III) conjugation">suru</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese suru " .. cat_suffix) end
inflected_forms =
replace_suffix('する', {'し', 'した'}, 'suru', {'shi', 'shita'}) or
replace_suffix('ずる', {'じ', 'じた'}, 'zuru', {'ji', 'jita'})
if inflected_forms then
insert_form('stem', inflected_forms[1])
insert_form('past', inflected_forms[2])
else
require("Module:debug").track("ja-headword/suru conjugation failed")
end
elseif inflection_type == 'kuru' then
table.insert(data.info_mid, '<abbr title="kuru (type III) conjugation">kuru</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese kuru " .. cat_suffix) end
inflected_forms = replace_suffix('くる', {'き', 'きた'}, 'kuru', {'ki', 'kita'})
if inflected_forms then
insert_form('stem', inflected_forms[1])
insert_form('past', inflected_forms[2])
else
require("Module:debug").track("ja-headword/kuru conjugation failed")
end
elseif inflection_type == 'i' or inflection_type == 'い' then
table.insert(data.info_mid, '<abbr title="-i (type I) inflection">-i</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese い-i " .. cat_suffix) end
inflected_forms = replace_suffix('い', {'く'}, 'i', {'ku'})
if inflected_forms then
insert_form('adverbial', inflected_forms[1])
else
require("Module:debug").track("ja-headword/-i inflection failed")
end
elseif inflection_type == 'is' then
table.insert(data.info_mid, '<abbr title="-i (type I) inflection">-i</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese い-i " .. cat_suffix) end
inflected_forms = replace_suffix('いい', {'よく'}, 'ii', {'yoku'})
if inflected_forms then
insert_form('adverbial', inflected_forms[1])
else
require("Module:debug").track("ja-headword/slightly irregular -i inflection failed")
end
elseif inflection_type == 'na' or inflection_type == 'な' then
table.insert(data.info_mid, '<abbr title="-na (type II) inflection">-na</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese な-na " .. cat_suffix) end
inflected_forms = replace_suffix('', {'[[な]]', '[[に]]'}, '', {' na', ' ni'})
insert_form('adnominal', inflected_forms[1])
insert_form('adverbial', inflected_forms[2])
elseif inflection_type == "yo" then
table.insert(data.info_mid, '<abbr title="yodan conjugation (classical)"><sup><small>†</small></sup>yodan</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese yodan " .. cat_suffix) end
elseif inflection_type == "kami ni" then
table.insert(data.info_mid, '<abbr title="kami nidan conjugation (classical)"><sup><small>†</small></sup>nidan</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese kami nidan " .. cat_suffix) end
elseif inflection_type == "shimo ni" then
table.insert(data.info_mid, '<abbr title="shimo nidan conjugation (classical)"><sup><small>†</small></sup>nidan</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese shimo nidan " .. cat_suffix) end
elseif inflection_type == "rahen" then
table.insert(data.info_mid, '<abbr title="r-special conjugation (classical)"><sup><small>†</small></sup>-ri</abbr>')
elseif inflection_type == "sahen" then
table.insert(data.info_mid, '<abbr title="s-special conjugation (classical)"><sup><small>†</small></sup>-se</abbr>')
elseif inflection_type == "kahen" then
table.insert(data.info_mid, '<abbr title="k-special conjugation (classical)"><sup><small>†</small></sup>-ko</abbr>')
elseif inflection_type == "nahen" then
table.insert(data.info_mid, '<abbr title="n-special conjugation (classical)"><sup><small>†</small></sup>-n</abbr>')
elseif inflection_type == "nari" or inflection_type == "なり" then
table.insert(data.info_mid, '<abbr title="-nari inflection (classical)"><sup><small>†</small></sup>-nari</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese なり-nari " .. cat_suffix) end
elseif inflection_type == 'tari' or inflection_type == 'たり' then
table.insert(data.info_mid, '<abbr title="-tari inflection (classical)"><sup><small>†</small></sup>-tari</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese たり-tari " .. cat_suffix) end
inflected_forms = replace_suffix('', {'[[たる]]', '[[と]]', '[[として]]'}, '', {' taru', ' to', ' to shite'})
insert_form('adnominal', inflected_forms[1])
insert_form('adverbial', inflected_forms[2], inflected_forms[3])
elseif inflection_type == "ku" or inflection_type == "く" then
table.insert(data.info_mid, '<abbr title="-ku inflection (classical)"><sup><small>†</small></sup>-ku</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese く-ku " .. cat_suffix) end
elseif inflection_type == "shiku" or inflection_type == "しく" then
table.insert(data.info_mid, '<abbr title="-shiku inflection (classical)"><sup><small>†</small></sup>-shiku</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese しく-shiku " .. cat_suffix) end
elseif inflection_type == "ka" or inflection_type == "か" then
table.insert(data.info_mid, '<abbr title="-ka inflection (dialectal)"><sup><small>†</small></sup>-ka</abbr>')
if cat_suffix then table.insert(data.categories, "Japanese か-ka " .. cat_suffix) end
elseif inflection_type == 'irr' then
table.insert(data.info_mid, 'irregular')
if cat_suffix then table.insert(data.categories, "Japanese irregular " .. cat_suffix) end
elseif inflection_type == '-' or inflection_type == 'un' then
table.insert(data.info_mid, 'uninflectable')
end
end
pos_functions["verbs"] = function(args, data)
add_transitivity(data, args["tr"])
add_inflections(data, args["infl"], 'verbs')
end
pos_functions["suffixes"] = function(args, data)
add_inflections(data, args["infl"])
end
pos_functions["auxiliary verbs"] = function(args, data)
table.insert(data.categories, "Japanese auxiliary verbs")
add_inflections(data, args["infl"])
data.pos_category = "verbs"
end
pos_functions["suru verbs"] = function(args, data)
add_transitivity(data, args["tr"])
add_inflections(data, 'suru', 'verbs')
data.pos_category = "verbs"
end
pos_functions["verb forms"] = function(args, data)
add_inflections(data, args["infl"])
end
pos_functions["adjectives"] = function(args, data)
add_inflections(data, args["infl"], 'adjectives')
end
pos_functions["adjective forms"] = function(args, data)
add_inflections(data, args["infl"])
end
pos_functions["nouns"] = function(args, data)
-- the counter (classifier) parameter, only relevant for nouns
local counter = args["count"] or ""
if counter == "-" then
table.insert(data.inflections, {label = "uncountable"})
elseif counter ~= "" then
table.insert(data.inflections, {label = "counter", counter})
end
end
-- For use in soft redirect pages
-- Sortkey is not provided
function export.cat(pagename, categories)
categorize_by_kanji({categories = categories}, pagename)
-- categorize by the script of the pagename or specific characters contained in it
categorize_by_kanji({categories = categories}, pagename, categories)
end
-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
local args = require('Module:parameters').process(frame:getParent().args, {
[1] = {list = true},
['hira'] = {}, ['kata'] = {},
['rom'] = {},
['tr'] = {},
['infl'] = {}, ['type'] = {alias_of = 'infl'}, ['decl'] = {alias_of = 'infl'},
['count'] = {},
['kyu'] = {}, ['shin'] = {},
['hhira'] = {}, ['hkata'] = {},
['head'] = {},
['sort'] = {},
['pagename'] = {},
})
args['pagename'] = args['pagename'] or mw.title.getCurrentTitle().text
local data = {
lang = lang,
sc = sc,
pos_category = poscat,
categories = {},
translits = {},
heads = {},
inflections = {},
genders = {'m'}, -- placeholder
sort_key = nil,
--custom info
info_mid = {},
heads_preserved = nil,
kana = nil,
}
local katakana_category = {}
-- sort out all the kanas and do the romanization business
format_headword(args, data, args["head"] or args['pagename'])
-- add certain "inflections" and categories for adjectives, verbs, or nouns
if pos_functions[poscat] then
pos_functions[poscat](args, data)
end
-- the presence of kyūjitai param indicates that this is shinjitai kanji entry and vice versa
if args["kyu"] then
if data.pos_category == "suru verbs" then
table.insert(data.inflections, {label = "[[Appendix:Japanese_glossary#kyūjitai|kyūjitai]]", "[[" .. args["kyu"] .. "]][[する]]"})
else
table.insert(data.inflections, {label = "[[Appendix:Japanese_glossary#kyūjitai|kyūjitai]]", args["kyu"]})
end
require('Module:debug').track'ja-headword/kyu'
end
if args["shin"] then
table.insert(data.inflections, {label = "[[Appendix:Japanese_glossary#kyūjitai|kyūjitai]]"})
if data.pos_category == "suru verbs" then
table.insert(data.inflections, {label = "[[Appendix:Japanese_glossary#shinjitai|shinjitai]]", "[[" .. args["shin"] .. "]][[する]]"})
else
table.insert(data.inflections, {label = "[[Appendix:Japanese_glossary#shinjitai|shinjitai]]", args["shin"]})
end
require('Module:debug').track'ja-headword/shin'
end
local hist_info = historical_kana(args, data, poscat)
-- categorize by joyo kanji and number of kanji
categorize_by_kanji(data, args['pagename'])
-- categorize by the script of the pagename or specific characters contained in it
extra_categorization(data, args['pagename'], katakana_category)
data.sort_key = args['sort'] or data.kana and m_ja.jsort(data.kana) or nil
return
(data.kana and '<span id="' .. data.kana .. '"></span>' or '') ..
require('Module:headword').full_headword(data)
:gsub('<span class="gender">.-</span>', hist_info .. '<i>'..table.concat(data.info_mid, ' ')..'</i>') ..
require("Module:utilities").format_categories(katakana_category, lang, data.sort_key and m_ja.hira_to_kata(data.sort_key))
end
return export
7xgwv1htgdbkv0gnbkqo9w3qevgdw65
コンドーム
0
5310
13336
2022-08-04T12:37:21Z
Asinis632
1829
Created page with "==Japanese== {{DEFAULTSORT:こんどおむ}} {{swp|lang=ja}} ===Etymology=== [[Appendix:Glossary#loanword|Borrowed]] from {{bor|ja|de|Kondom}},<ref>{{R:Kojien}}</ref> from {{bor|ja|en|condom}},<ref>{{R:Daijisen}}</ref> or from {{bor|ja|fr|condom}}.<ref name="DJR"/> ===Pronunciation=== {{ja-pron|acc=3|acc_ref=DJR}} ===Noun=== {{ja-noun}} # a [[condom]] {{gloss|flexible sleeve worn on the penis}} ====Synonyms==== * {{ja-r|ゴム}} * {{ja-r|スキン}} * {{ja-r|ルー..."
wikitext
text/x-wiki
==Japanese==
{{DEFAULTSORT:こんどおむ}}
{{swp|lang=ja}}
===Etymology===
[[Appendix:Glossary#loanword|Borrowed]] from {{bor|ja|de|Kondom}},<ref>{{R:Kojien}}</ref> from {{bor|ja|en|condom}},<ref>{{R:Daijisen}}</ref> or from {{bor|ja|fr|condom}}.<ref name="DJR"/>
===Pronunciation===
{{ja-pron|acc=3|acc_ref=DJR}}
===Noun===
{{ja-noun}}
# a [[condom]] {{gloss|flexible sleeve worn on the penis}}
====Synonyms====
* {{ja-r|ゴム}}
* {{ja-r|スキン}}
* {{ja-r|ルーデサック}}; {{ja-r|サック}}
* {{ja-r|^フレンチ レター}}
===References===
<references/>
{{C|ja|Birth control}}
k1740sthgrno19cktxgcvpp5o2bs1vm
13337
13336
2022-08-04T12:37:43Z
Asinis632
1829
wikitext
text/x-wiki
{{-ja-}}
{{DEFAULTSORT:こんどおむ}}
{{swp|lang=ja}}
===Etymology===
[[Appendix:Glossary#loanword|Borrowed]] from {{bor|ja|de|Kondom}},<ref>{{R:Kojien}}</ref> from {{bor|ja|en|condom}},<ref>{{R:Daijisen}}</ref> or from {{bor|ja|fr|condom}}.<ref name="DJR"/>
===Pronunciation===
{{ja-pron|acc=3|acc_ref=DJR}}
===Noun===
{{ja-noun}}
# a [[condom]] {{gloss|flexible sleeve worn on the penis}}
====Synonyms====
* {{ja-r|ゴム}}
* {{ja-r|スキン}}
* {{ja-r|ルーデサック}}; {{ja-r|サック}}
* {{ja-r|^フレンチ レター}}
===References===
<references/>
{{C|ja|Birth control}}
a762ipybbmhyazvf3pz9kpi11bo7q9m
Templet:R:Kojien
10
5312
13341
2022-08-05T07:05:49Z
Asinis632
1829
Created page with "<span class="book">'''1998''', <cite>{{lang|ja|広辞苑}} (''[[w:Kōjien|Kōjien]]'')</cite>, Fifth Edition (in Japanese), [[w:Tōkyō|Tōkyō]]: [[w:Iwanami Shoten|Iwanami Shoten]], {{ISBN|4000801112}}</span><noinclude> [[Category:Japanese reference templates|Kojien]] </noinclude>"
wikitext
text/x-wiki
<span class="book">'''1998''', <cite>{{lang|ja|広辞苑}} (''[[w:Kōjien|Kōjien]]'')</cite>, Fifth Edition (in Japanese), [[w:Tōkyō|Tōkyō]]: [[w:Iwanami Shoten|Iwanami Shoten]], {{ISBN|4000801112}}</span><noinclude>
[[Category:Japanese reference templates|Kojien]]
</noinclude>
ct7k8vytcagqvik5tfcrspaub1wjetm
Templet:swp
10
5313
13343
2022-08-05T07:12:43Z
Asinis632
1829
Redirected page to [[Templet:slim-wikipedia]]
wikitext
text/x-wiki
#redirect [[Templet:slim-wikipedia]]
j6fazmke51gom3v238wcfs15jhr4mzc
Templet:slim-wikipedia
10
5315
13344
2022-08-05T07:13:26Z
Asinis632
1829
Created page with "{{#invoke:interproject|wikipedia_box|slim=1}}<noinclude>{{documentation}}</noinclude>"
wikitext
text/x-wiki
{{#invoke:interproject|wikipedia_box|slim=1}}<noinclude>{{documentation}}</noinclude>
blu8mkxfwci2w8rhvv7mfrb56n9l6ki
Module:interproject
828
5316
13345
2022-08-05T07:14:39Z
Asinis632
1829
Created page with "local export = {} local function track(page) require("Module:debug/track")("interproject/" .. page) return true end local function process_links(linkdata, prefix, name, wmlang, sc) prefix = prefix .. ":" .. (wmlang:getCode() == "en" and "" or wmlang:getCode() .. ":") local links = {} local iplinks = {} local m_links = require("Module:links") local lang = wmlang:getWiktionaryLanguage() local ipalt = name .. " " .. (wmlang:getCode() == "en" and "" or "<sup>..."
Scribunto
text/plain
local export = {}
local function track(page)
require("Module:debug/track")("interproject/" .. page)
return true
end
local function process_links(linkdata, prefix, name, wmlang, sc)
prefix = prefix .. ":" .. (wmlang:getCode() == "en" and "" or wmlang:getCode() .. ":")
local links = {}
local iplinks = {}
local m_links = require("Module:links")
local lang = wmlang:getWiktionaryLanguage()
local ipalt = name .. " " .. (wmlang:getCode() == "en" and "" or "<sup>" .. wmlang:getCode() .. "</sup>")
for i, link in ipairs(linkdata) do
link.lang = lang
link.sc = sc
link.term = prefix .. link.term
link.tr = "-"
table.insert(iplinks, "<span class=\"interProject\">[[" .. link.term .. "|" .. ipalt .. "]]</span>")
table.insert(links, m_links.full_link(link, "bold"))
end
return links, iplinks
end
function export.wikipedia_box(frame)
local params = {
[1] = {},
[2] = {},
["cat"] = {},
["category"] = {alias_of = "cat"},
["i"] = {type = "boolean"},
["lang"] = {default = "en"},
["mul"] = {},
["mullabel"] = {},
["mulcat"] = {},
["mulcatlabel"] = {},
["portal"] = {},
["sc"] = {},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
if args.mul or args.mullabel or args.mulcat or args.mulcatlabel then
track("wikipedia-box-mul")
end
local wmlang = require("Module:wikimedia languages").getByCodeWithFallback(args["lang"]) or error("The Wikimedia language code \"" .. args["lang"] .. "\" is not valid.")
local sc = args["sc"] and require("Module:scripts").getByCode(args["sc"], "sc") or nil
local linkdata = {}
if args["cat"] then
table.insert(linkdata, {term = "Category:" .. args["cat"], alt = args[1] or args["cat"]})
elseif args["portal"] then
table.insert(linkdata, {term = "Portal:" .. args["portal"], alt = args[1] or args["portal"]})
else
local term = args[1] or mw.title.getCurrentTitle().text
table.insert(linkdata, {term = term, alt = args[2] or term})
end
if args["mul"] or args["mulcat"] then
if args["mulcat"] then
table.insert(linkdata, {term = "Category:" .. args["mulcat"], alt = args["mulcatlabel"] or args["mulcat"]})
else
table.insert(linkdata, {term = args["mul"], alt = args["mullabel"] or args["mul"]})
end
end
local links, iplinks = process_links(linkdata, "w", "Wikipedia", wmlang, sc)
if frame.args["slim"] then
return
"<div class=\"sister-wikipedia sister-project noprint floatright\" style=\"border: solid #aaa 1px; font-size: 90%; background: #f9f9f9; width: 250px; padding: 4px; text-align: left;\">" ..
"<div style=\"float: left;\">[[File:Wikipedia-logo.png|14px|none| ]]</div>" ..
"<div style=\"margin-left: 15px;\">" ..
" " ..
table.concat(links, " and ") ..
" on " ..
(wmlang:getCode() == "en" and "" or wmlang:getCanonicalName() .. " ") ..
"Wikipedia" ..
"</div>" ..
"</div>"
else
local linktype
if args["cat"] then
linktype = "a category"
elseif args["mul"] then
linktype = "articles"
elseif args["mulcat"] then
linktype = "categories"
elseif args["portal"] then
linktype = "a portal"
else
linktype = "an article"
end
return
"<div class=\"sister-wikipedia sister-project noprint floatright\" style=\"border: 1px solid #aaa; font-size: 90%; background: #f9f9f9; width: 250px; padding: 4px; text-align: left;\">" ..
"<div style=\"float: left;\">[[File:Wikipedia-logo-v2.svg|44px|none|link=|alt=]]</div>" ..
"<div style=\"margin-left: 60px;\">" ..
wmlang:getCanonicalName() .. " [[Wikipedia]] has " .. linktype .. " on:" ..
"<div style=\"margin-left: 10px;\">" .. table.concat(links, " and ") .. "</div>" ..
"</div>" ..
table.concat(iplinks) .. ((args[1] == mw.title.getCurrentTitle().text and not args[2]) and "[[Category:wikipedia with redundant first parameter]]" or "") ..
"</div>"
end
end
function export.projectlink(frame, compat)
local m_params = require("Module:parameters")
local iparams = {
["prefix"] = {required = true},
["name"] = {required = true},
["image"] = {required = true},
["compat"] = {type = "boolean"},
}
iargs = m_params.process(frame.args, iparams)
compat = compat or iargs.compat
local lang_param = compat and "lang" or 1
local term_param = compat and 1 or 2
local alt_param = compat and 2 or 3
local params = {
[lang_param] = {},
[term_param] = {},
[alt_param] = {},
["i"] = {type = "boolean"},
["nodot"] = {},
["sc"] = {},
}
local args = m_params.process(frame:getParent().args, params)
local wmlang = args[lang_param] or "en"
wmlang = require("Module:wikimedia languages").getByCodeWithFallback(wmlang) or error("The Wikimedia language code \"" .. wmlang .. "\" is not valid.")
local sc = args["sc"] and require("Module:scripts").getByCode(args["sc"], "sc") or nil
local term = args[term_param] or mw.title.getCurrentTitle().text
local linkdata = {term = term, alt = args[alt_param] or term}
if args["i"] then
linkdata.alt = "''" .. linkdata.alt .. "''"
end
local links, iplinks = process_links({linkdata}, iargs["prefix"], iargs["name"], wmlang, sc)
return
"[[Image:" .. iargs["image"] .. "|15px|link=" .. linkdata.term .. "]] " ..
table.concat(links, " and ") ..
" on " ..
(wmlang:getCode() == "en" and "" or "the " .. wmlang:getCanonicalName() .. " ") ..
" " .. iargs["name"] .. (args["nodot"] and "" or ".") ..
table.concat(iplinks)
end
return export
iqfi2fi38bam1z1ski9ljdunohbs1v9