Wiktionary tpiwiktionary https://tpi.wiktionary.org/wiki/Fran_Pes MediaWiki 1.39.0-wmf.23 case-sensitive Media Sipesol Toktok Yusa Toktok bilong yusa Wiktionary Wiktionary toktok Fail Toktok bilong fail MediaWiki Toktok bilong mediawiki Templet Toktok bilong templet Halivim Toktok bilong halivim Grup Toktok bilong grup TimedText TimedText talk Module Module talk Gadget Gadget talk Gadget definition Gadget definition talk Module:languages/data2 828 3647 13338 13209 2022-08-04T12:39:06Z Asinis632 1829 Scribunto text/plain local u = mw.ustring.char -- UTF-8 encoded strings for some commonly-used diacritics local GRAVE = u(0x0300) local ACUTE = u(0x0301) local CIRC = u(0x0302) local TILDE = u(0x0303) local MACRON = u(0x0304) local BREVE = u(0x0306) local DOTABOVE = u(0x0307) local DIAER = u(0x0308) local CARON = u(0x030C) local DGRAVE = u(0x030F) local INVBREVE = u(0x0311) local DOTBELOW = u(0x0323) local RINGBELOW = u(0x0325) local CEDILLA = u(0x0327) local OGONEK = u(0x0328) local CGJ = u(0x034F) -- combining grapheme joiner local DOUBLEINVBREVE = u(0x0361) -- Punctuation to be used for standardChars field local PUNCTUATION = ' !#%&*+,-./:;<=>?@^_`|~\'()' local Cyrl = {"Cyrl"} local Latn = {"Latn"} local LatnArab = {"Latn", "Arab"} local m = {} m["aa"] = { "Afar", 27811, "cus-eas", Latn, entry_name = { remove_diacritics = ACUTE}, } m["ab"] = { "Abkhaz", 5111, "cau-abz", {"Cyrl", "Geor", "Latn"}, translit_module = "ab-translit", override_translit = true, entry_name = { from = {GRAVE, ACUTE}, to = {}} , } m["ae"] = { "Avestan", 29572, "ira-cen", {"Avst", "Gujr"}, translit_module = "Avst-translit", wikipedia_article = "Avestan", } m["af"] = { "Afrikaans", 14196, "gmw", LatnArab, ancestors = {"nl"}, sort_key = { from = {"[äáâà]", "[ëéêè]", "[ïíîì]", "[öóôò]", "[üúûù]", "[ÿýŷỳ]", "^-", "'"}, to = {"a" , "e" , "i" , "o" , "u" , "y" }} , } m["ak"] = { "Akan", 28026, "alv-ctn", Latn, } m["am"] = { "Amharic", 28244, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit", } m["an"] = { "Aragonese", 8765, "roa-ibe", Latn, ancestors = {"roa-oan"}, } m["ar"] = { "Arabic", 13955, "sem-arb", {"Arab", "Hebr", "Brai"}, -- replace alif waṣl with alif -- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha, -- damma, kasra, shadda, sukun, superscript (dagger) alef entry_name = { from = {u(0x0671), u(0x0640), "[" .. u(0x064B) .. "-" .. u(0x0652) .. "]", u(0x0670)}, to = {u(0x0627)}}, -- put Judeo-Arabic (Hebrew-script Arabic) under the category header -- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles sort_key = { from = {"^%f[" .. u(0x5D0) .. "-" .. u(0x5EA) .. "]"}, to = {u(0xFB21)}, }, translit_module = "ar-translit", } m["as"] = { "Assamese", 29401, "inc-eas", {"as-Beng"}, ancestors = {"inc-mas"}, translit_module = "as-translit", } m["av"] = { "Avar", 29561, "cau-nec", Cyrl, ancestors = {"oav"}, translit_module = "av-translit", override_translit = true, entry_name = { from = {GRAVE, ACUTE}, to = {}} , } m["ay"] = { "Aymara", 4627, "sai-aym", Latn, } m["az"] = { "Azerbaijani", 9292, "trk-ogz", {"Latn", "Cyrl", "fa-Arab"}, ancestors = {"trk-oat"}, } m["ba"] = { "Bashkir", 13389, "trk-kbu", Cyrl, translit_module = "ba-translit", override_translit = true, } m["be"] = { "Belarusian", 9091, "zle", Cyrl, ancestors = {"orv"}, translit_module = "be-translit", sort_key = { from = {"Ё", "ё"}, to = {"Е" , "е"}}, entry_name = { from = {"Ѐ", "ѐ", GRAVE, ACUTE}, to = {"Е", "е"}}, } m["bg"] = { "Bulgarian", 7918, "zls", {"Cyrl"}, ancestors = {"cu"}, translit_module = "bg-translit", entry_name = { from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE}, to = {"Е", "е", "И", "и"}}, } m["bh"] = { "Bihari", 135305, "inc-eas", {"Deva"}, ancestors = {"inc-mgd"}, } m["bi"] = { "Bislama", 35452, "crp", Latn, ancestors = {"en"}, } m["bm"] = { "Bambara", 33243, "dmn-emn", Latn, } m["bn"] = { "Bengali", 9610, "inc-eas", {"Beng", "Newa"}, ancestors = {"inc-mbn"}, translit_module = "bn-translit", } m["bo"] = { "Tibetan", 34271, "sit-tib", {"Tibt"}, -- sometimes Deva? ancestors = {"xct"}, translit_module = "bo-translit", override_translit = true, } m["br"] = { "Breton", 12107, "cel-bry", Latn, ancestors = {"xbm"}, } m["ca"] = { "Catalan", 7026, "roa", Latn, ancestors = {"roa-oca"}, sort_key = { from = {"à", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "l·l"}, to = {"a", "e" , "i" , "o" , "u" , "c", "ll" }} , } m["ce"] = { "Chechen", 33350, "cau-vay", Cyrl, translit_module = "ce-translit", override_translit = true, entry_name = { from = {MACRON}, to = {}}, } m["ch"] = { "Chamorro", 33262, "poz-sus", Latn, } m["co"] = { "Corsican", 33111, "roa-itd", Latn, } m["cr"] = { "Cree", 33390, "alg", {"Cans", "Latn"}, translit_module = "translit-redirect", } m["cs"] = { "Czech", 9056, "zlw", Latn, ancestors = {"zlw-ocs"}, sort_key = { from = {"á", "é", "í", "ó", "[úů]", "ý"}, to = {"a", "e", "i", "o", "u" , "y"}} , } m["cu"] = { "Old Church Slavonic", 35499, "zls", {"Cyrs", "Glag"}, translit_module = "Cyrs-Glag-translit", entry_name = { from = {u(0x0484)}, -- kamora to = {}}, sort_key = { from = {"оу", "є"}, to = {"у" , "е"}} , } m["cv"] = { "Chuvash", 33348, "trk-ogr", Cyrl, ancestors = {"xbo"}, translit_module = "cv-translit", override_translit = true, } m["cy"] = { "Welsh", 9309, "cel-bry", Latn, ancestors = {"wlm"}, sort_key = { from = {"[âáàä]", "ch", "dd", "[êéèë]", "ff", "ngh", "[îíìï]", "ll", "[ôóòö]", "ph", "rh", "th", "[ûúùü]", "[ŵẃẁẅ]", "[ŷýỳÿ]", "'"}, to = {"a" , "c~", "d~", "e" , "f~", "g~h", "i" , "l~", "o" , "p~", "r~", "t~", "u" , "w" , "y" }} , standardChars = "A-IL-PR-UWYa-il-pr-uwy0-9ÂâÊêÎîÔôÛûŴŵŶŷ" .. PUNCTUATION, } m["da"] = { "Danish", 9035, "gmq", Latn, ancestors = {"gmq-oda"}, } m["de"] = { "German", 188, "gmw", {"Latn", "Latf"}, ancestors = {"gmh"}, sort_key = { from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]", "ß" }, to = {"a" , "e" , "i" , "o" , "u" , "ss"}} , standardChars = "A-Za-z0-9ÄäÖöÜüß" .. PUNCTUATION, } m["dv"] = { "Dhivehi", 32656, "inc-ins", {"Thaa"}, ancestors = {"elu-prk"}, translit_module = "dv-translit", override_translit = true, } m["dz"] = { "Dzongkha", 33081, "sit-tib", {"Tibt"}, ancestors = {"xct"}, translit_module = "bo-translit", override_translit = true, } m["ee"] = { "Ewe", 30005, "alv-gbe", Latn, } m["el"] = { "Greek", 9129, "grk", {"Grek", "Brai"}, ancestors = {"grc"}, translit_module = "el-translit", override_translit = true, sort_key = { -- Keep this synchronized with grc, cpg, pnt, tsd from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"}, to = {"α" , "ε" , "η" , "ι" , "ο" , "υ" , "ω" , "ρ", "σ"}} , standardChars = "ͺ;΄-ώϜϝ" .. PUNCTUATION, } m["en"] = { "Inglis", 1860, "gmw", {"Latn", "Brai", "Shaw", "Dsrt"}, -- entries in Shaw or Dsrt might require prior discussion ancestors = {"enm"}, sort_key = { from = {"[äàáâåā]", "[ëèéêē]", "[ïìíîī]", "[öòóôō]", "[üùúûū]", "æ" , "œ" , "[çč]", "ñ", "'"}, to = {"a" , "e" , "i" , "o" , "u" , "ae", "oe", "c" , "n"}}, wikimedia_codes = {"en", "simple"}, standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF), } m["eo"] = { "Esperanto", 143, "art", Latn, sort_key = { from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ĉ]", "[ĝ]", "[ĥ]", "[ĵ]", "[ŝ]", "[ŭ]"}, to = {"a" , "e" , "i" , "o" , "u", "cĉ", "gĉ", "hĉ", "jĉ", "sĉ", "uĉ"}} , standardChars = "A-PRSTUVZa-prstuvzĉĈĝĜĵĴŝŜŭŬ0-9" .. PUNCTUATION, } m["es"] = { "Spanish", 1321, "roa-ibe", {"Latn", "Brai"}, ancestors = {"osp"}, sort_key = { from = {"á", "é", "í", "ó", "[úü]", "ç", "ñ"}, to = {"a", "e", "i", "o", "u" , "c", "n~"}}, standardChars = "A-VXYZa-vxyz0-9ÁáÉéÍíÓóÚúÑñ¿¡" .. PUNCTUATION, } m["et"] = { "Estonian", 9072, "fiu-fin", Latn, } m["eu"] = { "Basque", 8752, "euq", Latn, } m["fa"] = { "Persian", 9168, "ira-swi", {"fa-Arab"}, ancestors = {"pal"}, -- "ira-mid" entry_name = { from = {u(0x064E), u(0x0640), u(0x064F), u(0x0650), u(0x0651), u(0x0652)}, to = {}} , } m["ff"] = { "Fula", 33454, "alv-fwo", {"Latn", "Adlm"}, } m["fi"] = { "Finnish", 1412, "fiu-fin", Latn, entry_name = { from = {"ˣ"}, -- Used to indicate gemination of the next consonant to = {}}, sort_key = { from = {"[áàâã]", "[éèêẽ]", "[íìîĩ]", "[óòôõ]", "[úùûũ]", "[ýỳŷüű]", "[øõő]", "æ" , "œ" , "[čç]", "š", "ž", "ß" , "[':]"}, to = {"a" , "e" , "i" , "o" , "u" , "y" , "ö" , "ae", "oe", "c" , "s", "z", "ss"}} , } m["fj"] = { "Fijian", 33295, "poz-occ", Latn, } m["fo"] = { "Faroese", 25258, "gmq", Latn, ancestors = {"non"}, } m["fr"] = { "French", 150, "roa-oil", {"Latn", "Brai"}, ancestors = {"frm"}, sort_key = { from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ" , "œ" , "'"}, to = {"a" , "e" , "i" , "o" , "u" , "y" , "c", "ae", "oe"}}, standardChars = "A-Za-z0-9ÀÂÇÉÈÊËÎÏÔŒÛÙÜàâçéèêëîïôœûùü«»" .. PUNCTUATION, } m["fy"] = { "West Frisian", 27175, "gmw-fri", Latn, ancestors = {"ofs"}, sort_key = { from = {"[àáâä]", "[èéêë]", "[ìíîïyỳýŷÿ]", "[òóôö]", "[ùúûü]", "æ", "[ /.-]"}, to = {"a" , "e" , "i" , "o" , "u", "ae"}} , standardChars = "A-PR-WYZa-pr-wyz0-9Ææâäàéêëèïìôöòúûüùỳ" .. PUNCTUATION, } m["ga"] = { "Irish", 9142, "cel-gae", Latn, ancestors = {"mga"}, sort_key = { from = {"á", "é", "í", "ó", "ú", "ý", "ḃ" , "ċ" , "ḋ" , "ḟ" , "ġ" , "ṁ" , "ṗ" , "ṡ" , "ṫ" }, to = {"a", "e", "i", "o", "u", "y", "bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}} , standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION, } m["gd"] = { "Scottish Gaelic", 9314, "cel-gae", Latn, ancestors = {"mga"}, sort_key = { from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ýỳ]"}, to = {"a" , "e" , "i" , "o" , "u" , "y" }} , standardChars = "A-IL-PR-Ua-il-pr-u0-9ÀàÈèÌìÒòÙù" .. PUNCTUATION, } m["gl"] = { "Galician", 9307, "roa-ibe", Latn, ancestors = {"roa-opt"}, sort_key = { from = {"á", "é", "í", "ó", "ú"}, to = {"a", "e", "i", "o", "u"}} , } m["gn"] = { "Guaraní", 35876, "tup-gua", Latn, } m["gu"] = { "Gujarati", 5137, "inc-wes", {"Gujr"}, ancestors = {"inc-mgu"}, translit_module = "gu-translit", } m["gv"] = { "Manx", 12175, "cel-gae", Latn, ancestors = {"mga"}, sort_key = { from = {"ç", "-"}, to = {"c"}} , standardChars = "A-WYÇa-wyç0-9" .. PUNCTUATION, } m["ha"] = { "Hausa", 56475, "cdc-wst", LatnArab, sort_key = { from = {"ɓ", "ɗ", "ƙ", "'y", "ƴ", "'" }, to = {"b~" , "d~" , "k~", "y~", "y~", "" }}, entry_name = { from = {"R̃", "r̃", "À", "à", "È", "è", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Î", "î", "Ô", "ô", "Û", "û", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú", "Ā̀", "ā̀", "Ḕ", "ḕ", "Ī̀", "ī̀", "Ṑ", "ṑ", "Ū̀", "ū̀", GRAVE, ACUTE}, to = {"R", "r", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}, } m["he"] = { "Hebrew", 9288, "sem-can", {"Hebr", "Phnx", "Brai"}, entry_name = { from = {"[" .. u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. CGJ .. "]"}, to = {}} , } m["hi"] = { "Hindi", 1568, "inc-hnd", {"Deva", "Kthi", "Newa"}, ancestors = {"inc-ohi"}, translit_module = "hi-translit", standardChars = "ँंअ-ऊएऐओ-घच-झट-नप-रलवशसहा-ूेैो-◌्।-॰ड़ढ़" .. PUNCTUATION, } m["ho"] = { "Hiri Motu", 33617, "crp", Latn, ancestors = {"meu"}, } m["ht"] = { "Haitian Creole", 33491, "crp", Latn, ancestors = {"fr"}, } m["hu"] = { "Hungarian", 9067, "urj-ugr", {"Latn", "Hung"}, ancestors = {"ohu"}, sort_key = { from = {"á", "é", "í", "ó", "ú", "[öő]", "[üű]", "cs", "dzs", "gy", "ly", "ny", "zs"}, to = {"a", "e", "i", "o", "u", "o~", "u~", "c~", "dz~", "g~", "l~", "n~", "z~"}, }, } m["hy"] = { "Armenian", 8785, "hyx", {"Armn", "Brai"}, ancestors = {"axm"}, translit_module = "Armn-translit", override_translit = true, sort_key = { from = {"ու", "և", "եւ"}, to = {"ւ", "եվ", "եվ"}}, entry_name = { from = {"՞", "՜", "՛", "՟", "և", "<sup>յ</sup>", "<sup>ի</sup>", "<sup>է</sup>"}, to = {"", "", "", "", "եւ", "յ", "ի", "է"}} , } m["hz"] = { "Herero", 33315, "bnt-swb", Latn, } m["ia"] = { "Interlingua", 35934, "art", Latn, } m["id"] = { "Indonesian", 9240, "poz-mly", Latn, ancestors = {"ms"}, } m["ie"] = { "Interlingue", 35850, "art", Latn, type = "appendix-constructed", } m["ig"] = { "Igbo", 33578, "alv-igb", Latn, sort_key = { from = {"ụ", "ị", "ọ", "gb", "gh", "gw", "kp", "kw", "ṅ", "nw", "ny", "sh"}, to = {"u~" , "i~", "o~", "gy", "gz", "g~", "kz", "k~", "ny", "nz", "n~", "s~"}}, entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON }, } m["ii"] = { "Sichuan Yi", 34235, "tbq-lol", {"Yiii"}, translit_module = "ii-translit", } m["ik"] = { "Inupiaq", 27183, "esx-inu", Latn, } m["io"] = { "Ido", 35224, "art", Latn, } m["is"] = { "Icelandic", 294, "gmq", Latn, ancestors = {"non"}, } m["it"] = { "Italian", 652, "roa-itd", Latn, sort_key = { from = {"[àáâäå]", "[èéêë]", "[ìíîï]", "[òóôö]", "[ùúûü]"}, to = {"a" , "e" , "i" , "o" , "u" }} , standardChars = "A-IL-VZa-il-vz0-9" .. PUNCTUATION, } m["iu"] = { "Inuktitut", 29921, "esx-inu", {"Cans", "Latn"}, translit_module = "translit-redirect", override_translit = true, } m["ja"] = { "Siapan", 5287, "jpx", {"Jpan", "Brai"}, ancestors = {"ojp"}, --[=[ -- Handled by jsort function in [[Module:ja]]. sort_key = { from = {"[ぁァア]", "[ぃィイ]", "[ぅゔゥウヴ]", "[ぇェエ]", "[ぉォオ]", "[がゕカガヵ]", "[ぎキギ]", "[ぐクグㇰ]", "[げゖケゲヶ]", "[ごコゴ]", "[ざサザ]", "[じシジㇱ]", "[ずスズㇲ]", "[ぜセゼ]", "[ぞソゾ]", "[だタダ]", "[ぢチヂ]", "[っづッツヅ]", "[でテデ]", "[どトドㇳ]", "ナ", "ニ", "[ヌㇴ]", "ネ", "ノ", "[ばぱハバパㇵ]", "[びぴヒビピㇶ]", "[ぶぷフブプㇷ]", "[べぺヘベペㇸ]", "[ぼぽホボポㇹ]", "マ", "ミ", "[ムㇺ]", "メ", "モ", "[ゃャヤ]", "[ゅュユ]", "[ょョヨ]", "[ラㇻ]", "[リㇼ]", "[ルㇽ]", "[レㇾ]", "[ロㇿ]", "[ゎヮワヷ]", "[ヰヸ]", "[ヱヹ]", "[ヲヺ]", "ン", "[゙゚゛゜ゝゞ・ヽヾ]", "𛀀"}, to = {"あ", "い", "う", "え", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ら", "り", "る", "れ", "ろ", "わ", "ゐ", "ゑ", "を", "ん", "", "え"}}, --]=] } m["jv"] = { "Javanese", 33549, "poz-sus", {"Latn", "Java"}, translit_module = "jv-translit", ancestors = {"kaw"}, link_tr = true, } m["ka"] = { "Georgian", 8108, "ccs-gzn", {"Geor", "Geok", "Hebr"}, -- Hebr is used to write Judeo-Georgian ancestors = {"oge"}, translit_module = "Geor-translit", override_translit = true, entry_name = { from = {"̂"}, to = {""}}, } m["kg"] = { "Kongo", 33702, "bnt-kng", Latn, } m["ki"] = { "Kikuyu", 33587, "bnt-kka", Latn, } m["kj"] = { "Kwanyama", 1405077, "bnt-ova", Latn, } m["kk"] = { "Kazakh", 9252, "trk-kno", {"Cyrl", "Latn", "kk-Arab"}, translit_module = "kk-translit", override_translit = true, } m["kl"] = { "Greenlandic", 25355, "esx-inu", Latn, } m["km"] = { "Khmer", 9205, "mkh-kmr", {"Khmr"}, ancestors = {"mkh-mkm"}, translit_module = "km-translit", } m["kn"] = { "Kannada", 33673, "dra", {"Knda"}, ancestors = {"dra-mkn"}, translit_module = "kn-translit", } m["ko"] = { "Korean", 9176, "qfa-kor", {"Kore", "Brai"}, ancestors = {"okm"}, -- 20210122 idea: strip parenthesized hanja from entry link -- Hani regex is a reasonable subset of Hani from [[Module:scripts/data]], -- last updated on 20210214. entry_name = { from = { " *%([一-鿿㐀-䶿𠀀-𮯯𰀀-𱍏﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩]+%)", }, to = { "", }}, display = { from = {"%-"}, to = {}, }, translit_module = "ko-translit", } m["kr"] = { "Kanuri", 36094, "ssa-sah", LatnArab, sort_key = { from = {"ny", "ǝ", "sh"}, to = {"n~", "e~", "s~"}} , -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically entry_name = { from = {"À", "à", "È", "è", "Ǝ̀", "ǝ̀", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Ǝ̂", "ǝ̂", "Î", "î", "Ô", "ô", "Û", "û", "Ă", "ă", "Ĕ", "ĕ", "Ǝ̆", "ǝ̆", "Ĭ", "ĭ", "Ŏ", "ŏ", "Ŭ", "ŭ", "Á", "á", "É", "é", "Ǝ́", "ǝ́", "Í", "í", "Ó", "ó", "Ú", "ú", GRAVE, ACUTE}, to = {"A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u"}}, } m["ks"] = { "Kashmiri", 33552, "inc-dar", {"ks-Arab", "Deva", "Shrd", "Latn"}, translit_module = "translit-redirect", ancestors = {"sa"}, } -- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT m["kw"] = { "Cornish", 25289, "cel-bry", Latn, ancestors = {"cnx"}, } m["ky"] = { "Kyrgyz", 9255, "trk-kip", {"Cyrl", "Latn", "Arab"}, translit_module = "ky-translit", override_translit = true, } m["la"] = { "Latin", 397, "itc", Latn, ancestors = {"itc-ola"}, entry_name = {remove_diacritics = MACRON .. BREVE .. DIAER .. DOUBLEINVBREVE}, standardChars = "A-Za-z0-9ÆæŒœĀ-ăĒ-ĕĪ-ĭŌ-ŏŪ-ŭȲȳ" .. MACRON .. BREVE .. PUNCTUATION, } m["lb"] = { "Luxembourgish", 9051, "gmw", Latn, ancestors = {"gmh"}, } m["lg"] = { "Luganda", 33368, "bnt-nyg", Latn, entry_name = { from = {"á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ", "â", "Â", "ê", "Ê", "î", "Î", "ô", "Ô", "û", "Û" }, to = {"a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U",}}, sort_key = { from = {"ŋ"}, to = {"n"}} , } m["li"] = { "Limburgish", 102172, "gmw", Latn, ancestors = {"dum"}, } m["ln"] = { "Lingala", 36217, "bnt-bmo", Latn, } m["lo"] = { "Lao", 9211, "tai-swe", {"Laoo"}, translit_module = "lo-translit", sort_key = { from = {"[%pໆ]", "[່-ໍ]", "ຼ", "ຽ", "ໜ", "ໝ", "([ເແໂໃໄ])([ກ-ຮ])"}, to = {"", "", "ລ", "ຍ", "ຫນ", "ຫມ", "%2%1"}}, standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. PUNCTUATION, } m["lt"] = { "Lithuanian", 9083, "bat", Latn, ancestors = {"olt"}, entry_name = { from = {"[áãà]", "[ÁÃÀ]", "[éẽè]", "[ÉẼÈ]", "[íĩì]", "[ÍĨÌ]", "[ýỹ]", "[ÝỸ]", "ñ", "[óõò]", "[ÓÕÒ]", "[úũù]", "[ÚŨÙ]", ACUTE, GRAVE, TILDE}, to = {"a", "A", "e", "E", "i", "I", "y", "Y", "n", "o", "O", "u", "U"}} , } m["lu"] = { "Luba-Katanga", 36157, "bnt-lub", Latn, } m["lv"] = { "Latvian", 9078, "bat", Latn, entry_name = { -- This attempts to convert vowels with tone marks to vowels either with -- or without macrons. Specifically, there should be no macrons if the -- vowel is part of a diphthong (including resonant diphthongs such -- pìrksts -> pirksts not #pīrksts). What we do is first convert the -- vowel + tone mark to a vowel + tilde in a decomposed fashion, -- then remove the tilde in diphthongs, then convert the remaining -- vowel + tilde sequences to macroned vowels, then delete any other -- tilde. We leave already-macroned vowels alone: Both e.g. ar and ār -- occur before consonants. FIXME: This still might not be sufficient. from = {"Ȩ", "ȩ", "[ÂÃÀ]", "[âãà]", "[ÊẼÈ]", "[êẽè]", "[ÎĨÌ]", "[îĩì]", "[ÔÕÒ]", "[ôõò]", "[ÛŨÙ]", "[ûũù]", "[ÑǸ]", "[ñǹ]", "[" .. CIRC .. TILDE ..GRAVE .."]", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .. "?([^aAeEiIoOuUāĀēĒīĪūŪ])", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .."?$", "([iI])" .. TILDE .. "?([eE])" .. TILDE .. "?", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "U" .. TILDE, "u" .. TILDE, TILDE}, to = {"E", "e", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "O", "o", "U" .. TILDE, "u" .. TILDE, "N", "n", TILDE, "%1%2%3", "%1%2", "%1%2", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ū", "ū", ""}}, } m["mg"] = { "Malagasy", 7930, "poz-bre", Latn, } m["mh"] = { "Marshallese", 36280, "poz-mic", Latn, sort_key = { from = {"ā" , "ļ" , "m̧" , "ņ" , "n̄" , "o̧" , "ō" , "ū" }, to = {"a~", "l~", "m~", "n~", "n~~", "o~", "o~~", "u~"}} , } m["mi"] = { "Maori", 36451, "poz-pep", Latn, } m["mk"] = { "Macedonian", 9296, "zls", Cyrl, translit_module = "mk-translit", entry_name = { from = {ACUTE}, to = {}}, } m["ml"] = { "Malayalam", 36236, "dra", {"Mlym"}, translit_module = "ml-translit", override_translit = true, } m["mn"] = { "Mongolian", 9246, "xgn", {"Cyrl", "Mong", "Soyo", "Zanb"}, -- entries in Soyo or Zanb might require prior discussion ancestors = {"cmg"}, translit_module = "mn-translit", override_translit = true, } -- "mo" IS TREATED AS "ro", SEE WT:LT m["mr"] = { "Marathi", 1571, "inc-sou", {"Deva", "Modi"}, ancestors = {"omr"}, translit_module = "mr-translit", entry_name = { from = {"च़", "ज़", "झ़"}, to = {"च", "ज", "झ"}} , } m["ms"] = { "Malay", 9237, "poz-mly", {"Latn", "ms-Arab"}, } m["mt"] = { "Maltese", 9166, "sem-arb", Latn, ancestors = {"sqr"}, sort_key = { from = {"ċ", "ġ", "ħ"}, to = {"c", "g", "h"} } } m["my"] = { "Burmese", 9228, "tbq-brm", {"Mymr"}, ancestors = {"obr"}, translit_module = "my-translit", override_translit = true, sort_key = { from = {"ျ", "ြ", "ွ", "ှ", "ဿ"}, to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"}}, } m["na"] = { "Nauruan", 13307, "poz-mic", Latn, } m["nb"] = { "Norwegian Bokmål", 25167, "gmq", Latn, ancestors = {"gmq-mno"}, wikimedia_codes = {"no"}, } m["nd"] = { "Northern Ndebele", 35613, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["ne"] = { "Nepali", 33823, "inc-pah", {"Deva", "Newa"}, translit_module = "ne-translit", } m["ng"] = { "Ndonga", 33900, "bnt-ova", Latn, } m["nl"] = { "Dutch", 7411, "gmw", Latn, ancestors = {"dum"}, sort_key = { from = {"[äáâå]", "[ëéê]", "[ïíî]", "[öóô]", "[üúû]", "ç", "ñ", "^-"}, to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} , standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF), } m["nn"] = { "Norwegian Nynorsk", 25164, "gmq", Latn, ancestors = {"gmq-mno"}, } m["no"] = { "Norwegian", 9043, "gmq", Latn, ancestors = {"gmq-mno"}, } m["nr"] = { "Southern Ndebele", 36785, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["nv"] = { "Navajo", 13310, "apa", Latn, sort_key = { from = {"[áą]", "[éę]", "[íį]", "[óǫ]", "ń", "^n([djlt])", "ł" , "[ʼ’']", ACUTE}, to = {"a" , "e" , "i" , "o" , "n", "ni%1" , "l~"}}, -- the tilde is used to guarantee that ł will always be sorted after all other words with l } m["ny"] = { "Chichewa", 33273, "bnt-nys", Latn, entry_name = { from = {"ŵ", "Ŵ", "á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ" }, to = {"w", "W", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M"}}, sort_key = { from = {"ng'"}, to = {"ng"}} , } m["oc"] = { "Occitan", 14185, "roa", {"Latn", "Hebr"}, ancestors = {"pro"}, sort_key = { from = {"[àá]", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "([lns])·h"}, to = {"a" , "e" , "i" , "o" , "u" , "c", "%1h" }} , } m["oj"] = { "Ojibwe", 33875, "alg", {"Cans", "Latn"}, sort_key = { from = {"aa", "ʼ", "ii", "oo", "sh", "zh"}, to = {"a~", "h~", "i~", "o~", "s~", "z~"}} , } m["om"] = { "Oromo", 33864, "cus-eas", {"Latn", "Ethi"}, } m["or"] = { "Oriya", 33810, "inc-eas", {"Orya"}, ancestors = {"inc-mor"}, translit_module = "or-translit", } m["os"] = { "Ossetian", 33968, "xsc", {"Cyrl", "Geor", "Latn"}, ancestors = {"oos"}, translit_module = "os-translit", override_translit = true, entry_name = { from = {GRAVE, ACUTE}, to = {}} , } m["pa"] = { "Punjabi", 58635, "inc-pan", {"Guru", "pa-Arab"}, ancestors = {"inc-opa"}, translit_module = "translit-redirect", entry_name = { from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658), u(0x08C7), u(0x0768)}, to = {"", "", "", "", "", "", "", "", "", "ل", "ن"}} , } m["pi"] = { "Pali", 36727, "inc-mid", {"Latn", "Brah", "Deva", "Beng", "Sinh", "Mymr", "Thai", "Lana", "Laoo", "Khmr"}, ancestors = {"sa"}, translit_module = "translit-redirect", sort_key = { from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)}, to = {"a~", "i~", "u~", "d~", "l~", "m~", "n~", "n~~", "n~~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} , entry_name = { from = {u(0xFE00)}, to = {}}, } m["pl"] = { "Polish", 809, "zlw-lch", Latn, ancestors = {"zlw-opl"}, sort_key = { from = {"[Ąą]", "[Ćć]", "[Ęę]", "[Łł]", "[Ńń]", "[Óó]", "[Śś]", "[Żż]", "[Źź]"}, to = { "a" .. u(0x10FFFF), "c" .. u(0x10FFFF), "e" .. u(0x10FFFF), "l" .. u(0x10FFFF), "n" .. u(0x10FFFF), "o" .. u(0x10FFFF), "s" .. u(0x10FFFF), "z" .. u(0x10FFFF), "z" .. u(0x10FFFE)}} , } m["ps"] = { "Pashto", 58680, "ira-pat", {"ps-Arab"}, ancestors = {"ira-pat-pro"}, } m["pt"] = { "Portuguese", 5146, "roa-ibe", {"Latn", "Brai"}, ancestors = {"roa-opt"}, sort_key = { from = {"[àãáâä]", "[èẽéêë]", "[ìĩíï]", "[òóôõö]", "[üúùũ]", "ç", "ñ"}, to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} , } m["qu"] = { "Quechua", 5218, "qwe", Latn, } m["rm"] = { "Romansch", 13199, "roa-rhe", Latn, } m["ro"] = { "Romanian", 7913, "roa-eas", {"Latn", "Cyrl"}, sort_key = { from = {"ă" , "â" , "î" , "ș" , "ț" }, to = {"a~", "a~~", "i~", "s~", "t~"}}, } m["ru"] = { "Russian", 7737, "zle", {"Cyrl", "Brai"}, translit_module = "ru-translit", sort_key = { from = {"ё"}, to = {"е" .. mw.ustring.char(0x10FFFF)}}, entry_name = { from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE, DIAER}, to = {"Е", "е", "И", "и"}}, standardChars = "ЁА-яё0-9—" .. PUNCTUATION, } m["rw"] = { "Rwanda-Rundi", 3217514, "bnt-glb", Latn, entry_name = { from = {"[áāâǎā́]", "[éēêěḗ]", "[íīîǐī́]", "[óōôǒṓ]", "[úūûǔū́]"}, to = {"a", "e" , "i", "o" , "u"} }, } m["sa"] = { "Sanskrit", 11059, "inc-old", {"Deva", "Bali", "as-Beng", "Beng", "Bhks", "Brah", "Gran", "Gujr", "Guru", "Java", "Khar", "Khmr", "Knda", "Lana", "Laoo", "Mlym", "Modi", "Mymr", "Nand", "Newa", "Orya", "Saur", "Shrd", "Sidd", "Sinh", "Taml", "Telu", "Thai", "Tibt", "Tirh"}, sort_key = { from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)}, to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} , entry_name = { from = {u(0xFE00)}, to = {}}, translit_module = "translit-redirect", } m["sc"] = { "Sardinian", 33976, "roa", Latn, } m["sd"] = { "Sindhi", 33997, "inc-snd", {"sd-Arab", "Deva", "Sind", "Khoj"}, entry_name = { from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}, ancestors = {"inc-vra"}, translit_module = "translit-redirect", } m["se"] = { "Northern Sami", 33947, "smi", Latn, entry_name = { from = {"ạ", "[ēẹ]", "ī", "[ōọ]", "ū", "ˈ"}, to = {"a", "e" , "i", "o" , "u"} }, sort_key = { from = {"á" , "č" , "đ" , "ŋ" , "š" , "ŧ" , "ž" }, to = {"a²", "c²", "d²", "n²", "s²", "t²", "z²"} }, standardChars = "A-PR-VZa-pr-vz0-9ÁáČčĐđŊŋŠšŦŧŽž" .. PUNCTUATION, } m["sg"] = { "Sango", 33954, "crp", Latn, ancestors = {"ngb"}, } m["sh"] = { "Serbo-Croatian", 9301, "zls", {"Latn", "Cyrl", "Glag"}, entry_name = { from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }}, wikimedia_codes = {"sh", "bs", "hr", "sr"}, } m["si"] = { "Sinhalese", 13267, "inc-ins", {"Sinh"}, ancestors = {"elu-prk"}, translit_module = "si-translit", override_translit = true, } m["sk"] = { "Slovak", 9058, "zlw", Latn, sort_key = { from = {"[áä]", "é", "í", "[óô]", "ú", "ý", "ŕ", "ĺ", "[" .. DIAER .. ACUTE .. CIRC .. "]"}, to = {"a" , "e", "i", "o" , "u", "y", "r", "l", ""}} , } m["sl"] = { "Slovene", 9063, "zls", Latn, entry_name = { from = {"[ÁÀÂĀȂȀ]", "[áàâāȃȁ]", "[ÉÈÊĒȆȄỆẸ]", "[éèêēȇȅệẹə]", "[ÍÌÎĪȊȈ]", "[íìîīȋȉ]", "[ÓÒÔŌȎȌỘỌ]", "[óòôōȏȍộọ]", "[ŔȒȐ]", "[ŕȓȑ]", "[ÚÙÛŪȖȔ]", "[úùûūȗȕ]", "ł", GRAVE, ACUTE, CIRC, MACRON, DGRAVE, INVBREVE, DOTBELOW}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "l"}, }, sort_key = { from = {"č" , "š" , "ž" }, to = {"c²", "s²", "z²"}, }, } m["sm"] = { "Samoan", 34011, "poz-pnp", Latn, } m["sn"] = { "Shona", 34004, "bnt-sho", Latn, entry_name = {remove_diacritics = ACUTE}, } m["so"] = { "Somali", 13275, "cus-eas", {"Latn", "Arab", "Osma"}, entry_name = { from = {"[ÁÀÂ]", "[áàâ]", "[ÉÈÊ]", "[éèê]", "[ÍÌÎ]", "[íìî]", "[ÓÒÔ]", "[óòô]", "[ÚÙÛ]", "[úùû]", "[ÝỲ]", "[ýỳ]"}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "U" , "u", "Y", "y"}} , } m["sq"] = { "Albanian", 8748, "sqj", {"Latn", "Grek", "Elba"}, entry_name = {remove_diacritics = ACUTE}, sort_key = { from = { '[âãä]', '[ÂÃÄ]', '[êẽë]', '[ÊẼË]', 'ĩ', 'Ĩ', 'õ', 'Õ', 'ũ', 'Ũ', 'ỹ', 'Ỹ', 'ç', 'Ç' }, to = { 'a', 'A', 'e', 'E', 'i', 'I', 'o', 'O', 'u', 'U', 'y', 'Y', 'c', 'C' } } , } m["sr"] = { "Sebian", 9299, "zls", aliases = {"Српски", "Српски језик", "српски", "српски језик", "srpski", "srpski jezik", "Srpski", "Srpski jezik"}, scripts = {"Latn", "Cyrl"}, entry_name = { from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }}, } m["ss"] = { "Swazi", 34014, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["st"] = { "Sotho", 34340, "bnt-sts", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["su"] = { "Sundanese", 34002, "poz-msa", {"Latn", "Sund"}, translit_module = "su-translit", } m["sv"] = { "Swedish", 9027, "gmq", Latn, ancestors = {"gmq-osw"}, } m["sw"] = { "Swahili", 7838, "bnt-swh", LatnArab, sort_key = { from = {"ng'", "^-"}, to = {"ngz"}} , } m["ta"] = { "Tamil", 5885, "dra", {"Taml"}, ancestors = {"oty"}, translit_module = "ta-translit", override_translit = true, } m["te"] = { "Telugu", 8097, "dra", {"Telu"}, translit_module = "te-translit", override_translit = true, } m["tg"] = { "Tajik", 9260, "ira-swi", {"Cyrl", "fa-Arab", "Latn"}, ancestors = {"pal"}, -- same as "fa", see WT:T:AFA translit_module = "tg-translit", override_translit = true, sort_key = { from = {"Ё", "ё"}, to = {"Е" , "е"}} , entry_name = { from = {ACUTE}, to = {}} , } m["th"] = { "Tai", 9217, "tai-swe", {"Thai", "Brai"}, translit_module = "th-translit", sort_key = { from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}, } m["ti"] = { "Tigrinya", 34124, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit", } m["tk"] = { "Turkmen", 9267, "trk-ogz", {"Latn", "Cyrl", "Arab"}, entry_name = { from = {"ā", "ē", "ī", "ō", "ū", "ȳ", "ȫ", "ǖ", MACRON}, to = {"a", "e", "i", "o", "u", "y", "ö", "ü", ""}}, ancestors = {"trk-ogz-pro"}, } m["tl"] = { "Tagalog", 34057, "phi", {"Latn", "Tglg"}, entry_name = { from = {"[áàâ]", "[éèê]", "[íìî]", "[óòô]", "[úùû]", ACUTE, GRAVE, CIRC}, to = {"a" , "e" , "i" , "o" , "u" }}, translit_module = "tl-translit", override_translit = true } m["tn"] = { "Tswana", 34137, "bnt-sts", Latn, } m["to"] = { "Tongan", 34094, "poz-pol", Latn, sort_key = { from = {"ā", "ē", "ī", "ō", "ū", MACRON}, to = {"a", "e", "i", "o", "u", ""}}, entry_name = { from = {"á", "é", "í", "ó", "ú", ACUTE}, to = {"a", "e", "i", "o", "u", ""}}, } m["tr"] = { "Turkish", 256, "trk-ogz", Latn, ancestors = {"ota"}, } m["ts"] = { "Tsonga", 34327, "bnt-tsr", Latn, } m["tt"] = { "Tatar", 25285, "trk-kbu", {"Cyrl", "Latn", "tt-Arab"}, translit_module = "tt-translit", override_translit = true, } -- "tw" IS TREATED AS "ak", SEE WT:LT m["ty"] = { "Tahitian", 34128, "poz-pep", Latn, } m["ug"] = { "Uyghur", 13263, "trk-kar", {"ug-Arab", "Latn", "Cyrl"}, ancestors = {"chg"}, translit_module = "ug-translit", override_translit = true, } m["uk"] = { "Ukrainian", 8798, "zle", Cyrl, ancestors = {"orv"}, translit_module = "uk-translit", entry_name = { from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE}, to = {"Е", "е", "И", "и"}}, standardChars = "ЄІЇА-ЩЬЮ-щьюяєії" .. PUNCTUATION, } m["ur"] = { "Urdu", 1617, "inc-hnd", {"ur-Arab"}, ancestors = {"inc-ohi"}, entry_name = { from = {u(0x0640), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658)}, to = {}} , } m["uz"] = { "Uzbek", 9264, "trk-kar", {"Latn", "Cyrl", "fa-Arab"}, ancestors = {"chg"}, } m["ve"] = { "Venda", 32704, "bnt-bso", Latn, } m["vi"] = { "Vietnamese", 9199, "mkh-vie", {"Latn", "Hani"}, ancestors = {"mkh-mvi"}, sort_key = "vi-sortkey", } m["vo"] = { "Volapük", 36986, "art", Latn, } m["wa"] = { "Walloon", 34219, "roa-oil", Latn, ancestors = {"fro"}, sort_key = { from = {"[áàâäå]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"}, to = {"a" , "e" , "i" , "o" , "u" , "y" , "c"}} , } m["wo"] = { "Wolof", 34257, "alv-fwo", LatnArab, } m["xh"] = { "Xhosa", 13218, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["yi"] = { "Yiddish", 8641, "gmw", {"Hebr"}, ancestors = {"gmh"}, sort_key = { from = {"[אַאָ]", "בּ", "[וֹוּ]", "יִ", "ײַ", "פֿ"}, to = {"א", "ב", "ו", "י", "יי", "פ"}} , translit_module = "yi-translit", } m["yo"] = { "Yoruba", 34311, "alv-yor", Latn, sort_key = { from = {"ẹ", "ọ", "gb", "ṣ"}, to = {"e~" , "o~", "g~", "s~"}}, entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON }, } m["za"] = { "Zhuang", 13216, "tai", {"Latn", "Hani"}, sort_key = { from = {"%p"}, to = {""}}, } m["zh"] = { "Chinese", 7850, "zhx", {"Hani", "Brai", "Nshu"}, ancestors = {"ltc"}, sort_key = "zh-sortkey", } m["zu"] = { "Zulu", 10179, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } return m ic3npv3o3391rej1u5pwl6rmfk5b6y7 13339 13338 2022-08-04T12:40:06Z Asinis632 1829 Scribunto text/plain local u = mw.ustring.char -- UTF-8 encoded strings for some commonly-used diacritics local GRAVE = u(0x0300) local ACUTE = u(0x0301) local CIRC = u(0x0302) local TILDE = u(0x0303) local MACRON = u(0x0304) local BREVE = u(0x0306) local DOTABOVE = u(0x0307) local DIAER = u(0x0308) local CARON = u(0x030C) local DGRAVE = u(0x030F) local INVBREVE = u(0x0311) local DOTBELOW = u(0x0323) local RINGBELOW = u(0x0325) local CEDILLA = u(0x0327) local OGONEK = u(0x0328) local CGJ = u(0x034F) -- combining grapheme joiner local DOUBLEINVBREVE = u(0x0361) -- Punctuation to be used for standardChars field local PUNCTUATION = ' !#%&*+,-./:;<=>?@^_`|~\'()' local Cyrl = {"Cyrl"} local Latn = {"Latn"} local LatnArab = {"Latn", "Arab"} local m = {} m["aa"] = { "Afar", 27811, "cus-eas", Latn, entry_name = { remove_diacritics = ACUTE}, } m["ab"] = { "Abkhaz", 5111, "cau-abz", {"Cyrl", "Geor", "Latn"}, translit_module = "ab-translit", override_translit = true, entry_name = { from = {GRAVE, ACUTE}, to = {}} , } m["ae"] = { "Avestan", 29572, "ira-cen", {"Avst", "Gujr"}, translit_module = "Avst-translit", wikipedia_article = "Avestan", } m["af"] = { "Afrikaans", 14196, "gmw", LatnArab, ancestors = {"nl"}, sort_key = { from = {"[äáâà]", "[ëéêè]", "[ïíîì]", "[öóôò]", "[üúûù]", "[ÿýŷỳ]", "^-", "'"}, to = {"a" , "e" , "i" , "o" , "u" , "y" }} , } m["ak"] = { "Akan", 28026, "alv-ctn", Latn, } m["am"] = { "Amharic", 28244, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit", } m["an"] = { "Aragonese", 8765, "roa-ibe", Latn, ancestors = {"roa-oan"}, } m["ar"] = { "Arabic", 13955, "sem-arb", {"Arab", "Hebr", "Brai"}, -- replace alif waṣl with alif -- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha, -- damma, kasra, shadda, sukun, superscript (dagger) alef entry_name = { from = {u(0x0671), u(0x0640), "[" .. u(0x064B) .. "-" .. u(0x0652) .. "]", u(0x0670)}, to = {u(0x0627)}}, -- put Judeo-Arabic (Hebrew-script Arabic) under the category header -- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles sort_key = { from = {"^%f[" .. u(0x5D0) .. "-" .. u(0x5EA) .. "]"}, to = {u(0xFB21)}, }, translit_module = "ar-translit", } m["as"] = { "Assamese", 29401, "inc-eas", {"as-Beng"}, ancestors = {"inc-mas"}, translit_module = "as-translit", } m["av"] = { "Avar", 29561, "cau-nec", Cyrl, ancestors = {"oav"}, translit_module = "av-translit", override_translit = true, entry_name = { from = {GRAVE, ACUTE}, to = {}} , } m["ay"] = { "Aymara", 4627, "sai-aym", Latn, } m["az"] = { "Azerbaijani", 9292, "trk-ogz", {"Latn", "Cyrl", "fa-Arab"}, ancestors = {"trk-oat"}, } m["ba"] = { "Bashkir", 13389, "trk-kbu", Cyrl, translit_module = "ba-translit", override_translit = true, } m["be"] = { "Belarusian", 9091, "zle", Cyrl, ancestors = {"orv"}, translit_module = "be-translit", sort_key = { from = {"Ё", "ё"}, to = {"Е" , "е"}}, entry_name = { from = {"Ѐ", "ѐ", GRAVE, ACUTE}, to = {"Е", "е"}}, } m["bg"] = { "Bulgarian", 7918, "zls", {"Cyrl"}, ancestors = {"cu"}, translit_module = "bg-translit", entry_name = { from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE}, to = {"Е", "е", "И", "и"}}, } m["bh"] = { "Bihari", 135305, "inc-eas", {"Deva"}, ancestors = {"inc-mgd"}, } m["bi"] = { "Bislama", 35452, "crp", Latn, ancestors = {"en"}, } m["bm"] = { "Bambara", 33243, "dmn-emn", Latn, } m["bn"] = { "Bengali", 9610, "inc-eas", {"Beng", "Newa"}, ancestors = {"inc-mbn"}, translit_module = "bn-translit", } m["bo"] = { "Tibetan", 34271, "sit-tib", {"Tibt"}, -- sometimes Deva? ancestors = {"xct"}, translit_module = "bo-translit", override_translit = true, } m["br"] = { "Breton", 12107, "cel-bry", Latn, ancestors = {"xbm"}, } m["ca"] = { "Catalan", 7026, "roa", Latn, ancestors = {"roa-oca"}, sort_key = { from = {"à", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "l·l"}, to = {"a", "e" , "i" , "o" , "u" , "c", "ll" }} , } m["ce"] = { "Chechen", 33350, "cau-vay", Cyrl, translit_module = "ce-translit", override_translit = true, entry_name = { from = {MACRON}, to = {}}, } m["ch"] = { "Chamorro", 33262, "poz-sus", Latn, } m["co"] = { "Corsican", 33111, "roa-itd", Latn, } m["cr"] = { "Cree", 33390, "alg", {"Cans", "Latn"}, translit_module = "translit-redirect", } m["cs"] = { "Czech", 9056, "zlw", Latn, ancestors = {"zlw-ocs"}, sort_key = { from = {"á", "é", "í", "ó", "[úů]", "ý"}, to = {"a", "e", "i", "o", "u" , "y"}} , } m["cu"] = { "Old Church Slavonic", 35499, "zls", {"Cyrs", "Glag"}, translit_module = "Cyrs-Glag-translit", entry_name = { from = {u(0x0484)}, -- kamora to = {}}, sort_key = { from = {"оу", "є"}, to = {"у" , "е"}} , } m["cv"] = { "Chuvash", 33348, "trk-ogr", Cyrl, ancestors = {"xbo"}, translit_module = "cv-translit", override_translit = true, } m["cy"] = { "Welsh", 9309, "cel-bry", Latn, ancestors = {"wlm"}, sort_key = { from = {"[âáàä]", "ch", "dd", "[êéèë]", "ff", "ngh", "[îíìï]", "ll", "[ôóòö]", "ph", "rh", "th", "[ûúùü]", "[ŵẃẁẅ]", "[ŷýỳÿ]", "'"}, to = {"a" , "c~", "d~", "e" , "f~", "g~h", "i" , "l~", "o" , "p~", "r~", "t~", "u" , "w" , "y" }} , standardChars = "A-IL-PR-UWYa-il-pr-uwy0-9ÂâÊêÎîÔôÛûŴŵŶŷ" .. PUNCTUATION, } m["da"] = { "Danish", 9035, "gmq", Latn, ancestors = {"gmq-oda"}, } m["de"] = { "Jeman", 188, "gmw", {"Latn", "Latf"}, ancestors = {"gmh"}, sort_key = { from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]", "ß" }, to = {"a" , "e" , "i" , "o" , "u" , "ss"}} , standardChars = "A-Za-z0-9ÄäÖöÜüß" .. PUNCTUATION, } m["dv"] = { "Dhivehi", 32656, "inc-ins", {"Thaa"}, ancestors = {"elu-prk"}, translit_module = "dv-translit", override_translit = true, } m["dz"] = { "Dzongkha", 33081, "sit-tib", {"Tibt"}, ancestors = {"xct"}, translit_module = "bo-translit", override_translit = true, } m["ee"] = { "Ewe", 30005, "alv-gbe", Latn, } m["el"] = { "Greek", 9129, "grk", {"Grek", "Brai"}, ancestors = {"grc"}, translit_module = "el-translit", override_translit = true, sort_key = { -- Keep this synchronized with grc, cpg, pnt, tsd from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"}, to = {"α" , "ε" , "η" , "ι" , "ο" , "υ" , "ω" , "ρ", "σ"}} , standardChars = "ͺ;΄-ώϜϝ" .. PUNCTUATION, } m["en"] = { "Inglis", 1860, "gmw", {"Latn", "Brai", "Shaw", "Dsrt"}, -- entries in Shaw or Dsrt might require prior discussion ancestors = {"enm"}, sort_key = { from = {"[äàáâåā]", "[ëèéêē]", "[ïìíîī]", "[öòóôō]", "[üùúûū]", "æ" , "œ" , "[çč]", "ñ", "'"}, to = {"a" , "e" , "i" , "o" , "u" , "ae", "oe", "c" , "n"}}, wikimedia_codes = {"en", "simple"}, standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF), } m["eo"] = { "Esperanto", 143, "art", Latn, sort_key = { from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ĉ]", "[ĝ]", "[ĥ]", "[ĵ]", "[ŝ]", "[ŭ]"}, to = {"a" , "e" , "i" , "o" , "u", "cĉ", "gĉ", "hĉ", "jĉ", "sĉ", "uĉ"}} , standardChars = "A-PRSTUVZa-prstuvzĉĈĝĜĵĴŝŜŭŬ0-9" .. PUNCTUATION, } m["es"] = { "Spanish", 1321, "roa-ibe", {"Latn", "Brai"}, ancestors = {"osp"}, sort_key = { from = {"á", "é", "í", "ó", "[úü]", "ç", "ñ"}, to = {"a", "e", "i", "o", "u" , "c", "n~"}}, standardChars = "A-VXYZa-vxyz0-9ÁáÉéÍíÓóÚúÑñ¿¡" .. PUNCTUATION, } m["et"] = { "Estonian", 9072, "fiu-fin", Latn, } m["eu"] = { "Basque", 8752, "euq", Latn, } m["fa"] = { "Persian", 9168, "ira-swi", {"fa-Arab"}, ancestors = {"pal"}, -- "ira-mid" entry_name = { from = {u(0x064E), u(0x0640), u(0x064F), u(0x0650), u(0x0651), u(0x0652)}, to = {}} , } m["ff"] = { "Fula", 33454, "alv-fwo", {"Latn", "Adlm"}, } m["fi"] = { "Finnish", 1412, "fiu-fin", Latn, entry_name = { from = {"ˣ"}, -- Used to indicate gemination of the next consonant to = {}}, sort_key = { from = {"[áàâã]", "[éèêẽ]", "[íìîĩ]", "[óòôõ]", "[úùûũ]", "[ýỳŷüű]", "[øõő]", "æ" , "œ" , "[čç]", "š", "ž", "ß" , "[':]"}, to = {"a" , "e" , "i" , "o" , "u" , "y" , "ö" , "ae", "oe", "c" , "s", "z", "ss"}} , } m["fj"] = { "Fijian", 33295, "poz-occ", Latn, } m["fo"] = { "Faroese", 25258, "gmq", Latn, ancestors = {"non"}, } m["fr"] = { "French", 150, "roa-oil", {"Latn", "Brai"}, ancestors = {"frm"}, sort_key = { from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ" , "œ" , "'"}, to = {"a" , "e" , "i" , "o" , "u" , "y" , "c", "ae", "oe"}}, standardChars = "A-Za-z0-9ÀÂÇÉÈÊËÎÏÔŒÛÙÜàâçéèêëîïôœûùü«»" .. PUNCTUATION, } m["fy"] = { "West Frisian", 27175, "gmw-fri", Latn, ancestors = {"ofs"}, sort_key = { from = {"[àáâä]", "[èéêë]", "[ìíîïyỳýŷÿ]", "[òóôö]", "[ùúûü]", "æ", "[ /.-]"}, to = {"a" , "e" , "i" , "o" , "u", "ae"}} , standardChars = "A-PR-WYZa-pr-wyz0-9Ææâäàéêëèïìôöòúûüùỳ" .. PUNCTUATION, } m["ga"] = { "Irish", 9142, "cel-gae", Latn, ancestors = {"mga"}, sort_key = { from = {"á", "é", "í", "ó", "ú", "ý", "ḃ" , "ċ" , "ḋ" , "ḟ" , "ġ" , "ṁ" , "ṗ" , "ṡ" , "ṫ" }, to = {"a", "e", "i", "o", "u", "y", "bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}} , standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION, } m["gd"] = { "Scottish Gaelic", 9314, "cel-gae", Latn, ancestors = {"mga"}, sort_key = { from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ýỳ]"}, to = {"a" , "e" , "i" , "o" , "u" , "y" }} , standardChars = "A-IL-PR-Ua-il-pr-u0-9ÀàÈèÌìÒòÙù" .. PUNCTUATION, } m["gl"] = { "Galician", 9307, "roa-ibe", Latn, ancestors = {"roa-opt"}, sort_key = { from = {"á", "é", "í", "ó", "ú"}, to = {"a", "e", "i", "o", "u"}} , } m["gn"] = { "Guaraní", 35876, "tup-gua", Latn, } m["gu"] = { "Gujarati", 5137, "inc-wes", {"Gujr"}, ancestors = {"inc-mgu"}, translit_module = "gu-translit", } m["gv"] = { "Manx", 12175, "cel-gae", Latn, ancestors = {"mga"}, sort_key = { from = {"ç", "-"}, to = {"c"}} , standardChars = "A-WYÇa-wyç0-9" .. PUNCTUATION, } m["ha"] = { "Hausa", 56475, "cdc-wst", LatnArab, sort_key = { from = {"ɓ", "ɗ", "ƙ", "'y", "ƴ", "'" }, to = {"b~" , "d~" , "k~", "y~", "y~", "" }}, entry_name = { from = {"R̃", "r̃", "À", "à", "È", "è", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Î", "î", "Ô", "ô", "Û", "û", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú", "Ā̀", "ā̀", "Ḕ", "ḕ", "Ī̀", "ī̀", "Ṑ", "ṑ", "Ū̀", "ū̀", GRAVE, ACUTE}, to = {"R", "r", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}, } m["he"] = { "Hebrew", 9288, "sem-can", {"Hebr", "Phnx", "Brai"}, entry_name = { from = {"[" .. u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. CGJ .. "]"}, to = {}} , } m["hi"] = { "Hindi", 1568, "inc-hnd", {"Deva", "Kthi", "Newa"}, ancestors = {"inc-ohi"}, translit_module = "hi-translit", standardChars = "ँंअ-ऊएऐओ-घच-झट-नप-रलवशसहा-ूेैो-◌्।-॰ड़ढ़" .. PUNCTUATION, } m["ho"] = { "Hiri Motu", 33617, "crp", Latn, ancestors = {"meu"}, } m["ht"] = { "Haitian Creole", 33491, "crp", Latn, ancestors = {"fr"}, } m["hu"] = { "Hungarian", 9067, "urj-ugr", {"Latn", "Hung"}, ancestors = {"ohu"}, sort_key = { from = {"á", "é", "í", "ó", "ú", "[öő]", "[üű]", "cs", "dzs", "gy", "ly", "ny", "zs"}, to = {"a", "e", "i", "o", "u", "o~", "u~", "c~", "dz~", "g~", "l~", "n~", "z~"}, }, } m["hy"] = { "Armenian", 8785, "hyx", {"Armn", "Brai"}, ancestors = {"axm"}, translit_module = "Armn-translit", override_translit = true, sort_key = { from = {"ու", "և", "եւ"}, to = {"ւ", "եվ", "եվ"}}, entry_name = { from = {"՞", "՜", "՛", "՟", "և", "<sup>յ</sup>", "<sup>ի</sup>", "<sup>է</sup>"}, to = {"", "", "", "", "եւ", "յ", "ի", "է"}} , } m["hz"] = { "Herero", 33315, "bnt-swb", Latn, } m["ia"] = { "Interlingua", 35934, "art", Latn, } m["id"] = { "Indonesian", 9240, "poz-mly", Latn, ancestors = {"ms"}, } m["ie"] = { "Interlingue", 35850, "art", Latn, type = "appendix-constructed", } m["ig"] = { "Igbo", 33578, "alv-igb", Latn, sort_key = { from = {"ụ", "ị", "ọ", "gb", "gh", "gw", "kp", "kw", "ṅ", "nw", "ny", "sh"}, to = {"u~" , "i~", "o~", "gy", "gz", "g~", "kz", "k~", "ny", "nz", "n~", "s~"}}, entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON }, } m["ii"] = { "Sichuan Yi", 34235, "tbq-lol", {"Yiii"}, translit_module = "ii-translit", } m["ik"] = { "Inupiaq", 27183, "esx-inu", Latn, } m["io"] = { "Ido", 35224, "art", Latn, } m["is"] = { "Icelandic", 294, "gmq", Latn, ancestors = {"non"}, } m["it"] = { "Italian", 652, "roa-itd", Latn, sort_key = { from = {"[àáâäå]", "[èéêë]", "[ìíîï]", "[òóôö]", "[ùúûü]"}, to = {"a" , "e" , "i" , "o" , "u" }} , standardChars = "A-IL-VZa-il-vz0-9" .. PUNCTUATION, } m["iu"] = { "Inuktitut", 29921, "esx-inu", {"Cans", "Latn"}, translit_module = "translit-redirect", override_translit = true, } m["ja"] = { "Siapan", 5287, "jpx", {"Jpan", "Brai"}, ancestors = {"ojp"}, --[=[ -- Handled by jsort function in [[Module:ja]]. sort_key = { from = {"[ぁァア]", "[ぃィイ]", "[ぅゔゥウヴ]", "[ぇェエ]", "[ぉォオ]", "[がゕカガヵ]", "[ぎキギ]", "[ぐクグㇰ]", "[げゖケゲヶ]", "[ごコゴ]", "[ざサザ]", "[じシジㇱ]", "[ずスズㇲ]", "[ぜセゼ]", "[ぞソゾ]", "[だタダ]", "[ぢチヂ]", "[っづッツヅ]", "[でテデ]", "[どトドㇳ]", "ナ", "ニ", "[ヌㇴ]", "ネ", "ノ", "[ばぱハバパㇵ]", "[びぴヒビピㇶ]", "[ぶぷフブプㇷ]", "[べぺヘベペㇸ]", "[ぼぽホボポㇹ]", "マ", "ミ", "[ムㇺ]", "メ", "モ", "[ゃャヤ]", "[ゅュユ]", "[ょョヨ]", "[ラㇻ]", "[リㇼ]", "[ルㇽ]", "[レㇾ]", "[ロㇿ]", "[ゎヮワヷ]", "[ヰヸ]", "[ヱヹ]", "[ヲヺ]", "ン", "[゙゚゛゜ゝゞ・ヽヾ]", "𛀀"}, to = {"あ", "い", "う", "え", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ら", "り", "る", "れ", "ろ", "わ", "ゐ", "ゑ", "を", "ん", "", "え"}}, --]=] } m["jv"] = { "Javanese", 33549, "poz-sus", {"Latn", "Java"}, translit_module = "jv-translit", ancestors = {"kaw"}, link_tr = true, } m["ka"] = { "Georgian", 8108, "ccs-gzn", {"Geor", "Geok", "Hebr"}, -- Hebr is used to write Judeo-Georgian ancestors = {"oge"}, translit_module = "Geor-translit", override_translit = true, entry_name = { from = {"̂"}, to = {""}}, } m["kg"] = { "Kongo", 33702, "bnt-kng", Latn, } m["ki"] = { "Kikuyu", 33587, "bnt-kka", Latn, } m["kj"] = { "Kwanyama", 1405077, "bnt-ova", Latn, } m["kk"] = { "Kazakh", 9252, "trk-kno", {"Cyrl", "Latn", "kk-Arab"}, translit_module = "kk-translit", override_translit = true, } m["kl"] = { "Greenlandic", 25355, "esx-inu", Latn, } m["km"] = { "Khmer", 9205, "mkh-kmr", {"Khmr"}, ancestors = {"mkh-mkm"}, translit_module = "km-translit", } m["kn"] = { "Kannada", 33673, "dra", {"Knda"}, ancestors = {"dra-mkn"}, translit_module = "kn-translit", } m["ko"] = { "Korean", 9176, "qfa-kor", {"Kore", "Brai"}, ancestors = {"okm"}, -- 20210122 idea: strip parenthesized hanja from entry link -- Hani regex is a reasonable subset of Hani from [[Module:scripts/data]], -- last updated on 20210214. entry_name = { from = { " *%([一-鿿㐀-䶿𠀀-𮯯𰀀-𱍏﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩]+%)", }, to = { "", }}, display = { from = {"%-"}, to = {}, }, translit_module = "ko-translit", } m["kr"] = { "Kanuri", 36094, "ssa-sah", LatnArab, sort_key = { from = {"ny", "ǝ", "sh"}, to = {"n~", "e~", "s~"}} , -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically entry_name = { from = {"À", "à", "È", "è", "Ǝ̀", "ǝ̀", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Ǝ̂", "ǝ̂", "Î", "î", "Ô", "ô", "Û", "û", "Ă", "ă", "Ĕ", "ĕ", "Ǝ̆", "ǝ̆", "Ĭ", "ĭ", "Ŏ", "ŏ", "Ŭ", "ŭ", "Á", "á", "É", "é", "Ǝ́", "ǝ́", "Í", "í", "Ó", "ó", "Ú", "ú", GRAVE, ACUTE}, to = {"A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u"}}, } m["ks"] = { "Kashmiri", 33552, "inc-dar", {"ks-Arab", "Deva", "Shrd", "Latn"}, translit_module = "translit-redirect", ancestors = {"sa"}, } -- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT m["kw"] = { "Cornish", 25289, "cel-bry", Latn, ancestors = {"cnx"}, } m["ky"] = { "Kyrgyz", 9255, "trk-kip", {"Cyrl", "Latn", "Arab"}, translit_module = "ky-translit", override_translit = true, } m["la"] = { "Latin", 397, "itc", Latn, ancestors = {"itc-ola"}, entry_name = {remove_diacritics = MACRON .. BREVE .. DIAER .. DOUBLEINVBREVE}, standardChars = "A-Za-z0-9ÆæŒœĀ-ăĒ-ĕĪ-ĭŌ-ŏŪ-ŭȲȳ" .. MACRON .. BREVE .. PUNCTUATION, } m["lb"] = { "Luxembourgish", 9051, "gmw", Latn, ancestors = {"gmh"}, } m["lg"] = { "Luganda", 33368, "bnt-nyg", Latn, entry_name = { from = {"á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ", "â", "Â", "ê", "Ê", "î", "Î", "ô", "Ô", "û", "Û" }, to = {"a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U",}}, sort_key = { from = {"ŋ"}, to = {"n"}} , } m["li"] = { "Limburgish", 102172, "gmw", Latn, ancestors = {"dum"}, } m["ln"] = { "Lingala", 36217, "bnt-bmo", Latn, } m["lo"] = { "Lao", 9211, "tai-swe", {"Laoo"}, translit_module = "lo-translit", sort_key = { from = {"[%pໆ]", "[່-ໍ]", "ຼ", "ຽ", "ໜ", "ໝ", "([ເແໂໃໄ])([ກ-ຮ])"}, to = {"", "", "ລ", "ຍ", "ຫນ", "ຫມ", "%2%1"}}, standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. PUNCTUATION, } m["lt"] = { "Lithuanian", 9083, "bat", Latn, ancestors = {"olt"}, entry_name = { from = {"[áãà]", "[ÁÃÀ]", "[éẽè]", "[ÉẼÈ]", "[íĩì]", "[ÍĨÌ]", "[ýỹ]", "[ÝỸ]", "ñ", "[óõò]", "[ÓÕÒ]", "[úũù]", "[ÚŨÙ]", ACUTE, GRAVE, TILDE}, to = {"a", "A", "e", "E", "i", "I", "y", "Y", "n", "o", "O", "u", "U"}} , } m["lu"] = { "Luba-Katanga", 36157, "bnt-lub", Latn, } m["lv"] = { "Latvian", 9078, "bat", Latn, entry_name = { -- This attempts to convert vowels with tone marks to vowels either with -- or without macrons. Specifically, there should be no macrons if the -- vowel is part of a diphthong (including resonant diphthongs such -- pìrksts -> pirksts not #pīrksts). What we do is first convert the -- vowel + tone mark to a vowel + tilde in a decomposed fashion, -- then remove the tilde in diphthongs, then convert the remaining -- vowel + tilde sequences to macroned vowels, then delete any other -- tilde. We leave already-macroned vowels alone: Both e.g. ar and ār -- occur before consonants. FIXME: This still might not be sufficient. from = {"Ȩ", "ȩ", "[ÂÃÀ]", "[âãà]", "[ÊẼÈ]", "[êẽè]", "[ÎĨÌ]", "[îĩì]", "[ÔÕÒ]", "[ôõò]", "[ÛŨÙ]", "[ûũù]", "[ÑǸ]", "[ñǹ]", "[" .. CIRC .. TILDE ..GRAVE .."]", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .. "?([^aAeEiIoOuUāĀēĒīĪūŪ])", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .."?$", "([iI])" .. TILDE .. "?([eE])" .. TILDE .. "?", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "U" .. TILDE, "u" .. TILDE, TILDE}, to = {"E", "e", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "O", "o", "U" .. TILDE, "u" .. TILDE, "N", "n", TILDE, "%1%2%3", "%1%2", "%1%2", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ū", "ū", ""}}, } m["mg"] = { "Malagasy", 7930, "poz-bre", Latn, } m["mh"] = { "Marshallese", 36280, "poz-mic", Latn, sort_key = { from = {"ā" , "ļ" , "m̧" , "ņ" , "n̄" , "o̧" , "ō" , "ū" }, to = {"a~", "l~", "m~", "n~", "n~~", "o~", "o~~", "u~"}} , } m["mi"] = { "Maori", 36451, "poz-pep", Latn, } m["mk"] = { "Macedonian", 9296, "zls", Cyrl, translit_module = "mk-translit", entry_name = { from = {ACUTE}, to = {}}, } m["ml"] = { "Malayalam", 36236, "dra", {"Mlym"}, translit_module = "ml-translit", override_translit = true, } m["mn"] = { "Mongolian", 9246, "xgn", {"Cyrl", "Mong", "Soyo", "Zanb"}, -- entries in Soyo or Zanb might require prior discussion ancestors = {"cmg"}, translit_module = "mn-translit", override_translit = true, } -- "mo" IS TREATED AS "ro", SEE WT:LT m["mr"] = { "Marathi", 1571, "inc-sou", {"Deva", "Modi"}, ancestors = {"omr"}, translit_module = "mr-translit", entry_name = { from = {"च़", "ज़", "झ़"}, to = {"च", "ज", "झ"}} , } m["ms"] = { "Malay", 9237, "poz-mly", {"Latn", "ms-Arab"}, } m["mt"] = { "Maltese", 9166, "sem-arb", Latn, ancestors = {"sqr"}, sort_key = { from = {"ċ", "ġ", "ħ"}, to = {"c", "g", "h"} } } m["my"] = { "Burmese", 9228, "tbq-brm", {"Mymr"}, ancestors = {"obr"}, translit_module = "my-translit", override_translit = true, sort_key = { from = {"ျ", "ြ", "ွ", "ှ", "ဿ"}, to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"}}, } m["na"] = { "Nauruan", 13307, "poz-mic", Latn, } m["nb"] = { "Norwegian Bokmål", 25167, "gmq", Latn, ancestors = {"gmq-mno"}, wikimedia_codes = {"no"}, } m["nd"] = { "Northern Ndebele", 35613, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["ne"] = { "Nepali", 33823, "inc-pah", {"Deva", "Newa"}, translit_module = "ne-translit", } m["ng"] = { "Ndonga", 33900, "bnt-ova", Latn, } m["nl"] = { "Dutch", 7411, "gmw", Latn, ancestors = {"dum"}, sort_key = { from = {"[äáâå]", "[ëéê]", "[ïíî]", "[öóô]", "[üúû]", "ç", "ñ", "^-"}, to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} , standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF), } m["nn"] = { "Norwegian Nynorsk", 25164, "gmq", Latn, ancestors = {"gmq-mno"}, } m["no"] = { "Norwegian", 9043, "gmq", Latn, ancestors = {"gmq-mno"}, } m["nr"] = { "Southern Ndebele", 36785, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["nv"] = { "Navajo", 13310, "apa", Latn, sort_key = { from = {"[áą]", "[éę]", "[íį]", "[óǫ]", "ń", "^n([djlt])", "ł" , "[ʼ’']", ACUTE}, to = {"a" , "e" , "i" , "o" , "n", "ni%1" , "l~"}}, -- the tilde is used to guarantee that ł will always be sorted after all other words with l } m["ny"] = { "Chichewa", 33273, "bnt-nys", Latn, entry_name = { from = {"ŵ", "Ŵ", "á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ" }, to = {"w", "W", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M"}}, sort_key = { from = {"ng'"}, to = {"ng"}} , } m["oc"] = { "Occitan", 14185, "roa", {"Latn", "Hebr"}, ancestors = {"pro"}, sort_key = { from = {"[àá]", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "([lns])·h"}, to = {"a" , "e" , "i" , "o" , "u" , "c", "%1h" }} , } m["oj"] = { "Ojibwe", 33875, "alg", {"Cans", "Latn"}, sort_key = { from = {"aa", "ʼ", "ii", "oo", "sh", "zh"}, to = {"a~", "h~", "i~", "o~", "s~", "z~"}} , } m["om"] = { "Oromo", 33864, "cus-eas", {"Latn", "Ethi"}, } m["or"] = { "Oriya", 33810, "inc-eas", {"Orya"}, ancestors = {"inc-mor"}, translit_module = "or-translit", } m["os"] = { "Ossetian", 33968, "xsc", {"Cyrl", "Geor", "Latn"}, ancestors = {"oos"}, translit_module = "os-translit", override_translit = true, entry_name = { from = {GRAVE, ACUTE}, to = {}} , } m["pa"] = { "Punjabi", 58635, "inc-pan", {"Guru", "pa-Arab"}, ancestors = {"inc-opa"}, translit_module = "translit-redirect", entry_name = { from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658), u(0x08C7), u(0x0768)}, to = {"", "", "", "", "", "", "", "", "", "ل", "ن"}} , } m["pi"] = { "Pali", 36727, "inc-mid", {"Latn", "Brah", "Deva", "Beng", "Sinh", "Mymr", "Thai", "Lana", "Laoo", "Khmr"}, ancestors = {"sa"}, translit_module = "translit-redirect", sort_key = { from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)}, to = {"a~", "i~", "u~", "d~", "l~", "m~", "n~", "n~~", "n~~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} , entry_name = { from = {u(0xFE00)}, to = {}}, } m["pl"] = { "Polish", 809, "zlw-lch", Latn, ancestors = {"zlw-opl"}, sort_key = { from = {"[Ąą]", "[Ćć]", "[Ęę]", "[Łł]", "[Ńń]", "[Óó]", "[Śś]", "[Żż]", "[Źź]"}, to = { "a" .. u(0x10FFFF), "c" .. u(0x10FFFF), "e" .. u(0x10FFFF), "l" .. u(0x10FFFF), "n" .. u(0x10FFFF), "o" .. u(0x10FFFF), "s" .. u(0x10FFFF), "z" .. u(0x10FFFF), "z" .. u(0x10FFFE)}} , } m["ps"] = { "Pashto", 58680, "ira-pat", {"ps-Arab"}, ancestors = {"ira-pat-pro"}, } m["pt"] = { "Portuguese", 5146, "roa-ibe", {"Latn", "Brai"}, ancestors = {"roa-opt"}, sort_key = { from = {"[àãáâä]", "[èẽéêë]", "[ìĩíï]", "[òóôõö]", "[üúùũ]", "ç", "ñ"}, to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} , } m["qu"] = { "Quechua", 5218, "qwe", Latn, } m["rm"] = { "Romansch", 13199, "roa-rhe", Latn, } m["ro"] = { "Romanian", 7913, "roa-eas", {"Latn", "Cyrl"}, sort_key = { from = {"ă" , "â" , "î" , "ș" , "ț" }, to = {"a~", "a~~", "i~", "s~", "t~"}}, } m["ru"] = { "Russian", 7737, "zle", {"Cyrl", "Brai"}, translit_module = "ru-translit", sort_key = { from = {"ё"}, to = {"е" .. mw.ustring.char(0x10FFFF)}}, entry_name = { from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE, DIAER}, to = {"Е", "е", "И", "и"}}, standardChars = "ЁА-яё0-9—" .. PUNCTUATION, } m["rw"] = { "Rwanda-Rundi", 3217514, "bnt-glb", Latn, entry_name = { from = {"[áāâǎā́]", "[éēêěḗ]", "[íīîǐī́]", "[óōôǒṓ]", "[úūûǔū́]"}, to = {"a", "e" , "i", "o" , "u"} }, } m["sa"] = { "Sanskrit", 11059, "inc-old", {"Deva", "Bali", "as-Beng", "Beng", "Bhks", "Brah", "Gran", "Gujr", "Guru", "Java", "Khar", "Khmr", "Knda", "Lana", "Laoo", "Mlym", "Modi", "Mymr", "Nand", "Newa", "Orya", "Saur", "Shrd", "Sidd", "Sinh", "Taml", "Telu", "Thai", "Tibt", "Tirh"}, sort_key = { from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)}, to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} , entry_name = { from = {u(0xFE00)}, to = {}}, translit_module = "translit-redirect", } m["sc"] = { "Sardinian", 33976, "roa", Latn, } m["sd"] = { "Sindhi", 33997, "inc-snd", {"sd-Arab", "Deva", "Sind", "Khoj"}, entry_name = { from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}, ancestors = {"inc-vra"}, translit_module = "translit-redirect", } m["se"] = { "Northern Sami", 33947, "smi", Latn, entry_name = { from = {"ạ", "[ēẹ]", "ī", "[ōọ]", "ū", "ˈ"}, to = {"a", "e" , "i", "o" , "u"} }, sort_key = { from = {"á" , "č" , "đ" , "ŋ" , "š" , "ŧ" , "ž" }, to = {"a²", "c²", "d²", "n²", "s²", "t²", "z²"} }, standardChars = "A-PR-VZa-pr-vz0-9ÁáČčĐđŊŋŠšŦŧŽž" .. PUNCTUATION, } m["sg"] = { "Sango", 33954, "crp", Latn, ancestors = {"ngb"}, } m["sh"] = { "Serbo-Croatian", 9301, "zls", {"Latn", "Cyrl", "Glag"}, entry_name = { from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }}, wikimedia_codes = {"sh", "bs", "hr", "sr"}, } m["si"] = { "Sinhalese", 13267, "inc-ins", {"Sinh"}, ancestors = {"elu-prk"}, translit_module = "si-translit", override_translit = true, } m["sk"] = { "Slovak", 9058, "zlw", Latn, sort_key = { from = {"[áä]", "é", "í", "[óô]", "ú", "ý", "ŕ", "ĺ", "[" .. DIAER .. ACUTE .. CIRC .. "]"}, to = {"a" , "e", "i", "o" , "u", "y", "r", "l", ""}} , } m["sl"] = { "Slovene", 9063, "zls", Latn, entry_name = { from = {"[ÁÀÂĀȂȀ]", "[áàâāȃȁ]", "[ÉÈÊĒȆȄỆẸ]", "[éèêēȇȅệẹə]", "[ÍÌÎĪȊȈ]", "[íìîīȋȉ]", "[ÓÒÔŌȎȌỘỌ]", "[óòôōȏȍộọ]", "[ŔȒȐ]", "[ŕȓȑ]", "[ÚÙÛŪȖȔ]", "[úùûūȗȕ]", "ł", GRAVE, ACUTE, CIRC, MACRON, DGRAVE, INVBREVE, DOTBELOW}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "l"}, }, sort_key = { from = {"č" , "š" , "ž" }, to = {"c²", "s²", "z²"}, }, } m["sm"] = { "Samoan", 34011, "poz-pnp", Latn, } m["sn"] = { "Shona", 34004, "bnt-sho", Latn, entry_name = {remove_diacritics = ACUTE}, } m["so"] = { "Somali", 13275, "cus-eas", {"Latn", "Arab", "Osma"}, entry_name = { from = {"[ÁÀÂ]", "[áàâ]", "[ÉÈÊ]", "[éèê]", "[ÍÌÎ]", "[íìî]", "[ÓÒÔ]", "[óòô]", "[ÚÙÛ]", "[úùû]", "[ÝỲ]", "[ýỳ]"}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "U" , "u", "Y", "y"}} , } m["sq"] = { "Albanian", 8748, "sqj", {"Latn", "Grek", "Elba"}, entry_name = {remove_diacritics = ACUTE}, sort_key = { from = { '[âãä]', '[ÂÃÄ]', '[êẽë]', '[ÊẼË]', 'ĩ', 'Ĩ', 'õ', 'Õ', 'ũ', 'Ũ', 'ỹ', 'Ỹ', 'ç', 'Ç' }, to = { 'a', 'A', 'e', 'E', 'i', 'I', 'o', 'O', 'u', 'U', 'y', 'Y', 'c', 'C' } } , } m["sr"] = { "Sebian", 9299, "zls", aliases = {"Српски", "Српски језик", "српски", "српски језик", "srpski", "srpski jezik", "Srpski", "Srpski jezik"}, scripts = {"Latn", "Cyrl"}, entry_name = { from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }}, } m["ss"] = { "Swazi", 34014, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["st"] = { "Sotho", 34340, "bnt-sts", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["su"] = { "Sundanese", 34002, "poz-msa", {"Latn", "Sund"}, translit_module = "su-translit", } m["sv"] = { "Swedish", 9027, "gmq", Latn, ancestors = {"gmq-osw"}, } m["sw"] = { "Swahili", 7838, "bnt-swh", LatnArab, sort_key = { from = {"ng'", "^-"}, to = {"ngz"}} , } m["ta"] = { "Tamil", 5885, "dra", {"Taml"}, ancestors = {"oty"}, translit_module = "ta-translit", override_translit = true, } m["te"] = { "Telugu", 8097, "dra", {"Telu"}, translit_module = "te-translit", override_translit = true, } m["tg"] = { "Tajik", 9260, "ira-swi", {"Cyrl", "fa-Arab", "Latn"}, ancestors = {"pal"}, -- same as "fa", see WT:T:AFA translit_module = "tg-translit", override_translit = true, sort_key = { from = {"Ё", "ё"}, to = {"Е" , "е"}} , entry_name = { from = {ACUTE}, to = {}} , } m["th"] = { "Tai", 9217, "tai-swe", {"Thai", "Brai"}, translit_module = "th-translit", sort_key = { from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}, } m["ti"] = { "Tigrinya", 34124, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit", } m["tk"] = { "Turkmen", 9267, "trk-ogz", {"Latn", "Cyrl", "Arab"}, entry_name = { from = {"ā", "ē", "ī", "ō", "ū", "ȳ", "ȫ", "ǖ", MACRON}, to = {"a", "e", "i", "o", "u", "y", "ö", "ü", ""}}, ancestors = {"trk-ogz-pro"}, } m["tl"] = { "Tagalog", 34057, "phi", {"Latn", "Tglg"}, entry_name = { from = {"[áàâ]", "[éèê]", "[íìî]", "[óòô]", "[úùû]", ACUTE, GRAVE, CIRC}, to = {"a" , "e" , "i" , "o" , "u" }}, translit_module = "tl-translit", override_translit = true } m["tn"] = { "Tswana", 34137, "bnt-sts", Latn, } m["to"] = { "Tongan", 34094, "poz-pol", Latn, sort_key = { from = {"ā", "ē", "ī", "ō", "ū", MACRON}, to = {"a", "e", "i", "o", "u", ""}}, entry_name = { from = {"á", "é", "í", "ó", "ú", ACUTE}, to = {"a", "e", "i", "o", "u", ""}}, } m["tr"] = { "Turkish", 256, "trk-ogz", Latn, ancestors = {"ota"}, } m["ts"] = { "Tsonga", 34327, "bnt-tsr", Latn, } m["tt"] = { "Tatar", 25285, "trk-kbu", {"Cyrl", "Latn", "tt-Arab"}, translit_module = "tt-translit", override_translit = true, } -- "tw" IS TREATED AS "ak", SEE WT:LT m["ty"] = { "Tahitian", 34128, "poz-pep", Latn, } m["ug"] = { "Uyghur", 13263, "trk-kar", {"ug-Arab", "Latn", "Cyrl"}, ancestors = {"chg"}, translit_module = "ug-translit", override_translit = true, } m["uk"] = { "Ukrainian", 8798, "zle", Cyrl, ancestors = {"orv"}, translit_module = "uk-translit", entry_name = { from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE}, to = {"Е", "е", "И", "и"}}, standardChars = "ЄІЇА-ЩЬЮ-щьюяєії" .. PUNCTUATION, } m["ur"] = { "Urdu", 1617, "inc-hnd", {"ur-Arab"}, ancestors = {"inc-ohi"}, entry_name = { from = {u(0x0640), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658)}, to = {}} , } m["uz"] = { "Uzbek", 9264, "trk-kar", {"Latn", "Cyrl", "fa-Arab"}, ancestors = {"chg"}, } m["ve"] = { "Venda", 32704, "bnt-bso", Latn, } m["vi"] = { "Vietnamese", 9199, "mkh-vie", {"Latn", "Hani"}, ancestors = {"mkh-mvi"}, sort_key = "vi-sortkey", } m["vo"] = { "Volapük", 36986, "art", Latn, } m["wa"] = { "Walloon", 34219, "roa-oil", Latn, ancestors = {"fro"}, sort_key = { from = {"[áàâäå]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"}, to = {"a" , "e" , "i" , "o" , "u" , "y" , "c"}} , } m["wo"] = { "Wolof", 34257, "alv-fwo", LatnArab, } m["xh"] = { "Xhosa", 13218, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["yi"] = { "Yiddish", 8641, "gmw", {"Hebr"}, ancestors = {"gmh"}, sort_key = { from = {"[אַאָ]", "בּ", "[וֹוּ]", "יִ", "ײַ", "פֿ"}, to = {"א", "ב", "ו", "י", "יי", "פ"}} , translit_module = "yi-translit", } m["yo"] = { "Yoruba", 34311, "alv-yor", Latn, sort_key = { from = {"ẹ", "ọ", "gb", "ṣ"}, to = {"e~" , "o~", "g~", "s~"}}, entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON }, } m["za"] = { "Zhuang", 13216, "tai", {"Latn", "Hani"}, sort_key = { from = {"%p"}, to = {""}}, } m["zh"] = { "Chinese", 7850, "zhx", {"Hani", "Brai", "Nshu"}, ancestors = {"ltc"}, sort_key = "zh-sortkey", } m["zu"] = { "Zulu", 10179, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } return m rr08cwq52vf14ctn6kcpvqf40nplp9a 13340 13339 2022-08-04T12:41:04Z Asinis632 1829 Scribunto text/plain local u = mw.ustring.char -- UTF-8 encoded strings for some commonly-used diacritics local GRAVE = u(0x0300) local ACUTE = u(0x0301) local CIRC = u(0x0302) local TILDE = u(0x0303) local MACRON = u(0x0304) local BREVE = u(0x0306) local DOTABOVE = u(0x0307) local DIAER = u(0x0308) local CARON = u(0x030C) local DGRAVE = u(0x030F) local INVBREVE = u(0x0311) local DOTBELOW = u(0x0323) local RINGBELOW = u(0x0325) local CEDILLA = u(0x0327) local OGONEK = u(0x0328) local CGJ = u(0x034F) -- combining grapheme joiner local DOUBLEINVBREVE = u(0x0361) -- Punctuation to be used for standardChars field local PUNCTUATION = ' !#%&*+,-./:;<=>?@^_`|~\'()' local Cyrl = {"Cyrl"} local Latn = {"Latn"} local LatnArab = {"Latn", "Arab"} local m = {} m["aa"] = { "Afar", 27811, "cus-eas", Latn, entry_name = { remove_diacritics = ACUTE}, } m["ab"] = { "Abkhaz", 5111, "cau-abz", {"Cyrl", "Geor", "Latn"}, translit_module = "ab-translit", override_translit = true, entry_name = { from = {GRAVE, ACUTE}, to = {}} , } m["ae"] = { "Avestan", 29572, "ira-cen", {"Avst", "Gujr"}, translit_module = "Avst-translit", wikipedia_article = "Avestan", } m["af"] = { "Afrikaans", 14196, "gmw", LatnArab, ancestors = {"nl"}, sort_key = { from = {"[äáâà]", "[ëéêè]", "[ïíîì]", "[öóôò]", "[üúûù]", "[ÿýŷỳ]", "^-", "'"}, to = {"a" , "e" , "i" , "o" , "u" , "y" }} , } m["ak"] = { "Akan", 28026, "alv-ctn", Latn, } m["am"] = { "Amharic", 28244, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit", } m["an"] = { "Aragonese", 8765, "roa-ibe", Latn, ancestors = {"roa-oan"}, } m["ar"] = { "Arabic", 13955, "sem-arb", {"Arab", "Hebr", "Brai"}, -- replace alif waṣl with alif -- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha, -- damma, kasra, shadda, sukun, superscript (dagger) alef entry_name = { from = {u(0x0671), u(0x0640), "[" .. u(0x064B) .. "-" .. u(0x0652) .. "]", u(0x0670)}, to = {u(0x0627)}}, -- put Judeo-Arabic (Hebrew-script Arabic) under the category header -- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles sort_key = { from = {"^%f[" .. u(0x5D0) .. "-" .. u(0x5EA) .. "]"}, to = {u(0xFB21)}, }, translit_module = "ar-translit", } m["as"] = { "Assamese", 29401, "inc-eas", {"as-Beng"}, ancestors = {"inc-mas"}, translit_module = "as-translit", } m["av"] = { "Avar", 29561, "cau-nec", Cyrl, ancestors = {"oav"}, translit_module = "av-translit", override_translit = true, entry_name = { from = {GRAVE, ACUTE}, to = {}} , } m["ay"] = { "Aymara", 4627, "sai-aym", Latn, } m["az"] = { "Azerbaijani", 9292, "trk-ogz", {"Latn", "Cyrl", "fa-Arab"}, ancestors = {"trk-oat"}, } m["ba"] = { "Bashkir", 13389, "trk-kbu", Cyrl, translit_module = "ba-translit", override_translit = true, } m["be"] = { "Belarusian", 9091, "zle", Cyrl, ancestors = {"orv"}, translit_module = "be-translit", sort_key = { from = {"Ё", "ё"}, to = {"Е" , "е"}}, entry_name = { from = {"Ѐ", "ѐ", GRAVE, ACUTE}, to = {"Е", "е"}}, } m["bg"] = { "Bulgarian", 7918, "zls", {"Cyrl"}, ancestors = {"cu"}, translit_module = "bg-translit", entry_name = { from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE}, to = {"Е", "е", "И", "и"}}, } m["bh"] = { "Bihari", 135305, "inc-eas", {"Deva"}, ancestors = {"inc-mgd"}, } m["bi"] = { "Bislama", 35452, "crp", Latn, ancestors = {"en"}, } m["bm"] = { "Bambara", 33243, "dmn-emn", Latn, } m["bn"] = { "Bengali", 9610, "inc-eas", {"Beng", "Newa"}, ancestors = {"inc-mbn"}, translit_module = "bn-translit", } m["bo"] = { "Tibetan", 34271, "sit-tib", {"Tibt"}, -- sometimes Deva? ancestors = {"xct"}, translit_module = "bo-translit", override_translit = true, } m["br"] = { "Breton", 12107, "cel-bry", Latn, ancestors = {"xbm"}, } m["ca"] = { "Catalan", 7026, "roa", Latn, ancestors = {"roa-oca"}, sort_key = { from = {"à", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "l·l"}, to = {"a", "e" , "i" , "o" , "u" , "c", "ll" }} , } m["ce"] = { "Chechen", 33350, "cau-vay", Cyrl, translit_module = "ce-translit", override_translit = true, entry_name = { from = {MACRON}, to = {}}, } m["ch"] = { "Chamorro", 33262, "poz-sus", Latn, } m["co"] = { "Corsican", 33111, "roa-itd", Latn, } m["cr"] = { "Cree", 33390, "alg", {"Cans", "Latn"}, translit_module = "translit-redirect", } m["cs"] = { "Czech", 9056, "zlw", Latn, ancestors = {"zlw-ocs"}, sort_key = { from = {"á", "é", "í", "ó", "[úů]", "ý"}, to = {"a", "e", "i", "o", "u" , "y"}} , } m["cu"] = { "Old Church Slavonic", 35499, "zls", {"Cyrs", "Glag"}, translit_module = "Cyrs-Glag-translit", entry_name = { from = {u(0x0484)}, -- kamora to = {}}, sort_key = { from = {"оу", "є"}, to = {"у" , "е"}} , } m["cv"] = { "Chuvash", 33348, "trk-ogr", Cyrl, ancestors = {"xbo"}, translit_module = "cv-translit", override_translit = true, } m["cy"] = { "Welsh", 9309, "cel-bry", Latn, ancestors = {"wlm"}, sort_key = { from = {"[âáàä]", "ch", "dd", "[êéèë]", "ff", "ngh", "[îíìï]", "ll", "[ôóòö]", "ph", "rh", "th", "[ûúùü]", "[ŵẃẁẅ]", "[ŷýỳÿ]", "'"}, to = {"a" , "c~", "d~", "e" , "f~", "g~h", "i" , "l~", "o" , "p~", "r~", "t~", "u" , "w" , "y" }} , standardChars = "A-IL-PR-UWYa-il-pr-uwy0-9ÂâÊêÎîÔôÛûŴŵŶŷ" .. PUNCTUATION, } m["da"] = { "Danish", 9035, "gmq", Latn, ancestors = {"gmq-oda"}, } m["de"] = { "Jeman", 188, "gmw", {"Latn", "Latf"}, ancestors = {"gmh"}, sort_key = { from = {"[äàáâå]", "[ëèéê]", "[ïìíî]", "[öòóô]", "[üùúû]", "ß" }, to = {"a" , "e" , "i" , "o" , "u" , "ss"}} , standardChars = "A-Za-z0-9ÄäÖöÜüß" .. PUNCTUATION, } m["dv"] = { "Dhivehi", 32656, "inc-ins", {"Thaa"}, ancestors = {"elu-prk"}, translit_module = "dv-translit", override_translit = true, } m["dz"] = { "Dzongkha", 33081, "sit-tib", {"Tibt"}, ancestors = {"xct"}, translit_module = "bo-translit", override_translit = true, } m["ee"] = { "Ewe", 30005, "alv-gbe", Latn, } m["el"] = { "Greek", 9129, "grk", {"Grek", "Brai"}, ancestors = {"grc"}, translit_module = "el-translit", override_translit = true, sort_key = { -- Keep this synchronized with grc, cpg, pnt, tsd from = {"[ᾳάᾴὰᾲᾶᾷἀᾀἄᾄἂᾂἆᾆἁᾁἅᾅἃᾃἇᾇ]", "[έὲἐἔἒἑἕἓ]", "[ῃήῄὴῂῆῇἠᾐἤᾔἢᾒἦᾖἡᾑἥᾕἣᾓἧᾗ]", "[ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗ]", "[όὸὀὄὂὁὅὃ]", "[ύὺῦὐὔὒὖὑὕὓὗϋΰῢῧ]", "[ῳώῴὼῲῶῷὠᾠὤᾤὢᾢὦᾦὡᾡὥᾥὣᾣὧᾧ]", "ῥ", "ς"}, to = {"α" , "ε" , "η" , "ι" , "ο" , "υ" , "ω" , "ρ", "σ"}} , standardChars = "ͺ;΄-ώϜϝ" .. PUNCTUATION, } m["en"] = { "Inglis", 1860, "gmw", {"Latn", "Brai", "Shaw", "Dsrt"}, -- entries in Shaw or Dsrt might require prior discussion ancestors = {"enm"}, sort_key = { from = {"[äàáâåā]", "[ëèéêē]", "[ïìíîī]", "[öòóôō]", "[üùúûū]", "æ" , "œ" , "[çč]", "ñ", "'"}, to = {"a" , "e" , "i" , "o" , "u" , "ae", "oe", "c" , "n"}}, wikimedia_codes = {"en", "simple"}, standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF), } m["eo"] = { "Esperanto", 143, "art", Latn, sort_key = { from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ĉ]", "[ĝ]", "[ĥ]", "[ĵ]", "[ŝ]", "[ŭ]"}, to = {"a" , "e" , "i" , "o" , "u", "cĉ", "gĉ", "hĉ", "jĉ", "sĉ", "uĉ"}} , standardChars = "A-PRSTUVZa-prstuvzĉĈĝĜĵĴŝŜŭŬ0-9" .. PUNCTUATION, } m["es"] = { "Spanish", 1321, "roa-ibe", {"Latn", "Brai"}, ancestors = {"osp"}, sort_key = { from = {"á", "é", "í", "ó", "[úü]", "ç", "ñ"}, to = {"a", "e", "i", "o", "u" , "c", "n~"}}, standardChars = "A-VXYZa-vxyz0-9ÁáÉéÍíÓóÚúÑñ¿¡" .. PUNCTUATION, } m["et"] = { "Estonian", 9072, "fiu-fin", Latn, } m["eu"] = { "Basque", 8752, "euq", Latn, } m["fa"] = { "Persian", 9168, "ira-swi", {"fa-Arab"}, ancestors = {"pal"}, -- "ira-mid" entry_name = { from = {u(0x064E), u(0x0640), u(0x064F), u(0x0650), u(0x0651), u(0x0652)}, to = {}} , } m["ff"] = { "Fula", 33454, "alv-fwo", {"Latn", "Adlm"}, } m["fi"] = { "Finnish", 1412, "fiu-fin", Latn, entry_name = { from = {"ˣ"}, -- Used to indicate gemination of the next consonant to = {}}, sort_key = { from = {"[áàâã]", "[éèêẽ]", "[íìîĩ]", "[óòôõ]", "[úùûũ]", "[ýỳŷüű]", "[øõő]", "æ" , "œ" , "[čç]", "š", "ž", "ß" , "[':]"}, to = {"a" , "e" , "i" , "o" , "u" , "y" , "ö" , "ae", "oe", "c" , "s", "z", "ss"}} , } m["fj"] = { "Fijian", 33295, "poz-occ", Latn, } m["fo"] = { "Faroese", 25258, "gmq", Latn, ancestors = {"non"}, } m["fr"] = { "Frens", 150, "roa-oil", {"Latn", "Brai"}, ancestors = {"frm"}, sort_key = { from = {"[áàâä]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "æ" , "œ" , "'"}, to = {"a" , "e" , "i" , "o" , "u" , "y" , "c", "ae", "oe"}}, standardChars = "A-Za-z0-9ÀÂÇÉÈÊËÎÏÔŒÛÙÜàâçéèêëîïôœûùü«»" .. PUNCTUATION, } m["fy"] = { "West Frisian", 27175, "gmw-fri", Latn, ancestors = {"ofs"}, sort_key = { from = {"[àáâä]", "[èéêë]", "[ìíîïyỳýŷÿ]", "[òóôö]", "[ùúûü]", "æ", "[ /.-]"}, to = {"a" , "e" , "i" , "o" , "u", "ae"}} , standardChars = "A-PR-WYZa-pr-wyz0-9Ææâäàéêëèïìôöòúûüùỳ" .. PUNCTUATION, } m["ga"] = { "Irish", 9142, "cel-gae", Latn, ancestors = {"mga"}, sort_key = { from = {"á", "é", "í", "ó", "ú", "ý", "ḃ" , "ċ" , "ḋ" , "ḟ" , "ġ" , "ṁ" , "ṗ" , "ṡ" , "ṫ" }, to = {"a", "e", "i", "o", "u", "y", "bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}} , standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION, } m["gd"] = { "Scottish Gaelic", 9314, "cel-gae", Latn, ancestors = {"mga"}, sort_key = { from = {"[áà]", "[éè]", "[íì]", "[óò]", "[úù]", "[ýỳ]"}, to = {"a" , "e" , "i" , "o" , "u" , "y" }} , standardChars = "A-IL-PR-Ua-il-pr-u0-9ÀàÈèÌìÒòÙù" .. PUNCTUATION, } m["gl"] = { "Galician", 9307, "roa-ibe", Latn, ancestors = {"roa-opt"}, sort_key = { from = {"á", "é", "í", "ó", "ú"}, to = {"a", "e", "i", "o", "u"}} , } m["gn"] = { "Guaraní", 35876, "tup-gua", Latn, } m["gu"] = { "Gujarati", 5137, "inc-wes", {"Gujr"}, ancestors = {"inc-mgu"}, translit_module = "gu-translit", } m["gv"] = { "Manx", 12175, "cel-gae", Latn, ancestors = {"mga"}, sort_key = { from = {"ç", "-"}, to = {"c"}} , standardChars = "A-WYÇa-wyç0-9" .. PUNCTUATION, } m["ha"] = { "Hausa", 56475, "cdc-wst", LatnArab, sort_key = { from = {"ɓ", "ɗ", "ƙ", "'y", "ƴ", "'" }, to = {"b~" , "d~" , "k~", "y~", "y~", "" }}, entry_name = { from = {"R̃", "r̃", "À", "à", "È", "è", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Î", "î", "Ô", "ô", "Û", "û", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ō", "ō", "Ū", "ū", "Á", "á", "É", "é", "Í", "í", "Ó", "ó", "Ú", "ú", "Ā̀", "ā̀", "Ḕ", "ḕ", "Ī̀", "ī̀", "Ṑ", "ṑ", "Ū̀", "ū̀", GRAVE, ACUTE}, to = {"R", "r", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "I", "i", "O", "o", "U", "u"}}, } m["he"] = { "Hebrew", 9288, "sem-can", {"Hebr", "Phnx", "Brai"}, entry_name = { from = {"[" .. u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. CGJ .. "]"}, to = {}} , } m["hi"] = { "Hindi", 1568, "inc-hnd", {"Deva", "Kthi", "Newa"}, ancestors = {"inc-ohi"}, translit_module = "hi-translit", standardChars = "ँंअ-ऊएऐओ-घच-झट-नप-रलवशसहा-ूेैो-◌्।-॰ड़ढ़" .. PUNCTUATION, } m["ho"] = { "Hiri Motu", 33617, "crp", Latn, ancestors = {"meu"}, } m["ht"] = { "Haitian Creole", 33491, "crp", Latn, ancestors = {"fr"}, } m["hu"] = { "Hungarian", 9067, "urj-ugr", {"Latn", "Hung"}, ancestors = {"ohu"}, sort_key = { from = {"á", "é", "í", "ó", "ú", "[öő]", "[üű]", "cs", "dzs", "gy", "ly", "ny", "zs"}, to = {"a", "e", "i", "o", "u", "o~", "u~", "c~", "dz~", "g~", "l~", "n~", "z~"}, }, } m["hy"] = { "Armenian", 8785, "hyx", {"Armn", "Brai"}, ancestors = {"axm"}, translit_module = "Armn-translit", override_translit = true, sort_key = { from = {"ու", "և", "եւ"}, to = {"ւ", "եվ", "եվ"}}, entry_name = { from = {"՞", "՜", "՛", "՟", "և", "<sup>յ</sup>", "<sup>ի</sup>", "<sup>է</sup>"}, to = {"", "", "", "", "եւ", "յ", "ի", "է"}} , } m["hz"] = { "Herero", 33315, "bnt-swb", Latn, } m["ia"] = { "Interlingua", 35934, "art", Latn, } m["id"] = { "Indonesian", 9240, "poz-mly", Latn, ancestors = {"ms"}, } m["ie"] = { "Interlingue", 35850, "art", Latn, type = "appendix-constructed", } m["ig"] = { "Igbo", 33578, "alv-igb", Latn, sort_key = { from = {"ụ", "ị", "ọ", "gb", "gh", "gw", "kp", "kw", "ṅ", "nw", "ny", "sh"}, to = {"u~" , "i~", "o~", "gy", "gz", "g~", "kz", "k~", "ny", "nz", "n~", "s~"}}, entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON }, } m["ii"] = { "Sichuan Yi", 34235, "tbq-lol", {"Yiii"}, translit_module = "ii-translit", } m["ik"] = { "Inupiaq", 27183, "esx-inu", Latn, } m["io"] = { "Ido", 35224, "art", Latn, } m["is"] = { "Icelandic", 294, "gmq", Latn, ancestors = {"non"}, } m["it"] = { "Italian", 652, "roa-itd", Latn, sort_key = { from = {"[àáâäå]", "[èéêë]", "[ìíîï]", "[òóôö]", "[ùúûü]"}, to = {"a" , "e" , "i" , "o" , "u" }} , standardChars = "A-IL-VZa-il-vz0-9" .. PUNCTUATION, } m["iu"] = { "Inuktitut", 29921, "esx-inu", {"Cans", "Latn"}, translit_module = "translit-redirect", override_translit = true, } m["ja"] = { "Siapan", 5287, "jpx", {"Jpan", "Brai"}, ancestors = {"ojp"}, --[=[ -- Handled by jsort function in [[Module:ja]]. sort_key = { from = {"[ぁァア]", "[ぃィイ]", "[ぅゔゥウヴ]", "[ぇェエ]", "[ぉォオ]", "[がゕカガヵ]", "[ぎキギ]", "[ぐクグㇰ]", "[げゖケゲヶ]", "[ごコゴ]", "[ざサザ]", "[じシジㇱ]", "[ずスズㇲ]", "[ぜセゼ]", "[ぞソゾ]", "[だタダ]", "[ぢチヂ]", "[っづッツヅ]", "[でテデ]", "[どトドㇳ]", "ナ", "ニ", "[ヌㇴ]", "ネ", "ノ", "[ばぱハバパㇵ]", "[びぴヒビピㇶ]", "[ぶぷフブプㇷ]", "[べぺヘベペㇸ]", "[ぼぽホボポㇹ]", "マ", "ミ", "[ムㇺ]", "メ", "モ", "[ゃャヤ]", "[ゅュユ]", "[ょョヨ]", "[ラㇻ]", "[リㇼ]", "[ルㇽ]", "[レㇾ]", "[ロㇿ]", "[ゎヮワヷ]", "[ヰヸ]", "[ヱヹ]", "[ヲヺ]", "ン", "[゙゚゛゜ゝゞ・ヽヾ]", "𛀀"}, to = {"あ", "い", "う", "え", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ら", "り", "る", "れ", "ろ", "わ", "ゐ", "ゑ", "を", "ん", "", "え"}}, --]=] } m["jv"] = { "Javanese", 33549, "poz-sus", {"Latn", "Java"}, translit_module = "jv-translit", ancestors = {"kaw"}, link_tr = true, } m["ka"] = { "Georgian", 8108, "ccs-gzn", {"Geor", "Geok", "Hebr"}, -- Hebr is used to write Judeo-Georgian ancestors = {"oge"}, translit_module = "Geor-translit", override_translit = true, entry_name = { from = {"̂"}, to = {""}}, } m["kg"] = { "Kongo", 33702, "bnt-kng", Latn, } m["ki"] = { "Kikuyu", 33587, "bnt-kka", Latn, } m["kj"] = { "Kwanyama", 1405077, "bnt-ova", Latn, } m["kk"] = { "Kazakh", 9252, "trk-kno", {"Cyrl", "Latn", "kk-Arab"}, translit_module = "kk-translit", override_translit = true, } m["kl"] = { "Greenlandic", 25355, "esx-inu", Latn, } m["km"] = { "Khmer", 9205, "mkh-kmr", {"Khmr"}, ancestors = {"mkh-mkm"}, translit_module = "km-translit", } m["kn"] = { "Kannada", 33673, "dra", {"Knda"}, ancestors = {"dra-mkn"}, translit_module = "kn-translit", } m["ko"] = { "Korean", 9176, "qfa-kor", {"Kore", "Brai"}, ancestors = {"okm"}, -- 20210122 idea: strip parenthesized hanja from entry link -- Hani regex is a reasonable subset of Hani from [[Module:scripts/data]], -- last updated on 20210214. entry_name = { from = { " *%([一-鿿㐀-䶿𠀀-𮯯𰀀-𱍏﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩]+%)", }, to = { "", }}, display = { from = {"%-"}, to = {}, }, translit_module = "ko-translit", } m["kr"] = { "Kanuri", 36094, "ssa-sah", LatnArab, sort_key = { from = {"ny", "ǝ", "sh"}, to = {"n~", "e~", "s~"}} , -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically entry_name = { from = {"À", "à", "È", "è", "Ǝ̀", "ǝ̀", "Ì", "ì", "Ò", "ò", "Ù", "ù", "Â", "â", "Ê", "ê", "Ǝ̂", "ǝ̂", "Î", "î", "Ô", "ô", "Û", "û", "Ă", "ă", "Ĕ", "ĕ", "Ǝ̆", "ǝ̆", "Ĭ", "ĭ", "Ŏ", "ŏ", "Ŭ", "ŭ", "Á", "á", "É", "é", "Ǝ́", "ǝ́", "Í", "í", "Ó", "ó", "Ú", "ú", GRAVE, ACUTE}, to = {"A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u", "A", "a", "E", "e", "Ǝ", "ǝ", "I", "i", "O", "o", "U", "u"}}, } m["ks"] = { "Kashmiri", 33552, "inc-dar", {"ks-Arab", "Deva", "Shrd", "Latn"}, translit_module = "translit-redirect", ancestors = {"sa"}, } -- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT m["kw"] = { "Cornish", 25289, "cel-bry", Latn, ancestors = {"cnx"}, } m["ky"] = { "Kyrgyz", 9255, "trk-kip", {"Cyrl", "Latn", "Arab"}, translit_module = "ky-translit", override_translit = true, } m["la"] = { "Latin", 397, "itc", Latn, ancestors = {"itc-ola"}, entry_name = {remove_diacritics = MACRON .. BREVE .. DIAER .. DOUBLEINVBREVE}, standardChars = "A-Za-z0-9ÆæŒœĀ-ăĒ-ĕĪ-ĭŌ-ŏŪ-ŭȲȳ" .. MACRON .. BREVE .. PUNCTUATION, } m["lb"] = { "Luxembourgish", 9051, "gmw", Latn, ancestors = {"gmh"}, } m["lg"] = { "Luganda", 33368, "bnt-nyg", Latn, entry_name = { from = {"á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ", "â", "Â", "ê", "Ê", "î", "Î", "ô", "Ô", "û", "Û" }, to = {"a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U",}}, sort_key = { from = {"ŋ"}, to = {"n"}} , } m["li"] = { "Limburgish", 102172, "gmw", Latn, ancestors = {"dum"}, } m["ln"] = { "Lingala", 36217, "bnt-bmo", Latn, } m["lo"] = { "Lao", 9211, "tai-swe", {"Laoo"}, translit_module = "lo-translit", sort_key = { from = {"[%pໆ]", "[່-ໍ]", "ຼ", "ຽ", "ໜ", "ໝ", "([ເແໂໃໄ])([ກ-ຮ])"}, to = {"", "", "ລ", "ຍ", "ຫນ", "ຫມ", "%2%1"}}, standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. PUNCTUATION, } m["lt"] = { "Lithuanian", 9083, "bat", Latn, ancestors = {"olt"}, entry_name = { from = {"[áãà]", "[ÁÃÀ]", "[éẽè]", "[ÉẼÈ]", "[íĩì]", "[ÍĨÌ]", "[ýỹ]", "[ÝỸ]", "ñ", "[óõò]", "[ÓÕÒ]", "[úũù]", "[ÚŨÙ]", ACUTE, GRAVE, TILDE}, to = {"a", "A", "e", "E", "i", "I", "y", "Y", "n", "o", "O", "u", "U"}} , } m["lu"] = { "Luba-Katanga", 36157, "bnt-lub", Latn, } m["lv"] = { "Latvian", 9078, "bat", Latn, entry_name = { -- This attempts to convert vowels with tone marks to vowels either with -- or without macrons. Specifically, there should be no macrons if the -- vowel is part of a diphthong (including resonant diphthongs such -- pìrksts -> pirksts not #pīrksts). What we do is first convert the -- vowel + tone mark to a vowel + tilde in a decomposed fashion, -- then remove the tilde in diphthongs, then convert the remaining -- vowel + tilde sequences to macroned vowels, then delete any other -- tilde. We leave already-macroned vowels alone: Both e.g. ar and ār -- occur before consonants. FIXME: This still might not be sufficient. from = {"Ȩ", "ȩ", "[ÂÃÀ]", "[âãà]", "[ÊẼÈ]", "[êẽè]", "[ÎĨÌ]", "[îĩì]", "[ÔÕÒ]", "[ôõò]", "[ÛŨÙ]", "[ûũù]", "[ÑǸ]", "[ñǹ]", "[" .. CIRC .. TILDE ..GRAVE .."]", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .. "?([^aAeEiIoOuUāĀēĒīĪūŪ])", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .."?$", "([iI])" .. TILDE .. "?([eE])" .. TILDE .. "?", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "U" .. TILDE, "u" .. TILDE, TILDE}, to = {"E", "e", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "O", "o", "U" .. TILDE, "u" .. TILDE, "N", "n", TILDE, "%1%2%3", "%1%2", "%1%2", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ū", "ū", ""}}, } m["mg"] = { "Malagasy", 7930, "poz-bre", Latn, } m["mh"] = { "Marshallese", 36280, "poz-mic", Latn, sort_key = { from = {"ā" , "ļ" , "m̧" , "ņ" , "n̄" , "o̧" , "ō" , "ū" }, to = {"a~", "l~", "m~", "n~", "n~~", "o~", "o~~", "u~"}} , } m["mi"] = { "Maori", 36451, "poz-pep", Latn, } m["mk"] = { "Macedonian", 9296, "zls", Cyrl, translit_module = "mk-translit", entry_name = { from = {ACUTE}, to = {}}, } m["ml"] = { "Malayalam", 36236, "dra", {"Mlym"}, translit_module = "ml-translit", override_translit = true, } m["mn"] = { "Mongolian", 9246, "xgn", {"Cyrl", "Mong", "Soyo", "Zanb"}, -- entries in Soyo or Zanb might require prior discussion ancestors = {"cmg"}, translit_module = "mn-translit", override_translit = true, } -- "mo" IS TREATED AS "ro", SEE WT:LT m["mr"] = { "Marathi", 1571, "inc-sou", {"Deva", "Modi"}, ancestors = {"omr"}, translit_module = "mr-translit", entry_name = { from = {"च़", "ज़", "झ़"}, to = {"च", "ज", "झ"}} , } m["ms"] = { "Malay", 9237, "poz-mly", {"Latn", "ms-Arab"}, } m["mt"] = { "Maltese", 9166, "sem-arb", Latn, ancestors = {"sqr"}, sort_key = { from = {"ċ", "ġ", "ħ"}, to = {"c", "g", "h"} } } m["my"] = { "Burmese", 9228, "tbq-brm", {"Mymr"}, ancestors = {"obr"}, translit_module = "my-translit", override_translit = true, sort_key = { from = {"ျ", "ြ", "ွ", "ှ", "ဿ"}, to = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"}}, } m["na"] = { "Nauruan", 13307, "poz-mic", Latn, } m["nb"] = { "Norwegian Bokmål", 25167, "gmq", Latn, ancestors = {"gmq-mno"}, wikimedia_codes = {"no"}, } m["nd"] = { "Northern Ndebele", 35613, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["ne"] = { "Nepali", 33823, "inc-pah", {"Deva", "Newa"}, translit_module = "ne-translit", } m["ng"] = { "Ndonga", 33900, "bnt-ova", Latn, } m["nl"] = { "Dutch", 7411, "gmw", Latn, ancestors = {"dum"}, sort_key = { from = {"[äáâå]", "[ëéê]", "[ïíî]", "[öóô]", "[üúû]", "ç", "ñ", "^-"}, to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} , standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF), } m["nn"] = { "Norwegian Nynorsk", 25164, "gmq", Latn, ancestors = {"gmq-mno"}, } m["no"] = { "Norwegian", 9043, "gmq", Latn, ancestors = {"gmq-mno"}, } m["nr"] = { "Southern Ndebele", 36785, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["nv"] = { "Navajo", 13310, "apa", Latn, sort_key = { from = {"[áą]", "[éę]", "[íį]", "[óǫ]", "ń", "^n([djlt])", "ł" , "[ʼ’']", ACUTE}, to = {"a" , "e" , "i" , "o" , "n", "ni%1" , "l~"}}, -- the tilde is used to guarantee that ł will always be sorted after all other words with l } m["ny"] = { "Chichewa", 33273, "bnt-nys", Latn, entry_name = { from = {"ŵ", "Ŵ", "á", "Á", "é", "É", "í", "Í", "ó", "Ó", "ú", "Ú", "ń", "Ń", "ḿ", "Ḿ" }, to = {"w", "W", "a", "A", "e", "E", "i", "I", "o", "O", "u", "U", "n", "N", "m", "M"}}, sort_key = { from = {"ng'"}, to = {"ng"}} , } m["oc"] = { "Occitan", 14185, "roa", {"Latn", "Hebr"}, ancestors = {"pro"}, sort_key = { from = {"[àá]", "[èé]", "[íï]", "[òó]", "[úü]", "ç", "([lns])·h"}, to = {"a" , "e" , "i" , "o" , "u" , "c", "%1h" }} , } m["oj"] = { "Ojibwe", 33875, "alg", {"Cans", "Latn"}, sort_key = { from = {"aa", "ʼ", "ii", "oo", "sh", "zh"}, to = {"a~", "h~", "i~", "o~", "s~", "z~"}} , } m["om"] = { "Oromo", 33864, "cus-eas", {"Latn", "Ethi"}, } m["or"] = { "Oriya", 33810, "inc-eas", {"Orya"}, ancestors = {"inc-mor"}, translit_module = "or-translit", } m["os"] = { "Ossetian", 33968, "xsc", {"Cyrl", "Geor", "Latn"}, ancestors = {"oos"}, translit_module = "os-translit", override_translit = true, entry_name = { from = {GRAVE, ACUTE}, to = {}} , } m["pa"] = { "Punjabi", 58635, "inc-pan", {"Guru", "pa-Arab"}, ancestors = {"inc-opa"}, translit_module = "translit-redirect", entry_name = { from = {u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658), u(0x08C7), u(0x0768)}, to = {"", "", "", "", "", "", "", "", "", "ل", "ن"}} , } m["pi"] = { "Pali", 36727, "inc-mid", {"Latn", "Brah", "Deva", "Beng", "Sinh", "Mymr", "Thai", "Lana", "Laoo", "Khmr"}, ancestors = {"sa"}, translit_module = "translit-redirect", sort_key = { from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)}, to = {"a~", "i~", "u~", "d~", "l~", "m~", "n~", "n~~", "n~~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} , entry_name = { from = {u(0xFE00)}, to = {}}, } m["pl"] = { "Polish", 809, "zlw-lch", Latn, ancestors = {"zlw-opl"}, sort_key = { from = {"[Ąą]", "[Ćć]", "[Ęę]", "[Łł]", "[Ńń]", "[Óó]", "[Śś]", "[Żż]", "[Źź]"}, to = { "a" .. u(0x10FFFF), "c" .. u(0x10FFFF), "e" .. u(0x10FFFF), "l" .. u(0x10FFFF), "n" .. u(0x10FFFF), "o" .. u(0x10FFFF), "s" .. u(0x10FFFF), "z" .. u(0x10FFFF), "z" .. u(0x10FFFE)}} , } m["ps"] = { "Pashto", 58680, "ira-pat", {"ps-Arab"}, ancestors = {"ira-pat-pro"}, } m["pt"] = { "Portuguese", 5146, "roa-ibe", {"Latn", "Brai"}, ancestors = {"roa-opt"}, sort_key = { from = {"[àãáâä]", "[èẽéêë]", "[ìĩíï]", "[òóôõö]", "[üúùũ]", "ç", "ñ"}, to = {"a" , "e" , "i" , "o" , "u" , "c", "n"}} , } m["qu"] = { "Quechua", 5218, "qwe", Latn, } m["rm"] = { "Romansch", 13199, "roa-rhe", Latn, } m["ro"] = { "Romanian", 7913, "roa-eas", {"Latn", "Cyrl"}, sort_key = { from = {"ă" , "â" , "î" , "ș" , "ț" }, to = {"a~", "a~~", "i~", "s~", "t~"}}, } m["ru"] = { "Russian", 7737, "zle", {"Cyrl", "Brai"}, translit_module = "ru-translit", sort_key = { from = {"ё"}, to = {"е" .. mw.ustring.char(0x10FFFF)}}, entry_name = { from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE, DIAER}, to = {"Е", "е", "И", "и"}}, standardChars = "ЁА-яё0-9—" .. PUNCTUATION, } m["rw"] = { "Rwanda-Rundi", 3217514, "bnt-glb", Latn, entry_name = { from = {"[áāâǎā́]", "[éēêěḗ]", "[íīîǐī́]", "[óōôǒṓ]", "[úūûǔū́]"}, to = {"a", "e" , "i", "o" , "u"} }, } m["sa"] = { "Sanskrit", 11059, "inc-old", {"Deva", "Bali", "as-Beng", "Beng", "Bhks", "Brah", "Gran", "Gujr", "Guru", "Java", "Khar", "Khmr", "Knda", "Lana", "Laoo", "Mlym", "Modi", "Mymr", "Nand", "Newa", "Orya", "Saur", "Shrd", "Sidd", "Sinh", "Taml", "Telu", "Thai", "Tibt", "Tirh"}, sort_key = { from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)}, to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}} , entry_name = { from = {u(0xFE00)}, to = {}}, translit_module = "translit-redirect", } m["sc"] = { "Sardinian", 33976, "roa", Latn, } m["sd"] = { "Sindhi", 33997, "inc-snd", {"sd-Arab", "Deva", "Sind", "Khoj"}, entry_name = { from = {u(0x0671), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0670), u(0x0640)}, to = {u(0x0627)}}, ancestors = {"inc-vra"}, translit_module = "translit-redirect", } m["se"] = { "Northern Sami", 33947, "smi", Latn, entry_name = { from = {"ạ", "[ēẹ]", "ī", "[ōọ]", "ū", "ˈ"}, to = {"a", "e" , "i", "o" , "u"} }, sort_key = { from = {"á" , "č" , "đ" , "ŋ" , "š" , "ŧ" , "ž" }, to = {"a²", "c²", "d²", "n²", "s²", "t²", "z²"} }, standardChars = "A-PR-VZa-pr-vz0-9ÁáČčĐđŊŋŠšŦŧŽž" .. PUNCTUATION, } m["sg"] = { "Sango", 33954, "crp", Latn, ancestors = {"ngb"}, } m["sh"] = { "Serbo-Croatian", 9301, "zls", {"Latn", "Cyrl", "Glag"}, entry_name = { from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }}, wikimedia_codes = {"sh", "bs", "hr", "sr"}, } m["si"] = { "Sinhalese", 13267, "inc-ins", {"Sinh"}, ancestors = {"elu-prk"}, translit_module = "si-translit", override_translit = true, } m["sk"] = { "Slovak", 9058, "zlw", Latn, sort_key = { from = {"[áä]", "é", "í", "[óô]", "ú", "ý", "ŕ", "ĺ", "[" .. DIAER .. ACUTE .. CIRC .. "]"}, to = {"a" , "e", "i", "o" , "u", "y", "r", "l", ""}} , } m["sl"] = { "Slovene", 9063, "zls", Latn, entry_name = { from = {"[ÁÀÂĀȂȀ]", "[áàâāȃȁ]", "[ÉÈÊĒȆȄỆẸ]", "[éèêēȇȅệẹə]", "[ÍÌÎĪȊȈ]", "[íìîīȋȉ]", "[ÓÒÔŌȎȌỘỌ]", "[óòôōȏȍộọ]", "[ŔȒȐ]", "[ŕȓȑ]", "[ÚÙÛŪȖȔ]", "[úùûūȗȕ]", "ł", GRAVE, ACUTE, CIRC, MACRON, DGRAVE, INVBREVE, DOTBELOW}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "l"}, }, sort_key = { from = {"č" , "š" , "ž" }, to = {"c²", "s²", "z²"}, }, } m["sm"] = { "Samoan", 34011, "poz-pnp", Latn, } m["sn"] = { "Shona", 34004, "bnt-sho", Latn, entry_name = {remove_diacritics = ACUTE}, } m["so"] = { "Somali", 13275, "cus-eas", {"Latn", "Arab", "Osma"}, entry_name = { from = {"[ÁÀÂ]", "[áàâ]", "[ÉÈÊ]", "[éèê]", "[ÍÌÎ]", "[íìî]", "[ÓÒÔ]", "[óòô]", "[ÚÙÛ]", "[úùû]", "[ÝỲ]", "[ýỳ]"}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "U" , "u", "Y", "y"}} , } m["sq"] = { "Albanian", 8748, "sqj", {"Latn", "Grek", "Elba"}, entry_name = {remove_diacritics = ACUTE}, sort_key = { from = { '[âãä]', '[ÂÃÄ]', '[êẽë]', '[ÊẼË]', 'ĩ', 'Ĩ', 'õ', 'Õ', 'ũ', 'Ũ', 'ỹ', 'Ỹ', 'ç', 'Ç' }, to = { 'a', 'A', 'e', 'E', 'i', 'I', 'o', 'O', 'u', 'U', 'y', 'Y', 'c', 'C' } } , } m["sr"] = { "Sebian", 9299, "zls", aliases = {"Српски", "Српски језик", "српски", "српски језик", "srpski", "srpski jezik", "Srpski", "Srpski jezik"}, scripts = {"Latn", "Cyrl"}, entry_name = { from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE}, to = {"A" , "a" , "E" , "e" , "I" , "i" , "O" , "o" , "R" , "r" , "U" , "u" , "Е", "е", "И" , "и", "У", "у" }}, } m["ss"] = { "Swazi", 34014, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["st"] = { "Sotho", 34340, "bnt-sts", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["su"] = { "Sundanese", 34002, "poz-msa", {"Latn", "Sund"}, translit_module = "su-translit", } m["sv"] = { "Swedish", 9027, "gmq", Latn, ancestors = {"gmq-osw"}, } m["sw"] = { "Swahili", 7838, "bnt-swh", LatnArab, sort_key = { from = {"ng'", "^-"}, to = {"ngz"}} , } m["ta"] = { "Tamil", 5885, "dra", {"Taml"}, ancestors = {"oty"}, translit_module = "ta-translit", override_translit = true, } m["te"] = { "Telugu", 8097, "dra", {"Telu"}, translit_module = "te-translit", override_translit = true, } m["tg"] = { "Tajik", 9260, "ira-swi", {"Cyrl", "fa-Arab", "Latn"}, ancestors = {"pal"}, -- same as "fa", see WT:T:AFA translit_module = "tg-translit", override_translit = true, sort_key = { from = {"Ё", "ё"}, to = {"Е" , "е"}} , entry_name = { from = {ACUTE}, to = {}} , } m["th"] = { "Tai", 9217, "tai-swe", {"Thai", "Brai"}, translit_module = "th-translit", sort_key = { from = {"[%pๆ]", "[็-๎]", "([เแโใไ])([ก-ฮ])"}, to = {"", "", "%2%1"}}, } m["ti"] = { "Tigrinya", 34124, "sem-eth", {"Ethi"}, translit_module = "Ethi-translit", } m["tk"] = { "Turkmen", 9267, "trk-ogz", {"Latn", "Cyrl", "Arab"}, entry_name = { from = {"ā", "ē", "ī", "ō", "ū", "ȳ", "ȫ", "ǖ", MACRON}, to = {"a", "e", "i", "o", "u", "y", "ö", "ü", ""}}, ancestors = {"trk-ogz-pro"}, } m["tl"] = { "Tagalog", 34057, "phi", {"Latn", "Tglg"}, entry_name = { from = {"[áàâ]", "[éèê]", "[íìî]", "[óòô]", "[úùû]", ACUTE, GRAVE, CIRC}, to = {"a" , "e" , "i" , "o" , "u" }}, translit_module = "tl-translit", override_translit = true } m["tn"] = { "Tswana", 34137, "bnt-sts", Latn, } m["to"] = { "Tongan", 34094, "poz-pol", Latn, sort_key = { from = {"ā", "ē", "ī", "ō", "ū", MACRON}, to = {"a", "e", "i", "o", "u", ""}}, entry_name = { from = {"á", "é", "í", "ó", "ú", ACUTE}, to = {"a", "e", "i", "o", "u", ""}}, } m["tr"] = { "Turkish", 256, "trk-ogz", Latn, ancestors = {"ota"}, } m["ts"] = { "Tsonga", 34327, "bnt-tsr", Latn, } m["tt"] = { "Tatar", 25285, "trk-kbu", {"Cyrl", "Latn", "tt-Arab"}, translit_module = "tt-translit", override_translit = true, } -- "tw" IS TREATED AS "ak", SEE WT:LT m["ty"] = { "Tahitian", 34128, "poz-pep", Latn, } m["ug"] = { "Uyghur", 13263, "trk-kar", {"ug-Arab", "Latn", "Cyrl"}, ancestors = {"chg"}, translit_module = "ug-translit", override_translit = true, } m["uk"] = { "Ukrainian", 8798, "zle", Cyrl, ancestors = {"orv"}, translit_module = "uk-translit", entry_name = { from = {"Ѐ", "ѐ", "Ѝ", "ѝ", GRAVE, ACUTE}, to = {"Е", "е", "И", "и"}}, standardChars = "ЄІЇА-ЩЬЮ-щьюяєії" .. PUNCTUATION, } m["ur"] = { "Urdu", 1617, "inc-hnd", {"ur-Arab"}, ancestors = {"inc-ohi"}, entry_name = { from = {u(0x0640), u(0x064B), u(0x064C), u(0x064D), u(0x064E), u(0x064F), u(0x0650), u(0x0651), u(0x0652), u(0x0658)}, to = {}} , } m["uz"] = { "Uzbek", 9264, "trk-kar", {"Latn", "Cyrl", "fa-Arab"}, ancestors = {"chg"}, } m["ve"] = { "Venda", 32704, "bnt-bso", Latn, } m["vi"] = { "Vietnamese", 9199, "mkh-vie", {"Latn", "Hani"}, ancestors = {"mkh-mvi"}, sort_key = "vi-sortkey", } m["vo"] = { "Volapük", 36986, "art", Latn, } m["wa"] = { "Walloon", 34219, "roa-oil", Latn, ancestors = {"fro"}, sort_key = { from = {"[áàâäå]", "[éèêë]", "[íìîï]", "[óòôö]", "[úùûü]", "[ýỳŷÿ]", "ç", "'"}, to = {"a" , "e" , "i" , "o" , "u" , "y" , "c"}} , } m["wo"] = { "Wolof", 34257, "alv-fwo", LatnArab, } m["xh"] = { "Xhosa", 13218, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } m["yi"] = { "Yiddish", 8641, "gmw", {"Hebr"}, ancestors = {"gmh"}, sort_key = { from = {"[אַאָ]", "בּ", "[וֹוּ]", "יִ", "ײַ", "פֿ"}, to = {"א", "ב", "ו", "י", "יי", "פ"}} , translit_module = "yi-translit", } m["yo"] = { "Yoruba", 34311, "alv-yor", Latn, sort_key = { from = {"ẹ", "ọ", "gb", "ṣ"}, to = {"e~" , "o~", "g~", "s~"}}, entry_name = { remove_diacritics = ACUTE .. GRAVE .. MACRON }, } m["za"] = { "Zhuang", 13216, "tai", {"Latn", "Hani"}, sort_key = { from = {"%p"}, to = {""}}, } m["zh"] = { "Chinese", 7850, "zhx", {"Hani", "Brai", "Nshu"}, ancestors = {"ltc"}, sort_key = "zh-sortkey", } m["zu"] = { "Zulu", 10179, "bnt-ngu", Latn, entry_name = { from = {"[āàáâǎ]", "[ēèéêě]", "[īìíîǐ]", "[ōòóôǒ]", "[ūùúûǔ]", "ḿ", "[ǹńň]", MACRON, ACUTE, GRAVE, CIRC, CARON}, to = {"a" , "e" , "i" , "o" , "u" , "m", "n" }}, } return m kkc3gpdrmedhehpi1ryz16nm6zndcie Module:ja-headword 828 3880 13342 11272 2022-08-05T07:11:24Z Asinis632 1829 Scribunto text/plain local m_ja = require("Module:ja") local m_ja_ruby = require('Module:ja-ruby') local find = mw.ustring.find local export = {} local pos_functions = {} local lang = require("Module:languages").getByCode("ja") local sc = require("Module:scripts").getByCode("Jpan") local Latn = require("Module:scripts").getByCode("Latn") local Japanese_symbols = '%ー・=?!。、' local katakana_range = 'ァ-ヺーヽヾ' local hiragana_range = 'ぁ-ゖーゞゝ' local kana_range = katakana_range .. hiragana_range .. Japanese_symbols local Japanese_scripts_range = kana_range .. '一-鿌・々' local katakana_pattern = '^[' .. katakana_range .. Japanese_symbols .. ']*$' local hiragana_pattern = '^[' .. hiragana_range .. Japanese_symbols .. ']*$' local kana_pattern = '^[' .. kana_range .. ']*$' local kana_pattern_full = '^[、' .. kana_range .. '%s%.%-%^%%]*$' local function remove_links(text) return (text:gsub("%[%[[^|%]]-|", ""):gsub("%[%[", ""):gsub("%]%]", "")) end local detect_kana_script = require("Module:fun").memoize(function(kana) if find(kana, katakana_pattern) then return 'kata' elseif find(kana, hiragana_pattern) then return 'hira' elseif find(kana, kana_pattern) then return 'both' else return nil end end) local en_numerals = { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen" } local en_grades = { "first grade", "second grade", "third grade", "fourth grade", "fifth grade", "sixth grade", "secondary school", "jinmeiyō", "hyōgaiji" } local aliases = { ['transitive']='tr', ['trans']='tr', ['intransitive']='in', ['intrans']='in', ['intr']='in', ['godan']='1', ['ichidan']='2', ['irregular']='irr' } local function kana_to_romaji(kana, data, args) -- make adjustments for -u verbs and -i adjectives by placing a period before the last character -- to prevent romanizing long vowels with macrons if (data.pos_category == "verbs") or (data.pos_category == "adjectives" and (args["infl"] == "i" or args["infl"] == "い" or args["infl"] == "is")) then kana = mw.ustring.gsub(kana,'([うい])$','.%1') end -- hyphens for prefixes, suffixes, and counters (classifiers) if data.pos_category == "prefixes" then kana = kana:gsub('%-?$', '-') elseif data.pos_category == "suffixes" or data.pos_category == "counters" or data.pos_category == "classifiers" then kana = kana:gsub('^%-?', '-') end -- automatic caps for proper nouns, if not already specified if data.pos_category == "proper nouns" then if not find(kana, '%^') then kana = mw.ustring.gsub(kana, '^(.)', '^%1') kana = mw.ustring.gsub(kana, '([%s%-])(.)', '%1^%2') end end kana = m_ja.kana_to_romaji(kana) return kana end local function historical_kana(args, data, poscat) local hk = args["hhira"] or args["hkata"] if hk then if hk:match'ゐ' then table.insert(data.categories, "Japanese terms historically spelled with ゐ") end if hk:match'ゑ' then table.insert(data.categories, "Japanese terms historically spelled with ゑ") end if hk:match'を' and not (data.kana and data.kana:match'を') then table.insert(data.categories, "Japanese terms historically spelled with を") end if hk:match'ぢ' and not (data.kana and data.kana:match'ぢ') then table.insert(data.categories, "Japanese terms historically spelled with ぢ") end if hk:match'づ' and not (data.kana and data.kana:match'づ') then table.insert(data.categories, "Japanese terms historically spelled with づ") end return '<sup>←' .. require('Module:ja-link').link({ lemma = hk, }, { hist = true, face = 'head', disableSelfLink = true, }) .. '<sup>[[w:Historical kana orthography|?]]</sup></sup> ' else return '' end end local function assign_kana_to_kanji(head, kana, pagename) local pat_k = '々㐀-䶵一-鿌' .. mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAFF) if mw.ustring.len(head) == 1 or mw.ustring.match(head, '[^' .. Japanese_scripts_range .. '%[%]|%s]') then return head, kana end local kanji_pos = {[0] = 0} local link_border = 0 local head_nolink = mw.ustring.gsub(head, '()(%b[])()', function(p1, w1, p2) if w1:sub(2, 2) ~= '[' or w1:sub(-2, -2) ~= ']' then return w1 end for pp1 in mw.ustring.gmatch(mw.ustring.sub(head, link_border + 1, p1 - 1), '()[' .. pat_k .. ']') do table.insert(kanji_pos, pp1 + link_border) end local p_pipe = mw.ustring.find(w1, '|') or 2 w1 = mw.ustring.sub(w1, p_pipe + 1, -3) link_border = p1 - 1 + p_pipe for pp1 in mw.ustring.gmatch(w1, '()[' .. pat_k .. ']') do table.insert(kanji_pos, pp1 + link_border) end link_border = p2 - 1 return w1 end) for pp1 in mw.ustring.gmatch(mw.ustring.sub(head, link_border + 1), '()[' .. pat_k .. ']') do table.insert(kanji_pos, pp1 + link_border) end local pagetext = mw.title.new(pagename):getContent() if not pagetext then return head, kana end local non_kanji = {} local last_kanji = 1 for p1 in mw.ustring.gmatch(head_nolink, '[' .. pat_k .. ']()') do table.insert(non_kanji, mw.ustring.sub(head_nolink, last_kanji, p1 - 2)) last_kanji = p1 end table.insert(non_kanji, mw.ustring.sub(head_nolink, last_kanji)) for kanjitab_args in pagetext:gmatch'{{%s*ja%-kanjitab%s*(|.-)}}' do local readings = {} local readings_len = {} local readings_o = {} local id = 1 for ka in kanjitab_args:gmatch'|([^|]*)' do if not ka:match'=' then local r_kana, r_len = ka:match'^%s*(%D*)(%d*)%s*$' readings[id] = readings[id] or r_kana readings_len[id] = tonumber(r_len) id = id + 1 else local id_t, id_n = ka:match'^%s*([ko]?)(%d+)%s*=' if id_t then id_n = tonumber(id_n) local r = ka:match'^.-=%s*(.-)%s*$' if id_t == '' then local r_kana, r_len = r:match'(%D*)(%d*)' readings[id_n] = readings[id_n] or r_kana readings_len[id_n] = tonumber(r_len) elseif id_t == 'k' then readings[id_n] = r else readings_o[id_n] = r end end end end local kana_decom = {} local reading_id = 1 local reading_len = 1 for i = 1, #non_kanji - 1 do if reading_len <= 1 then reading_len = readings_len[reading_id] or 1 table.insert(kana_decom, non_kanji[i]) table.insert(kana_decom, (readings[reading_id] or '') .. (readings_o[reading_id] or '')) reading_id = reading_id + 1 else reading_len = reading_len - 1 end end table.insert(kana_decom, non_kanji[#non_kanji]) if table.concat(kana_decom):gsub(' ', '') == kana:gsub('[%.%- ^]', '') then local head_decom = {} reading_id = 1 reading_len = 1 for i = 1, #non_kanji - 1 do if reading_len <= 1 then reading_len = readings_len[reading_id] or 1 table.insert(head_decom, mw.ustring.sub(head, kanji_pos[i - 1] + 1, kanji_pos[i] - 1)) table.insert(head_decom, mw.ustring.sub(head, kanji_pos[i], kanji_pos[i + reading_len - 1])) reading_id = reading_id + 1 else reading_len = reading_len - 1 end end table.insert(head_decom, mw.ustring.sub(head, kanji_pos[#non_kanji - 1] + 1)) return table.concat(head_decom, '%'), table.concat(kana_decom, '%') end end return head, kana end local function default_seperator(text) require('Module:debug').track('ja-headword/default separator used') local result = {} local p0 = 1 text = text:gsub('%[%[([^|]-)%]%]', '%1'):gsub('%[%[[^|]-|([^|]-)%]%]', '%1') for p1, w1 in mw.ustring.gmatch(text, table.concat{ '()([々㐀-䶵一-鿌', mw.ustring.char(0xF900), "-", mw.ustring.char(0xFAD9), '𠀀-𯨟0-9A-Za-z〆〇0-9a-zA-Zα-ωΑ-Ω])', }) do if p0 < p1 then table.insert(result, mw.ustring.sub(text, p0, p1 - 1)) end table.insert(result, w1) p0 = p1 + 1 end if p0 <= mw.ustring.len(text) then table.insert(result, mw.ustring.sub(text, p0)) end return table.concat(result, '%') end -- adds category Japanese terms spelled with jōyō kanji or Japanese terms spelled with non-jōyō kanji -- (if it contains any kanji) local function categorize_by_kanji(data, PAGENAME) -- remove non-kanji characters local onlykanji = mw.ustring.gsub(PAGENAME, '[^一-鿌]', '') local number_of_kanji = mw.ustring.len(onlykanji) if number_of_kanji > 0 then for i=1,mw.ustring.len(onlykanji) do table.insert(data.categories, ("Japanese terms spelled with %s kanji"):format(en_grades[m_ja.kanji_grade(mw.ustring.sub(onlykanji,i,i))])) end -- categorize by number of kanji if number_of_kanji == 1 then table.insert(data.categories, "Japanese terms written with one Han script character") elseif en_numerals[number_of_kanji] then table.insert(data.categories, ("Japanese terms written with %s Han script characters"):format(en_numerals[number_of_kanji])) end end -- single-kanji terms if mw.ustring.len(PAGENAME) == 1 and mw.ustring.match(PAGENAME, '[一-鿌]') then table.insert(data.categories, "Japanese terms spelled with " .. PAGENAME) table.insert(data.categories, "Japanese single-kanji terms") end end -- categorize by the script of the pagename or specific characters contained in it local function extra_categorization(data, PAGENAME, katakana_category) -- if PAGENAME is hiragana, put in that category, same for katakana (but do it at the end) if detect_kana_script(PAGENAME) == 'hira' then table.insert(data.categories, "Siapan hiragana") end if detect_kana_script(PAGENAME) == 'kata' then table.insert(katakana_category, "Siapan katakana") end if find(PAGENAME, "[^" .. Japanese_scripts_range .. "]") and find(PAGENAME, '[' .. Japanese_scripts_range .. ']') then table.insert(data.categories, "Japanese terms written in multiple scripts") end for _,character in ipairs({'々','〆','ヶ','ゝ','ゞ','ヽ','ヾ','ゐ','ヰ','ゑ','ヱ','ゔ','ヷ','ヸ','ヹ','ヺ','・','=','゠'}) do if mw.ustring.match(PAGENAME,character) then table.insert(data.categories, ("Japanese terms spelled with %s"):format(character)) end end if find(PAGENAME, "[ァ-ヺヽヾ]") and find(PAGENAME, "[ぁ-ゖゞゝ]") and data.pos_category ~= "proverbs" and data.pos_category ~= "phrases" then table.insert(data.categories, "Japanese terms spelled with mixed kana") end end -- go through args and build inflections by finding whatever kanas were given to us local function format_headword(args, data, head) local headword_kana_type = detect_kana_script(remove_links(m_ja.remove_ruby_markup(head))) local allkana, romajis = {}, {} local rep = {} local _insert_kana = headword_kana_type and function(k) -- pure-kana-title entry if k == '' then return end local key = remove_links(m_ja.remove_ruby_markup(k)) romajis[1] = kana_to_romaji(remove_links(k), data, args) if not rep[key] then table.insert(allkana, k) rep[key] = true end end or function(k) -- non-pure-kana-title entry if k == '' then return end local key = m_ja.kana_to_romaji(remove_links(m_ja.remove_ruby_markup(k))) if not rep[key] then table.insert(romajis, kana_to_romaji(remove_links(k), data, args)) table.insert(allkana, k) rep[key] = true end end if headword_kana_type then _insert_kana(remove_links(head)) allkana[1] = head end for i, arg in ipairs(args[1]) do -- test for kana: filter out POS designations if find(arg, kana_pattern_full) then _insert_kana(arg) end end -- accept "hira" and "kata" but let Lua decide if they are really hiragana or katakana if args["hira"] and args["hira"] ~= "" then _insert_kana(args["hira"]) end if args["kata"] and args["kata"] ~= "" then _insert_kana(args["kata"]) end if args["rom"] then romajis[1] = args["rom"] end if #allkana == 0 then error('Kana form is required') end if #romajis == 0 then error('Romaji is required') end local suru_ending = data.pos_category == "suru verbs" and '[[する]]' or '' for _, kana in ipairs(allkana) do -- add everything to inflections, except historical hiragana which is next -- local format_result = headword_kana_type and allkana[i] or format_ruby(PAGENAME, allkana[i], data) local format_result, format_result_preserved --<ruby> form, []() form if headword_kana_type then format_result = m_ja.remove_ruby_markup(kana) format_result_preserved = remove_links(format_result) .. suru_ending format_result = format_result .. suru_ending else local head_for_ruby, kana_for_ruby if kana:match'%%' then if head:match'%%' then head_for_ruby, kana_for_ruby = head, kana else head_for_ruby, kana_for_ruby = default_seperator(head), kana end else head_for_ruby, kana_for_ruby = assign_kana_to_kanji(head, kana, args.pagename) end local format_table = m_ja_ruby.parse_text(head_for_ruby, kana_for_ruby, { try = 'force', try_force_limit = 10000 }) format_result = m_ja_ruby.to_wiki(format_table, { break_link = true, }):gsub('<rt>(..-)</rt>', "<rt>[[" .. remove_links(m_ja.remove_ruby_markup(kana)) .."|%1]]</rt>") .. suru_ending format_result_preserved = remove_links(m_ja_ruby.to_markup(format_table)) .. suru_ending end table.insert(data.heads, format_result) data.heads_preserved = data.heads_preserved or format_result_preserved end suru_ending = data.pos_category == "suru verbs" and ' suru' or '' for _, rom in ipairs(romajis) do table.insert(data.translits, '[[' .. rom .. '#Japanese|' .. rom .. ']]' .. suru_ending) end if #romajis > 1 then table.insert(data.categories, "Japanese words with multiple readings") end data.kana = allkana[1] and remove_links(m_ja.remove_ruby_markup(allkana[1])) end local function add_transitivity(data, tr) tr = aliases[tr] or tr if tr == "tr" then table.insert(data.info_mid, 'transitive') table.insert(data.categories, "Japanese transitive verbs") elseif tr == "in" then table.insert(data.info_mid, 'intransitive') table.insert(data.categories, "Japanese intransitive verbs") elseif tr == "both" then table.insert(data.info_mid, 'transitive or intransitive') table.insert(data.categories, "Japanese transitive verbs") table.insert(data.categories, "Japanese intransitive verbs") else table.insert(data.categories, "Japanese verbs without transitivity") end end local function add_inflections(data, inflection_type, cat_suffix) local lemma = data.heads_preserved or data.heads[1] local romaji = remove_links(data.translits[1]) inflection_type = aliases[inflection_type] or inflection_type local function replace_suffix(lemma_from, lemma_to, romaji_from, romaji_to) -- e.g. 持って来る, lemma = "[持](も)って来(く)る" -- lemma_from = "くる", lemma_to = {"き","きた"} local p_kr = katakana_range .. hiragana_range local lemma_sub local romaji_sub local key_pos = {} local i1, i2 romaji_from = romaji_from or m_ja.kana_to_romaji(lemma_from) if type(lemma_to) ~= 'table' then lemma_to = {lemma_to} end if type(romaji_to) ~= 'table' then romaji_to = {romaji_to} end for i, v in ipairs(lemma_to) do romaji_to[i] = romaji_to[i] or m_ja.kana_to_romaji(v) end lemma_sub = lemma lemma_from = lemma_from ~= '' and mw.text.split(lemma_from, '') or {} -- lemma_from = {"く","る"} local len_lemma_from = #lemma_from -- find the last two kana in "[持](も)って来(く)る" key_pos[len_lemma_from + 1] = {-1} for i = len_lemma_from, 1, -1 do i1, _, i2 = mw.ustring.find(lemma_sub, '[' .. m_ja.kata_to_hira(lemma_from[i]) .. m_ja.hira_to_kata(lemma_from[i]) .. ']()[^' .. p_kr .. ']-$') if not i1 then return nil end i1 = i1 - 1 key_pos[i] = {i1, i2} lemma_sub = mw.ustring.sub(lemma_sub, 1, i1) end romaji_sub, i1 = romaji:gsub(romaji_from .. '%s*$', '') if i1 ~= 1 then return nil end local result = {} for i, v in ipairs(lemma_to) do local result_single = {lemma_sub} for j = 1, len_lemma_from do table.insert(result_single, mw.ustring.sub(v, j, j)) table.insert(result_single, mw.ustring.sub(lemma, key_pos[j][2], key_pos[j + 1][1])) end table.insert(result_single, mw.ustring.sub(v, len_lemma_from + 1)) result[i] = {lemma = table.concat(result_single), romaji = romaji_sub .. romaji_to[i]} -- "[持](も)って来(" .. "き" .. ")" .. "" .. "" and "[持](も)って来(" .. "き" .. ")" .. "た" .. "" end return result -- {{lemma="[持](も)って来(き)",romaji="motteki"},{lemma="[持](も)って来(き)た",romaji="mottekita"}} end local function insert_form(label, ...) -- label = "stem" or "past" etc. -- ... = {lemma=...,romaji=...},{lemma=...,romaji=...} local labeled_forms = {label = label} for _, v in ipairs{...} do local table_form = m_ja_ruby.parse_markup(v.lemma) local form_term = m_ja_ruby.to_wiki(table_form) if not form_term:find'%[%[.+%]%]' then form_term = '[[' .. m_ja_ruby.to_text(table_form) .. '#Japanese|' .. form_term .. ']]' end table.insert(labeled_forms, { term = form_term, translit = v.romaji, }) end table.insert(data.inflections, labeled_forms) end local inflected_forms if inflection_type == '1' or inflection_type == '1s' then table.insert(data.info_mid, '<abbr title="godan (type I) conjugation">godan</abbr>') if cat_suffix then table.insert(data.categories, "Japanese type 1 " .. cat_suffix) if cat_suffix == 'verbs' and data.translits[1] and mw.ustring.find(remove_links(data.translits[1]), '[ieIEīēĪĒ]ru$') then table.insert(data.categories, "Japanese type 1 verbs that end in -iru or -eru") end end if inflection_type == '1' then inflected_forms = replace_suffix('く', {'き', 'いた'}, 'ku', {'ki', 'ita'}) or replace_suffix('ぐ', {'ぎ', 'いだ'}, 'gu', {'gi', 'ida'}) or replace_suffix('す', {'し', 'した'}, 'su', {'shi', 'shita'}) or replace_suffix('つ', {'ち', 'った'}, 'tsu', {'chi', 'tta'}) or replace_suffix('ぬ', {'に', 'んだ'}, 'nu', {'ni', 'nda'}) or replace_suffix('ぶ', {'び', 'んだ'}, 'bu', {'bi', 'nda'}) or replace_suffix('む', {'み', 'んだ'}, 'mu', {'mi', 'nda'}) or replace_suffix('る', {'り', 'った'}, 'ru', {'ri', 'tta'}) or replace_suffix('う', {'い', 'った'}, 'u', {'i', 'tta'}) if inflected_forms then insert_form('stem', inflected_forms[1]) insert_form('past', inflected_forms[2]) else require("Module:debug").track("ja-headword/godan conjugation failed") end else inflected_forms = replace_suffix('る', {'り', 'った', 'い'}, 'ru', {'ri', 'tta', 'i'}) or --くださる replace_suffix('いく', {'いき', 'いった'}, 'iku', {'iki', 'itta'}) or --行く replace_suffix('う', {'い', 'うた'}, 'ou', {'oi', 'ōta'}) --問う if inflected_forms then insert_form('stem', inflected_forms[1], inflected_forms[3]) insert_form('past', inflected_forms[2]) else require("Module:debug").track("ja-headword/godan conjugation special failed") end end elseif inflection_type == '2' then table.insert(data.info_mid, '<abbr title="ichidan (type II) conjugation">ichidan</abbr>') if cat_suffix then table.insert(data.categories, "Japanese type 2 " .. cat_suffix) end inflected_forms = replace_suffix('る', {'', 'た'}, 'ru', {'', 'ta'}) if inflected_forms then insert_form('stem', inflected_forms[1]) insert_form('past', inflected_forms[2]) else require("Module:debug").track("ja-headword/ichidan conjugation failed") end elseif inflection_type == 'suru' then table.insert(data.info_mid, '<abbr title="suru (type III) conjugation">suru</abbr>') if cat_suffix then table.insert(data.categories, "Japanese suru " .. cat_suffix) end inflected_forms = replace_suffix('する', {'し', 'した'}, 'suru', {'shi', 'shita'}) or replace_suffix('ずる', {'じ', 'じた'}, 'zuru', {'ji', 'jita'}) if inflected_forms then insert_form('stem', inflected_forms[1]) insert_form('past', inflected_forms[2]) else require("Module:debug").track("ja-headword/suru conjugation failed") end elseif inflection_type == 'kuru' then table.insert(data.info_mid, '<abbr title="kuru (type III) conjugation">kuru</abbr>') if cat_suffix then table.insert(data.categories, "Japanese kuru " .. cat_suffix) end inflected_forms = replace_suffix('くる', {'き', 'きた'}, 'kuru', {'ki', 'kita'}) if inflected_forms then insert_form('stem', inflected_forms[1]) insert_form('past', inflected_forms[2]) else require("Module:debug").track("ja-headword/kuru conjugation failed") end elseif inflection_type == 'i' or inflection_type == 'い' then table.insert(data.info_mid, '<abbr title="-i (type I) inflection">-i</abbr>') if cat_suffix then table.insert(data.categories, "Japanese い-i " .. cat_suffix) end inflected_forms = replace_suffix('い', {'く'}, 'i', {'ku'}) if inflected_forms then insert_form('adverbial', inflected_forms[1]) else require("Module:debug").track("ja-headword/-i inflection failed") end elseif inflection_type == 'is' then table.insert(data.info_mid, '<abbr title="-i (type I) inflection">-i</abbr>') if cat_suffix then table.insert(data.categories, "Japanese い-i " .. cat_suffix) end inflected_forms = replace_suffix('いい', {'よく'}, 'ii', {'yoku'}) if inflected_forms then insert_form('adverbial', inflected_forms[1]) else require("Module:debug").track("ja-headword/slightly irregular -i inflection failed") end elseif inflection_type == 'na' or inflection_type == 'な' then table.insert(data.info_mid, '<abbr title="-na (type II) inflection">-na</abbr>') if cat_suffix then table.insert(data.categories, "Japanese な-na " .. cat_suffix) end inflected_forms = replace_suffix('', {'[[な]]', '[[に]]'}, '', {' na', ' ni'}) insert_form('adnominal', inflected_forms[1]) insert_form('adverbial', inflected_forms[2]) elseif inflection_type == "yo" then table.insert(data.info_mid, '<abbr title="yodan conjugation (classical)"><sup><small>†</small></sup>yodan</abbr>') if cat_suffix then table.insert(data.categories, "Japanese yodan " .. cat_suffix) end elseif inflection_type == "kami ni" then table.insert(data.info_mid, '<abbr title="kami nidan conjugation (classical)"><sup><small>†</small></sup>nidan</abbr>') if cat_suffix then table.insert(data.categories, "Japanese kami nidan " .. cat_suffix) end elseif inflection_type == "shimo ni" then table.insert(data.info_mid, '<abbr title="shimo nidan conjugation (classical)"><sup><small>†</small></sup>nidan</abbr>') if cat_suffix then table.insert(data.categories, "Japanese shimo nidan " .. cat_suffix) end elseif inflection_type == "rahen" then table.insert(data.info_mid, '<abbr title="r-special conjugation (classical)"><sup><small>†</small></sup>-ri</abbr>') elseif inflection_type == "sahen" then table.insert(data.info_mid, '<abbr title="s-special conjugation (classical)"><sup><small>†</small></sup>-se</abbr>') elseif inflection_type == "kahen" then table.insert(data.info_mid, '<abbr title="k-special conjugation (classical)"><sup><small>†</small></sup>-ko</abbr>') elseif inflection_type == "nahen" then table.insert(data.info_mid, '<abbr title="n-special conjugation (classical)"><sup><small>†</small></sup>-n</abbr>') elseif inflection_type == "nari" or inflection_type == "なり" then table.insert(data.info_mid, '<abbr title="-nari inflection (classical)"><sup><small>†</small></sup>-nari</abbr>') if cat_suffix then table.insert(data.categories, "Japanese なり-nari " .. cat_suffix) end elseif inflection_type == 'tari' or inflection_type == 'たり' then table.insert(data.info_mid, '<abbr title="-tari inflection (classical)"><sup><small>†</small></sup>-tari</abbr>') if cat_suffix then table.insert(data.categories, "Japanese たり-tari " .. cat_suffix) end inflected_forms = replace_suffix('', {'[[たる]]', '[[と]]', '[[として]]'}, '', {' taru', ' to', ' to shite'}) insert_form('adnominal', inflected_forms[1]) insert_form('adverbial', inflected_forms[2], inflected_forms[3]) elseif inflection_type == "ku" or inflection_type == "く" then table.insert(data.info_mid, '<abbr title="-ku inflection (classical)"><sup><small>†</small></sup>-ku</abbr>') if cat_suffix then table.insert(data.categories, "Japanese く-ku " .. cat_suffix) end elseif inflection_type == "shiku" or inflection_type == "しく" then table.insert(data.info_mid, '<abbr title="-shiku inflection (classical)"><sup><small>†</small></sup>-shiku</abbr>') if cat_suffix then table.insert(data.categories, "Japanese しく-shiku " .. cat_suffix) end elseif inflection_type == "ka" or inflection_type == "か" then table.insert(data.info_mid, '<abbr title="-ka inflection (dialectal)"><sup><small>†</small></sup>-ka</abbr>') if cat_suffix then table.insert(data.categories, "Japanese か-ka " .. cat_suffix) end elseif inflection_type == 'irr' then table.insert(data.info_mid, 'irregular') if cat_suffix then table.insert(data.categories, "Japanese irregular " .. cat_suffix) end elseif inflection_type == '-' or inflection_type == 'un' then table.insert(data.info_mid, 'uninflectable') end end pos_functions["verbs"] = function(args, data) add_transitivity(data, args["tr"]) add_inflections(data, args["infl"], 'verbs') end pos_functions["suffixes"] = function(args, data) add_inflections(data, args["infl"]) end pos_functions["auxiliary verbs"] = function(args, data) table.insert(data.categories, "Japanese auxiliary verbs") add_inflections(data, args["infl"]) data.pos_category = "verbs" end pos_functions["suru verbs"] = function(args, data) add_transitivity(data, args["tr"]) add_inflections(data, 'suru', 'verbs') data.pos_category = "verbs" end pos_functions["verb forms"] = function(args, data) add_inflections(data, args["infl"]) end pos_functions["adjectives"] = function(args, data) add_inflections(data, args["infl"], 'adjectives') end pos_functions["adjective forms"] = function(args, data) add_inflections(data, args["infl"]) end pos_functions["nouns"] = function(args, data) -- the counter (classifier) parameter, only relevant for nouns local counter = args["count"] or "" if counter == "-" then table.insert(data.inflections, {label = "uncountable"}) elseif counter ~= "" then table.insert(data.inflections, {label = "counter", counter}) end end -- For use in soft redirect pages -- Sortkey is not provided function export.cat(pagename, categories) categorize_by_kanji({categories = categories}, pagename) -- categorize by the script of the pagename or specific characters contained in it categorize_by_kanji({categories = categories}, pagename, categories) end -- The main entry point. -- This is the only function that can be invoked from a template. function export.show(frame) local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.") local args = require('Module:parameters').process(frame:getParent().args, { [1] = {list = true}, ['hira'] = {}, ['kata'] = {}, ['rom'] = {}, ['tr'] = {}, ['infl'] = {}, ['type'] = {alias_of = 'infl'}, ['decl'] = {alias_of = 'infl'}, ['count'] = {}, ['kyu'] = {}, ['shin'] = {}, ['hhira'] = {}, ['hkata'] = {}, ['head'] = {}, ['sort'] = {}, ['pagename'] = {}, }) args['pagename'] = args['pagename'] or mw.title.getCurrentTitle().text local data = { lang = lang, sc = sc, pos_category = poscat, categories = {}, translits = {}, heads = {}, inflections = {}, genders = {'m'}, -- placeholder sort_key = nil, --custom info info_mid = {}, heads_preserved = nil, kana = nil, } local katakana_category = {} -- sort out all the kanas and do the romanization business format_headword(args, data, args["head"] or args['pagename']) -- add certain "inflections" and categories for adjectives, verbs, or nouns if pos_functions[poscat] then pos_functions[poscat](args, data) end -- the presence of kyūjitai param indicates that this is shinjitai kanji entry and vice versa if args["kyu"] then if data.pos_category == "suru verbs" then table.insert(data.inflections, {label = "[[Appendix:Japanese_glossary#kyūjitai|kyūjitai]]", "[[" .. args["kyu"] .. "]][[する]]"}) else table.insert(data.inflections, {label = "[[Appendix:Japanese_glossary#kyūjitai|kyūjitai]]", args["kyu"]}) end require('Module:debug').track'ja-headword/kyu' end if args["shin"] then table.insert(data.inflections, {label = "[[Appendix:Japanese_glossary#kyūjitai|kyūjitai]]"}) if data.pos_category == "suru verbs" then table.insert(data.inflections, {label = "[[Appendix:Japanese_glossary#shinjitai|shinjitai]]", "[[" .. args["shin"] .. "]][[する]]"}) else table.insert(data.inflections, {label = "[[Appendix:Japanese_glossary#shinjitai|shinjitai]]", args["shin"]}) end require('Module:debug').track'ja-headword/shin' end local hist_info = historical_kana(args, data, poscat) -- categorize by joyo kanji and number of kanji categorize_by_kanji(data, args['pagename']) -- categorize by the script of the pagename or specific characters contained in it extra_categorization(data, args['pagename'], katakana_category) data.sort_key = args['sort'] or data.kana and m_ja.jsort(data.kana) or nil return (data.kana and '<span id="' .. data.kana .. '"></span>' or '') .. require('Module:headword').full_headword(data) :gsub('<span class="gender">.-</span>', hist_info .. '<i>'..table.concat(data.info_mid, '&nbsp;')..'</i>') .. require("Module:utilities").format_categories(katakana_category, lang, data.sort_key and m_ja.hira_to_kata(data.sort_key)) end return export 7xgwv1htgdbkv0gnbkqo9w3qevgdw65 コンドーム 0 5310 13336 2022-08-04T12:37:21Z Asinis632 1829 Created page with "==Japanese== {{DEFAULTSORT:こんどおむ}} {{swp|lang=ja}} ===Etymology=== [[Appendix:Glossary#loanword|Borrowed]] from {{bor|ja|de|Kondom}},<ref>{{R:Kojien}}</ref> from {{bor|ja|en|condom}},<ref>{{R:Daijisen}}</ref> or from {{bor|ja|fr|condom}}.<ref name="DJR"/> ===Pronunciation=== {{ja-pron|acc=3|acc_ref=DJR}} ===Noun=== {{ja-noun}} # a [[condom]] {{gloss|flexible sleeve worn on the penis}} ====Synonyms==== * {{ja-r|ゴム}} * {{ja-r|スキン}} * {{ja-r|ルー..." wikitext text/x-wiki ==Japanese== {{DEFAULTSORT:こんどおむ}} {{swp|lang=ja}} ===Etymology=== [[Appendix:Glossary#loanword|Borrowed]] from {{bor|ja|de|Kondom}},<ref>{{R:Kojien}}</ref> from {{bor|ja|en|condom}},<ref>{{R:Daijisen}}</ref> or from {{bor|ja|fr|condom}}.<ref name="DJR"/> ===Pronunciation=== {{ja-pron|acc=3|acc_ref=DJR}} ===Noun=== {{ja-noun}} # a [[condom]] {{gloss|flexible sleeve worn on the penis}} ====Synonyms==== * {{ja-r|ゴム}} * {{ja-r|スキン}} * {{ja-r|ルーデサック}}; {{ja-r|サック}} * {{ja-r|^フレンチ レター}} ===References=== <references/> {{C|ja|Birth control}} k1740sthgrno19cktxgcvpp5o2bs1vm 13337 13336 2022-08-04T12:37:43Z Asinis632 1829 wikitext text/x-wiki {{-ja-}} {{DEFAULTSORT:こんどおむ}} {{swp|lang=ja}} ===Etymology=== [[Appendix:Glossary#loanword|Borrowed]] from {{bor|ja|de|Kondom}},<ref>{{R:Kojien}}</ref> from {{bor|ja|en|condom}},<ref>{{R:Daijisen}}</ref> or from {{bor|ja|fr|condom}}.<ref name="DJR"/> ===Pronunciation=== {{ja-pron|acc=3|acc_ref=DJR}} ===Noun=== {{ja-noun}} # a [[condom]] {{gloss|flexible sleeve worn on the penis}} ====Synonyms==== * {{ja-r|ゴム}} * {{ja-r|スキン}} * {{ja-r|ルーデサック}}; {{ja-r|サック}} * {{ja-r|^フレンチ レター}} ===References=== <references/> {{C|ja|Birth control}} a762ipybbmhyazvf3pz9kpi11bo7q9m Templet:R:Kojien 10 5312 13341 2022-08-05T07:05:49Z Asinis632 1829 Created page with "<span class="book">'''1998''', <cite>{{lang|ja|広辞苑}} (''[[w:Kōjien|Kōjien]]'')</cite>, Fifth Edition (in Japanese), [[w:Tōkyō|Tōkyō]]: [[w:Iwanami Shoten|Iwanami Shoten]], {{ISBN|4000801112}}</span><noinclude> [[Category:Japanese reference templates|Kojien]] </noinclude>" wikitext text/x-wiki <span class="book">'''1998''', <cite>{{lang|ja|広辞苑}} (''[[w:Kōjien|Kōjien]]'')</cite>, Fifth Edition (in Japanese), [[w:Tōkyō|Tōkyō]]: [[w:Iwanami Shoten|Iwanami Shoten]], {{ISBN|4000801112}}</span><noinclude> [[Category:Japanese reference templates|Kojien]] </noinclude> ct7k8vytcagqvik5tfcrspaub1wjetm Templet:swp 10 5313 13343 2022-08-05T07:12:43Z Asinis632 1829 Redirected page to [[Templet:slim-wikipedia]] wikitext text/x-wiki #redirect [[Templet:slim-wikipedia]] j6fazmke51gom3v238wcfs15jhr4mzc Templet:slim-wikipedia 10 5315 13344 2022-08-05T07:13:26Z Asinis632 1829 Created page with "{{#invoke:interproject|wikipedia_box|slim=1}}<noinclude>{{documentation}}</noinclude>" wikitext text/x-wiki {{#invoke:interproject|wikipedia_box|slim=1}}<noinclude>{{documentation}}</noinclude> blu8mkxfwci2w8rhvv7mfrb56n9l6ki Module:interproject 828 5316 13345 2022-08-05T07:14:39Z Asinis632 1829 Created page with "local export = {} local function track(page) require("Module:debug/track")("interproject/" .. page) return true end local function process_links(linkdata, prefix, name, wmlang, sc) prefix = prefix .. ":" .. (wmlang:getCode() == "en" and "" or wmlang:getCode() .. ":") local links = {} local iplinks = {} local m_links = require("Module:links") local lang = wmlang:getWiktionaryLanguage() local ipalt = name .. " " .. (wmlang:getCode() == "en" and "" or "<sup>..." Scribunto text/plain local export = {} local function track(page) require("Module:debug/track")("interproject/" .. page) return true end local function process_links(linkdata, prefix, name, wmlang, sc) prefix = prefix .. ":" .. (wmlang:getCode() == "en" and "" or wmlang:getCode() .. ":") local links = {} local iplinks = {} local m_links = require("Module:links") local lang = wmlang:getWiktionaryLanguage() local ipalt = name .. " " .. (wmlang:getCode() == "en" and "" or "<sup>" .. wmlang:getCode() .. "</sup>") for i, link in ipairs(linkdata) do link.lang = lang link.sc = sc link.term = prefix .. link.term link.tr = "-" table.insert(iplinks, "<span class=\"interProject\">[[" .. link.term .. "|" .. ipalt .. "]]</span>") table.insert(links, m_links.full_link(link, "bold")) end return links, iplinks end function export.wikipedia_box(frame) local params = { [1] = {}, [2] = {}, ["cat"] = {}, ["category"] = {alias_of = "cat"}, ["i"] = {type = "boolean"}, ["lang"] = {default = "en"}, ["mul"] = {}, ["mullabel"] = {}, ["mulcat"] = {}, ["mulcatlabel"] = {}, ["portal"] = {}, ["sc"] = {}, } local args = require("Module:parameters").process(frame:getParent().args, params) if args.mul or args.mullabel or args.mulcat or args.mulcatlabel then track("wikipedia-box-mul") end local wmlang = require("Module:wikimedia languages").getByCodeWithFallback(args["lang"]) or error("The Wikimedia language code \"" .. args["lang"] .. "\" is not valid.") local sc = args["sc"] and require("Module:scripts").getByCode(args["sc"], "sc") or nil local linkdata = {} if args["cat"] then table.insert(linkdata, {term = "Category:" .. args["cat"], alt = args[1] or args["cat"]}) elseif args["portal"] then table.insert(linkdata, {term = "Portal:" .. args["portal"], alt = args[1] or args["portal"]}) else local term = args[1] or mw.title.getCurrentTitle().text table.insert(linkdata, {term = term, alt = args[2] or term}) end if args["mul"] or args["mulcat"] then if args["mulcat"] then table.insert(linkdata, {term = "Category:" .. args["mulcat"], alt = args["mulcatlabel"] or args["mulcat"]}) else table.insert(linkdata, {term = args["mul"], alt = args["mullabel"] or args["mul"]}) end end local links, iplinks = process_links(linkdata, "w", "Wikipedia", wmlang, sc) if frame.args["slim"] then return "<div class=\"sister-wikipedia sister-project noprint floatright\" style=\"border: solid #aaa 1px; font-size: 90%; background: #f9f9f9; width: 250px; padding: 4px; text-align: left;\">" .. "<div style=\"float: left;\">[[File:Wikipedia-logo.png|14px|none| ]]</div>" .. "<div style=\"margin-left: 15px;\">" .. " &nbsp;" .. table.concat(links, " and ") .. " on " .. (wmlang:getCode() == "en" and "" or wmlang:getCanonicalName() .. "&nbsp;") .. "Wikipedia" .. "</div>" .. "</div>" else local linktype if args["cat"] then linktype = "a category" elseif args["mul"] then linktype = "articles" elseif args["mulcat"] then linktype = "categories" elseif args["portal"] then linktype = "a portal" else linktype = "an article" end return "<div class=\"sister-wikipedia sister-project noprint floatright\" style=\"border: 1px solid #aaa; font-size: 90%; background: #f9f9f9; width: 250px; padding: 4px; text-align: left;\">" .. "<div style=\"float: left;\">[[File:Wikipedia-logo-v2.svg|44px|none|link=|alt=]]</div>" .. "<div style=\"margin-left: 60px;\">" .. wmlang:getCanonicalName() .. " [[Wikipedia]] has " .. linktype .. " on:" .. "<div style=\"margin-left: 10px;\">" .. table.concat(links, " and ") .. "</div>" .. "</div>" .. table.concat(iplinks) .. ((args[1] == mw.title.getCurrentTitle().text and not args[2]) and "[[Category:wikipedia with redundant first parameter]]" or "") .. "</div>" end end function export.projectlink(frame, compat) local m_params = require("Module:parameters") local iparams = { ["prefix"] = {required = true}, ["name"] = {required = true}, ["image"] = {required = true}, ["compat"] = {type = "boolean"}, } iargs = m_params.process(frame.args, iparams) compat = compat or iargs.compat local lang_param = compat and "lang" or 1 local term_param = compat and 1 or 2 local alt_param = compat and 2 or 3 local params = { [lang_param] = {}, [term_param] = {}, [alt_param] = {}, ["i"] = {type = "boolean"}, ["nodot"] = {}, ["sc"] = {}, } local args = m_params.process(frame:getParent().args, params) local wmlang = args[lang_param] or "en" wmlang = require("Module:wikimedia languages").getByCodeWithFallback(wmlang) or error("The Wikimedia language code \"" .. wmlang .. "\" is not valid.") local sc = args["sc"] and require("Module:scripts").getByCode(args["sc"], "sc") or nil local term = args[term_param] or mw.title.getCurrentTitle().text local linkdata = {term = term, alt = args[alt_param] or term} if args["i"] then linkdata.alt = "''" .. linkdata.alt .. "''" end local links, iplinks = process_links({linkdata}, iargs["prefix"], iargs["name"], wmlang, sc) return "[[Image:" .. iargs["image"] .. "|15px|link=" .. linkdata.term .. "]] " .. table.concat(links, " and ") .. " on " .. (wmlang:getCode() == "en" and "" or "the " .. wmlang:getCanonicalName() .. " ") .. " " .. iargs["name"] .. (args["nodot"] and "" or ".") .. table.concat(iplinks) end return export iqfi2fi38bam1z1ski9ljdunohbs1v9