ဝိက်ရှေန်နရဳ
mnwwiktionary
https://mnw.wiktionary.org/wiki/%E1%80%9D%E1%80%AD%E1%80%80%E1%80%BA%E1%80%9B%E1%80%BE%E1%80%B1%E1%80%94%E1%80%BA%E1%80%94%E1%80%9B%E1%80%B3:%E1%80%99%E1%80%AF%E1%80%80%E1%80%BA%E1%80%9C%E1%80%AD%E1%80%80%E1%80%BA%E1%80%90%E1%80%99%E1%80%BA
MediaWiki 1.47.0-wmf.3
case-sensitive
မဳဒဳယာ
တၟေင်
ဓရီုကျာ
ညးလွပ်
ညးလွပ် ဓရီုကျာ
ဝိက်ရှေန်နရဳ
ဝိက်ရှေန်နရဳ ဓရီုကျာ
ဝှာင်
ဝှာင် ဓရီုကျာ
မဳဒဳယာဝဳကဳ
မဳဒဳယာဝဳကဳ ဓရီုကျာ
ထာမ်ပလိက်
ထာမ်ပလိက် ဓရီုကျာ
ရီု
ရီု ဓရီုကျာ
ကဏ္ဍ
ကဏ္ဍ ဓရီုကျာ
အဆက်လက္ကရဴ
အဆက်လက္ကရဴ ဓရီုကျာ
ကာရန်
ကာရန် ဓရီုကျာ
အဘိဓာန်
အဘိဓာန် ဓရီုကျာ
ဗီုပြၚ်သိုၚ်တၟိ
ဗီုပြၚ်သိုၚ်တၟိ ဓရီုကျာ
TimedText
TimedText talk
မဝ်ဂျူ
မဝ်ဂျူ ဓရီုကျာ
Event
Event talk
ထာမ်ပလိက်:rfdef
10
873
395135
395130
2026-05-19T12:10:29Z
咽頭べさ
33
395135
wikitext
text/x-wiki
{{#invoke:checkparams|error}}<!-- Validate template parameters
-->{{ {{#if:{{{lang|}}}|deprecated lang param usage|no deprecated lang param usage}}|lang={{{lang|}}}|<!--
-->{{#switch:{{{lang|{{{1|}}}}}}<!--
-->|en|mul=<!--
-->''ဝေါဟာတဏအ်ဝွံမၞုံပၟိက်မပွံၚ်အဓိပ္ပါဲ {{#if:{{{2|}}}| (''{{{2}}})''}}။ သ္ပဂုန်တုဲရီုဗၚ်ချူပ္တိတ် ကဵု '''မချူဗပေၚ်စုတ်မပွံၚ်အဓိပ္ပါဲ'''၊ မဆုဲလ္ပာ်တေံနကဵုမလိက်{{tl|rfdef}}''။<!--
-->|#default=<!--
-->''ဝေါဟာတဏအ်ဝွံမၞုံပၟိက်ချူကၠာဲစုတ်နကဵုဘာသာမန် This term needs a translation to Mon{{#if:{{{2|}}}| (''{{{2}}})''}}။ သ္ပဂုန်တုဲရီုဗၚ်ချူပ္တိတ် ကဵု '''မချူကၠာဲဗပေၚ်စုတ်'''၊ မဆုဲလ္ပာ်တေံနကဵုမလိက် {{tl|rfdef}}။''<!--
-->}}<!--
-->}}<!--
--><includeonly><!--
-->{{#if:{{{nocat|}}}||<!--
-->{{#if:{{{langname|}}}| |<!--
--><!--
-->}}<!--
-->}}<!--
--></includeonly><!--
--><noinclude>{{documentation}}</noinclude>
dgd21fnoo0l4orvq0e0tco4ow5x22j0
ထာမ်ပလိက်:pi-decl-noun
10
1036
395176
158937
2026-05-20T09:55:26Z
咽頭べさ
33
395176
wikitext
text/x-wiki
<includeonly>{{#invoke:pi-decl/noun|show}}</includeonly><noinclude>
{{documentation}}
{{tcat|ndecl}}</noinclude>
1xy3fg0b9g0axpo40vuq9ts53h0kc9w
ထာမ်ပလိက်:pi-decl-noun/test1
10
1039
395181
158938
2026-05-20T10:29:47Z
咽頭べさ
33
395181
wikitext
text/x-wiki
<includeonly>{{#invoke:pi-decl/noun/testcases|show}}</includeonly><noinclude>{{tcat|ndecl}}</noinclude>
4m6jw78bv007e5cyzeuf97x0s8b3p2u
မဝ်ဂျူ:pi-decl/noun
828
1040
395183
158297
2026-05-20T10:42:38Z
咽頭べさ
33
395183
Scribunto
text/plain
local export = {}
-- require("Module:log globals") -- Examine Lua logs at end of preview for results.
local links = require("Module:links")
local lang = require("Module:languages").getByCode("pi")
local m_parameters = require("Module:parameters")
local m_str_utils = require("Module:string utilities")
local m_translit
local to_script
local find = m_str_utils.find
local gsub = m_str_utils.gsub
local match = m_str_utils.match
local sub = m_str_utils.sub
local u = m_str_utils.char -- For readability.
local load = mw.loadData
local ti = table.insert
local currentScript
local scriptCode
local genders = {
["m"] = "ပုလ္လိၚ်", ["f"] = "ဣတ္တိလိၚ်", ["n"] = "နပုလ္လိၚ်",
}
local rows = {
"Nominative (first)", "Accusative (second)", "Instrumental (third)", "Dative (fourth)",
"Ablative (fifth)", "Genitive (sixth)", "Locative (seventh)", "Vocative (calling)",
}
local endings = {
["one"] = {
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah Cakm
["a"] = {},
["ā"] = { "า", "ा", "आ", "া", "আ", "ါ", "ာ", " ႃ", "ᩣ", "ᩤ", "າ", "ា", u(0x17A4),
"ා", "ආ", "𑀸", "𑀆", "𑄂" },
["i"] = { "ิ", "ि", "इ", "ি", "ই", "ိ", "ဣ", "ᩥ", "ᩍ", "ິ", "ិ", "ឥ",
"ි", "ඉ", "𑀺", "𑀇", "𑄨" },
["ī"] = { "ี", "ी", "ई", "ী", "ঈ", "ီ", "ဳ", "ဤ", "ᩦ", "ᩎ", "ີ", "ី", "ឦ",
"ී", "ඊ", "𑀻", "𑀈", "𑄩" },
["u"] = { "ุ", "ु", "उ", "ু", "উ", "ု", "ဥ", "ᩩ", "ᩏ", "ຸ", "ុ", "ឧ",
"ු", "උ", "𑀼", "𑀉", "𑄪" },
["ū"] = { "ู", "ू", "ऊ", "ূ", "ঊ", "ူ", "ဦ", "ᩪ", "ᩐ", "ູ", "ូ", "ឨ", "ឩ",
"ූ", "ඌ", "𑀽", "𑀊", "𑄫" },
["ah"] = { "ะ", "ະ"},
},
["two"] = {
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah Cakm
["ar"] = { "รฺ", "ัร", "र्", "র্", "ရ်", "ᩁ᩺", "ᩁ᩼", "ຣ໌", "ຣ຺", "ັຣ", "រ៑",
"ර්", "𑀭𑁆", "𑄢𑄴"},
["as"] = { "สฺ", "ัส", "स्", "স্", "သ်", "ᩈ᩺", "ᩈ᩼", "ສ໌", "ສ຺", "ັສ", "ស៑",
"ස්", "𑀲𑁆", "𑄥𑄴" },
["an"] = { "นฺ", "ัน", "न्", "ন্", "န်", "ᨶ᩺", "ᨶ᩼", "ນ໌", "ນ຺", "ັນ", "ន៑",
"න්", "𑀦𑁆", "𑄚𑄴"},
ent = { "นต", "ນຕ"},
["in"] = { "ิน", "ິນ"},
},
three = {
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah Cakm
ant = { "ันต" , "ັນຕ"},
ent = {},
ont = {},
["in"] = { "ินฺ", "िन्", "িন্", "ိန်", "ᩥᨶ᩺", "ິນ຺", "ិន៑",
"ින්", "𑀺𑀦𑁆", "𑄨𑄚𑄴" },
},
four = {
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah Cakm
ant = { "นฺตฺ", "न्त्", "ন্ত্", "န္တ်", "ᨶ᩠ᨲ᩺", "ᨶ᩠ᨲ᩼", "ນ຺ຕ໌", "ນ຺ຕ຺", "ន្ត៑",
"න්ත්", "𑀦𑁆𑀢𑁆", "𑄚𑄴𑄖𑄴" },
vant = { "วันต", "ວັນຕ" },
mant = { "มันต", "ມັນຕ" },
},
five = { -- 'ent' and 'ont' are discontiguous for Thai and Lao. Assume NFC (as above).
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah Cakm
antT = { "න්ත්" },
vant = { "วนฺตฺ", "वन्त्", "ৱন্ত্","ৰন্ত্", "ွန္တ်", "ဝန္တ်", "ᩅᨶ᩠ᨲ᩺", "ᩅᨶ᩠ᨲ᩼", "ວນ຺ຕ຺", "ວນ຺ຕ໌", "វន្ត៑",
"වන්ත්", "𑀯𑀦𑁆𑀢𑁆", "𑅇𑄚𑄴𑄖𑄴" },
mant = { "มนฺตฺ", "मन्त्", "মন্ত্", "မန္တ်", "ᨾᨶ᩠ᨲ᩺", "ᨾᨶ᩠ᨲ᩼", "ມນ຺ຕ຺", "ມນ຺ຕ໌", "មន្ត៑",
"ᩜᨶ᩠ᨲ᩺", "ᩜᨶ᩠ᨲ᩼",
"මන්ත්", "𑀫𑀦𑁆𑀢𑁆", "𑄟𑄚𑄴𑄖𑄴"},
ent = { "ेन्त्", "েন্ত্", "ေန္တ်", "ᩮᨶ᩠ᨲ᩺", "ᩮᨶ᩠ᨲ᩼", "េន្ត៑",
"एन्त्", "এন্ত্", "ဧန္တ်", "ᩑᨶ᩠ᨲ᩺", "ᩑᨶ᩠ᨲ᩼", "ឯន្ត៑",
"ෙන්ත්", "𑁂𑀦𑁆𑀢𑁆", "𑄬𑄚𑄴𑄖𑄴" ,
"එන්ත්", "𑀏𑀦𑁆𑀢𑁆" },
ont = { "ोन्त्", "োন্ত্", "ာန္တ်", "ါန္တ်", "ᩣᨶ᩠ᨲ᩺", "ᩣᨶ᩠ᨲ᩼", "ោន្ត៑",
"ᩤᨶ᩠ᨲ᩺", "ᩤᨶ᩠ᨲ᩼",
"ओन्त्", "ওন্ত্", "ဩန္တ်", "ᩰᨶ᩠ᨲ᩺", "ᩰᨶ᩠ᨲ᩼", "ឲន្ត៑",
"ᩒᨶ᩠ᨲ᩺", "ᩒᨶ᩠ᨲ᩼",
"ොන්ත්", "𑁄𑀦𑁆𑀢𑁆", "𑄮𑄚𑄴𑄖𑄴",
"ඔන්ත්", "𑀑𑀦𑁆𑀢𑁆"},
},
six = {
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah
vantT = {"වන්ත්" },
mantT = {"මන්ත්" },
entT = {"ෙන්ත්",
"එන්ත්" },
ontT = {"ොන්ත්",
"ඔන්ත්" },
},
}
function export.detectEnding(stem, options)
-- Correct checking order is last 6, last 5, last 4, last 3, last 2, last 1, but we
-- Can do slightly better by knowing the data.
local oneLetter = sub(stem, -1)
for key, arr in pairs(endings.one) do
if oneLetter == key then
return key
end
for _, val in ipairs(arr) do
if oneLetter == val then
return key
end
end
end
-- Check Latin script first
local fourLetters = sub(stem, -4)
if 'mant' == fourLetters or 'vant' == fourLetters then
return fourLetters
end
local wordEnd = sub(stem, -6)
for key, arr in pairs(endings.six) do
-- if wordEnd == key then
-- return key
-- end
for _, val in ipairs(arr) do
if wordEnd == val then
return key
end
end
end
wordEnd = sub(stem, -5)
for key, arr in pairs(endings.five) do
-- if wordEnd == key then
-- return key
-- end
for _, val in ipairs(arr) do
if wordEnd == val then
return key
end
end
end
for key, arr in pairs(endings.four) do
if fourLetters == key then return key end
for _, val in ipairs(arr) do
if fourLetters == val then
-- Scripts with visually ordered preposed vowels have not been checked thoroughly
if key == 'ant' and
(oneLetter == u(0x0E3A) or oneLetter == u(0xECC) or
oneLetter == u(0x0EBA)) then
local pm6 = sub(stem, -6, -6)
if match(pm6, '[เโເໂ]') then -- 1 char onset
return 'ent' -- 'ent' for 'ont' matters not.
elseif match(pm6, '['..u(0x0E3A)..u(0x0EBA)..']')
and match(sub(stem, -8, -8), '[เโເໂ]') then -- 2 char onset
return 'ent' -- 'ent' for 'ont' matters not.
else
return key
end
else
return key
end
end
end
end
local threeLetters = sub(stem, -3)
for key, arr in pairs(endings.three) do
if threeLetters == key then
return key
end
for _, val in ipairs(arr) do
if threeLetters == val then return key; end
end
end
local impl = options and options.impl or 'yes' -- Fudge to pass old tests.
wordEnd = sub(stem, -2)
for key, arr in pairs(endings.two) do
if wordEnd == key then
return key
end
for _, val in ipairs(arr) do
if wordEnd == val then
if key == 'ent' then
local pm3 = sub(stem, -3, -3)
if match(pm3, '['..u(0x0e31)..u(0xeb1)..']') then
-- Recognise below
return 'ant'
elseif match(sub(stem, -4, -3), '[เโເໂ][ก-ฮກ-ຮ]') then -- 1 char onset
return 'ent'
elseif match(sub(stem, -5, -3), '[เโເໂ][ก-ฮກ-ຮ][ก-ฮກ-ຮ]') then -- 2 char onset
return 'ent'
end
elseif wordEnd == "ิน" or wordEnd == "ິນ" then
if impl == 'yes' then
return 'a'
elseif impl == 'both' then
error("Does "..stem.." end in -in or -ina?")
else
return key
end
else
return key
end
end
end
end
return "a"
end
-- Selectively converts touching to conjoining.
local sinh_flip = {["කⒿ්ව"]="ක්ව",
["තⒿ්ථ"]="ත්ථ", ["තⒿ්ව"]="ත්ව",
["නⒿ්ථ"]="න්ථ", ["නⒿ්ද"]="න්ද", ["නⒿ්ධ"]="න්ධ", ["නⒿ්ව"]="න්ව",
}
-- Argument option is optional.
function export.joinSuffix(scriptCode, stem, suffixes, option)
if stem == nil then
errmes = {}
table.insert(errmes, 'joinSuffix('..scriptCode)
table.insert(errmes, tostring(stem))
table.insert(errmes, tostring(suffixes))
table.insert(errmes, tostring(option)..')')
error(table.concat(errmes, ','))
end
local output = {}
local term
local aa = option and option.aa or "default"
local join, term2
if scriptCode == 'Lana' or scriptCode == 'Mymr' or scriptCode == 'Sinh' then
join = 'Ⓙ'
else
join = ""
end
for _,suffix in ipairs(suffixes) do
if match(suffix, "^⌫⌫⌫⌫⌫") then --backspace
term = sub(stem, 1, -6) .. join .. sub(suffix, 6, -1)
elseif match(suffix, "^⌫⌫⌫⌫") then --backspace
term = sub(stem, 1, -5) .. join .. sub(suffix, 5, -1)
elseif match(suffix, "^⌫⌫⌫") then --backspace
term = sub(stem, 1, -4) .. join .. sub(suffix, 4, -1)
elseif match(suffix, "^⌫⌫") then --backspace
term = sub(stem, 1, -3) .. join .. sub(suffix, 3, -1)
elseif match(suffix, "^⌫") then --backspace
term = sub(stem, 1, -2) .. join .. sub(suffix, 2, -1)
else
term = stem .. join .. suffix
end
--note: Sinh conjuncts are already ready.
if scriptCode == "Thai" then
term = gsub(term, "(.)↶([เโ])", "%2%1") --swap
elseif scriptCode == "Mymr" then
-- term = gsub(term, "င္", "င်္", "ၚ္", "ၚ်္") -- Pali doesn't have -Vr mid-word like Sanskrit, so no need to include repha.
term = gsub(term, "(င်္)(ၚ်္)([ခဂငဒပဝ])(ေ?)Ⓙာ", "%1%2%3ါ") -- redundant!
-- term = gsub(term, "္[ယရ]", { ["္ယ"] = "ျ", ["္ရ"] = "ြ" }) --these not need tall aa
term = gsub(term, "Ⓙ္[ယရ]", { ["Ⓙ္ယ"] = "ျ", ["Ⓙ္ရ"] = "ြ" }) --these not need tall aa
term = gsub(term, "^([ခဂငဒပဝ])Ⓙ(ေ?)ာ", "%1%2ါ")
term = gsub(term, "([^္])([ခဂငဒပဝ])Ⓙ(ေ?)ာ", "%1%2%3ါ")
term = gsub(term, "([^္])Ⓙ([ခဂငဒပဝ])(ေ?)ာ", "%1%2%3ါ")
term = gsub(term, "([ခဂငဒပဝ])(္[က-အဿ])Ⓙ(ေ?)ာ", "%1%2%3ါ")
term = gsub(term, "([ခဂငဒပဝ])Ⓙ(္[က-အဿ])(ေ?)ာ", "%1%2%3ါ")
-- term = gsub(term, "္[ဝဟ]", { ["္ဝ"] = "ွ", ["္ဟ"] = "ှ" })
-- term = gsub(term, "ဉ္ဉ", "ည")
-- term = gsub(term, "သ္သ", "ဿ")
term = gsub(term, 'Ⓙ', '')
elseif scriptCode == "Lana" then
if aa == "both" then
term2 = gsub(term, 'Ⓙ', '')
end
if aa == "tall" or aa == "both" then
term = gsub(term, "^([ᨣᨴᨵᨷᩅ])Ⓙ(ᩮ?)ᩣ", "%1%2ᩤ")
term = gsub(term, "([^᩠])([ᨣᨴᨵᨷᩅ])Ⓙ(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([^᩠])Ⓙ([ᨣᨴᨵᨷᩅ])(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([ᨣᨴᨵᨷᩅ])(᩠[ᨠ-ᩌᩔ])Ⓙ(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([ᨣᨴᨵᨷᩅ])Ⓙ(᩠[ᨠ-ᩌᩔ])(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "(ᨻᩛ)Ⓙ(ᩮ?)ᩣ", "%1%2ᩤ")
term = gsub(term, 'Ⓙ', '')
if aa == "tall" then
term2 = term
end
elseif aa == "round" then
term = gsub(term, 'Ⓙ', '')
term2 = term
elseif aa == "default" then
-- term = gsub(term, "ᨦ᩠", "ᩘ")
term = gsub(term, "^([ᨣᨴᨵᨷᩅ])Ⓙ(ᩮ?)ᩣ", "%1%2ᩤ")
term = gsub(term, "([^᩠])([ᨣᨴᨵᨷᩅ])Ⓙ(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([^᩠])Ⓙ([ᨣᨴᨵᨷᩅ])(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([ᨣᨴᨵᨷᩅ])(᩠[ᨠ-ᩌᩔ])Ⓙ(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([ᨣᨴᨵᨷᩅ])Ⓙ(᩠[ᨠ-ᩌᩔ])(ᩮ?)ᩣ", "%1%2%3ᩤ")
-- term = gsub(term, "᩠[ᩁᩃ]", { ["᩠ᩁ"] = "ᩕ", ["᩠ᩃ"] = "ᩖ" })
-- term = gsub(term, "([ᨭ-ᨱ])᩠ᨮ", "%1ᩛ")
-- term = gsub(term, "([ᨷ-ᨾ])᩠ᨻ", "%1ᩛ")
-- term = gsub(term, "ᩈ᩠ᩈ", "ᩔ")
term = gsub(term, 'Ⓙ', '')
term2 = term
else
error('Parameter aa has undefined value "'..aa..'".')
end
if term ~= term2 then table.insert(output, term2) end
elseif scriptCode == "Beng" then
term = gsub(term, "ৰ্", "ৰ"..u(0x200d).."্") -- ৰ্(v-) needs ZWJ to display correctly
elseif scriptCode == "Laoo" then
term = gsub(term, "(.຺?)↶([ເໂ])", "%2%1")
elseif scriptCode == "Sinh" then
-- Assume cluster formation appends the joiner.
term = gsub(term, "[කතන]Ⓙ..[ථදධව]", sinh_flip)
term = gsub(term, 'Ⓙ', '')
end
table.insert(output, term)
end
return output
end
function export.joinSuffixes(scriptCode, stem, pattern, option)
local forms = {}
for i,_ in ipairs(rows) do
forms[2*i-1] = export.joinSuffix(scriptCode, stem, pattern[2 * i - 1],
option)
forms[2*i] = export.joinSuffix(scriptCode, stem, pattern[2 * i],
option)
end
return forms
end
function export.orJoin(script, list, options) -- options is optional!
local output = {};
local scriptCode = script:getCode()
local showtr = options and options.showtr or 'plain'
local sep = ''
if 'Latn' == scriptCode then showtr = 'none' end
for _,term in ipairs(list) do
local item = {sc = script, lang = lang, term = term} -- links.full_link() is at liberty to trash this table.
ti(output, sep)
sep = " <small style=\"color:var(--wikt-palette-grey-8,#888)\">or</small> "
if showtr == 'none' then
item.tr = '-'
else
if options and options.subst then
-- Legal stuff:
-- The contents of this block were lifted from English Wiktionary Module:usex lines 97 to 101
-- of 3 July 2021, which see for attribution, and then localised.
local substs = mw.text.split(options.subst, ",")
for _, subpair in ipairs(substs) do
local subsplit =
mw.text.split(subpair, find(subpair, "//") and "//" or "/")
term = gsub(term, subsplit[1], subsplit[2])
end
end
local aslat = nil
if (scriptCode == 'Thai' and options and options.impl == 'no' or
scriptCode == 'Laoo') then
m_translit = m_translit or require("Module:pi-translit")
aslat = m_translit.trwo(term, 'pi', scriptCode, options)
elseif term ~= item.term then -- Must complete transliteration
aslat = (lang:transliterate(term, script))
end
if showtr == 'plain' then
item.tr = aslat
elseif showtr == 'link' then
aslat = aslat or (lang:transliterate(term, script))
item.tr = links.full_link({term = aslat, lang = lang})
else
item.tr = '-'
error('Bad value for option showtr.')
end
end
ti(output, links.full_link(item))
end
return table.concat(output)
end
-- convert Latin script inflections to another script
-- C2 is second character of pseudostem. Ignored if NIL.
local function convert_one_set(stem, nstrip, suffixes, sc, impl, c2)
local form, pre
local strip = string.rep("⌫", nstrip)
local option = {impl = impl}
local xlitend = {}
form = export.joinSuffix('Latn', stem, suffixes)
for ia, va in pairs(form) do
local altform = sub(to_script(va..'#', sc, option), 1, -2)
-- Special handling is needed for a preposed vowel.
pre = match(altform, "^[เโເໂ]")
if pre then
xlitend[ia] = strip .. "↶" .. pre .. sub(altform, 3)
-- Quick cheat for Myanmar script variants.
elseif c2 and c2 == sub(altform,2,2) then
xlitend[ia] = sub(strip, 2) .. sub(altform, 3)
-- Back to the normal case.
else
xlitend[ia] = strip .. sub(altform, 2)
end
end
return xlitend
end
local convert_suffixes = function(stem, nstrip, suffixes, sc, impl)
local xlitend = {}
to_script = to_script or require("Module:pi-Latn-translit").tr
local c2
if nstrip > 0 and sc == 'Mymr' then
c2 = sub(to_script(stem, sc, option), 2, 2)
end
-- Seemingly #suffixes doesn't work because the module is loaded!
-- Testing didn't reveal a problem, but avoiding it solved the problem!
-- for k = 1, #suffixes do
if #suffixes ~= 16 then error('#suffixes = '..tostring(#suffixes)) end
for k, _ in ipairs(suffixes) do
xlitend[k] = convert_one_set(stem, nstrip, suffixes[k], sc, impl, c2)
end
return xlitend
end
local liapise = function(retval, liap) -- Change Lao abl/ins plural
-- Copy list to avoid changing data from data module.
local oval = retval retval = {}
for _, forms in ipairs(oval) do table.insert(retval, forms) end
local dob = nil local dobh = nil local sena = nil
if liap == 'b' then
dob = 1
elseif liap == 'bh' then
dobh = 1
elseif liap == 'b.' then
sena = 1
elseif liap == 'bbh' then
dob = 1 dobh = 1
elseif liap == 'bb.' then
dob = 1 sena = 1
elseif liap == 'bhb.' then
dobh = 1 sena = 1
elseif liap == 'none' then
elseif liap == 'all' or liap == 'bbhb.' then
dob = 1 dobh = 1 sena = 1
else
error('Value "'..liap..'" of liap is not understood.')
end
for caseno = 6, 10, 4 do
local forms = retval[caseno]
local nuforms = {}
for _, form in ipairs(forms) do
if sub(form, -2, -1) == 'ຠິ' then
if dob then table.insert(nuforms, sub(form,1,-3)..'ພິ') end
if dobh then table.insert(nuforms, form) end
if sena then table.insert(nuforms, sub(form,1,-3)..'ພ຺ິ') end
else
table.insert(nuforms, form)
end
end
retval[caseno] = nuforms
end
return retval
end
local yselect = function(retval, yval, nvals) -- Change Lao case ending
-- Copy list to avoid changing data from data module.
local oval = retval retval = {}
for _, forms in ipairs(oval) do table.insert(retval, forms) end
local yung = nil local yaa = nil
if yval == 'both' then
yung = 1
yaa = 1
elseif yval == 'ຍ' then
yung = 1
elseif yval == 'ຢ' then
yaa = 1
elseif yval == 'yung' then
yung = 1
elseif yval == 'yaa' then
yaa = 1
else
error('Value "'..yval..'" of argument y is not understood.')
end
for caseno = 1, nvals do
local forms = retval[caseno]
local nuforms = {}
for _, form in ipairs(forms) do
if yung then
local s = gsub(form, '[ຍຢ]', 'ຍ') -- gsub() is a bad actual arg!
table.insert(nuforms, s)
end
if yaa then
local s = gsub(form, '[ຍຢ]', 'ຢ')
table.insert(nuforms, s)
end
end
retval[caseno] = nuforms
end
return retval
end
function export.arrcat_nodup(a1, a2) -- Concatenate two arrays without duplication
-- One of the arrays may have been 'loaded', so cannot use the # operator.
local n1 = 0
local cat = {}
for _, a1v in ipairs(a1) do
n1 = n1 + 1
cat[n1] = a1v
end
for _, a2v in ipairs(a2) do
local met = false
for j = 1, n1 do
if a2v == cat[j] then
met = true
break
end
end
if not met then
n1 = n1 + 1
cat[n1] = a2v
end
end
return cat
end
local arrcat = export.arrcat_nodup
local both_sets = function(scriptCode, ending, g, option)
option.impl= 'yes'
iset = export.getSuffixes(scriptCode, ending, g, option)
option.impl = 'no'
eset = export.getSuffixes(scriptCode, ending, g, option)
retval = {}
-- error('i='..iset[3][1]..' e='..eset[3][1])
for ic = 1, 16 do
retval[ic] = arrcat(iset[ic], eset[ic])
end
-- error('m1='..'<'..tostring(retval[1][1])..'>'..' m2='..'<'..tostring(retval[1][2])..'>')
return retval
end
local function wayToConvert(ending, impl)
local antlen = {yes = 4, no = 3} -- Length by implicitness.
local inlen = {yes = 3, no = 2}
local way = {
a = {pseudoStem = 'ka', ndel = 0},
ar = {pseudoStem = 'kar', ndel = 2},
as = {pseudoStem = 'kas', ndel = 2},
an = {pseudoStem = 'kan', ndel = 2},
ant = {pseudoStem = 'kant', ndel = antlen[impl]},
ent = {pseudoStem = 'kant', ndel = antlen[impl]},
ont = {pseudoStem = 'kant', ndel = antlen[impl]},
mant = {pseudoStem = 'kant', ndel = antlen[impl]},
vant = {pseudoStem = 'kant', ndel = antlen[impl]},
antT = {pseudoStem = 'kant', ndel = 5},
entT = {pseudoStem = 'kant', ndel = 5},
ontT = {pseudoStem = 'kant', ndel = 5},
mantT = {pseudoStem = 'kant', ndel = 5},
vantT = {pseudoStem = 'kant', ndel = 5},
["ā"] = {pseudoStem = 'kā', ndel = 1},
i = {pseudoStem = 'ki', ndel = 1},
["ī"] = {pseudoStem = 'kī', ndel = 1},
["in"]= {pseudoStem = 'kin', ndel = inlen[impl]},
u = {pseudoStem = 'ku', ndel = 1},
["ū"] = {pseudoStem = 'kū', ndel = 1},
}
if impl == 'no' then
way.a = {pseudoStem = 'ka', ndel = 1}
way.ent = {pseudoStem = 'knt', ndel = 2}
way.ont = {pseudoStem = 'knt', ndel = 2}
end
return way[ending]
end
function export.getSuffixes(scriptCode, ending, g, option)
local impl = option and option.impl or 'yes'
if (impl == 'both') then
return both_sets(scriptCode, ending, g, option)
end
local pattern = load("Module:pi-decl/noun/" .. scriptCode)
local applicable = pattern and pattern[ending] and pattern[ending][g]
if applicable then
if impl == 'yes' or ending == 'ah' then
return applicable
end
elseif 'Latn' == scriptCode then
return nil
elseif 'ah' == ending then
ending = 'a'
impl = 'no'
end
pattern = require("Module:pi-decl/noun/Latn") -- Why doesn't load work with testcases?
local tabulated_ending = ending
if 'T' == sub(ending, -1) then
tabulated_ending = sub(ending, 1, -2)
end
applicable = pattern and pattern[tabulated_ending] and
pattern[tabulated_ending][g]
if not applicable then
error('Not even Latin script has ' .. g .. ' -'..tabulated_ending..
' endings.')
return nil -- If you don't like the message above!
end
way = wayToConvert(ending, impl)
if not way then return nil end
return convert_suffixes(way.pseudoStem, way.ndel, applicable,
scriptCode, impl)
end
function export.present(stem, g, forms, number, options) -- options is optional
local gmark, dos, dop
if 'no' == g then
gmark = ''
else
gmark = ' (' .. genders[g] .. ')'
end
if not number or number == 'both'then
dos = 1; dop = 1
elseif number == 's' then
dos = 1; dop = nil;
elseif number == 'p' then
dos = nil; dop = 1;
else
error('Parameter "number" has meaningless value "'..number..'".' )
end
local output = {}
table.insert(output, '<div class="NavFrame" style="min-width:30%"><div class="NavHead" style="background:var(--wikt-palette-lightblue,#d9ebff)">Declension table of "' .. stem .. '"' .. gmark..'</div><div class="NavContent">')
table.insert(output, '<table class="inflection-table" style="background:var(--wikt-palette-paleblue,#f8f9fa);text-align:center;width:100%"><tr><th style="background:var(--wikt-palette-cyan,#eaffff)">Case / Number</th>')
if dos then
table.insert(output, '<th style="background:var(--wikt-palette-cyan,#eaffff)">Singular</th>')
end
if dop then
table.insert(output, '<th style="background:var(--wikt-palette-cyan,#eaffff)">Plural</th></tr>')
end
for i,v in ipairs(rows) do
if #forms[2*i-1] > 0 or #forms[2*i] > 0 then
table.insert(output, "<tr><td style=\"background:var(--wikt-palette-cyan,#eaffff)\">" .. v .. "</td>")
if dos then
table.insert(output, "<td>")
table.insert(output, export.orJoin(currentScript, forms[2 * i - 1], options))
table.insert(output, "</td>")
end
if dop then
table.insert(output, "<td>")
table.insert(output, export.orJoin(currentScript, forms[2 * i], options))
table.insert(output, "</td>")
end
table.insert(output, "</tr>")
end
end
table.insert(output, "</table></div></div>")
return table.concat(output)
end
local function unwritten() error('Code missing.') end
local function liapise_one_set(set, liap)
local forms = { {}, {}, {}, {}, {}, set,
{}, {}, {}, {}, {},
{}, {}, {}, {}, {} }
local modified = liapise(forms, liap)
return modified[6]
end
local function modify_form_set(stem, ending, name, caseno, forms, at)
local ipalts = at[name]
local way = at[name..'_mod']
to_script = to_script or require("Module:pi-Latn-translit").tr
if ipalts and #ipalts > 0 then
local alts = {}
for j, v in ipairs(ipalts) do
local c1 = string.sub(v,1,1)
local vsc = lang:findBestScript(v):getCode()
if vsc == 'None' then
vsc = at.sc and sc or vsc
end
if '+' == c1 then
local vext
v = string.sub(v,2)
if vsc ~= 'Latn' then
vext = {at.dc and dc(v) or v}
elseif scriptCode ~= 'Latn' then
local impls
if at.impl == 'both' then
impls = {'yes', 'no'}
else
impls = {at.impl}
end
vext = {}
for _, impl in ipairs(impls) do
local cvtway = wayToConvert(ending, impl)
local vset = convert_one_set(cvtway.pseudoStem, cvtway.ndel,
{v}, scriptCode, impl, nil)
vext = arrcat(vext, vset)
end
if scriptCode == 'Laoo' then
local vexset = yselect({vext}, at.y, 1)
vext = vexset(1)
end
else
vext = {v}
end
if scriptCode == 'Laoo' and vsc == 'Latn'
and (caseno == 6 or caseno == 10) then
vext = liapise_one_set(vext, at.liap)
end
local vext = export.joinSuffix(scriptCode, stem, vext, at)
for _, vv in ipairs(vext) do ti(alts, vv) end
elseif vsc == scriptCode then
ti(alts, v)
elseif vsc == 'Latn' then
-- TODO: Sane Myanmar and Lao script support.
local options = {}
local vext = {}
if at.impl and at.impl == 'both' then
options.y = at.y -- Probably ineffective
options.impl = 'yes'
ti(vext, to_script(v, scriptCode, options))
options.impl = 'no'
ti(vext, to_script(v, scriptCode, options))
else
ti(vext, to_script(v, scriptCode, options))
end
if scriptCode == 'Laoo' and vsc == 'Latn'
and (caseno == 6 or caseno == 10) then
vext = liapise_one_set(vext, at.liap)
local vexset = yselect({vext}, at.y, 1)
vext = vexset(1)
end
for _, vv in ipairs(vext) do ti(alts, vv) end
else
ti(alts, v) -- Go ahead anyway
end
end
if 'after' == way then
forms[caseno] = arrcat(forms[caseno], alts)
elseif 'before' == way then
forms[caseno] = arrcat(alts, forms[caseno])
elseif 'replace' == way then
forms[caseno] = alts;
elseif 'blank' == way then
-- Issue warning about alts?
forms[caseno] = {}
else
error('Bad value for parameter '..name..'_mod')
end
elseif 'blank' == way then
forms[caseno] = {}
end
end
local function modify(stem, ending, forms, args)
local mod_default = 'after'
local params = {
[1] = {alias_of = 'stem'},
[2] = {alias_of = 'ending'},
[3] = {alias_of = 'g'},
stem = {},
ending = {},
g = {required = true},
gender = {alias_of = 'g'},
v = {},
variation = {alias_of = 'v'},
label = {},
number = {},
showtr = {},
subst = {},
sc = {},
aa = {default = 'default'},
liap = {default = 'default'},
impl = {default = 'yes'},
y = {default = 'default'},
nonom = {type = 'boolean'},
noms = {list = true},
noms_mod = {default = mod_default},
nomp = {list = true},
nomp_mod = {default = mod_default},
noacc = {type = 'boolean'},
accs = {list = true},
accs_mod = {default = mod_default},
accp = {list = true},
accp_mod = {default = mod_default},
noins = {type = 'boolean'},
inss = {list = true},
inss_mod = {default = mod_default},
insp = {list = true},
insp_mod = {default = mod_default},
nodat = {type = 'boolean'},
dats = {list = true},
dats_mod = {default = mod_default},
datp = {list = true},
datp_mod = {default = mod_default},
noabl = {type = 'boolean'},
abls = {list = true},
abls_mod = {default = mod_default},
ablp = {list = true},
ablp_mod = {default = mod_default},
nogen = {type = 'boolean'},
gens = {list = true},
gens_mod = {default = mod_default},
genp = {list = true},
genp_mod = {default = mod_default},
noloc = {type = 'boolean'},
locs = {list = true},
locs_mod = {default = mod_default},
locp = {list = true},
locp_mod = {default = mod_default},
novoc = {type = 'boolean'},
vocs = {list = true},
vocs_mod = {default = mod_default},
vocp = {list = true},
vocp_mod = {default = mod_default},
}
local at = m_parameters.process(args, params)
if ending == 'ah' then
at.impl = 'no'
end
for i, v in ipairs(rows) do
local name = string.lower(string.sub(v,1,3))
if at['no'..name] then
forms[2*i] = {}
forms[2*i-1] = {}
else
modify_form_set(stem, ending, name..'s', 2*i-1, forms, at)
modify_form_set(stem, ending, name..'p', 2*i, forms, at)
end
end
return forms;
end
function export.show(frame)
local args = frame:getParent().args
local PAGENAME = mw.loadData("Module:headword/data").pagename
local stem = args[1] or args["stem"] or PAGENAME
currentScript = lang:findBestScript(stem)
scriptCode = currentScript:getCode()
if scriptCode == "None" and args["sc"] then
scriptCode = args["sc"]
currentScript = require("Module:scripts").getByCode(scriptCode, "No such script as "..scriptCode)
end
local g = args[3] or args["g"] or args["gender"] -- for each gender only
local variation = args["v"] or args["variation"] -- for some scripts
if not g then
error("A gender is required to display proper declensions.")
end
local lookup_g = g
if 'no' == lookup_g then lookup_g = 'm' end -- Arbitrary!
local option = {impl = args["impl"] or 'yes'}
local xlit_options = {}
xlit_options.impl = option.impl
xlit_options.showtr = args.showtr
local ending = args[2] or args["ending"] or export.detectEnding(stem, option)
if ending == 'ah' then xlit_options.impl = 'no' end
local selectedPattern =
export.getSuffixes(scriptCode, ending, lookup_g, option)
if args["liap"] and (scriptCode == 'Laoo') then
selectedPattern = liapise(selectedPattern, args["liap"])
end
if args.y and (scriptCode == 'Laoo') then
selectedPattern = yselect(selectedPattern, args.y, 16)
xlit_options.y = args.y
end
option.aa = args["aa"] -- Reusable!
local forms = export.joinSuffixes(scriptCode, stem, selectedPattern, option)
modify(stem, ending, forms, args)
for ic = 1, 16 do forms[ic] = arrcat({}, forms[ic]) end -- Remove duplicates.
xlit_options.subst = args["subst"]
-- for name, _ in pairs(_G) do mw.addWarning('Global '..name) end
return export.present(args["label"] or stem, g, forms, args["number"], xlit_options)
end
return export
ls5qhy7nsv4q1mwqha17s14wlyc07vj
395200
395183
2026-05-20T11:37:17Z
咽頭べさ
33
395200
Scribunto
text/plain
local export = {}
-- require("Module:log globals") -- Examine Lua logs at end of preview for results.
local links = require("Module:links")
local lang = require("Module:languages").getByCode("pi")
local m_parameters = require("Module:parameters")
local m_str_utils = require("Module:string utilities")
local m_translit
local to_script
local find = m_str_utils.find
local gsub = m_str_utils.gsub
local match = m_str_utils.match
local sub = m_str_utils.sub
local u = m_str_utils.char -- For readability.
local load = mw.loadData
local ti = table.insert
local currentScript
local scriptCode
local genders = {
["m"] = "ပုလ္လိၚ်", ["f"] = "ဣတ္တိလိၚ်", ["n"] = "နပုလ္လိၚ်",
}
local rows = {
"မဒုၚ်ယၟု (ပထမ)", "ကမ္မကာရက (ဒုတိယ)", "တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက် (တတိယ)", "ပြကမ္မကာရက (စတုတ္ထ)",
"ပရေၚ်မလၚ် (ပဉ္စမ)", "ဗဳဇဂကူ (ဆဋ္ဌမ)", "ခၞံဗဒှ်ဌာန်မတန်တဴ (သတ္တမ)", "ပရေၚ်ဂယိုၚ်လမျီု (ပွမခုတ်ယၟု)",
}
local endings = {
["one"] = {
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah Cakm
["a"] = {},
["ā"] = { "า", "ा", "आ", "া", "আ", "ါ", "ာ", " ႃ", "ᩣ", "ᩤ", "າ", "ា", u(0x17A4),
"ා", "ආ", "𑀸", "𑀆", "𑄂" },
["i"] = { "ิ", "ि", "इ", "ি", "ই", "ိ", "ဣ", "ᩥ", "ᩍ", "ິ", "ិ", "ឥ",
"ි", "ඉ", "𑀺", "𑀇", "𑄨" },
["ī"] = { "ี", "ी", "ई", "ী", "ঈ", "ီ", "ဳ", "ဤ", "ᩦ", "ᩎ", "ີ", "ី", "ឦ",
"ී", "ඊ", "𑀻", "𑀈", "𑄩" },
["u"] = { "ุ", "ु", "उ", "ু", "উ", "ု", "ဥ", "ᩩ", "ᩏ", "ຸ", "ុ", "ឧ",
"ු", "උ", "𑀼", "𑀉", "𑄪" },
["ū"] = { "ู", "ू", "ऊ", "ূ", "ঊ", "ူ", "ဦ", "ᩪ", "ᩐ", "ູ", "ូ", "ឨ", "ឩ",
"ූ", "ඌ", "𑀽", "𑀊", "𑄫" },
["ah"] = { "ะ", "ະ"},
},
["two"] = {
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah Cakm
["ar"] = { "รฺ", "ัร", "र्", "র্", "ရ်", "ᩁ᩺", "ᩁ᩼", "ຣ໌", "ຣ຺", "ັຣ", "រ៑",
"ර්", "𑀭𑁆", "𑄢𑄴"},
["as"] = { "สฺ", "ัส", "स्", "স্", "သ်", "ᩈ᩺", "ᩈ᩼", "ສ໌", "ສ຺", "ັສ", "ស៑",
"ස්", "𑀲𑁆", "𑄥𑄴" },
["an"] = { "นฺ", "ัน", "न्", "ন্", "န်", "ᨶ᩺", "ᨶ᩼", "ນ໌", "ນ຺", "ັນ", "ន៑",
"න්", "𑀦𑁆", "𑄚𑄴"},
ent = { "นต", "ນຕ"},
["in"] = { "ิน", "ິນ"},
},
three = {
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah Cakm
ant = { "ันต" , "ັນຕ"},
ent = {},
ont = {},
["in"] = { "ินฺ", "िन्", "িন্", "ိန်", "ᩥᨶ᩺", "ິນ຺", "ិន៑",
"ින්", "𑀺𑀦𑁆", "𑄨𑄚𑄴" },
},
four = {
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah Cakm
ant = { "นฺตฺ", "न्त्", "ন্ত্", "န္တ်", "ᨶ᩠ᨲ᩺", "ᨶ᩠ᨲ᩼", "ນ຺ຕ໌", "ນ຺ຕ຺", "ន្ត៑",
"න්ත්", "𑀦𑁆𑀢𑁆", "𑄚𑄴𑄖𑄴" },
vant = { "วันต", "ວັນຕ" },
mant = { "มันต", "ມັນຕ" },
},
five = { -- 'ent' and 'ont' are discontiguous for Thai and Lao. Assume NFC (as above).
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah Cakm
antT = { "න්ත්" },
vant = { "วนฺตฺ", "वन्त्", "ৱন্ত্","ৰন্ত্", "ွန္တ်", "ဝန္တ်", "ᩅᨶ᩠ᨲ᩺", "ᩅᨶ᩠ᨲ᩼", "ວນ຺ຕ຺", "ວນ຺ຕ໌", "វន្ត៑",
"වන්ත්", "𑀯𑀦𑁆𑀢𑁆", "𑅇𑄚𑄴𑄖𑄴" },
mant = { "มนฺตฺ", "मन्त्", "মন্ত্", "မန္တ်", "ᨾᨶ᩠ᨲ᩺", "ᨾᨶ᩠ᨲ᩼", "ມນ຺ຕ຺", "ມນ຺ຕ໌", "មន្ត៑",
"ᩜᨶ᩠ᨲ᩺", "ᩜᨶ᩠ᨲ᩼",
"මන්ත්", "𑀫𑀦𑁆𑀢𑁆", "𑄟𑄚𑄴𑄖𑄴"},
ent = { "ेन्त्", "েন্ত্", "ေန္တ်", "ᩮᨶ᩠ᨲ᩺", "ᩮᨶ᩠ᨲ᩼", "េន្ត៑",
"एन्त्", "এন্ত্", "ဧန္တ်", "ᩑᨶ᩠ᨲ᩺", "ᩑᨶ᩠ᨲ᩼", "ឯន្ត៑",
"ෙන්ත්", "𑁂𑀦𑁆𑀢𑁆", "𑄬𑄚𑄴𑄖𑄴" ,
"එන්ත්", "𑀏𑀦𑁆𑀢𑁆" },
ont = { "ोन्त्", "োন্ত্", "ာန္တ်", "ါန္တ်", "ᩣᨶ᩠ᨲ᩺", "ᩣᨶ᩠ᨲ᩼", "ោន្ត៑",
"ᩤᨶ᩠ᨲ᩺", "ᩤᨶ᩠ᨲ᩼",
"ओन्त्", "ওন্ত্", "ဩန္တ်", "ᩰᨶ᩠ᨲ᩺", "ᩰᨶ᩠ᨲ᩼", "ឲន្ត៑",
"ᩒᨶ᩠ᨲ᩺", "ᩒᨶ᩠ᨲ᩼",
"ොන්ත්", "𑁄𑀦𑁆𑀢𑁆", "𑄮𑄚𑄴𑄖𑄴",
"ඔන්ත්", "𑀑𑀦𑁆𑀢𑁆"},
},
six = {
-- key(Ln) Thai Deva Beng Mymr Lana Laoo Khmr
-- Sinh Brah
vantT = {"වන්ත්" },
mantT = {"මන්ත්" },
entT = {"ෙන්ත්",
"එන්ත්" },
ontT = {"ොන්ත්",
"ඔන්ත්" },
},
}
function export.detectEnding(stem, options)
-- Correct checking order is last 6, last 5, last 4, last 3, last 2, last 1, but we
-- Can do slightly better by knowing the data.
local oneLetter = sub(stem, -1)
for key, arr in pairs(endings.one) do
if oneLetter == key then
return key
end
for _, val in ipairs(arr) do
if oneLetter == val then
return key
end
end
end
-- Check Latin script first
local fourLetters = sub(stem, -4)
if 'mant' == fourLetters or 'vant' == fourLetters then
return fourLetters
end
local wordEnd = sub(stem, -6)
for key, arr in pairs(endings.six) do
-- if wordEnd == key then
-- return key
-- end
for _, val in ipairs(arr) do
if wordEnd == val then
return key
end
end
end
wordEnd = sub(stem, -5)
for key, arr in pairs(endings.five) do
-- if wordEnd == key then
-- return key
-- end
for _, val in ipairs(arr) do
if wordEnd == val then
return key
end
end
end
for key, arr in pairs(endings.four) do
if fourLetters == key then return key end
for _, val in ipairs(arr) do
if fourLetters == val then
-- Scripts with visually ordered preposed vowels have not been checked thoroughly
if key == 'ant' and
(oneLetter == u(0x0E3A) or oneLetter == u(0xECC) or
oneLetter == u(0x0EBA)) then
local pm6 = sub(stem, -6, -6)
if match(pm6, '[เโເໂ]') then -- 1 char onset
return 'ent' -- 'ent' for 'ont' matters not.
elseif match(pm6, '['..u(0x0E3A)..u(0x0EBA)..']')
and match(sub(stem, -8, -8), '[เโເໂ]') then -- 2 char onset
return 'ent' -- 'ent' for 'ont' matters not.
else
return key
end
else
return key
end
end
end
end
local threeLetters = sub(stem, -3)
for key, arr in pairs(endings.three) do
if threeLetters == key then
return key
end
for _, val in ipairs(arr) do
if threeLetters == val then return key; end
end
end
local impl = options and options.impl or 'yes' -- Fudge to pass old tests.
wordEnd = sub(stem, -2)
for key, arr in pairs(endings.two) do
if wordEnd == key then
return key
end
for _, val in ipairs(arr) do
if wordEnd == val then
if key == 'ent' then
local pm3 = sub(stem, -3, -3)
if match(pm3, '['..u(0x0e31)..u(0xeb1)..']') then
-- Recognise below
return 'ant'
elseif match(sub(stem, -4, -3), '[เโເໂ][ก-ฮກ-ຮ]') then -- 1 char onset
return 'ent'
elseif match(sub(stem, -5, -3), '[เโເໂ][ก-ฮກ-ຮ][ก-ฮກ-ຮ]') then -- 2 char onset
return 'ent'
end
elseif wordEnd == "ิน" or wordEnd == "ິນ" then
if impl == 'yes' then
return 'a'
elseif impl == 'both' then
error("Does "..stem.." end in -in or -ina?")
else
return key
end
else
return key
end
end
end
end
return "a"
end
-- Selectively converts touching to conjoining.
local sinh_flip = {["කⒿ්ව"]="ක්ව",
["තⒿ්ථ"]="ත්ථ", ["තⒿ්ව"]="ත්ව",
["නⒿ්ථ"]="න්ථ", ["නⒿ්ද"]="න්ද", ["නⒿ්ධ"]="න්ධ", ["නⒿ්ව"]="න්ව",
}
-- Argument option is optional.
function export.joinSuffix(scriptCode, stem, suffixes, option)
if stem == nil then
errmes = {}
table.insert(errmes, 'joinSuffix('..scriptCode)
table.insert(errmes, tostring(stem))
table.insert(errmes, tostring(suffixes))
table.insert(errmes, tostring(option)..')')
error(table.concat(errmes, ','))
end
local output = {}
local term
local aa = option and option.aa or "default"
local join, term2
if scriptCode == 'Lana' or scriptCode == 'Mymr' or scriptCode == 'Sinh' then
join = 'Ⓙ'
else
join = ""
end
for _,suffix in ipairs(suffixes) do
if match(suffix, "^⌫⌫⌫⌫⌫") then --backspace
term = sub(stem, 1, -6) .. join .. sub(suffix, 6, -1)
elseif match(suffix, "^⌫⌫⌫⌫") then --backspace
term = sub(stem, 1, -5) .. join .. sub(suffix, 5, -1)
elseif match(suffix, "^⌫⌫⌫") then --backspace
term = sub(stem, 1, -4) .. join .. sub(suffix, 4, -1)
elseif match(suffix, "^⌫⌫") then --backspace
term = sub(stem, 1, -3) .. join .. sub(suffix, 3, -1)
elseif match(suffix, "^⌫") then --backspace
term = sub(stem, 1, -2) .. join .. sub(suffix, 2, -1)
else
term = stem .. join .. suffix
end
--note: Sinh conjuncts are already ready.
if scriptCode == "Thai" then
term = gsub(term, "(.)↶([เโ])", "%2%1") --swap
elseif scriptCode == "Mymr" then
-- term = gsub(term, "င္", "င်္", "ၚ္", "ၚ်္") -- Pali doesn't have -Vr mid-word like Sanskrit, so no need to include repha.
term = gsub(term, "(င်္)(ၚ်္)([ခဂငဒပဝ])(ေ?)Ⓙာ", "%1%2%3ါ") -- redundant!
-- term = gsub(term, "္[ယရ]", { ["္ယ"] = "ျ", ["္ရ"] = "ြ" }) --these not need tall aa
term = gsub(term, "Ⓙ္[ယရ]", { ["Ⓙ္ယ"] = "ျ", ["Ⓙ္ရ"] = "ြ" }) --these not need tall aa
term = gsub(term, "^([ခဂငဒပဝ])Ⓙ(ေ?)ာ", "%1%2ါ")
term = gsub(term, "([^္])([ခဂငဒပဝ])Ⓙ(ေ?)ာ", "%1%2%3ါ")
term = gsub(term, "([^္])Ⓙ([ခဂငဒပဝ])(ေ?)ာ", "%1%2%3ါ")
term = gsub(term, "([ခဂငဒပဝ])(္[က-အဿ])Ⓙ(ေ?)ာ", "%1%2%3ါ")
term = gsub(term, "([ခဂငဒပဝ])Ⓙ(္[က-အဿ])(ေ?)ာ", "%1%2%3ါ")
-- term = gsub(term, "္[ဝဟ]", { ["္ဝ"] = "ွ", ["္ဟ"] = "ှ" })
-- term = gsub(term, "ဉ္ဉ", "ည")
-- term = gsub(term, "သ္သ", "ဿ")
term = gsub(term, 'Ⓙ', '')
elseif scriptCode == "Lana" then
if aa == "both" then
term2 = gsub(term, 'Ⓙ', '')
end
if aa == "tall" or aa == "both" then
term = gsub(term, "^([ᨣᨴᨵᨷᩅ])Ⓙ(ᩮ?)ᩣ", "%1%2ᩤ")
term = gsub(term, "([^᩠])([ᨣᨴᨵᨷᩅ])Ⓙ(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([^᩠])Ⓙ([ᨣᨴᨵᨷᩅ])(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([ᨣᨴᨵᨷᩅ])(᩠[ᨠ-ᩌᩔ])Ⓙ(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([ᨣᨴᨵᨷᩅ])Ⓙ(᩠[ᨠ-ᩌᩔ])(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "(ᨻᩛ)Ⓙ(ᩮ?)ᩣ", "%1%2ᩤ")
term = gsub(term, 'Ⓙ', '')
if aa == "tall" then
term2 = term
end
elseif aa == "round" then
term = gsub(term, 'Ⓙ', '')
term2 = term
elseif aa == "default" then
-- term = gsub(term, "ᨦ᩠", "ᩘ")
term = gsub(term, "^([ᨣᨴᨵᨷᩅ])Ⓙ(ᩮ?)ᩣ", "%1%2ᩤ")
term = gsub(term, "([^᩠])([ᨣᨴᨵᨷᩅ])Ⓙ(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([^᩠])Ⓙ([ᨣᨴᨵᨷᩅ])(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([ᨣᨴᨵᨷᩅ])(᩠[ᨠ-ᩌᩔ])Ⓙ(ᩮ?)ᩣ", "%1%2%3ᩤ")
term = gsub(term, "([ᨣᨴᨵᨷᩅ])Ⓙ(᩠[ᨠ-ᩌᩔ])(ᩮ?)ᩣ", "%1%2%3ᩤ")
-- term = gsub(term, "᩠[ᩁᩃ]", { ["᩠ᩁ"] = "ᩕ", ["᩠ᩃ"] = "ᩖ" })
-- term = gsub(term, "([ᨭ-ᨱ])᩠ᨮ", "%1ᩛ")
-- term = gsub(term, "([ᨷ-ᨾ])᩠ᨻ", "%1ᩛ")
-- term = gsub(term, "ᩈ᩠ᩈ", "ᩔ")
term = gsub(term, 'Ⓙ', '')
term2 = term
else
error('Parameter aa has undefined value "'..aa..'".')
end
if term ~= term2 then table.insert(output, term2) end
elseif scriptCode == "Beng" then
term = gsub(term, "ৰ্", "ৰ"..u(0x200d).."্") -- ৰ্(v-) needs ZWJ to display correctly
elseif scriptCode == "Laoo" then
term = gsub(term, "(.຺?)↶([ເໂ])", "%2%1")
elseif scriptCode == "Sinh" then
-- Assume cluster formation appends the joiner.
term = gsub(term, "[කතන]Ⓙ..[ථදධව]", sinh_flip)
term = gsub(term, 'Ⓙ', '')
end
table.insert(output, term)
end
return output
end
function export.joinSuffixes(scriptCode, stem, pattern, option)
local forms = {}
for i,_ in ipairs(rows) do
forms[2*i-1] = export.joinSuffix(scriptCode, stem, pattern[2 * i - 1],
option)
forms[2*i] = export.joinSuffix(scriptCode, stem, pattern[2 * i],
option)
end
return forms
end
function export.orJoin(script, list, options) -- options is optional!
local output = {};
local scriptCode = script:getCode()
local showtr = options and options.showtr or 'plain'
local sep = ''
if 'Latn' == scriptCode then showtr = 'none' end
for _,term in ipairs(list) do
local item = {sc = script, lang = lang, term = term} -- links.full_link() is at liberty to trash this table.
ti(output, sep)
sep = " <small style=\"color:var(--wikt-palette-grey-8,#888)\">or</small> "
if showtr == 'none' then
item.tr = '-'
else
if options and options.subst then
-- Legal stuff:
-- The contents of this block were lifted from English Wiktionary Module:usex lines 97 to 101
-- of 3 July 2021, which see for attribution, and then localised.
local substs = mw.text.split(options.subst, ",")
for _, subpair in ipairs(substs) do
local subsplit =
mw.text.split(subpair, find(subpair, "//") and "//" or "/")
term = gsub(term, subsplit[1], subsplit[2])
end
end
local aslat = nil
if (scriptCode == 'Thai' and options and options.impl == 'no' or
scriptCode == 'Laoo') then
m_translit = m_translit or require("Module:pi-translit")
aslat = m_translit.trwo(term, 'pi', scriptCode, options)
elseif term ~= item.term then -- Must complete transliteration
aslat = (lang:transliterate(term, script))
end
if showtr == 'plain' then
item.tr = aslat
elseif showtr == 'link' then
aslat = aslat or (lang:transliterate(term, script))
item.tr = links.full_link({term = aslat, lang = lang})
else
item.tr = '-'
error('Bad value for option showtr.')
end
end
ti(output, links.full_link(item))
end
return table.concat(output)
end
-- convert Latin script inflections to another script
-- C2 is second character of pseudostem. Ignored if NIL.
local function convert_one_set(stem, nstrip, suffixes, sc, impl, c2)
local form, pre
local strip = string.rep("⌫", nstrip)
local option = {impl = impl}
local xlitend = {}
form = export.joinSuffix('Latn', stem, suffixes)
for ia, va in pairs(form) do
local altform = sub(to_script(va..'#', sc, option), 1, -2)
-- Special handling is needed for a preposed vowel.
pre = match(altform, "^[เโເໂ]")
if pre then
xlitend[ia] = strip .. "↶" .. pre .. sub(altform, 3)
-- Quick cheat for Myanmar script variants.
elseif c2 and c2 == sub(altform,2,2) then
xlitend[ia] = sub(strip, 2) .. sub(altform, 3)
-- Back to the normal case.
else
xlitend[ia] = strip .. sub(altform, 2)
end
end
return xlitend
end
local convert_suffixes = function(stem, nstrip, suffixes, sc, impl)
local xlitend = {}
to_script = to_script or require("Module:pi-Latn-translit").tr
local c2
if nstrip > 0 and sc == 'Mymr' then
c2 = sub(to_script(stem, sc, option), 2, 2)
end
-- Seemingly #suffixes doesn't work because the module is loaded!
-- Testing didn't reveal a problem, but avoiding it solved the problem!
-- for k = 1, #suffixes do
if #suffixes ~= 16 then error('#suffixes = '..tostring(#suffixes)) end
for k, _ in ipairs(suffixes) do
xlitend[k] = convert_one_set(stem, nstrip, suffixes[k], sc, impl, c2)
end
return xlitend
end
local liapise = function(retval, liap) -- Change Lao abl/ins plural
-- Copy list to avoid changing data from data module.
local oval = retval retval = {}
for _, forms in ipairs(oval) do table.insert(retval, forms) end
local dob = nil local dobh = nil local sena = nil
if liap == 'b' then
dob = 1
elseif liap == 'bh' then
dobh = 1
elseif liap == 'b.' then
sena = 1
elseif liap == 'bbh' then
dob = 1 dobh = 1
elseif liap == 'bb.' then
dob = 1 sena = 1
elseif liap == 'bhb.' then
dobh = 1 sena = 1
elseif liap == 'none' then
elseif liap == 'all' or liap == 'bbhb.' then
dob = 1 dobh = 1 sena = 1
else
error('Value "'..liap..'" of liap is not understood.')
end
for caseno = 6, 10, 4 do
local forms = retval[caseno]
local nuforms = {}
for _, form in ipairs(forms) do
if sub(form, -2, -1) == 'ຠິ' then
if dob then table.insert(nuforms, sub(form,1,-3)..'ພິ') end
if dobh then table.insert(nuforms, form) end
if sena then table.insert(nuforms, sub(form,1,-3)..'ພ຺ິ') end
else
table.insert(nuforms, form)
end
end
retval[caseno] = nuforms
end
return retval
end
local yselect = function(retval, yval, nvals) -- Change Lao case ending
-- Copy list to avoid changing data from data module.
local oval = retval retval = {}
for _, forms in ipairs(oval) do table.insert(retval, forms) end
local yung = nil local yaa = nil
if yval == 'both' then
yung = 1
yaa = 1
elseif yval == 'ຍ' then
yung = 1
elseif yval == 'ຢ' then
yaa = 1
elseif yval == 'yung' then
yung = 1
elseif yval == 'yaa' then
yaa = 1
else
error('Value "'..yval..'" of argument y is not understood.')
end
for caseno = 1, nvals do
local forms = retval[caseno]
local nuforms = {}
for _, form in ipairs(forms) do
if yung then
local s = gsub(form, '[ຍຢ]', 'ຍ') -- gsub() is a bad actual arg!
table.insert(nuforms, s)
end
if yaa then
local s = gsub(form, '[ຍຢ]', 'ຢ')
table.insert(nuforms, s)
end
end
retval[caseno] = nuforms
end
return retval
end
function export.arrcat_nodup(a1, a2) -- Concatenate two arrays without duplication
-- One of the arrays may have been 'loaded', so cannot use the # operator.
local n1 = 0
local cat = {}
for _, a1v in ipairs(a1) do
n1 = n1 + 1
cat[n1] = a1v
end
for _, a2v in ipairs(a2) do
local met = false
for j = 1, n1 do
if a2v == cat[j] then
met = true
break
end
end
if not met then
n1 = n1 + 1
cat[n1] = a2v
end
end
return cat
end
local arrcat = export.arrcat_nodup
local both_sets = function(scriptCode, ending, g, option)
option.impl= 'yes'
iset = export.getSuffixes(scriptCode, ending, g, option)
option.impl = 'no'
eset = export.getSuffixes(scriptCode, ending, g, option)
retval = {}
-- error('i='..iset[3][1]..' e='..eset[3][1])
for ic = 1, 16 do
retval[ic] = arrcat(iset[ic], eset[ic])
end
-- error('m1='..'<'..tostring(retval[1][1])..'>'..' m2='..'<'..tostring(retval[1][2])..'>')
return retval
end
local function wayToConvert(ending, impl)
local antlen = {yes = 4, no = 3} -- Length by implicitness.
local inlen = {yes = 3, no = 2}
local way = {
a = {pseudoStem = 'ka', ndel = 0},
ar = {pseudoStem = 'kar', ndel = 2},
as = {pseudoStem = 'kas', ndel = 2},
an = {pseudoStem = 'kan', ndel = 2},
ant = {pseudoStem = 'kant', ndel = antlen[impl]},
ent = {pseudoStem = 'kant', ndel = antlen[impl]},
ont = {pseudoStem = 'kant', ndel = antlen[impl]},
mant = {pseudoStem = 'kant', ndel = antlen[impl]},
vant = {pseudoStem = 'kant', ndel = antlen[impl]},
antT = {pseudoStem = 'kant', ndel = 5},
entT = {pseudoStem = 'kant', ndel = 5},
ontT = {pseudoStem = 'kant', ndel = 5},
mantT = {pseudoStem = 'kant', ndel = 5},
vantT = {pseudoStem = 'kant', ndel = 5},
["ā"] = {pseudoStem = 'kā', ndel = 1},
i = {pseudoStem = 'ki', ndel = 1},
["ī"] = {pseudoStem = 'kī', ndel = 1},
["in"]= {pseudoStem = 'kin', ndel = inlen[impl]},
u = {pseudoStem = 'ku', ndel = 1},
["ū"] = {pseudoStem = 'kū', ndel = 1},
}
if impl == 'no' then
way.a = {pseudoStem = 'ka', ndel = 1}
way.ent = {pseudoStem = 'knt', ndel = 2}
way.ont = {pseudoStem = 'knt', ndel = 2}
end
return way[ending]
end
function export.getSuffixes(scriptCode, ending, g, option)
local impl = option and option.impl or 'yes'
if (impl == 'both') then
return both_sets(scriptCode, ending, g, option)
end
local pattern = load("Module:pi-decl/noun/" .. scriptCode)
local applicable = pattern and pattern[ending] and pattern[ending][g]
if applicable then
if impl == 'yes' or ending == 'ah' then
return applicable
end
elseif 'Latn' == scriptCode then
return nil
elseif 'ah' == ending then
ending = 'a'
impl = 'no'
end
pattern = require("Module:pi-decl/noun/Latn") -- Why doesn't load work with testcases?
local tabulated_ending = ending
if 'T' == sub(ending, -1) then
tabulated_ending = sub(ending, 1, -2)
end
applicable = pattern and pattern[tabulated_ending] and
pattern[tabulated_ending][g]
if not applicable then
error('Not even Latin script has ' .. g .. ' -'..tabulated_ending..
' endings.')
return nil -- If you don't like the message above!
end
way = wayToConvert(ending, impl)
if not way then return nil end
return convert_suffixes(way.pseudoStem, way.ndel, applicable,
scriptCode, impl)
end
function export.present(stem, g, forms, number, options) -- options is optional
local gmark, dos, dop
if 'no' == g then
gmark = ''
else
gmark = ' (' .. genders[g] .. ')'
end
if not number or number == 'both'then
dos = 1; dop = 1
elseif number == 's' then
dos = 1; dop = nil;
elseif number == 'p' then
dos = nil; dop = 1;
else
error('Parameter "number" has meaningless value "'..number..'".' )
end
local output = {}
table.insert(output, '<div class="NavFrame" style="min-width:30%"><div class="NavHead" style="background:var(--wikt-palette-lightblue,#d9ebff)">မလဟုတ်စှ်ေဆေၚ်စပ်ကဵု "' .. stem .. '"' .. gmark..'</div><div class="NavContent">')
table.insert(output, '<table class="inflection-table" style="background:var(--wikt-palette-paleblue,#f8f9fa);text-align:center;width:100%"><tr><th style="background:var(--wikt-palette-cyan,#eaffff)">ကိစ္စ / လိက်ဂၞန်</th>')
if dos then
table.insert(output, '<th style="background:var(--wikt-palette-cyan,#eaffff)">ကိုန်ဨကဝုစ်</th>')
end
if dop then
table.insert(output, '<th style="background:var(--wikt-palette-cyan,#eaffff)">ကိုန်ဗဟုဝစ်</th></tr>')
end
for i,v in ipairs(rows) do
if #forms[2*i-1] > 0 or #forms[2*i] > 0 then
table.insert(output, "<tr><td style=\"background:var(--wikt-palette-cyan,#eaffff)\">" .. v .. "</td>")
if dos then
table.insert(output, "<td>")
table.insert(output, export.orJoin(currentScript, forms[2 * i - 1], options))
table.insert(output, "</td>")
end
if dop then
table.insert(output, "<td>")
table.insert(output, export.orJoin(currentScript, forms[2 * i], options))
table.insert(output, "</td>")
end
table.insert(output, "</tr>")
end
end
table.insert(output, "</table></div></div>")
return table.concat(output)
end
local function unwritten() error('Code missing.') end
local function liapise_one_set(set, liap)
local forms = { {}, {}, {}, {}, {}, set,
{}, {}, {}, {}, {},
{}, {}, {}, {}, {} }
local modified = liapise(forms, liap)
return modified[6]
end
local function modify_form_set(stem, ending, name, caseno, forms, at)
local ipalts = at[name]
local way = at[name..'_mod']
to_script = to_script or require("Module:pi-Latn-translit").tr
if ipalts and #ipalts > 0 then
local alts = {}
for j, v in ipairs(ipalts) do
local c1 = string.sub(v,1,1)
local vsc = lang:findBestScript(v):getCode()
if vsc == 'None' then
vsc = at.sc and sc or vsc
end
if '+' == c1 then
local vext
v = string.sub(v,2)
if vsc ~= 'Latn' then
vext = {at.dc and dc(v) or v}
elseif scriptCode ~= 'Latn' then
local impls
if at.impl == 'both' then
impls = {'yes', 'no'}
else
impls = {at.impl}
end
vext = {}
for _, impl in ipairs(impls) do
local cvtway = wayToConvert(ending, impl)
local vset = convert_one_set(cvtway.pseudoStem, cvtway.ndel,
{v}, scriptCode, impl, nil)
vext = arrcat(vext, vset)
end
if scriptCode == 'Laoo' then
local vexset = yselect({vext}, at.y, 1)
vext = vexset(1)
end
else
vext = {v}
end
if scriptCode == 'Laoo' and vsc == 'Latn'
and (caseno == 6 or caseno == 10) then
vext = liapise_one_set(vext, at.liap)
end
local vext = export.joinSuffix(scriptCode, stem, vext, at)
for _, vv in ipairs(vext) do ti(alts, vv) end
elseif vsc == scriptCode then
ti(alts, v)
elseif vsc == 'Latn' then
-- TODO: Sane Myanmar and Lao script support.
local options = {}
local vext = {}
if at.impl and at.impl == 'both' then
options.y = at.y -- Probably ineffective
options.impl = 'yes'
ti(vext, to_script(v, scriptCode, options))
options.impl = 'no'
ti(vext, to_script(v, scriptCode, options))
else
ti(vext, to_script(v, scriptCode, options))
end
if scriptCode == 'Laoo' and vsc == 'Latn'
and (caseno == 6 or caseno == 10) then
vext = liapise_one_set(vext, at.liap)
local vexset = yselect({vext}, at.y, 1)
vext = vexset(1)
end
for _, vv in ipairs(vext) do ti(alts, vv) end
else
ti(alts, v) -- Go ahead anyway
end
end
if 'after' == way then
forms[caseno] = arrcat(forms[caseno], alts)
elseif 'before' == way then
forms[caseno] = arrcat(alts, forms[caseno])
elseif 'replace' == way then
forms[caseno] = alts;
elseif 'blank' == way then
-- Issue warning about alts?
forms[caseno] = {}
else
error('Bad value for parameter '..name..'_mod')
end
elseif 'blank' == way then
forms[caseno] = {}
end
end
local function modify(stem, ending, forms, args)
local mod_default = 'after'
local params = {
[1] = {alias_of = 'stem'},
[2] = {alias_of = 'ending'},
[3] = {alias_of = 'g'},
stem = {},
ending = {},
g = {required = true},
gender = {alias_of = 'g'},
v = {},
variation = {alias_of = 'v'},
label = {},
number = {},
showtr = {},
subst = {},
sc = {},
aa = {default = 'default'},
liap = {default = 'default'},
impl = {default = 'yes'},
y = {default = 'default'},
nonom = {type = 'boolean'},
noms = {list = true},
noms_mod = {default = mod_default},
nomp = {list = true},
nomp_mod = {default = mod_default},
noacc = {type = 'boolean'},
accs = {list = true},
accs_mod = {default = mod_default},
accp = {list = true},
accp_mod = {default = mod_default},
noins = {type = 'boolean'},
inss = {list = true},
inss_mod = {default = mod_default},
insp = {list = true},
insp_mod = {default = mod_default},
nodat = {type = 'boolean'},
dats = {list = true},
dats_mod = {default = mod_default},
datp = {list = true},
datp_mod = {default = mod_default},
noabl = {type = 'boolean'},
abls = {list = true},
abls_mod = {default = mod_default},
ablp = {list = true},
ablp_mod = {default = mod_default},
nogen = {type = 'boolean'},
gens = {list = true},
gens_mod = {default = mod_default},
genp = {list = true},
genp_mod = {default = mod_default},
noloc = {type = 'boolean'},
locs = {list = true},
locs_mod = {default = mod_default},
locp = {list = true},
locp_mod = {default = mod_default},
novoc = {type = 'boolean'},
vocs = {list = true},
vocs_mod = {default = mod_default},
vocp = {list = true},
vocp_mod = {default = mod_default},
}
local at = m_parameters.process(args, params)
if ending == 'ah' then
at.impl = 'no'
end
for i, v in ipairs(rows) do
local name = string.lower(string.sub(v,1,3))
if at['no'..name] then
forms[2*i] = {}
forms[2*i-1] = {}
else
modify_form_set(stem, ending, name..'s', 2*i-1, forms, at)
modify_form_set(stem, ending, name..'p', 2*i, forms, at)
end
end
return forms;
end
function export.show(frame)
local args = frame:getParent().args
local PAGENAME = mw.loadData("Module:headword/data").pagename
local stem = args[1] or args["stem"] or PAGENAME
currentScript = lang:findBestScript(stem)
scriptCode = currentScript:getCode()
if scriptCode == "None" and args["sc"] then
scriptCode = args["sc"]
currentScript = require("Module:scripts").getByCode(scriptCode, "No such script as "..scriptCode)
end
local g = args[3] or args["g"] or args["gender"] -- for each gender only
local variation = args["v"] or args["variation"] -- for some scripts
if not g then
error("A gender is required to display proper declensions.")
end
local lookup_g = g
if 'no' == lookup_g then lookup_g = 'm' end -- Arbitrary!
local option = {impl = args["impl"] or 'yes'}
local xlit_options = {}
xlit_options.impl = option.impl
xlit_options.showtr = args.showtr
local ending = args[2] or args["ending"] or export.detectEnding(stem, option)
if ending == 'ah' then xlit_options.impl = 'no' end
local selectedPattern =
export.getSuffixes(scriptCode, ending, lookup_g, option)
if args["liap"] and (scriptCode == 'Laoo') then
selectedPattern = liapise(selectedPattern, args["liap"])
end
if args.y and (scriptCode == 'Laoo') then
selectedPattern = yselect(selectedPattern, args.y, 16)
xlit_options.y = args.y
end
option.aa = args["aa"] -- Reusable!
local forms = export.joinSuffixes(scriptCode, stem, selectedPattern, option)
modify(stem, ending, forms, args)
for ic = 1, 16 do forms[ic] = arrcat({}, forms[ic]) end -- Remove duplicates.
xlit_options.subst = args["subst"]
-- for name, _ in pairs(_G) do mw.addWarning('Global '..name) end
return export.present(args["label"] or stem, g, forms, args["number"], xlit_options)
end
return export
80puq0uo9b6g7vnvbt42w4j9028x2ey
မဝ်ဂျူ:utilities/format categories/data
828
13352
395134
158380
2026-05-19T12:06:50Z
咽頭べさ
33
395134
Scribunto
text/plain
local data = {}
-- Namespaces in which format_categories will add categories.
data.allowedNamespaces = {
[""] = true,
["အဆက်လက္ကရဴ"] = true ,
["ဗီုပြၚ်သိုၚ်တၟိ"] = true,
["Citations"] = true,
["Thesaurus"] = true ,
}
-- Pages not in namespaces in which format_categories normally adds categories,
-- but where we make an exception.
data.allowedPrefixedPages = {
["ဝိက်ရှေန်နရဳ:Sandbox"] = true,
}
return data
2211zt0se7ajrafwd26r4t5hggxp306
မဝ်ဂျူ:de-noun
828
16685
395140
394904
2026-05-19T14:19:56Z
咽頭べさ
33
395140
Scribunto
text/plain
local export = {}
--[=[
Authorship: <benwing2>
]=]
--[=[
TERMINOLOGY:
-- "slot" = A particular combination of case/number.
Example slot names for nouns are "voc_s" (vocative singular) and
"gen_p" (genitive plural). Each slot is filled with zero or more forms.
-- "form" = The declined German form representing the value of a given slot.
-- "lemma" = The dictionary form of a given German term. Generally the nominative
masculine singular, but may occasionally be another form if the nominative
masculine singular is missing.
]=]
--[=[
FIXME:
1. Qualifiers in genders should appear as footnotes on the articles.
2. Support notation like <g:f> on feminine/diminutive/masculine, e.g. used for [[Gespons]] (neuter with the meaning
"wife", masculine with the meaning "husband").
3. Fix CSS gender-specific class in table.
4. Support adjectival nouns and adjective-noun combinations. (DONE)
5. Allow period and comma in forms e.g. for [[Eigent.-Whg.]], [[Eigt.-Whg.]] (using a backslash). (DONE)
6. Allow embedded links in genitive/plural/feminine/diminutive/masculine specs, e.g. 'f=![[weiblich]]er Geschäftspartner'.
7. Add 'prop' indicator to indicate proper nouns and suppress the indefinite article.
8. Add 'surname' indicator to indicate surnames, decline appropriately and include both masc and fem variants in the table. (DONE)
9. Add 'langname' indicator to indicate langnames and decline appropriately with its own table with two alternatives. (DONE)
]=]
local lang = require("Module:languages").getByCode("de")
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local m_links = require("Module:links")
local m_string_utilities = require("Module:string utilities")
local iut = require("Module:inflection utilities")
local put = require("Module:parse utilities")
local com = require("Module:de-common")
local pretend_from_headword = false -- may be set during debugging
local force_cat = false -- may be set during debugging
local u = m_str_utils.char
local rfind = m_str_utils.find
local rmatch = m_str_utils.match
local rsubn = m_str_utils.gsub
local unpack = unpack or table.unpack -- Lua 5.2 compatibility
local usub = m_str_utils.sub
local SUB_ESCAPED_PERIOD = u(0xFFF0)
local SUB_ESCAPED_COMMA = u(0xFFF1)
local archaic_dative_note = "[now rare, [[Wiktionary:German entry guidelines#Dative_singular_-e_in_noun_declension|see notes]]]"
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local function track(page)
require("Module:debug").track("de-noun/" .. page)
return true
end
local states = { "str", "wk", "mix" }
local definitenesses = { "ind", "def" }
local cases_with_abl_voc = { "nom", "gen", "dat", "acc", "abl", "voc" }
local basic_cases = { "nom", "gen", "dat", "acc" }
local numbers = { "s", "p" }
local gender_spec_to_full_gender = {
m = "masculine",
f = "feminine",
n = "neuter",
}
local case_set_with_abl_voc = m_table.listToSet(cases_with_abl_voc)
local function add_equiv(slot_list)
table.insert(slot_list, {"m_equiv", "-"}) -- masculine equivalent of a feminine or neuter noun
table.insert(slot_list, {"f_equiv", "-"}) -- feminine equivalent of a masculine or neuter noun
table.insert(slot_list, {"n_equiv", "-"}) -- neuter equivalent of a masculine or feminine noun
end
-- Construct noun slots.
local noun_slot_list = {}
add_equiv(noun_slot_list)
local noun_slot_set = {}
for _, number in ipairs(numbers) do
for _, case in ipairs(number == "s" and cases_with_abl_voc or basic_cases) do
local slot = case .. "_" .. number
local accel = case .. "|" .. number
table.insert(noun_slot_list, {slot, accel})
noun_slot_set[slot] = true
end
end
-- Construct noun surname slots.
local surname_slot_list = {
}
local surname_slot_set = {}
local surname_endings = {
{"m_s", "m|s"},
{"f_s", "f|s"},
{"p", "p"},
}
for _, case in ipairs(basic_cases) do
for _, ending_and_accel in ipairs(surname_endings) do
local ending, ending_accel = unpack(ending_and_accel)
local slot = case .. "_" .. ending
local accel = case .. "|" .. ending_accel
table.insert(surname_slot_list, {slot, accel})
surname_slot_set[slot] = true
end
end
-- Construct noun langname slots.
local langname_slot_list = {
}
local langname_slot_set = {}
for _, case in ipairs(basic_cases) do
for _, number in ipairs(numbers) do
for _, is_alt in ipairs { false, true } do
local slot = case .. "_" .. number .. (is_alt and "_alt" or "")
-- FIXME: We should add accelerators for the alternative forms, but this requires hacking the accelerator
-- code in [[Module:inflection utilities]] to specify the alternative lemma; e.g. genitive singular
-- ''Deutschen'' needs to have lemma [[Deutsche]] not [[Deutsch]].
local accel = is_alt and "-" or case .. "|" .. number
table.insert(langname_slot_list, {slot, accel})
langname_slot_set[slot] = true
end
end
end
-- Construct adjectival slots.
local adjectival_slot_list = {}
add_equiv(adjectival_slot_list)
local adjectival_slot_set = {}
for _, state in ipairs(states) do
for _, case in ipairs(basic_cases) do
for _, number in ipairs(numbers) do
local slot = state .. "_" .. case .. "_" .. number
local accel = state .. "|" .. case .. "|" .. number
table.insert(adjectival_slot_list, {slot, accel})
adjectival_slot_set[slot] = true
end
end
end
-- Construct expanded slot lists including linked variants.
local noun_slot_list_with_linked = m_table.shallowCopy(noun_slot_list)
table.insert(noun_slot_list_with_linked, {"nom_s_linked", "nom|s"})
table.insert(noun_slot_list_with_linked, {"nom_p_linked", "nom|p"})
local surname_slot_list_with_linked = m_table.shallowCopy(surname_slot_list)
table.insert(surname_slot_list_with_linked, {"nom_m_s_linked", "nom|m|s"})
local langname_slot_list_with_linked = m_table.shallowCopy(langname_slot_list)
table.insert(langname_slot_list_with_linked, {"nom_s_linked", "nom|s"})
local adjectival_slot_list_with_linked = m_table.shallowCopy(adjectival_slot_list)
table.insert(adjectival_slot_list_with_linked, {"str_nom_s_linked", "str|nom|s"})
table.insert(adjectival_slot_list_with_linked, {"str_nom_p_linked", "str|nom|p"})
-- Construct expanded slot lists including linked variants and articles.
local function add_slot_articles(slot_list, cases, numbers)
for _, case in ipairs(cases) do
for _, number in ipairs(numbers) do
for _, def in ipairs(definitenesses) do
local slotaccel = {"art_" .. def .. "_" .. case .. "_" .. number, "-"}
table.insert(slot_list, slotaccel)
end
end
end
end
local noun_slot_list_with_linked_and_articles = m_table.shallowCopy(noun_slot_list_with_linked)
add_slot_articles(noun_slot_list_with_linked_and_articles, cases_with_abl_voc, numbers)
local surname_slot_list_with_linked_and_articles = m_table.shallowCopy(surname_slot_list_with_linked)
add_slot_articles(surname_slot_list_with_linked_and_articles, basic_cases, {"m_s", "f_s", "p"})
local langname_slot_list_with_linked_and_articles = m_table.shallowCopy(langname_slot_list_with_linked)
add_slot_articles(langname_slot_list_with_linked_and_articles, basic_cases, {"s"})
local adjectival_slot_list_with_linked_and_articles = m_table.shallowCopy(adjectival_slot_list_with_linked)
add_slot_articles(adjectival_slot_list_with_linked_and_articles, basic_cases, numbers)
-- Return true if `prop` is a recognized indicator that can be specified on adjectives in [[Module:de-adjective]].
local function is_adjectival_decl_indicator(prop)
return prop == "ss" or prop == "sync_n" or prop == "sync_mn" or prop == "sync_mns"
end
local function skip_slot(number, slot)
return number == "sg" and rfind(slot, "_p$") or
number == "pl" and rfind(slot, "_s$")
end
local function combine_stem_ending(props, stem, ending)
if ending:find("^%^") then
-- Umlaut requested
ending = rsub(ending, "^%^", "")
stem = com.apply_umlaut(stem)
end
if props.ss and stem:find("ß$") and rfind(ending, "^" .. com.V) then
stem = rsub(stem, "ß$", "ss")
end
return stem .. ending
end
-- Add a form (a combination of `stem` and `ending`, where either may be a single string, a list of strings, or a
-- list of objects of the form {form=FORM, footnotes=FOOTNOTES}, where FOOTNOTES can be nil or a list of strings)
-- to the given slot `slot`. `gender` specifies the gender of the resulting form ("m", "f" or "n") or nil. (This is
-- used to ensure that the correct article is attached to the form when there are multiple forms with differing
-- genders. If `gender` is nil, articles of all relevant genders will be included. `gender` should only be nil
-- when the slot is plural or when the gender cannot be determined, e.g. in overrides.) `footnotes` specifies
-- any extra footnotes to add to the resulting form, and should be either nil or a list of strings.
-- `process_combined_stem_ending` is a function to process the resulting form before it is inserted. (This is used
-- currently to add an -n to the dative plural.)
local function add(base, slot, stem, ending, gender, footnotes, process_combined_stem_ending)
if not ending or skip_slot(base.number, slot) then
return
end
local function do_combine_stem_ending(stem, ending)
local retval = combine_stem_ending(base.props, stem, ending)
if process_combined_stem_ending then
retval = process_combined_stem_ending(retval)
end
-- For now, don't do this.
-- If gender specified, add a special character to the beginning of the value to indicate the
-- gender. This gets propagated to the end and used in [[Module:de-headword]].
-- if gender then
-- retval = gender_to_gender_char[gender] .. retval
-- end
return retval
end
footnotes = iut.combine_footnotes(base.footnotes, footnotes)
local ending_obj = iut.combine_form_and_footnotes(ending, footnotes)
-- If we're declining an adjectival noun or adjective-noun combination, and the slot is a noun slot, convert it to
-- the equivalent adjective slots (e.g. gen_s -> str_gen_s/wk_gen_s/mix_gen_s). But don't do that for "m_equiv",
-- "f_equiv", "n_equiv", which are the same in nouns and adjectives.
if base.props.overall_adj and noun_slot_set[slot] and not rfind(slot, "equiv$") then
for _, state in ipairs(states) do
iut.add_forms(base.forms, state .. "_" .. slot, stem or base.lemma, ending_obj, do_combine_stem_ending)
end
else
iut.add_forms(base.forms, slot, stem or base.lemma, ending_obj, do_combine_stem_ending)
end
end
-- Process an ending spec such as "s", "(e)s", "^er", "^lein", "!Pizzen", etc. as might be found in the genitive,
-- plural, an override, the value of dim=/m=/f=/n=, etc. `endings` is a list of such specs, where each entry of the
-- list is of the form {form=FORM, footnotes=FOOTNOTES} where FOOTNOTES is either nil or {FOOTNOTE, FOOTNOTE, ...}. If
-- `literal_endings` is given, the FORM values should be interpreted literally (i.e. as full forms) rather than as
-- ending specs. `default` is what to substitute if an ending spec is "+", and should be either in the same format as
-- `endings` or something that can be converted to that format, e.g. a string. `literal_default`, if given, indicates
-- that the FORM values in `default` should be interpreted literally, similar to `literal_endings`. `desc` is an
-- English description of what kind of spec is being processed, for error messages. `process` is called for each
-- generated form and is a function of two arguments, STEM and ENDING. If the spec is a full form, STEM will be that
-- form (in the form of an object {form=FORM, footnotes=FOOTNOTES}) and ENDING will be an empty string; otherwise, STEM
-- will be nil and ENDING will be the the ending to process in the form {form=FORM, footnotes=FOOTNOTES}. Note that
-- umlauts are not handled in process_spec(); if the spec passed in specifies an umlaut, e.g. "^chen", process()
-- will be called with a FORM beginning with "^", and must handle the umlaut itself. (Umlauts are properly handled
-- inside of add().)
local function process_spec(endings, literal_endings, default, literal_default, desc, process)
for _, ending in ipairs(endings) do
local function sub_form(form)
return {form = form, footnotes = ending.footnotes}
end
if ending.form == "--" then
-- do nothing
elseif ending.form == "+" then
if not default then
-- Could happen if e.g. gen is given as -- and then a gen_s override with + is specified, or with n= for neuter,
-- where no default is available.
error("Form '+' found for " .. desc .. " but no default is available")
end
process_spec(iut.convert_to_general_list_form(default, ending.footnotes), literal_default, nil, nil, desc, process)
else
local full_eform
if literal_endings or rfind(ending.form, "^" .. com.CAP) then
full_eform = true
elseif rfind(ending.form, "^!") then
full_eform = true
ending = sub_form(rsub(ending.form, "^!", ""))
end
if full_eform then
process(ending, "")
else
local expanded_endings
local umlaut = rmatch(ending.form, "^(%^?)%(e%)s$" )
if umlaut then
expanded_endings = {"es", "s"}
end
if not umlaut then
umlaut = rmatch(ending.form, "^(%^?)%(s%)$")
if umlaut then
expanded_endings = {"s", ""}
end
end
if not umlaut then
umlaut = rmatch(ending.form, "^(%^?)%(es%)$")
if umlaut then
expanded_endings = {"es", ""}
end
end
if expanded_endings then
local new_endings = {}
for _, expanded_ending in ipairs(expanded_endings) do
table.insert(new_endings, sub_form(umlaut .. expanded_ending))
end
process(nil, new_endings)
else
if ending.form == "-" then
ending = sub_form("")
end
process(nil, ending)
end
end
end
end
end
-- Add an ending spec such as "s", "(e)s", "^er", "^lein", "!Pizzen", etc. as might be found in the genitive, plural,
-- an override, the value of dim=/m=/f=/n=, etc., to the slot `slot` (e.g. "gen_s"). `endings` is a list of such specs,
-- where each entry of the list is of the form {form=FORM, footnotes=FOOTNOTES} where FOOTNOTES is either nil or
-- {FOOTNOTE, FOOTNOTE, ...}. For the meaning of `gender`, `footnotes` and `process_combined_stem_ending`, see add().
-- For the meaning of `default` and `literal_default`, see process_spec().
local function add_spec(base, slot, endings, gender, default, literal_default, footnotes, process_combined_stem_ending)
local function do_add(stem, ending)
add(base, slot, stem, ending, gender, footnotes, process_combined_stem_ending)
end
process_spec(endings, nil, default, literal_default, "slot '" .. slot .. "'", do_add)
end
local function process_slot_overrides(base)
for slot, overrides in pairs(base.overrides) do
if skip_slot(base.number, slot) then
error("Override specified for invalid slot '" .. slot .. "' due to '" .. base.number .. "' number restriction")
end
local origforms = base.forms[slot]
base.forms[slot] = nil
-- Gender is not given by the user.
add_spec(base, slot, overrides, nil, origforms, "literal default")
end
end
local function add_archaic_dative_singular(base, gender, def_gen)
for _, ending in ipairs(base.gens) do
local dat_ending
local ending_form = ending.form
if ending_form == "+" then
ending_form = def_gen
end
if ending_form == "es" or ending_form == "(e)s" then
dat_ending = "e"
elseif ending_form == "ses" then
dat_ending = "se"
elseif base.props.dat_with_e then
dat_ending = "e"
end
if dat_ending then
add(base, "dat_s", nil, dat_ending, gender, iut.combine_footnotes(ending.footnotes, {archaic_dative_note}))
end
end
end
local function get_n_ending(base, stem, is_sg)
if rfind(stem, "e$") then
-- typical feminine or weak masculine in -e
return "n"
elseif rfind(stem, "e[lr]$") and not rfind(stem, com.NV .. "[ei]e[lr]$") then
-- [[Kammer]], [[Feier]], [[Leier]], but not [[Spur]], [[Beer]], [[Manier]], [[Schmier]] or [[Vier]]
-- similarly, [[Achsel]], [[Gabel]], [[Tafel]], etc. but not [[Ziel]]
return "n"
elseif base.props.weak_n then
-- ''des Nachbarn'', ''des Herrn'', ''des Satyrn'', etc.
return "n"
elseif rfind(stem, "[^aeAE]in$") then
-- [[Chinesin]], [[Doktorin]], etc.; but not words in -ein or -ain such as [[Pein]]
return "nen"
else
return "en"
end
end
local function get_default_gen(base, gender)
if gender == "f" then
return ""
elseif base.props.weak then
return get_n_ending(base, base.lemma, "is singular")
elseif rfind(base.lemma, "nis$") then
-- neuter like [[Erlebnis]], [[Geheimnis]] or occasional masculine like [[Firnis]], [[Penis]]
return "ses"
elseif rfind(base.lemma, com.NV .. "us$") then
-- [[Euphemismus]], [[Exitus]], [[Exodus]], etc.
return ""
elseif rfind(base.lemma, "[sßxz]$") then
return "es"
else
return "s"
end
end
local function get_default_pl(base, gender)
if rfind(base.lemma, "nis$") then
-- neuter like [[Erlebnis]], [[Geheimnis]] or feminine like [[Kenntnis]], [[Wildnis]],
-- or occasional masculine like [[Firnis]], [[Penis]]
return "se"
elseif gender == "f" or base.props.weak then
return get_n_ending(base, base.lemma)
elseif rfind(base.lemma, "e$") then
track("default-pl-e-not-f-or-weak")
-- FIXME: This should return "s"
return get_n_ending(base, base.lemma)
elseif gender == "n" and rfind(base.lemma, "lein$") then
-- Diminutives in -lein (those in -chen will automatically get a null ending from -en below)
return ""
elseif gender == "n" and rfind(base.lemma, "um$") then
-- [[Museum]] -> [[Museen]], [[Vakuum]] -> [[Vakuen]]; not masculine [[Baum]] (plural [[Bäume]])
-- or [[Reichtum]] (plural [[Reichtümer]])
return "!" .. rsub(base.lemma, "um$", "en")
elseif rfind(base.lemma, "mus$") then
-- Algorithmus -> Algorithmen, Aphorismus -> Aphorismen
return "!" .. rsub(base.lemma, "us$", "en")
elseif rfind(base.lemma, com.NV .. "us$") then
-- [[Abakus]] -> [[Abakusse]], [[Zirkus]] -> [[Zirkusse]], [[Autobus]] -> [[Autobusse]];
-- not [[Applaus]] (plural [[Applause]])
return "se"
elseif rfind(base.lemma, "e[lmnr]$") and not rfind(base.lemma, com.NV .. "[ei]e[lnmr]$") then
-- check for weak ending -el, -em, -en, -er, e.g. [[Adler]], [[Meier]], [[Riedel]]; but exclude [[Heer]],
-- [[Bier]], [[Ziel]], which take -e by default
return ""
else
return "e"
end
end
local function decline_singular(base, gender, def_gen)
add(base, "nom_s", nil, "", gender)
add_spec(base, "gen_s", base.gens, gender, def_gen)
if base.props.weak then
local ending = get_n_ending(base, base.lemma, "is singular")
add(base, "dat_s", nil, ending, gender)
add(base, "acc_s", nil, gender == "m" and ending or "", gender)
else
add(base, "dat_s", nil, "", gender)
add_archaic_dative_singular(base, gender, def_gen)
add(base, "acc_s", nil, "", gender)
end
end
local function decline_plural(base, def_pl)
local function process_nom_pl_for_decl_type(stem_ending)
if base.props.saw_mn and base.number ~= "pl" then
if base.props.weak then
m_table.insertIfNot(base.decl_type, "weak")
elseif stem_ending == base.lemma .. "n" or stem_ending == base.lemma .. "en" then
m_table.insertIfNot(base.decl_type, "mixed")
else
m_table.insertIfNot(base.decl_type, "strong")
end
end
return stem_ending
end
local function process_dat_pl_to_add_n(stem_ending)
if base.props.nodatpln then
return stem_ending
elseif rfind(stem_ending, "e[lr]?$") or rfind(stem_ending, "erl$") then
return stem_ending .. "n"
else
return stem_ending
end
end
add_spec(base, "nom_p", base.pls, nil, def_pl, nil, nil, process_nom_pl_for_decl_type)
add_spec(base, "gen_p", base.pls, nil, def_pl)
add_spec(base, "dat_p", base.pls, nil, def_pl, nil, nil, process_dat_pl_to_add_n)
add_spec(base, "acc_p", base.pls, nil, def_pl)
end
local function decline_noun(base)
if base.number == "pl" then
decline_plural(base, "")
if rfind(base.lemma, "innen$") then
--- Ends in -innen, likely feminine. Chop off, and convert e.g. Chinesinnen -> Chinesen.
local masc = rsub(base.lemma, "innen$", "")
if rfind(masc, "es$") then
masc = masc .. "en"
end
-- No need to specify gender for *_equiv; will be handled correctly in [[Module:de-headword]].
add(base, "m_equiv", masc, "")
else
-- Likely masculine. Try to convert Chinesen -> Chinesinnen, and -er -> -erinnen.
local femstem = rsub(base.lemma, "en$", "")
add(base, "f_equiv", femstem, "innen")
end
else
base.decl_type = {}
for _, genderspec in ipairs(base.genders) do
local gender = genderspec.form
decline_singular(base, gender, get_default_gen(base, gender))
decline_plural(base, get_default_pl(base, gender))
if gender == "m" then
add(base, "f_equiv", rsub(base.lemma, "e$", ""), "in") -- feminine
elseif gender == "f" then
-- Try (sort of) to get the masculine. Remove final -in, and if the result ends in -es, convert to -ese
-- (e.g. Chinesin -> Chinese).
local masc = rsub(base.lemma, "in$", "")
if rfind(masc, "es$") then
masc = masc .. "e"
end
add(base, "m_equiv", masc, "")
end -- do nothing for neuter
end
end
end
local function decline_surname(base)
-- We don't specify gender here. There are always two genders, m and f, which will be handled correctly in
-- [[Module:de-headword]].
add(base, "nom_m_s", nil, "")
add(base, "nom_f_s", nil, "")
local gen_m_s
if rfind(base.lemma, "[sxzß]$") or rfind(base.lemma, "ce$") then
-- [[Marx]], [[Engels]], [[Weiß]], [[Schulz]]
-- also names with silent -s or -x like [[Delacroix]]
gen_m_s = "'"
else
gen_m_s = "s"
end
add_spec(base, "gen_m_s", base.gens, nil, gen_m_s)
add(base, "gen_m_s", nil, "", nil, {"[with an article]"})
add(base, "gen_f_s", nil, "")
add(base, "dat_m_s", nil, "")
add(base, "dat_f_s", nil, "")
add(base, "acc_m_s", nil, "")
add(base, "acc_f_s", nil, "")
local pl_ending
if rfind(base.lemma, "[sxß]$") then
-- [[Marx]], [[Engels]], [[Weiß]]
pl_ending = {"", "ens"}
elseif rfind(base.lemma, "z$") then
-- [[Schulz]], [[Schmitz]]
pl_ending = {"", "es", "ens"}
elseif rfind(base.lemma, "ce$") then
pl_ending = {"", "ns"}
elseif rfind(base.lemma, "e[nlr]?$") then
-- [[Müller]], [[Goethe]], [[Dürer]], [[Schlegel]], [[Münchhausen]]
pl_ending = {"s", ""}
else
-- [[Schmidt]], [[Bergmann]], [[Brentano]]
pl_ending = {"s"}
end
add_spec(base, "nom_p", base.pls, nil, pl_ending)
add_spec(base, "gen_p", base.pls, nil, pl_ending)
add_spec(base, "dat_p", base.pls, nil, pl_ending)
add_spec(base, "acc_p", base.pls, nil, pl_ending)
end
local function decline_toponym(base)
-- We don't specify gender here, which is always neuter.
add(base, "nom_s", nil, "")
local gen_s
local null_footnote
if rfind(base.lemma, "[sxzß]$") then
gen_s = "'"
null_footnote = "[with an article]"
else
gen_s = "s"
null_footnote = "[optionally with an article]"
end
add_spec(base, "gen_s", base.gens, nil, gen_s)
add(base, "gen_s", nil, "", nil, {null_footnote})
add(base, "dat_s", nil, "")
add(base, "acc_s", nil, "")
if base.number == "both" then
-- only with explicitly given plural
add_spec(base, "nom_p", base.pls)
add_spec(base, "gen_p", base.pls)
add_spec(base, "dat_p", base.pls)
add_spec(base, "acc_p", base.pls)
end
end
local function decline_langname(base)
-- We don't specify gender here, which is always neuter.
add(base, "nom_s", nil, "")
add(base, "gen_s", nil, "")
-- If explicit genitive singular given, add it (in addition to the null genitive singular), otherwise default to -s.
add_spec(base, "gen_s", base.gens, nil, "s")
add(base, "dat_s", nil, "")
add(base, "acc_s", nil, "")
add(base, "nom_s_alt", nil, "e")
add(base, "gen_s_alt", nil, "en")
add(base, "dat_s_alt", nil, "en")
add(base, "acc_s_alt", nil, "e")
end
local function decline_adjective(base)
-- Construct an equivalent call to {{de-adecl}} based on the adjective indicators we fetched.
local adj_spec_parts = {}
local function ins(val)
table.insert(adj_spec_parts, val)
end
local function ins_dot()
if #adj_spec_parts > 0 then
ins(".")
end
end
local function insert_footnotes(footnotes)
if footnotes then
for _, footnote in ipairs(footnotes) do
ins(footnote)
end
end
end
if base.adj_stem then
ins("stem")
for _, stem in ipairs(base.adj_stem) do
ins(":")
ins(stem.form)
insert_footnotes(stem.footnotes)
end
end
if base.adj_suppress then
ins_dot()
ins("suppress:")
ins(base.adj_suppress)
end
if base.footnotes then
ins_dot()
insert_footnotes(base.footnotes)
end
for prop, _ in pairs(base.props) do
if is_adjectival_decl_indicator(prop) then
ins_dot()
ins(prop)
end
end
local adj_alternant_multiword_spec = require("Module:de-adjective").do_generate_forms(
{base.lemma .. "<" .. table.concat(adj_spec_parts) .. ">"}
)
local function copy(from_slot, to_slot)
base.forms[to_slot] = adj_alternant_multiword_spec.forms[from_slot]
end
local function copy_gender_forms(gender)
local number = gender == "p" and "p" or "s"
for _, state in ipairs(states) do
for _, case in ipairs(basic_cases) do
copy(state .. "_" .. case .. "_" .. gender, state .. "_" .. case .. "_" .. number)
end
end
end
if base.number == "pl" then
copy_gender_forms("p")
-- No need to specify gender for *_equiv; will be handled correctly in [[Module:de-headword]].
add(base, "m_equiv", base.lemma, "e")
add(base, "f_equiv", base.lemma, "e")
add(base, "n_equiv", base.lemma, "e")
else
-- Normally there should be only one gender.
for _, genderspec in ipairs(base.genders) do
local gender = genderspec.form
copy_gender_forms(gender)
-- No need to specify gender for *_equiv; will be handled correctly in [[Module:de-headword]].
add(base, "m_equiv", base.lemma, "er") -- masculine
add(base, "f_equiv", base.lemma, "e") -- feminine
add(base, "n_equiv", base.lemma, "es") -- neuter
end
if base.number ~= "sg" then
copy_gender_forms("p")
end
end
end
-- Return the slots that may contain a lemma, in the order they should be checked. `props` is a property table,
-- coming either from `base` or `alternant_multiword_spec`.
local function get_lemma_slots(props)
if props.surname then
return {"nom_m_s"}
elseif props.overall_adj then
return {"str_nom_s", "str_nom_p"}
else
return {"nom_s", "nom_p"}
end
end
-- Return the lemmas for this term. The return value is a list of {form = FORM, footnotes = FOOTNOTES}.
-- If `linked_variant` is given, return the linked variants (with embedded links if specified that way by the user),
-- otherwies return variants with any embedded links removed. If `remove_footnotes` is given, remove any
-- footnotes attached to the lemmas.
function export.get_lemmas(alternant_multiword_spec, linked_variant, remove_footnotes)
local slots_to_fetch = get_lemma_slots(alternant_multiword_spec.props)
local linked_suf = linked_variant and "_linked" or ""
for _, slot in ipairs(slots_to_fetch) do
if alternant_multiword_spec.forms[slot .. linked_suf] then
local lemmas = alternant_multiword_spec.forms[slot .. linked_suf]
if remove_footnotes then
local lemmas_no_footnotes = {}
for _, lemma in ipairs(lemmas) do
table.insert(lemmas_no_footnotes, {form = lemma.form})
end
return lemmas_no_footnotes
else
return lemmas
end
end
end
return {}
end
local function handle_derived_slots_and_overrides(base)
process_slot_overrides(base)
-- Compute linked versions of potential lemma slots, for use in {{de-noun}}.
-- We substitute the original lemma (before removing links) for forms that
-- are the same as the lemma, if the original lemma has links.
for _, slot in ipairs(get_lemma_slots(base.props)) do
iut.insert_forms(base.forms, slot .. "_linked", iut.map_forms(base.forms[slot], function(form)
if form == base.orig_lemma_no_links and rfind(base.orig_lemma, "%[%[") then
return base.orig_lemma
else
return form
end
end))
end
end
-- Like put.split_alternating_runs_and_strip_spaces(), but ensure that backslash-escaped commas and periods are not
-- treated as separators.
local function split_alternating_runs_with_escapes(segments, splitchar)
for i, segment in ipairs(segments) do
segments[i] = rsub(segment, "\\,", SUB_ESCAPED_COMMA)
segments[i] = rsub(segment, "\\%.", SUB_ESCAPED_PERIOD)
end
local separated_groups = put.split_alternating_runs_and_strip_spaces(segments, splitchar)
for _, separated_group in ipairs(separated_groups) do
for i, segment in ipairs(separated_group) do
separated_group[i] = rsub(segment, SUB_ESCAPED_COMMA, ",")
separated_group[i] = rsub(segment, SUB_ESCAPED_PERIOD, ".")
end
end
return separated_groups
end
--[=[
Parse an indicator spec (text consisting of angle brackets and zero or more dot-separated indicators within them).
Return value is an object of the form
{
overrides = {
SLOT = {OVERRIDE, OVERRIDE, ...},
...
}, -- where OVERRIDE is {form = FORM, footnotes = FOOTNOTES}; same as `forms` table; FORM can be a full form (only if
beginning with a capital letter or !), otherwise an ending; "-" for an ending means a null ending, while
"--" suppresses the slot entirely, i.e. it is defective
gens = {GEN_SG_SPEC, GEN_SG_SPEC, ...}, same form as OVERRIDE above
pls = {PL_SPEC, PL_SPEC, ...}, same form as OVERRIDE above
forms = {}, -- forms for a single spec alternant; see `forms` below
props = {
PROP = true,
PROP = true,
...
}, -- misc Boolean properties: "weak" (weak noun); "adj" (adjectival noun; set using "+");
"ss" (lemma in -ß changes to -ss- before endings beginning with a vowel; pre-1996 spelling);
"nodatpln" (suppress automatic addition of 'n' in the dative plural after '-e', '-er', '-el')
number = "NUMBER", -- "sg", "pl", "both"; may be missing
adj = true, -- may be missing
-- The following additional fields are added by other functions:
orig_lemma = "ORIGINAL-LEMMA", -- as given by the user or taken from pagename
orig_lemma_no_links = "ORIGINAL-LEMMA-NO-LINKS", -- links removed
lemma = "LEMMA", -- `orig_lemma_no_links`,
forms = {
SLOT = {
{
form = "FORM",
footnotes = {"FOOTNOTE", "FOOTNOTE", ...} -- may be missing
},
...
},
...
},
}
]=]
local function parse_indicator_spec(angle_bracket_spec, lemma, pagename, proper_noun)
if lemma == "" then
lemma = pagename
end
local base = {forms = {}, overrides = {}, props = {prop = proper_noun}}
base.orig_lemma = lemma
base.orig_lemma_no_links = m_links.remove_links(lemma)
base.lemma = base.orig_lemma_no_links
local inside = rmatch(angle_bracket_spec, "^<(.*)>$")
assert(inside)
local function parse_err(msg)
error(msg .. ": <" .. inside .. ">")
end
--[=[
Parse a single override spec and return three values: the slot the override applies to, the original indicator
spec used to specify the slot, and the override specs. The input is a list where the footnotes have been separated
out. For example, given the spec 'dat:-[referring to a card suit, as a term of endearment, and generally in speech]:en[in most cases in writing]',
the input will be a list {"dat:-", "[referring to a card suit, as a term of endearment, and generally in speech]", ":en",
"[in most cases in writing]", ""}
]=]
local function parse_override(segments)
local part = segments[1]
local offset = 4
local case = usub(part, 1, 3)
if not case_set_with_abl_voc[case] then
parse_err("Internal error: unrecognized case in override: '" .. table.concat(segments) .. "'")
end
local indicator = case
local rest = usub(part, offset)
local slot
if rfind(rest, "^pl") then
rest = rsub(rest, "^pl", "")
slot = case .. "_p"
indicator = indicator .. "pl"
else
slot = case .. "_s"
end
if rfind(rest, "^:") then
rest = rsub(rest, "^:", "")
else
parse_err("Slot indicator '" .. indicator .. "' must be followed by a colon: '" .. table.concat(segments) .. "'")
end
if not noun_slot_set[slot] then
parse_err("Unrecognized slot indicator '" .. indicator .. "': '" .. table.concat(segments) .. "'")
end
segments[1] = rest
return slot, indicator, com.fetch_specs(segments, ":", "override", nil, parse_err)
end
if inside ~= "" then
local segments = put.parse_balanced_segment_run(inside, "[", "]")
local dot_separated_groups = split_alternating_runs_with_escapes(segments, "%.")
for i, dot_separated_group in ipairs(dot_separated_groups) do
local part = dot_separated_group[1]
if i == 1 then
local comma_separated_groups = split_alternating_runs_with_escapes(dot_separated_group, ",")
base.genders = com.fetch_specs(comma_separated_groups[1], ":", "gender", nil, parse_err)
local saw_sg = false
local saw_pl = false
local saw_gendered_pl = false
local saw_non_gendered_pl = false
local saw_adj = false
local special_variant = nil
for _, genderspec in ipairs(base.genders) do
local g = genderspec.form
if g == "m" or g == "n" then
-- Set this on `base.props` as it's used in various other places.
base.props.saw_mn = true
saw_sg = true
elseif g == "f" then
saw_sg = true
elseif g == "p" then
saw_pl = true
saw_non_gendered_pl = true
elseif rfind(g, "^[mfn]p$") then
saw_pl = true
saw_gendered_pl = true
elseif g == "+" or g == "p+" or g == "+p" then
if #base.genders > 1 then
parse_err("Can't specify multiple genders with adjectival declension")
end
saw_adj = true
if g ~= "+" then
saw_pl = true
end
elseif g == "surname" or g == "toponym" or g == "langname" then
if #base.genders > 1 then
parse_err("Can't specify multiple genders with " .. g .. " declension")
end
special_variant = g
else
parse_err("Unrecognized gender spec '" .. g .. "'")
end
end
if saw_sg and saw_pl then
parse_err("Can't specify both singular and plural gender specs")
end
if saw_gendered_pl and saw_non_gendered_pl then
parse_err("Can't specify both 'p' and gendered plural specs")
end
local gen_index = (base.props.saw_mn or special_variant) and 2 or 1
local pl_index =
(saw_adj or saw_pl) and 1 or
(base.props.saw_mn or special_variant == "surname" or special_variant == "toponym") and 3 or
2
if #comma_separated_groups > pl_index then
if saw_adj then
parse_err("Can't specify plurals or genitives with adjectival declension")
elseif saw_pl then
parse_err("Can't specify plurals or genitives with plural-only nouns")
elseif base.props.saw_mn then
parse_err("Can specify at most three comma-separated specs when the gender is masculine or "
.. "neuter (gender, genitive, plural)")
elseif special_variant == "surname" or special_variant == "toponym" then
parse_err("Can specify at most three comma-separated specs with '" .. special_variant .. "' "
.. "nouns ('" .. special_variant .. "', genitive, plural)")
elseif special_variant == "langname" then
parse_err("Can specify at most two comma-separated specs with 'langname' "
.. " ('langname', genitive)")
else
parse_err("Can specify at most two comma-separated specs when the gender is feminine "
.. "(gender, plural)")
end
end
if #comma_separated_groups >= gen_index and gen_index > 1 then
base.gens = com.fetch_specs(comma_separated_groups[gen_index], ":", "genitive", "allow blank", parse_err)
end
if #comma_separated_groups >= pl_index and pl_index > gen_index then
base.pls = com.fetch_specs(comma_separated_groups[pl_index], ":", "plural", "allow blank", parse_err)
end
if special_variant then
if #base.genders > 1 then
parse_err("Internal error: More than one gender spec for '" .. special_variant .. "'")
else
base.props[special_variant] = true
if special_variant == "surname" then
-- FIXME, does it make sense to put the footnotes on the feminine gender (they appear after the gender)?
base.genders = {{form = "m"}, {form = "f", footnotes = base.genders[1].footnotes}}
else
base.genders = {{form = "n", footnotes = base.genders[1].footnotes}}
end
end
elseif saw_adj then
if #base.genders > 1 then
parse_err("Internal error: More than one gender spec for adjectival declension")
else
base.props.adj = true
if saw_pl then
base.number = "pl"
base.genders = {{form = "p", footnotes = base.genders[1].footnotes}}
else
-- Stash the footnotes into `adj_footnotes`; we will put them onto the autodetected gender
-- in determine_adjectival_genders(), which will set base.genders appropriately.
base.adj_footnotes = base.genders[1].footnotes
base.genders = {}
end
end
elseif saw_pl then
-- Convert 'mp' to 'm-p', 'fp' to 'f-p', etc. as that's what [[Module:gender and number]] expects.
for _, genderspec in ipairs(base.genders) do
local gender = rmatch(genderspec.form, "^([mfn])p$")
if gender then
genderspec.form = gender .. "-p"
end
end
base.number = "pl"
end
elseif base.props.adj and part:find("^stem:") then
dot_separated_group[1] = rsub(part, "^stem:", "")
base.adj_stem = com.fetch_specs(dot_separated_group, ":", "adjectival stem", nil, parse_err)
elseif base.props.adj and part:find("^suppress:") then
if #dot_separated_group > 1 then
parse_err("Can't specify footnotes with suppress: '" .. table.concat(dot_separated_group) .. "'")
end
-- No need to parse or validate more. Will happen in [[Module:de-adjective]].
base.adj_suppress = rsub(part, "suppress:", "")
elseif part == "" then
if #dot_separated_group == 1 then
parse_err("Blank indicator")
end
base.footnotes = com.fetch_footnotes(dot_separated_group, parse_err)
elseif part:find(":") then
-- override
-- FIXME: Handle adjectival overrides
local case_prefix = usub(part, 1, 3)
if case_set_with_abl_voc[case_prefix] then
local slot, slot_indicator, override = parse_override(dot_separated_group)
if base.overrides[slot] then
parse_err("Can't specify override twice for slot '" .. slot_indicator .. "'")
else
base.overrides[slot] = override
end
else
parse_err("Unrecognized indicator '" .. part .. "'")
end
elseif #dot_separated_group > 1 then
local errmsg
if base.props.adj then
errmsg = "Footnotes only allowed with slot overrides, 'stem:' or by themselves"
else
errmsg = "Footnotes only allowed with genitive, plural, slot overrides or by themselves"
end
parse_err(errmsg .. ": '" .. table.concat(dot_separated_group) .. "'")
elseif part == "sg" or part == "both" then
if base.number then
if base.number ~= part then
parse_err("Can't specify '" .. part .. "' along with '" .. base.number .. "'")
else
parse_err("Can't specify '" .. part .. "' twice")
end
end
base.number = part
elseif not base.props.adj and (part == "weak" or part == "weak_n" or part == "ss" or part == "nodatpln" or part == "article" or part == "dat_with_e") then
if base.props[part] then
parse_err("Can't specify '" .. part .. "' twice")
end
base.props[part] = true
if part == "weak_n" then
-- weak_n implies weak
base.props.weak = true
end
elseif base.props.adj and (part == "article" or is_adjectival_decl_indicator(part)) then
if base.props[part] then
parse_err("Can't specify '" .. part .. "' twice")
end
base.props[part] = true
else
parse_err("Unrecognized indicator '" .. part .. "'")
end
end
end
return base
end
-- For an adjectival lemma, synthesize the predicative (lemma) form. It doesn't have to be perfect in that the
-- predicative form itself isn't used, so we don't have to try to convert -abler -> -abel or anything like that.
local function synthesize_adj_lemma(base)
local stem, ending = rmatch(base.lemma, "^(.*)(e[rs]?)$")
if not stem then
error("Unrecognized adjectival lemma, should end in '-er', '-e' or '-es': '" .. base.lemma .. "'")
end
base.lemma = stem
-- Will be ignored if number == "pl"
if ending == "er" then
base.autodetected_gender = "m"
elseif ending == "e" then
base.autodetected_gender = "f"
else
base.autodetected_gender = "n"
end
end
local function detect_indicator_spec(alternant_multiword_spec, base)
if base.props.article then
alternant_multiword_spec.props.article = true
end
for _, prop in ipairs {"surname", "toponym", "langname"} do
if alternant_multiword_spec.props[prop] == nil then
alternant_multiword_spec.props[prop] = base.props[prop]
elseif alternant_multiword_spec.props[prop] ~= base.props[prop] then
-- We do this because we have a special table with its own slots for each of these special variants.
-- FIXME: Consider supporting adjectives with these variants. That requires that we copy the adjectival
-- declensions to the appropriate per-variant slots.
error("If some alternants set '" .. prop .. "', all must do so")
end
end
if base.props.adj then
alternant_multiword_spec.props.overall_adj = true
synthesize_adj_lemma(base)
else
-- Set default values.
base.number =
base.number or
base.props.surname and "both" or
base.pls and "both" or
(alternant_multiword_spec.props.is_proper or base.props.toponym or base.props.langname) and "sg" or
"both"
if not base.props.surname then
if base.number == "pl" then
if base.gens then
error("Internal error: With plural-only noun, no genitive singular specs should be allowed")
end
if base.pls then
error("Internal error: With plural-only noun, no plural specs should be allowed")
end
end
if base.pls and base.number == "sg" then
error("Can't specify explicit plural specs along with explicit '.sg'")
end
end
base.gens = base.gens or {{form = "+"}}
base.pls = base.pls or {{form = "+"}}
end
end
local function detect_all_indicator_specs(alternant_multiword_spec)
iut.map_word_specs(alternant_multiword_spec, function(base)
detect_indicator_spec(alternant_multiword_spec, base)
end)
-- Now propagate some properties downwards.
iut.map_word_specs(alternant_multiword_spec, function(base)
base.props.overall_adj = alternant_multiword_spec.props.overall_adj
end)
end
local propagate_multiword_properties
local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only)
local seen_property
for _, multiword_spec in ipairs(alternant_spec.alternants) do
propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only)
if seen_property == nil then
seen_property = multiword_spec[property]
elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then
seen_property = mixed_value
end
end
alternant_spec[property] = seen_property
end
propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only)
local seen_property = nil
local last_seen_nounal_pos = 0
local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs
for i = 1, #word_specs do
local is_nounal
if word_specs[i].alternants then
propagate_alternant_properties(word_specs[i], property, mixed_value)
is_nounal = not not word_specs[i][property]
elseif nouns_only then
is_nounal = not word_specs[i].props.adj
else
is_nounal = not not word_specs[i][property]
end
if is_nounal then
if not word_specs[i][property] then
error("Internal error: noun-type word spec without " .. property .. " set")
end
for j = last_seen_nounal_pos + 1, i - 1 do
word_specs[j][property] = word_specs[j][property] or word_specs[i][property]
end
last_seen_nounal_pos = i
if seen_property == nil then
seen_property = word_specs[i][property]
elseif seen_property ~= word_specs[i][property] then
seen_property = mixed_value
end
end
end
if last_seen_nounal_pos > 0 then
for i = last_seen_nounal_pos + 1, #word_specs do
word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property]
end
end
multiword_spec[property] = seen_property
end
local function propagate_properties_downward(alternant_multiword_spec, property, default_propval)
local propval1 = alternant_multiword_spec[property] or default_propval
alternant_multiword_spec[property] = propval1
for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
local propval2 = alternant_or_word_spec[property] or propval1
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
local propval3 = multiword_spec[property] or propval2
for _, word_spec in ipairs(multiword_spec.word_specs) do
local propval4 = word_spec[property] or propval3
if propval4 == "mixed" then
error("Attempt to assign mixed " .. property .. " to word")
end
word_spec[property] = propval4
end
end
else
if propval2 == "mixed" then
error("Attempt to assign mixed " .. property .. " to word")
end
alternant_or_word_spec[property] = propval2
end
end
end
--[=[
Propagate `property` ("genders" or "number") from nouns to adjacent adjectives. We proceed as follows:
1. We assume the properties in question are already set on all nouns. This should happen in parse_indicator_spec().
2. We first propagate properties upwards and sideways. We recurse downwards from the top. When we encounter a
multiword spec, we proceed left to right looking for a noun. When we find a noun, we fetch its property
(recursing if the noun is an alternant), and propagate it to any adjectives to its left, up to the next noun
to the left. When we have processed the last noun, we also propagate its property value to any adjectives to the
right. Finally, we set the property value for the multiword spec itself by combining all the non-nil properties of
the individual elements. If all non-nil properties have the same value, the result is that value, otherwise it is
`mixed_value` (which is "mixed" gender, but "both" for number).
3. When we encounter an alternant spec in this process, we recursively process each alternant (which is a multiword
spec) using the previous step, and combine any non-nil properties we encounter the same way as for multiword specs.
4. The effect of steps 2 and 3 is to set the property of each alternant and multiword spec based on its children or its
neighbors.
]=]
local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value)
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only")
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false)
propagate_properties_downward(alternant_multiword_spec, property, default_propval)
end
-- Set the gender of adjectives and adjectival nouns to the gender autodetected during synthesize_adj_lemma(),
-- unless the form is plural. We don't just set the gender directly in synthesize_adj_lemma() because we don't know
-- until later (i.e. when propagate_properties() is called) whether an adjectival form in -e is feminine or plural.
-- We set the footnotes (i.e. qualifiers) of the gender to the footnotes (if any) specified directly after '+'.
local function determine_adjectival_genders(alternant_multiword_spec)
iut.map_word_specs(alternant_multiword_spec, function(base)
if base.props.adj and #base.genders == 0 then
base.genders = {{form = base.number == "pl" and "p" or base.autodetected_gender, footnotes = base.adj_footnotes}}
end
end)
end
-- Find the first noun in a multiword expression and set alternant_multiword_spec.first_noun
-- to the index of that noun. Also find the first adjective and set alternant_multiword_spec.first_adj
-- similarly. If there is a first noun, we use its properties to determine the overall expression's
-- properties; otherwise we use the first adjective's properties, otherwise the first word's properties.
-- If the "word" located this way is not an alternant spec, we just use its properties directly, otherwise
-- we use the properties of the first noun (or failing that the first adjective, or failing that the
-- first word) in each alternative alternant in the alternant spec. For this reason, we need to set the
-- the .first_noun of and .first_adj of each multiword expression embedded in the first noun alternant spec,
-- and the .first_adj of each multiword expression in each adjective alternant spec leading up to the
-- first noun alternant spec.
local function determine_noun_status(alternant_multiword_spec)
for i, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
if alternant_or_word_spec.alternants then
local alternant_type
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
for j, word_spec in ipairs(multiword_spec.word_specs) do
if not word_spec.props.adj then
multiword_spec.first_noun = j
alternant_type = "နာမ်"
break
elseif not multiword_spec.first_adj then
multiword_spec.first_adj = j
if not alternant_type then
alternant_type = "adj"
end
end
end
end
if alternant_type == "noun" then
alternant_multiword_spec.first_noun = i
return
elseif alternant_type == "adj" and not alternant_multiword_spec.first_adj then
alternant_multiword_spec.first_adj = i
end
else
if not alternant_or_word_spec.props.adj then
alternant_multiword_spec.first_noun = i
return
elseif not alternant_multiword_spec.first_adj then
alternant_multiword_spec.first_adj = i
end
end
end
end
local function decline_noun_or_adjective(base)
if base.props.surname then
decline_surname(base)
elseif base.props.toponym then
decline_toponym(base)
elseif base.props.langname then
decline_langname(base)
elseif base.props.adj then
decline_adjective(base)
else
decline_noun(base)
end
handle_derived_slots_and_overrides(base)
end
-- Set the overall articles. We can't do this using the normal inflection code as it will produce e.g.
-- '[[der]] [[und]] [[der]]' for conjoined nouns.
local function compute_non_surname_articles(alternant_multiword_spec)
if alternant_multiword_spec.number ~= "pl" then
iut.map_word_specs(alternant_multiword_spec, function(base)
for _, genderspec in ipairs(base.genders) do
for _, case in ipairs(cases_with_abl_voc) do
for _, def in ipairs(definitenesses) do
iut.insert_form(alternant_multiword_spec.forms, "art_" .. def .. "_" .. case .. "_s",
{form = com.articles[genderspec.form][def .. "_" .. case]})
end
end
end
end)
end
for _, case in ipairs(basic_cases) do
for _, def in ipairs(definitenesses) do
iut.insert_form(alternant_multiword_spec.forms, "art_" .. def .. "_" .. case .. "_p",
{form = com.articles.p[def .. "_" .. case]})
end
end
end
-- Set the overall surname articles. We can't do this using the normal inflection code as it will produce e.g.
-- '[[der]] [[und]] [[der]]' for conjoined nouns.
local function compute_surname_articles(alternant_multiword_spec)
for _, gender in ipairs {"m", "f"} do
for _, case in ipairs(basic_cases) do
for _, def in ipairs(definitenesses) do
iut.insert_form(alternant_multiword_spec.forms, "art_" .. def .. "_" .. case .. "_" .. gender .. "_s",
{form = "([[" .. com.articles[gender][def .. "_" .. case] .. "]])"})
end
end
end
for _, case in ipairs(basic_cases) do
iut.insert_form(alternant_multiword_spec.forms, "art_def_" .. case .. "_p",
{form = "([[" .. com.articles.p["def_" .. case] .. "]])"})
end
end
local function compute_articles(alternant_multiword_spec)
if alternant_multiword_spec.props.surname then
compute_surname_articles(alternant_multiword_spec)
else
compute_non_surname_articles(alternant_multiword_spec)
end
end
-- Call a function `fun` over the first noun in the `alternant_multiword_spec`, or over the first noun in each
-- alternant if there is more than one alternant. If there are no nouns, use the first adjective (in the case of an
-- adjectival noun).
local function map_first_noun(alternant_multiword_spec, fun)
local key_entry = alternant_multiword_spec.first_noun or alternant_multiword_spec.first_adj or 1
if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then
local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry]
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
key_entry = multiword_spec.first_noun or multiword_spec.first_adj or 1
if #multiword_spec.word_specs >= key_entry then
fun(multiword_spec.word_specs[key_entry])
end
end
else
fun(alternant_or_word_spec)
end
end
end
-- Compute the categories to add the noun to, as well as the annotation to display in the
-- declension title bar. We combine the code to do these functions as both categories and
-- title bar contain similar information.
local function compute_categories_and_annotation(alternant_multiword_spec)
alternant_multiword_spec.categories = {}
alternant_multiword_spec.decl_type = {}
local function insert(cattype)
cattype = rsub(cattype, "~", alternant_multiword_spec.pos)
m_table.insertIfNot(alternant_multiword_spec.categories, "ဂျာမာန်" .. cattype .. "")
end
if not alternant_multiword_spec.props.is_proper and alternant_multiword_spec.number == "sg" then
insert("~မတော်ဟွံဂွံဂမၠိုၚ်")
elseif alternant_multiword_spec.number == "pl" then
-- insert("pluralia tantum")
end
local annotation
local annparts = {}
local genderdescs = {}
local decldescs = {}
if alternant_multiword_spec.number == "sg" then
table.insert(annparts, "sg-only")
elseif alternant_multiword_spec.number == "pl" and alternant_multiword_spec.genders[1].spec ~= "p" then
-- If the gender is just 'p', we use "pl-only" below as a substitute for the gender and hook any qualifiers
-- onto it. Note that when 'p' is the gender, there can be only one gender.
table.insert(annparts, "pl-only")
end
for i, genderspec in ipairs(alternant_multiword_spec.genders) do
local genderdesc_parts = {}
local gender = genderspec.spec
if gender == "p" then
table.insert(genderdesc_parts, "pl-only")
else
gender = rsub(gender, "%-p$", "")
table.insert(genderdesc_parts, gender_spec_to_full_gender[gender])
end
if genderspec.qualifiers then
table.insert(genderdesc_parts, " ''(")
table.insert(genderdesc_parts, table.concat(genderspec.qualifiers, ", "))
table.insert(genderdesc_parts, ")''")
end
table.insert(genderdescs, table.concat(genderdesc_parts))
end
local function insert_decl_type(decl_type)
m_table.insertIfNot(decldescs, decl_type)
m_table.insertIfNot(alternant_multiword_spec.decl_type, decl_type)
end
local function do_word_spec(base)
if base.props.surname then
m_table.insertIfNot(decldescs, "surname")
elseif base.props.toponym then
m_table.insertIfNot(decldescs, "toponym")
elseif base.props.langname then
m_table.insertIfNot(decldescs, "langname")
elseif base.decl_type and #base.decl_type > 0 then
-- strong/weak/mixed declension type; should only be present on masculine or neuter nouns with a plural
for _, decl_type in ipairs(base.decl_type) do
if decl_type == "weak" then
insert("weak ~")
elseif decl_type == "mixed" then
-- insert("~မပံၚ်ဖနှဴလဝ်ဂမၠိုၚ်")
end
insert_decl_type(decl_type)
end
elseif base.props.saw_mn then
-- For singular-only masculine or neuter nouns, we can still classify as strong or weak.
-- We don't try to classify plural-only nouns. Even for nouns in -n or -en, we have no idea if they are
-- strong (-en is part of the stem), mixed or weak.
if base.props.weak then
insert("weak ~")
insert_decl_type("weak")
else
insert_decl_type("strong")
end
end
end
-- Use the surname/toponym/langname/weak/strong properties of the noun(s).
map_first_noun(alternant_multiword_spec, do_word_spec)
if #genderdescs > 0 then
table.insert(annparts, table.concat(genderdescs, " // "))
end
if #decldescs > 0 then
table.insert(annparts, table.concat(decldescs, " // "))
end
if not alternant_multiword_spec.first_noun and alternant_multiword_spec.first_adj then
insert("adjectival ~")
table.insert(annparts, "adjectival")
end
if alternant_multiword_spec.props.langname then
-- insert("specially-declined language names")
end
alternant_multiword_spec.annotation = table.concat(annparts, ", ")
end
local function compute_headword_genders(alternant_multiword_spec)
alternant_multiword_spec.genders = {}
-- Compute the genders based on the nouns. We don't want to use the adjectives in adjective-noun combinations
-- because that will cause issues in plural-only expressions like [[Kanarische Inseln]], where ''Inseln'' may be
-- 'f-p' but ''Kanarische'' will be just 'p', and we'd end up with both genders.
map_first_noun(alternant_multiword_spec, function(base)
for _, genderspec in ipairs(base.genders) do
-- Create the new spec to insert.
local spec = {spec = genderspec.form, qualifiers = genderspec.footnotes}
-- See if the gender of the spec is already present; if so, combine qualifiers.
local saw_existing = false
for _, existing_spec in ipairs(alternant_multiword_spec.genders) do
if existing_spec.spec == spec.spec then
existing_spec.qualifiers = iut.combine_footnotes(existing_spec.qualifiers, spec.qualifiers)
saw_existing = true
break
end
end
-- If not, add gender.
if not saw_existing then
table.insert(alternant_multiword_spec.genders, spec)
end
end
end)
-- Now convert the footnotes in the gender specs to qualifiers. This involves removing brackets and expanding any
-- footnote abbreviations.
for _, genderspec in ipairs(alternant_multiword_spec.genders) do
if genderspec.qualifiers then
local processed_qualifiers = {}
for _, qualifier in ipairs(genderspec.qualifiers) do
m_table.insertIfNot(processed_qualifiers,
iut.expand_footnote_or_references(qualifier, "return raw", "no parse refs"))
end
genderspec.qualifiers = processed_qualifiers
end
end
end
-- Process the specs in `arg_specs` given for dim=, m=, f=, n= or sg= and store the results in `slot` in
-- `alternant_multiword_spec.forms`. `arg_specs` is a list of specs, each of which is a comma-separated or
-- colon-separated string of specs, where each spec may be a suffix like "in", or a suffix with umlaut like
-- "^chen", or a full form beginning with a capital letter or exclamation point. Suffixes are added onto the lemma
-- with -e removed if present. `default` is the default value to use if "+" is given as a spec, and `literal_default`,
-- if given, indicates that `default` is always a literal (full) form; otherwise, it `default` begins with a
-- lowercase letter, it is taken as a suffix. (This is used in cases like the feminine of [[ordenlicher Professor]],
-- which is generated as "ordentiche Professorin"; we don't want this interpreted as a suffix.) `desc` is an English
-- description of the form whose specs are being processed, for display in error messages.
local function process_dim_m_f_n(alternant_multiword_spec, arg_specs, default, literal_default, slot, desc)
-- We don't want footnotes attached to a lemma to end up in the output. These footnotes typically get there if the
-- syntax `.[footnote]` is used, which attaches a footnote to every form.
local lemmas = export.get_lemmas(alternant_multiword_spec, nil, "remove footnotes")
lemmas = iut.map_forms(lemmas, function(form)
return rsub(form, "e$", "")
end)
for _, spec in ipairs(arg_specs) do
local function parse_err(msg)
error(msg .. ": " .. spec)
end
local segments = put.parse_balanced_segment_run(spec, "[", "]")
-- Allow comma (preferred) or colon as separator.
local ending_specs = com.fetch_specs(segments, "[,:]", desc, nil, parse_err)
-- FIXME, this should propagate the 'ss' property upwards
local props = {}
local function do_combine_stem_ending(stem, ending)
return combine_stem_ending(props, stem, ending)
end
local function process(stem, ending)
iut.add_forms(alternant_multiword_spec.forms, slot, stem or lemmas, ending, do_combine_stem_ending)
end
process_spec(ending_specs, nil, default, literal_default, desc, process)
end
end
local function show_forms(alternant_multiword_spec)
local lemmas = export.get_lemmas(alternant_multiword_spec)
local props = {
lang = lang,
lemmas = lemmas,
slot_list = alternant_multiword_spec.props.surname and surname_slot_list_with_linked_and_articles
or alternant_multiword_spec.props.langname and langname_slot_list_with_linked_and_articles
or alternant_multiword_spec.props.overall_adj and adjectival_slot_list_with_linked_and_articles
or noun_slot_list_with_linked_and_articles,
}
iut.show_forms(alternant_multiword_spec.forms, props)
end
local noun_template_both = [=[
<div class="NavFrame">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! style="{BG2};width:15%" |
! colspan="3" style="{BG2};width:46%" | ကိုန်ဨကဝုစ်
! colspan="2" style="{BG2};width:39%" | ကိုန်ဗဟုဝစ်
|-
! style="{BG3}" |
! style="{BG3};width:7%" | [[indefinite article|indef.]]
! style="{BG3};width:7%" | [[definite article|def.]]
! style="{BG3};width:32%" | နာမ်
! style="{BG3};width:7%" | [[definite article|def.]]
! style="{BG3};width:32%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_ind_nom_s}
| style="{BG4}" | {art_def_nom_s}
| {nom_s}
| style="{BG4}" | {art_def_nom_p}
| {nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_ind_gen_s}
| style="{BG4}" | {art_def_gen_s}
| {gen_s}
| style="{BG4}" | {art_def_gen_p}
| {gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_ind_dat_s}
| style="{BG4}" | {art_def_dat_s}
| {dat_s}
| style="{BG4}" | {art_def_dat_p}
| {dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_ind_acc_s}
| style="{BG4}" | {art_def_acc_s}
| {acc_s}
| style="{BG4}" | {art_def_acc_p}
| {acc_p}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_both_no_indef = [=[
<div class="NavFrame" style="width:93%">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! style="{BG2};width:15%" |
! colspan="2" style="{BG2};width:39%" | ကိုန်ဨကဝုစ်
! colspan="2" style="{BG2};width:39%" | ကိုန်ဗဟုဝစ်
|-
! style="{BG3}" |
! style="{BG3};width:7%" | [[definite article|def.]]
! style="{BG3};width:32%" | နာမ်
! style="{BG3};width:7%" | [[definite article|def.]]
! style="{BG3};width:32%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_def_nom_s}
| {nom_s}
| style="{BG4}" | {art_def_nom_p}
| {nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_def_gen_s}
| {gen_s}
| style="{BG4}" | {art_def_gen_p}
| {gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_def_dat_s}
| {dat_s}
| style="{BG4}" | {art_def_dat_p}
| {dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_def_acc_s}
| {acc_s}
| style="{BG4}" | {art_def_acc_p}
| {acc_p}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_abl_voc = [=[
|-
! style="{BG3}" | ပရေၚ်မလၚ်
| style="{BG4}" | {art_ind_abl_s}
| style="{BG4}" | {art_def_abl_s}
| {abl_s}
|-
! style="{BG3}" | ပရေၚ်ဂယိုၚ်လမျီု
| style="{BG4}" | {art_ind_voc_s}
| style="{BG4}" | {art_def_voc_s}
| {voc_s}]=]
local noun_template_sg = [=[
<div class="NavFrame" style="width:61%">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! style="{BG2};width:24.6%" |
! colspan="3" style="{BG2};" | ကိုန်ဨကဝုစ်
|-
! style="{BG3}" |
! style="{BG3};width:11.5%" | [[indefinite article|indef.]]
! style="{BG3};width:11.5%" | [[definite article|def.]]
! style="{BG3};width:52.5%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_ind_nom_s}
| style="{BG4}" | {art_def_nom_s}
| {nom_s}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_ind_gen_s}
| style="{BG4}" | {art_def_gen_s}
| {gen_s}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_ind_dat_s}
| style="{BG4}" | {art_def_dat_s}
| {dat_s}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_ind_acc_s}
| style="{BG4}" | {art_def_acc_s}
| {acc_s}{abl_voc_clause}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_sg_no_indef = [=[
<div class="NavFrame" style="width:50%">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! style="{BG2};width:24.6%" |
! colspan="2" style="{BG2};" | ကိုန်ဨကဝုစ်
|-
! style="{BG3}" |
! style="{BG3};width:11.5%" | [[definite article|def.]]
! style="{BG3};width:52.5%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_def_nom_s}
| {nom_s}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_def_gen_s}
| {gen_s}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_def_dat_s}
| {dat_s}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_def_acc_s}
| {acc_s}{abl_voc_clause}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_pl = [=[
<div class="NavFrame" style="width:61%">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! style="{BG2};width:24.6%" |
! colspan="2" style="{BG2};" | ကိုန်ဗဟုဝစ်
|-
! style="{BG3}" |
! style="{BG3};width:11.5%" | [[definite article|def.]]
! style="{BG3};width:52.5%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_def_nom_p}
| {nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_def_gen_p}
| {gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_def_dat_p}
| {dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_def_acc_p}
| {acc_p}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_surname = [=[
<div class="NavFrame">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! rowspan="2" style="{BG2};width:11%" |
! colspan="6" style="{BG2}" | ကိုန်ဨကဝုစ်
! colspan="2" rowspan="2" style="{BG2}" | ကိုန်ဗဟုဝစ်
|-
! colspan="3" style="{BG2}" | ပုလ္လိၚ်
! colspan="3" style="{BG2}" | ဣတ္တိလိၚ်
|-
! style="{BG3}" |
! style="{BG3};width:4%" | [[indefinite article|indef.]]
! style="{BG3};width:4%" | [[definite article|def.]]
! style="{BG3};width:23%" | နာမ်
! style="{BG3};width:4%" | [[indefinite article|indef.]]
! style="{BG3};width:4%" | [[definite article|def.]]
! style="{BG3};width:23%" | နာမ်
! style="{BG3};width:4%" | [[definite article|def.]]
! style="{BG3};width:23%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_ind_nom_m_s}
| style="{BG4}" | {art_def_nom_m_s}
| {nom_m_s}
| style="{BG4}" | {art_ind_nom_f_s}
| style="{BG4}" | {art_def_nom_f_s}
| {nom_f_s}
| style="{BG4}" | {art_def_nom_p}
| {nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_ind_gen_m_s}
| style="{BG4}" | {art_def_gen_m_s}
| {gen_m_s}
| style="{BG4}" | {art_ind_gen_f_s}
| style="{BG4}" | {art_def_gen_f_s}
| {gen_f_s}
| style="{BG4}" | {art_def_gen_p}
| {gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_ind_dat_m_s}
| style="{BG4}" | {art_def_dat_m_s}
| {dat_m_s}
| style="{BG4}" | {art_ind_dat_f_s}
| style="{BG4}" | {art_def_dat_f_s}
| {dat_f_s}
| style="{BG4}" | {art_def_dat_p}
| {dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_ind_acc_m_s}
| style="{BG4}" | {art_def_acc_m_s}
| {acc_m_s}
| style="{BG4}" | {art_ind_acc_f_s}
| style="{BG4}" | {art_def_acc_f_s}
| {acc_f_s}
| style="{BG4}" | {art_def_acc_p}
| {acc_p}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_langname = [=[
<div class="NavFrame" style="width:100%">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-langname"
! style="{BG2};width:15%" |
! colspan="5" style="{BG2};width:85%" | singular ''([[Wiktionary:German entry guidelines#Declension of language names|explanation of the use and meaning of the forms]])''
|-
! style="{BG3}" |
! style="{BG3};width:14%" | (ဗွဲတၟေၚ်လ္ပာ်မ္ၚးပစ္စဲ)
! style="{BG3};width:32%" | နာမ်
! style="{BG3};width:7%" | [[definite article|def.]]
! style="{BG3};width:32%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | ({art_def_nom_s})
| {nom_s}
| style="{BG4}" | {art_def_nom_s}
| {nom_s_alt}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | ({art_def_gen_s})
| {gen_s}
| style="{BG4}" | {art_def_gen_s}
| {gen_s_alt}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | ({art_def_dat_s})
| {dat_s}
| style="{BG4}" | {art_def_dat_s}
| {dat_s_alt}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | ({art_def_acc_s})
| {acc_s}
| style="{BG4}" | {art_def_acc_s}
| {acc_s_alt}
|{\cl}{notes_clause}</div></div>]=]
local adjectival_template_both = [=[
<div class="NavFrame">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table"
! style="{BG3};width:15%" |
! colspan="2" style="{BG3}" | ကိုန်ဨကဝုစ်
! colspan="2" style="{BG3}" | ကိုန်ဗဟုဝစ်
|-
! style="{BG2}" | {လိၚ်}
! colspan="4" style="{BG2}" | သ္ကာတ်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| colspan="2" | {str_nom_s}
| colspan="2" | {str_nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| colspan="2" | {str_gen_s}
| colspan="2" | {str_gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| colspan="2" | {str_dat_s}
| colspan="2" | {str_dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| colspan="2" | {str_acc_s}
| colspan="2" | {str_acc_p}
|-
! style="{BG2}" |
! colspan="4" style="{BG2}" | ဇြဟတ်ဍိုန်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4};width:5em" | {art_def_nom_s}
| {wk_nom_s}
| style="{BG4};width:5em" | {art_def_nom_p}
| {wk_nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4};width:5em" | {art_def_gen_s}
| {wk_gen_s}
| style="{BG4};width:5em" | {art_def_gen_p}
| {wk_gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4};width:5em" | {art_def_dat_s}
| {wk_dat_s}
| style="{BG4};width:5em" | {art_def_dat_p}
| {wk_dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4};width:5em" | {art_def_acc_s}
| {wk_acc_s}
| style="{BG4};width:5em" | {art_def_acc_p}
| {wk_acc_p}
|-
! style="{BG2}" |
! colspan="4" style="{BG2}" | ဖက်ဖနှဴလဝ်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4};width:5em" | {art_ind_nom_s}
| {mix_nom_s}
| style="{BG4};width:5em" | {art_ind_nom_p}
| {mix_nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4};width:5em" | {art_ind_gen_s}
| {mix_gen_s}
| style="{BG4};width:5em" | {art_ind_gen_p}
| {mix_gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4};width:5em" | {art_ind_dat_s}
| {mix_dat_s}
| style="{BG4};width:5em" | {art_ind_dat_p}
| {mix_dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4};width:5em" | {art_ind_acc_s}
| {mix_acc_s}
| style="{BG4};width:5em" | {art_ind_acc_p}
| {mix_acc_p}
|{\cl}{notes_clause}</div></div>]=]
local adjectival_template_sg = [=[
<div class="NavFrame" style="width:500px">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table"
! style="{BG3};width:15%" |
! colspan="2" style="{BG3}" | ကိုန်ဨကဝုစ်
|-
! style="{BG2}" | {လိၚ်}
! colspan="2" style="{BG2}" | သ္ကာတ်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| colspan="2" | {str_nom_s}
|-
! style="{BG3}" | ဗဳဇဂကူ
| colspan="2" | {str_gen_s}
|-
! style="{BG3}" | ပြကမ္မကာရက
| colspan="2" | {str_dat_s}
|-
! style="{BG3}" | ကမ္မကာရက
| colspan="2" | {str_acc_s}
|-
! style="{BG2}" |
! colspan="2" style="{BG2}" | ဇြဟတ်ဍိုန်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4};width:5em" | {art_def_nom_s}
| {wk_nom_s}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4};width:5em" | {art_def_gen_s}
| {wk_gen_s}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4};width:5em" | {art_def_dat_s}
| {wk_dat_s}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4};width:5em" | {art_def_acc_s}
| {wk_acc_s}
|-
! style="{BG2}" |
! colspan="2" style="{BG2}" | ဖက်ဖနှဴလဝ်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4};width:5em" | {art_ind_nom_s}
| {mix_nom_s}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4};width:5em" | {art_ind_gen_s}
| {mix_gen_s}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4};width:5em" | {art_ind_dat_s}
| {mix_dat_s}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4};width:5em" | {art_ind_acc_s}
| {mix_acc_s}
|{\cl}{notes_clause}</div></div>]=]
local notes_template = [===[
<div style="width:100%;text-align:left;background:var(--wikt-palette-lightblue,#d9ebff);color:inherit">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]===]
local function make_table(alternant_multiword_spec)
local forms = alternant_multiword_spec.forms
-- dark mode support
forms.BG1 = "background:var(--wikt-palette-white,#ffffff);color:inherit"
forms.BG2 = "background:var(--wikt-palette-grey,#9e9e9e);color:inherit"
forms.BG3 = "background:var(--wikt-palette-lightgrey,#cccccc);color:inherit"
forms.BG4 = "background:var(--wikt-palette-lightergrey,#eeeeee);color:inherit"
if alternant_multiword_spec.title then
forms.title = alternant_multiword_spec.title
else
forms.title = 'မလဟုတ်စှ်ေဆေၚ်စပ်ကဵု <i lang="de" class="Latn">' .. forms.lemma .. '</i>'
end
local annotation = alternant_multiword_spec.annotation
if annotation == "" then
forms.annotation = ""
else
forms.annotation = " [<span style=\"font-size: smaller;\">" .. annotation .. "</span>]"
end
local table_spec
if alternant_multiword_spec.props.surname then
table_spec = noun_template_surname
elseif alternant_multiword_spec.props.langname then
table_spec = noun_template_langname
elseif alternant_multiword_spec.props.overall_adj then
table_spec =
alternant_multiword_spec.number == "sg" and adjectival_template_sg or
alternant_multiword_spec.number == "pl" and rsub(rsub(adjectival_template_sg, "ကိုန်ဨကဝုစ်", "ကိုန်ဗဟုဝစ်"), "_s}", "_p}") or
adjectival_template_both
if alternant_multiword_spec.number == "pl" then
forms.gender = ""
else
local genderdesc_parts = {}
for _, gender in ipairs(alternant_multiword_spec.genders) do
table.insert(genderdesc_parts, gender_spec_to_full_gender[gender.spec])
end
forms.gender = "''" .. table.concat(genderdesc_parts, " or ") .. " gender ''"
end
else
local no_indef = alternant_multiword_spec.props.toponym or alternant_multiword_spec.props.article
table_spec =
alternant_multiword_spec.number == "sg" and (no_indef and noun_template_sg_no_indef or noun_template_sg) or
alternant_multiword_spec.number == "pl" and noun_template_pl or
(no_indef and noun_template_both_no_indef or noun_template_both)
if forms.abl_s ~= "—" or forms.voc_s ~= "—" then
forms.abl_voc_clause = m_string_utilities.format(noun_template_abl_voc, forms)
else
forms.abl_voc_clause = ""
end
end
forms.notes_clause = forms.footnote ~= "" and
m_string_utilities.format(notes_template, forms) or ""
return m_string_utilities.format(table_spec, forms)
end
-- Externally callable function to parse and decline a noun given user-specified arguments. Return value is
-- ALTERNANT_MULTIWORD_SPEC, an object where the declined forms are in `ALTERNANT_MULTIWORD_SPEC.forms` for each slot.
-- If there are no values for a slot, the slot key will be missing. The value for a given slot is a list of objects
-- {form=FORM, footnotes=FOOTNOTES}.
function export.do_generate_forms(parent_args, pos, from_headword, is_proper, def)
local params = {
[1] = {required = true, default = "Haus<n,es,^er>"},
pagename = {},
}
if from_headword or pretend_from_headword then
params["head"] = {list = true}
params["f"] = {list = true}
params["m"] = {list = true}
params["n"] = {list = true}
params["dim"] = {list = true}
params["sg"] = {list = true}
params["id"] = {}
params["sort"] = {}
params["splithyph"] = {type = "boolean"}
params["nolinkhead"] = {type = "boolean"}
end
local args = require("Module:parameters").process(parent_args, params)
local arg1 = args[1]
local need_surrounding_angle_brackets = true
-- Check whether we need to add <...> around the argument. If the
-- argument has no < in it, we definitely do. Otherwise, we need to
-- parse the balanced [...] and <...> and add <...> only if there isn't
-- a top-level <...>. We check for [...] because there might be angle
-- brackets inside of them (HTML tags in qualifiers or <<name:...>> and
-- such in references).
if arg1:find("<") then
local segments = put.parse_multi_delimiter_balanced_segment_run(arg1, {{"<", ">"}, {"[", "]"}})
for i = 2, #segments, 2 do
if segments[i]:find("^<.*>$") then
need_surrounding_angle_brackets = false
break
end
end
end
if need_surrounding_angle_brackets then
arg1 = "<" .. arg1 .. ">"
end
local pagename = args.pagename or mw.title.getCurrentTitle().text
local function do_parse_indicator_spec(angle_bracket_spec, lemma)
return parse_indicator_spec(angle_bracket_spec, lemma, pagename)
end
local parse_props = {
parse_indicator_spec = do_parse_indicator_spec,
allow_default_indicator = true,
allow_blank_lemma = true,
}
local alternant_multiword_spec = iut.parse_inflected_text(arg1, parse_props)
alternant_multiword_spec.args = args
alternant_multiword_spec.props = {}
alternant_multiword_spec.props.is_proper = is_proper
detect_all_indicator_specs(alternant_multiword_spec)
local default_number =
(alternant_multiword_spec.props.is_proper or alternant_multiword_spec.props.toponym) and "sg" or "both"
propagate_properties(alternant_multiword_spec, "number", default_number, "both")
-- FIXME, maybe should check that noun genders match adjective genders
determine_adjectival_genders(alternant_multiword_spec)
determine_noun_status(alternant_multiword_spec)
local inflect_props = {
skip_slot = function(slot)
return skip_slot(alternant_multiword_spec.number, slot)
end,
slot_list = alternant_multiword_spec.props.surname and surname_slot_list_with_linked
or alternant_multiword_spec.props.langname and langname_slot_list_with_linked
or alternant_multiword_spec.props.overall_adj and adjectival_slot_list_with_linked
or noun_slot_list_with_linked,
inflect_word_spec = decline_noun_or_adjective,
}
iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
compute_articles(alternant_multiword_spec)
compute_headword_genders(alternant_multiword_spec)
if not pos then
-- Compute part of speech for categories. Fetch the first lemma, or failing that (which would only happen
-- if the user overrides the nom_sg and nom_p to be missing) the pagename. If it begins with a hyphen,
-- it's a suffix, else a noun (proper nouns get categorized like nouns).
local lemmas = export.get_lemmas(alternant_multiword_spec)
local first_lemma = #lemmas > 0 and lemmas[1].form or pagename
pos = rfind(first_lemma, "^%-") and "အဆက်လက္ကရဴ" or "နာမ်"
end
alternant_multiword_spec.pos = pos
compute_categories_and_annotation(alternant_multiword_spec)
if from_headword or pretend_from_headword then
process_dim_m_f_n(alternant_multiword_spec, args.dim, "^chen", nil, "dim", "diminutive")
process_dim_m_f_n(alternant_multiword_spec, args.f, alternant_multiword_spec.forms.f_equiv,
"literal default", "f", "feminine equivalent")
process_dim_m_f_n(alternant_multiword_spec, args.m, alternant_multiword_spec.forms.m_equiv,
"literal default", "m", "masculine equivalent")
process_dim_m_f_n(alternant_multiword_spec, args.n, alternant_multiword_spec.forms.n_equiv,
"literal default", "n", "neuter equivalent")
process_dim_m_f_n(alternant_multiword_spec, args.sg, nil, nil, "sg", "singular")
end
return alternant_multiword_spec
end
-- Entry point for {{de-ndecl}}. Template-callable function to parse and decline a noun given
-- user-specified arguments and generate a displayable table of the declined forms.
function export.show(frame)
local parent_args = frame:getParent().args
local alternant_multiword_spec = export.do_generate_forms(parent_args)
show_forms(alternant_multiword_spec)
-- FIXME!
alternant_multiword_spec.forms.decl_type = "foo"
return make_table(alternant_multiword_spec) .. require("Module:utilities").format_categories(
alternant_multiword_spec.categories, lang, nil, nil, force_cat)
end
-- Concatenate all forms of all slots into a single string of the form "SLOT=FORM,FORM,...|SLOT=FORM,FORM,...|...".
-- Embedded pipe symbols (as might occur in embedded links) are converted to <!>. If INCLUDE_PROPS is given, also
-- include additional properties (currently, g= for headword genders). This is for use by bots.
local function concat_forms(alternant_multiword_spec, include_props)
local ins_text = {}
for _, slotaccel in ipairs(
alternant_multiword_spec.props.surname and surname_slot_list_with_linked or
alternant_multiword_spec.props.langname and langname_slot_list_with_linked or
alternant_multiword_spec.props.overall_adj and adjectival_slot_list_with_linked or
noun_slot_list_with_linked
) do
local slot, accel = unpack(slotaccel)
local formtext = iut.concat_forms_in_slot(alternant_multiword_spec.forms[slot])
if formtext then
table.insert(ins_text, slot .. "=" .. formtext)
end
end
if include_props then
table.insert(ins_text, "g=" .. table.concat(alternant_multiword_spec.genders, ","))
end
return table.concat(ins_text, "|")
end
-- Template-callable function to parse and decline a noun given user-specified arguments and return
-- the forms as a string of the same form as documented in concat_forms() above.
function export.generate_forms(frame)
local include_props = frame.args["include_props"]
local parent_args = frame:getParent().args
local alternant_multiword_spec = export.do_generate_forms(parent_args)
return concat_forms(alternant_multiword_spec, include_props)
end
return export
6fiz3stlf08polvfqxl52qgqq0foazd
395142
395140
2026-05-19T14:31:20Z
咽頭べさ
33
395142
Scribunto
text/plain
local export = {}
--[=[
Authorship: <benwing2>
]=]
--[=[
TERMINOLOGY:
-- "slot" = A particular combination of case/number.
Example slot names for nouns are "voc_s" (vocative singular) and
"gen_p" (genitive plural). Each slot is filled with zero or more forms.
-- "form" = The declined German form representing the value of a given slot.
-- "lemma" = The dictionary form of a given German term. Generally the nominative
masculine singular, but may occasionally be another form if the nominative
masculine singular is missing.
]=]
--[=[
FIXME:
1. Qualifiers in genders should appear as footnotes on the articles.
2. Support notation like <g:f> on feminine/diminutive/masculine, e.g. used for [[Gespons]] (neuter with the meaning
"wife", masculine with the meaning "husband").
3. Fix CSS gender-specific class in table.
4. Support adjectival nouns and adjective-noun combinations. (DONE)
5. Allow period and comma in forms e.g. for [[Eigent.-Whg.]], [[Eigt.-Whg.]] (using a backslash). (DONE)
6. Allow embedded links in genitive/plural/feminine/diminutive/masculine specs, e.g. 'f=![[weiblich]]er Geschäftspartner'.
7. Add 'prop' indicator to indicate proper nouns and suppress the indefinite article.
8. Add 'surname' indicator to indicate surnames, decline appropriately and include both masc and fem variants in the table. (DONE)
9. Add 'langname' indicator to indicate langnames and decline appropriately with its own table with two alternatives. (DONE)
]=]
local lang = require("Module:languages").getByCode("de")
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local m_links = require("Module:links")
local m_string_utilities = require("Module:string utilities")
local iut = require("Module:inflection utilities")
local put = require("Module:parse utilities")
local com = require("Module:de-common")
local pretend_from_headword = false -- may be set during debugging
local force_cat = false -- may be set during debugging
local u = m_str_utils.char
local rfind = m_str_utils.find
local rmatch = m_str_utils.match
local rsubn = m_str_utils.gsub
local unpack = unpack or table.unpack -- Lua 5.2 compatibility
local usub = m_str_utils.sub
local SUB_ESCAPED_PERIOD = u(0xFFF0)
local SUB_ESCAPED_COMMA = u(0xFFF1)
local archaic_dative_note = "[now rare, [[Wiktionary:German entry guidelines#Dative_singular_-e_in_noun_declension|see notes]]]"
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local function track(page)
require("Module:debug").track("de-noun/" .. page)
return true
end
local states = { "str", "wk", "mix" }
local definitenesses = { "ind", "def" }
local cases_with_abl_voc = { "nom", "gen", "dat", "acc", "abl", "voc" }
local basic_cases = { "nom", "gen", "dat", "acc" }
local numbers = { "s", "p" }
local gender_spec_to_full_gender = {
m = "masculine",
f = "feminine",
n = "neuter",
}
local case_set_with_abl_voc = m_table.listToSet(cases_with_abl_voc)
local function add_equiv(slot_list)
table.insert(slot_list, {"m_equiv", "-"}) -- masculine equivalent of a feminine or neuter noun
table.insert(slot_list, {"f_equiv", "-"}) -- feminine equivalent of a masculine or neuter noun
table.insert(slot_list, {"n_equiv", "-"}) -- neuter equivalent of a masculine or feminine noun
end
-- Construct noun slots.
local noun_slot_list = {}
add_equiv(noun_slot_list)
local noun_slot_set = {}
for _, number in ipairs(numbers) do
for _, case in ipairs(number == "s" and cases_with_abl_voc or basic_cases) do
local slot = case .. "_" .. number
local accel = case .. "|" .. number
table.insert(noun_slot_list, {slot, accel})
noun_slot_set[slot] = true
end
end
-- Construct noun surname slots.
local surname_slot_list = {
}
local surname_slot_set = {}
local surname_endings = {
{"m_s", "m|s"},
{"f_s", "f|s"},
{"p", "p"},
}
for _, case in ipairs(basic_cases) do
for _, ending_and_accel in ipairs(surname_endings) do
local ending, ending_accel = unpack(ending_and_accel)
local slot = case .. "_" .. ending
local accel = case .. "|" .. ending_accel
table.insert(surname_slot_list, {slot, accel})
surname_slot_set[slot] = true
end
end
-- Construct noun langname slots.
local langname_slot_list = {
}
local langname_slot_set = {}
for _, case in ipairs(basic_cases) do
for _, number in ipairs(numbers) do
for _, is_alt in ipairs { false, true } do
local slot = case .. "_" .. number .. (is_alt and "_alt" or "")
-- FIXME: We should add accelerators for the alternative forms, but this requires hacking the accelerator
-- code in [[Module:inflection utilities]] to specify the alternative lemma; e.g. genitive singular
-- ''Deutschen'' needs to have lemma [[Deutsche]] not [[Deutsch]].
local accel = is_alt and "-" or case .. "|" .. number
table.insert(langname_slot_list, {slot, accel})
langname_slot_set[slot] = true
end
end
end
-- Construct adjectival slots.
local adjectival_slot_list = {}
add_equiv(adjectival_slot_list)
local adjectival_slot_set = {}
for _, state in ipairs(states) do
for _, case in ipairs(basic_cases) do
for _, number in ipairs(numbers) do
local slot = state .. "_" .. case .. "_" .. number
local accel = state .. "|" .. case .. "|" .. number
table.insert(adjectival_slot_list, {slot, accel})
adjectival_slot_set[slot] = true
end
end
end
-- Construct expanded slot lists including linked variants.
local noun_slot_list_with_linked = m_table.shallowCopy(noun_slot_list)
table.insert(noun_slot_list_with_linked, {"nom_s_linked", "nom|s"})
table.insert(noun_slot_list_with_linked, {"nom_p_linked", "nom|p"})
local surname_slot_list_with_linked = m_table.shallowCopy(surname_slot_list)
table.insert(surname_slot_list_with_linked, {"nom_m_s_linked", "nom|m|s"})
local langname_slot_list_with_linked = m_table.shallowCopy(langname_slot_list)
table.insert(langname_slot_list_with_linked, {"nom_s_linked", "nom|s"})
local adjectival_slot_list_with_linked = m_table.shallowCopy(adjectival_slot_list)
table.insert(adjectival_slot_list_with_linked, {"str_nom_s_linked", "str|nom|s"})
table.insert(adjectival_slot_list_with_linked, {"str_nom_p_linked", "str|nom|p"})
-- Construct expanded slot lists including linked variants and articles.
local function add_slot_articles(slot_list, cases, numbers)
for _, case in ipairs(cases) do
for _, number in ipairs(numbers) do
for _, def in ipairs(definitenesses) do
local slotaccel = {"art_" .. def .. "_" .. case .. "_" .. number, "-"}
table.insert(slot_list, slotaccel)
end
end
end
end
local noun_slot_list_with_linked_and_articles = m_table.shallowCopy(noun_slot_list_with_linked)
add_slot_articles(noun_slot_list_with_linked_and_articles, cases_with_abl_voc, numbers)
local surname_slot_list_with_linked_and_articles = m_table.shallowCopy(surname_slot_list_with_linked)
add_slot_articles(surname_slot_list_with_linked_and_articles, basic_cases, {"m_s", "f_s", "p"})
local langname_slot_list_with_linked_and_articles = m_table.shallowCopy(langname_slot_list_with_linked)
add_slot_articles(langname_slot_list_with_linked_and_articles, basic_cases, {"s"})
local adjectival_slot_list_with_linked_and_articles = m_table.shallowCopy(adjectival_slot_list_with_linked)
add_slot_articles(adjectival_slot_list_with_linked_and_articles, basic_cases, numbers)
-- Return true if `prop` is a recognized indicator that can be specified on adjectives in [[Module:de-adjective]].
local function is_adjectival_decl_indicator(prop)
return prop == "ss" or prop == "sync_n" or prop == "sync_mn" or prop == "sync_mns"
end
local function skip_slot(number, slot)
return number == "sg" and rfind(slot, "_p$") or
number == "pl" and rfind(slot, "_s$")
end
local function combine_stem_ending(props, stem, ending)
if ending:find("^%^") then
-- Umlaut requested
ending = rsub(ending, "^%^", "")
stem = com.apply_umlaut(stem)
end
if props.ss and stem:find("ß$") and rfind(ending, "^" .. com.V) then
stem = rsub(stem, "ß$", "ss")
end
return stem .. ending
end
-- Add a form (a combination of `stem` and `ending`, where either may be a single string, a list of strings, or a
-- list of objects of the form {form=FORM, footnotes=FOOTNOTES}, where FOOTNOTES can be nil or a list of strings)
-- to the given slot `slot`. `gender` specifies the gender of the resulting form ("m", "f" or "n") or nil. (This is
-- used to ensure that the correct article is attached to the form when there are multiple forms with differing
-- genders. If `gender` is nil, articles of all relevant genders will be included. `gender` should only be nil
-- when the slot is plural or when the gender cannot be determined, e.g. in overrides.) `footnotes` specifies
-- any extra footnotes to add to the resulting form, and should be either nil or a list of strings.
-- `process_combined_stem_ending` is a function to process the resulting form before it is inserted. (This is used
-- currently to add an -n to the dative plural.)
local function add(base, slot, stem, ending, gender, footnotes, process_combined_stem_ending)
if not ending or skip_slot(base.number, slot) then
return
end
local function do_combine_stem_ending(stem, ending)
local retval = combine_stem_ending(base.props, stem, ending)
if process_combined_stem_ending then
retval = process_combined_stem_ending(retval)
end
-- For now, don't do this.
-- If gender specified, add a special character to the beginning of the value to indicate the
-- gender. This gets propagated to the end and used in [[Module:de-headword]].
-- if gender then
-- retval = gender_to_gender_char[gender] .. retval
-- end
return retval
end
footnotes = iut.combine_footnotes(base.footnotes, footnotes)
local ending_obj = iut.combine_form_and_footnotes(ending, footnotes)
-- If we're declining an adjectival noun or adjective-noun combination, and the slot is a noun slot, convert it to
-- the equivalent adjective slots (e.g. gen_s -> str_gen_s/wk_gen_s/mix_gen_s). But don't do that for "m_equiv",
-- "f_equiv", "n_equiv", which are the same in nouns and adjectives.
if base.props.overall_adj and noun_slot_set[slot] and not rfind(slot, "equiv$") then
for _, state in ipairs(states) do
iut.add_forms(base.forms, state .. "_" .. slot, stem or base.lemma, ending_obj, do_combine_stem_ending)
end
else
iut.add_forms(base.forms, slot, stem or base.lemma, ending_obj, do_combine_stem_ending)
end
end
-- Process an ending spec such as "s", "(e)s", "^er", "^lein", "!Pizzen", etc. as might be found in the genitive,
-- plural, an override, the value of dim=/m=/f=/n=, etc. `endings` is a list of such specs, where each entry of the
-- list is of the form {form=FORM, footnotes=FOOTNOTES} where FOOTNOTES is either nil or {FOOTNOTE, FOOTNOTE, ...}. If
-- `literal_endings` is given, the FORM values should be interpreted literally (i.e. as full forms) rather than as
-- ending specs. `default` is what to substitute if an ending spec is "+", and should be either in the same format as
-- `endings` or something that can be converted to that format, e.g. a string. `literal_default`, if given, indicates
-- that the FORM values in `default` should be interpreted literally, similar to `literal_endings`. `desc` is an
-- English description of what kind of spec is being processed, for error messages. `process` is called for each
-- generated form and is a function of two arguments, STEM and ENDING. If the spec is a full form, STEM will be that
-- form (in the form of an object {form=FORM, footnotes=FOOTNOTES}) and ENDING will be an empty string; otherwise, STEM
-- will be nil and ENDING will be the the ending to process in the form {form=FORM, footnotes=FOOTNOTES}. Note that
-- umlauts are not handled in process_spec(); if the spec passed in specifies an umlaut, e.g. "^chen", process()
-- will be called with a FORM beginning with "^", and must handle the umlaut itself. (Umlauts are properly handled
-- inside of add().)
local function process_spec(endings, literal_endings, default, literal_default, desc, process)
for _, ending in ipairs(endings) do
local function sub_form(form)
return {form = form, footnotes = ending.footnotes}
end
if ending.form == "--" then
-- do nothing
elseif ending.form == "+" then
if not default then
-- Could happen if e.g. gen is given as -- and then a gen_s override with + is specified, or with n= for neuter,
-- where no default is available.
error("Form '+' found for " .. desc .. " but no default is available")
end
process_spec(iut.convert_to_general_list_form(default, ending.footnotes), literal_default, nil, nil, desc, process)
else
local full_eform
if literal_endings or rfind(ending.form, "^" .. com.CAP) then
full_eform = true
elseif rfind(ending.form, "^!") then
full_eform = true
ending = sub_form(rsub(ending.form, "^!", ""))
end
if full_eform then
process(ending, "")
else
local expanded_endings
local umlaut = rmatch(ending.form, "^(%^?)%(e%)s$" )
if umlaut then
expanded_endings = {"es", "s"}
end
if not umlaut then
umlaut = rmatch(ending.form, "^(%^?)%(s%)$")
if umlaut then
expanded_endings = {"s", ""}
end
end
if not umlaut then
umlaut = rmatch(ending.form, "^(%^?)%(es%)$")
if umlaut then
expanded_endings = {"es", ""}
end
end
if expanded_endings then
local new_endings = {}
for _, expanded_ending in ipairs(expanded_endings) do
table.insert(new_endings, sub_form(umlaut .. expanded_ending))
end
process(nil, new_endings)
else
if ending.form == "-" then
ending = sub_form("")
end
process(nil, ending)
end
end
end
end
end
-- Add an ending spec such as "s", "(e)s", "^er", "^lein", "!Pizzen", etc. as might be found in the genitive, plural,
-- an override, the value of dim=/m=/f=/n=, etc., to the slot `slot` (e.g. "gen_s"). `endings` is a list of such specs,
-- where each entry of the list is of the form {form=FORM, footnotes=FOOTNOTES} where FOOTNOTES is either nil or
-- {FOOTNOTE, FOOTNOTE, ...}. For the meaning of `gender`, `footnotes` and `process_combined_stem_ending`, see add().
-- For the meaning of `default` and `literal_default`, see process_spec().
local function add_spec(base, slot, endings, gender, default, literal_default, footnotes, process_combined_stem_ending)
local function do_add(stem, ending)
add(base, slot, stem, ending, gender, footnotes, process_combined_stem_ending)
end
process_spec(endings, nil, default, literal_default, "slot '" .. slot .. "'", do_add)
end
local function process_slot_overrides(base)
for slot, overrides in pairs(base.overrides) do
if skip_slot(base.number, slot) then
error("Override specified for invalid slot '" .. slot .. "' due to '" .. base.number .. "' number restriction")
end
local origforms = base.forms[slot]
base.forms[slot] = nil
-- Gender is not given by the user.
add_spec(base, slot, overrides, nil, origforms, "literal default")
end
end
local function add_archaic_dative_singular(base, gender, def_gen)
for _, ending in ipairs(base.gens) do
local dat_ending
local ending_form = ending.form
if ending_form == "+" then
ending_form = def_gen
end
if ending_form == "es" or ending_form == "(e)s" then
dat_ending = "e"
elseif ending_form == "ses" then
dat_ending = "se"
elseif base.props.dat_with_e then
dat_ending = "e"
end
if dat_ending then
add(base, "dat_s", nil, dat_ending, gender, iut.combine_footnotes(ending.footnotes, {archaic_dative_note}))
end
end
end
local function get_n_ending(base, stem, is_sg)
if rfind(stem, "e$") then
-- typical feminine or weak masculine in -e
return "n"
elseif rfind(stem, "e[lr]$") and not rfind(stem, com.NV .. "[ei]e[lr]$") then
-- [[Kammer]], [[Feier]], [[Leier]], but not [[Spur]], [[Beer]], [[Manier]], [[Schmier]] or [[Vier]]
-- similarly, [[Achsel]], [[Gabel]], [[Tafel]], etc. but not [[Ziel]]
return "n"
elseif base.props.weak_n then
-- ''des Nachbarn'', ''des Herrn'', ''des Satyrn'', etc.
return "n"
elseif rfind(stem, "[^aeAE]in$") then
-- [[Chinesin]], [[Doktorin]], etc.; but not words in -ein or -ain such as [[Pein]]
return "nen"
else
return "en"
end
end
local function get_default_gen(base, gender)
if gender == "f" then
return ""
elseif base.props.weak then
return get_n_ending(base, base.lemma, "is singular")
elseif rfind(base.lemma, "nis$") then
-- neuter like [[Erlebnis]], [[Geheimnis]] or occasional masculine like [[Firnis]], [[Penis]]
return "ses"
elseif rfind(base.lemma, com.NV .. "us$") then
-- [[Euphemismus]], [[Exitus]], [[Exodus]], etc.
return ""
elseif rfind(base.lemma, "[sßxz]$") then
return "es"
else
return "s"
end
end
local function get_default_pl(base, gender)
if rfind(base.lemma, "nis$") then
-- neuter like [[Erlebnis]], [[Geheimnis]] or feminine like [[Kenntnis]], [[Wildnis]],
-- or occasional masculine like [[Firnis]], [[Penis]]
return "se"
elseif gender == "f" or base.props.weak then
return get_n_ending(base, base.lemma)
elseif rfind(base.lemma, "e$") then
track("default-pl-e-not-f-or-weak")
-- FIXME: This should return "s"
return get_n_ending(base, base.lemma)
elseif gender == "n" and rfind(base.lemma, "lein$") then
-- Diminutives in -lein (those in -chen will automatically get a null ending from -en below)
return ""
elseif gender == "n" and rfind(base.lemma, "um$") then
-- [[Museum]] -> [[Museen]], [[Vakuum]] -> [[Vakuen]]; not masculine [[Baum]] (plural [[Bäume]])
-- or [[Reichtum]] (plural [[Reichtümer]])
return "!" .. rsub(base.lemma, "um$", "en")
elseif rfind(base.lemma, "mus$") then
-- Algorithmus -> Algorithmen, Aphorismus -> Aphorismen
return "!" .. rsub(base.lemma, "us$", "en")
elseif rfind(base.lemma, com.NV .. "us$") then
-- [[Abakus]] -> [[Abakusse]], [[Zirkus]] -> [[Zirkusse]], [[Autobus]] -> [[Autobusse]];
-- not [[Applaus]] (plural [[Applause]])
return "se"
elseif rfind(base.lemma, "e[lmnr]$") and not rfind(base.lemma, com.NV .. "[ei]e[lnmr]$") then
-- check for weak ending -el, -em, -en, -er, e.g. [[Adler]], [[Meier]], [[Riedel]]; but exclude [[Heer]],
-- [[Bier]], [[Ziel]], which take -e by default
return ""
else
return "e"
end
end
local function decline_singular(base, gender, def_gen)
add(base, "nom_s", nil, "", gender)
add_spec(base, "gen_s", base.gens, gender, def_gen)
if base.props.weak then
local ending = get_n_ending(base, base.lemma, "is singular")
add(base, "dat_s", nil, ending, gender)
add(base, "acc_s", nil, gender == "m" and ending or "", gender)
else
add(base, "dat_s", nil, "", gender)
add_archaic_dative_singular(base, gender, def_gen)
add(base, "acc_s", nil, "", gender)
end
end
local function decline_plural(base, def_pl)
local function process_nom_pl_for_decl_type(stem_ending)
if base.props.saw_mn and base.number ~= "pl" then
if base.props.weak then
m_table.insertIfNot(base.decl_type, "weak")
elseif stem_ending == base.lemma .. "n" or stem_ending == base.lemma .. "en" then
m_table.insertIfNot(base.decl_type, "mixed")
else
m_table.insertIfNot(base.decl_type, "strong")
end
end
return stem_ending
end
local function process_dat_pl_to_add_n(stem_ending)
if base.props.nodatpln then
return stem_ending
elseif rfind(stem_ending, "e[lr]?$") or rfind(stem_ending, "erl$") then
return stem_ending .. "n"
else
return stem_ending
end
end
add_spec(base, "nom_p", base.pls, nil, def_pl, nil, nil, process_nom_pl_for_decl_type)
add_spec(base, "gen_p", base.pls, nil, def_pl)
add_spec(base, "dat_p", base.pls, nil, def_pl, nil, nil, process_dat_pl_to_add_n)
add_spec(base, "acc_p", base.pls, nil, def_pl)
end
local function decline_noun(base)
if base.number == "pl" then
decline_plural(base, "")
if rfind(base.lemma, "innen$") then
--- Ends in -innen, likely feminine. Chop off, and convert e.g. Chinesinnen -> Chinesen.
local masc = rsub(base.lemma, "innen$", "")
if rfind(masc, "es$") then
masc = masc .. "en"
end
-- No need to specify gender for *_equiv; will be handled correctly in [[Module:de-headword]].
add(base, "m_equiv", masc, "")
else
-- Likely masculine. Try to convert Chinesen -> Chinesinnen, and -er -> -erinnen.
local femstem = rsub(base.lemma, "en$", "")
add(base, "f_equiv", femstem, "innen")
end
else
base.decl_type = {}
for _, genderspec in ipairs(base.genders) do
local gender = genderspec.form
decline_singular(base, gender, get_default_gen(base, gender))
decline_plural(base, get_default_pl(base, gender))
if gender == "m" then
add(base, "f_equiv", rsub(base.lemma, "e$", ""), "in") -- feminine
elseif gender == "f" then
-- Try (sort of) to get the masculine. Remove final -in, and if the result ends in -es, convert to -ese
-- (e.g. Chinesin -> Chinese).
local masc = rsub(base.lemma, "in$", "")
if rfind(masc, "es$") then
masc = masc .. "e"
end
add(base, "m_equiv", masc, "")
end -- do nothing for neuter
end
end
end
local function decline_surname(base)
-- We don't specify gender here. There are always two genders, m and f, which will be handled correctly in
-- [[Module:de-headword]].
add(base, "nom_m_s", nil, "")
add(base, "nom_f_s", nil, "")
local gen_m_s
if rfind(base.lemma, "[sxzß]$") or rfind(base.lemma, "ce$") then
-- [[Marx]], [[Engels]], [[Weiß]], [[Schulz]]
-- also names with silent -s or -x like [[Delacroix]]
gen_m_s = "'"
else
gen_m_s = "s"
end
add_spec(base, "gen_m_s", base.gens, nil, gen_m_s)
add(base, "gen_m_s", nil, "", nil, {"[with an article]"})
add(base, "gen_f_s", nil, "")
add(base, "dat_m_s", nil, "")
add(base, "dat_f_s", nil, "")
add(base, "acc_m_s", nil, "")
add(base, "acc_f_s", nil, "")
local pl_ending
if rfind(base.lemma, "[sxß]$") then
-- [[Marx]], [[Engels]], [[Weiß]]
pl_ending = {"", "ens"}
elseif rfind(base.lemma, "z$") then
-- [[Schulz]], [[Schmitz]]
pl_ending = {"", "es", "ens"}
elseif rfind(base.lemma, "ce$") then
pl_ending = {"", "ns"}
elseif rfind(base.lemma, "e[nlr]?$") then
-- [[Müller]], [[Goethe]], [[Dürer]], [[Schlegel]], [[Münchhausen]]
pl_ending = {"s", ""}
else
-- [[Schmidt]], [[Bergmann]], [[Brentano]]
pl_ending = {"s"}
end
add_spec(base, "nom_p", base.pls, nil, pl_ending)
add_spec(base, "gen_p", base.pls, nil, pl_ending)
add_spec(base, "dat_p", base.pls, nil, pl_ending)
add_spec(base, "acc_p", base.pls, nil, pl_ending)
end
local function decline_toponym(base)
-- We don't specify gender here, which is always neuter.
add(base, "nom_s", nil, "")
local gen_s
local null_footnote
if rfind(base.lemma, "[sxzß]$") then
gen_s = "'"
null_footnote = "[with an article]"
else
gen_s = "s"
null_footnote = "[optionally with an article]"
end
add_spec(base, "gen_s", base.gens, nil, gen_s)
add(base, "gen_s", nil, "", nil, {null_footnote})
add(base, "dat_s", nil, "")
add(base, "acc_s", nil, "")
if base.number == "both" then
-- only with explicitly given plural
add_spec(base, "nom_p", base.pls)
add_spec(base, "gen_p", base.pls)
add_spec(base, "dat_p", base.pls)
add_spec(base, "acc_p", base.pls)
end
end
local function decline_langname(base)
-- We don't specify gender here, which is always neuter.
add(base, "nom_s", nil, "")
add(base, "gen_s", nil, "")
-- If explicit genitive singular given, add it (in addition to the null genitive singular), otherwise default to -s.
add_spec(base, "gen_s", base.gens, nil, "s")
add(base, "dat_s", nil, "")
add(base, "acc_s", nil, "")
add(base, "nom_s_alt", nil, "e")
add(base, "gen_s_alt", nil, "en")
add(base, "dat_s_alt", nil, "en")
add(base, "acc_s_alt", nil, "e")
end
local function decline_adjective(base)
-- Construct an equivalent call to {{de-adecl}} based on the adjective indicators we fetched.
local adj_spec_parts = {}
local function ins(val)
table.insert(adj_spec_parts, val)
end
local function ins_dot()
if #adj_spec_parts > 0 then
ins(".")
end
end
local function insert_footnotes(footnotes)
if footnotes then
for _, footnote in ipairs(footnotes) do
ins(footnote)
end
end
end
if base.adj_stem then
ins("stem")
for _, stem in ipairs(base.adj_stem) do
ins(":")
ins(stem.form)
insert_footnotes(stem.footnotes)
end
end
if base.adj_suppress then
ins_dot()
ins("suppress:")
ins(base.adj_suppress)
end
if base.footnotes then
ins_dot()
insert_footnotes(base.footnotes)
end
for prop, _ in pairs(base.props) do
if is_adjectival_decl_indicator(prop) then
ins_dot()
ins(prop)
end
end
local adj_alternant_multiword_spec = require("Module:de-adjective").do_generate_forms(
{base.lemma .. "<" .. table.concat(adj_spec_parts) .. ">"}
)
local function copy(from_slot, to_slot)
base.forms[to_slot] = adj_alternant_multiword_spec.forms[from_slot]
end
local function copy_gender_forms(gender)
local number = gender == "p" and "p" or "s"
for _, state in ipairs(states) do
for _, case in ipairs(basic_cases) do
copy(state .. "_" .. case .. "_" .. gender, state .. "_" .. case .. "_" .. number)
end
end
end
if base.number == "pl" then
copy_gender_forms("p")
-- No need to specify gender for *_equiv; will be handled correctly in [[Module:de-headword]].
add(base, "m_equiv", base.lemma, "e")
add(base, "f_equiv", base.lemma, "e")
add(base, "n_equiv", base.lemma, "e")
else
-- Normally there should be only one gender.
for _, genderspec in ipairs(base.genders) do
local gender = genderspec.form
copy_gender_forms(gender)
-- No need to specify gender for *_equiv; will be handled correctly in [[Module:de-headword]].
add(base, "m_equiv", base.lemma, "er") -- masculine
add(base, "f_equiv", base.lemma, "e") -- feminine
add(base, "n_equiv", base.lemma, "es") -- neuter
end
if base.number ~= "sg" then
copy_gender_forms("p")
end
end
end
-- Return the slots that may contain a lemma, in the order they should be checked. `props` is a property table,
-- coming either from `base` or `alternant_multiword_spec`.
local function get_lemma_slots(props)
if props.surname then
return {"nom_m_s"}
elseif props.overall_adj then
return {"str_nom_s", "str_nom_p"}
else
return {"nom_s", "nom_p"}
end
end
-- Return the lemmas for this term. The return value is a list of {form = FORM, footnotes = FOOTNOTES}.
-- If `linked_variant` is given, return the linked variants (with embedded links if specified that way by the user),
-- otherwies return variants with any embedded links removed. If `remove_footnotes` is given, remove any
-- footnotes attached to the lemmas.
function export.get_lemmas(alternant_multiword_spec, linked_variant, remove_footnotes)
local slots_to_fetch = get_lemma_slots(alternant_multiword_spec.props)
local linked_suf = linked_variant and "_linked" or ""
for _, slot in ipairs(slots_to_fetch) do
if alternant_multiword_spec.forms[slot .. linked_suf] then
local lemmas = alternant_multiword_spec.forms[slot .. linked_suf]
if remove_footnotes then
local lemmas_no_footnotes = {}
for _, lemma in ipairs(lemmas) do
table.insert(lemmas_no_footnotes, {form = lemma.form})
end
return lemmas_no_footnotes
else
return lemmas
end
end
end
return {}
end
local function handle_derived_slots_and_overrides(base)
process_slot_overrides(base)
-- Compute linked versions of potential lemma slots, for use in {{de-noun}}.
-- We substitute the original lemma (before removing links) for forms that
-- are the same as the lemma, if the original lemma has links.
for _, slot in ipairs(get_lemma_slots(base.props)) do
iut.insert_forms(base.forms, slot .. "_linked", iut.map_forms(base.forms[slot], function(form)
if form == base.orig_lemma_no_links and rfind(base.orig_lemma, "%[%[") then
return base.orig_lemma
else
return form
end
end))
end
end
-- Like put.split_alternating_runs_and_strip_spaces(), but ensure that backslash-escaped commas and periods are not
-- treated as separators.
local function split_alternating_runs_with_escapes(segments, splitchar)
for i, segment in ipairs(segments) do
segments[i] = rsub(segment, "\\,", SUB_ESCAPED_COMMA)
segments[i] = rsub(segment, "\\%.", SUB_ESCAPED_PERIOD)
end
local separated_groups = put.split_alternating_runs_and_strip_spaces(segments, splitchar)
for _, separated_group in ipairs(separated_groups) do
for i, segment in ipairs(separated_group) do
separated_group[i] = rsub(segment, SUB_ESCAPED_COMMA, ",")
separated_group[i] = rsub(segment, SUB_ESCAPED_PERIOD, ".")
end
end
return separated_groups
end
--[=[
Parse an indicator spec (text consisting of angle brackets and zero or more dot-separated indicators within them).
Return value is an object of the form
{
overrides = {
SLOT = {OVERRIDE, OVERRIDE, ...},
...
}, -- where OVERRIDE is {form = FORM, footnotes = FOOTNOTES}; same as `forms` table; FORM can be a full form (only if
beginning with a capital letter or !), otherwise an ending; "-" for an ending means a null ending, while
"--" suppresses the slot entirely, i.e. it is defective
gens = {GEN_SG_SPEC, GEN_SG_SPEC, ...}, same form as OVERRIDE above
pls = {PL_SPEC, PL_SPEC, ...}, same form as OVERRIDE above
forms = {}, -- forms for a single spec alternant; see `forms` below
props = {
PROP = true,
PROP = true,
...
}, -- misc Boolean properties: "weak" (weak noun); "adj" (adjectival noun; set using "+");
"ss" (lemma in -ß changes to -ss- before endings beginning with a vowel; pre-1996 spelling);
"nodatpln" (suppress automatic addition of 'n' in the dative plural after '-e', '-er', '-el')
number = "NUMBER", -- "sg", "pl", "both"; may be missing
adj = true, -- may be missing
-- The following additional fields are added by other functions:
orig_lemma = "ORIGINAL-LEMMA", -- as given by the user or taken from pagename
orig_lemma_no_links = "ORIGINAL-LEMMA-NO-LINKS", -- links removed
lemma = "LEMMA", -- `orig_lemma_no_links`,
forms = {
SLOT = {
{
form = "FORM",
footnotes = {"FOOTNOTE", "FOOTNOTE", ...} -- may be missing
},
...
},
...
},
}
]=]
local function parse_indicator_spec(angle_bracket_spec, lemma, pagename, proper_noun)
if lemma == "" then
lemma = pagename
end
local base = {forms = {}, overrides = {}, props = {prop = proper_noun}}
base.orig_lemma = lemma
base.orig_lemma_no_links = m_links.remove_links(lemma)
base.lemma = base.orig_lemma_no_links
local inside = rmatch(angle_bracket_spec, "^<(.*)>$")
assert(inside)
local function parse_err(msg)
error(msg .. ": <" .. inside .. ">")
end
--[=[
Parse a single override spec and return three values: the slot the override applies to, the original indicator
spec used to specify the slot, and the override specs. The input is a list where the footnotes have been separated
out. For example, given the spec 'dat:-[referring to a card suit, as a term of endearment, and generally in speech]:en[in most cases in writing]',
the input will be a list {"dat:-", "[referring to a card suit, as a term of endearment, and generally in speech]", ":en",
"[in most cases in writing]", ""}
]=]
local function parse_override(segments)
local part = segments[1]
local offset = 4
local case = usub(part, 1, 3)
if not case_set_with_abl_voc[case] then
parse_err("Internal error: unrecognized case in override: '" .. table.concat(segments) .. "'")
end
local indicator = case
local rest = usub(part, offset)
local slot
if rfind(rest, "^pl") then
rest = rsub(rest, "^pl", "")
slot = case .. "_p"
indicator = indicator .. "pl"
else
slot = case .. "_s"
end
if rfind(rest, "^:") then
rest = rsub(rest, "^:", "")
else
parse_err("Slot indicator '" .. indicator .. "' must be followed by a colon: '" .. table.concat(segments) .. "'")
end
if not noun_slot_set[slot] then
parse_err("Unrecognized slot indicator '" .. indicator .. "': '" .. table.concat(segments) .. "'")
end
segments[1] = rest
return slot, indicator, com.fetch_specs(segments, ":", "override", nil, parse_err)
end
if inside ~= "" then
local segments = put.parse_balanced_segment_run(inside, "[", "]")
local dot_separated_groups = split_alternating_runs_with_escapes(segments, "%.")
for i, dot_separated_group in ipairs(dot_separated_groups) do
local part = dot_separated_group[1]
if i == 1 then
local comma_separated_groups = split_alternating_runs_with_escapes(dot_separated_group, ",")
base.genders = com.fetch_specs(comma_separated_groups[1], ":", "gender", nil, parse_err)
local saw_sg = false
local saw_pl = false
local saw_gendered_pl = false
local saw_non_gendered_pl = false
local saw_adj = false
local special_variant = nil
for _, genderspec in ipairs(base.genders) do
local g = genderspec.form
if g == "m" or g == "n" then
-- Set this on `base.props` as it's used in various other places.
base.props.saw_mn = true
saw_sg = true
elseif g == "f" then
saw_sg = true
elseif g == "p" then
saw_pl = true
saw_non_gendered_pl = true
elseif rfind(g, "^[mfn]p$") then
saw_pl = true
saw_gendered_pl = true
elseif g == "+" or g == "p+" or g == "+p" then
if #base.genders > 1 then
parse_err("Can't specify multiple genders with adjectival declension")
end
saw_adj = true
if g ~= "+" then
saw_pl = true
end
elseif g == "surname" or g == "toponym" or g == "langname" then
if #base.genders > 1 then
parse_err("Can't specify multiple genders with " .. g .. " declension")
end
special_variant = g
else
parse_err("Unrecognized gender spec '" .. g .. "'")
end
end
if saw_sg and saw_pl then
parse_err("Can't specify both singular and plural gender specs")
end
if saw_gendered_pl and saw_non_gendered_pl then
parse_err("Can't specify both 'p' and gendered plural specs")
end
local gen_index = (base.props.saw_mn or special_variant) and 2 or 1
local pl_index =
(saw_adj or saw_pl) and 1 or
(base.props.saw_mn or special_variant == "surname" or special_variant == "toponym") and 3 or
2
if #comma_separated_groups > pl_index then
if saw_adj then
parse_err("Can't specify plurals or genitives with adjectival declension")
elseif saw_pl then
parse_err("Can't specify plurals or genitives with plural-only nouns")
elseif base.props.saw_mn then
parse_err("Can specify at most three comma-separated specs when the gender is masculine or "
.. "neuter (gender, genitive, plural)")
elseif special_variant == "surname" or special_variant == "toponym" then
parse_err("Can specify at most three comma-separated specs with '" .. special_variant .. "' "
.. "nouns ('" .. special_variant .. "', genitive, plural)")
elseif special_variant == "langname" then
parse_err("Can specify at most two comma-separated specs with 'langname' "
.. " ('langname', genitive)")
else
parse_err("Can specify at most two comma-separated specs when the gender is feminine "
.. "(gender, plural)")
end
end
if #comma_separated_groups >= gen_index and gen_index > 1 then
base.gens = com.fetch_specs(comma_separated_groups[gen_index], ":", "genitive", "allow blank", parse_err)
end
if #comma_separated_groups >= pl_index and pl_index > gen_index then
base.pls = com.fetch_specs(comma_separated_groups[pl_index], ":", "plural", "allow blank", parse_err)
end
if special_variant then
if #base.genders > 1 then
parse_err("Internal error: More than one gender spec for '" .. special_variant .. "'")
else
base.props[special_variant] = true
if special_variant == "surname" then
-- FIXME, does it make sense to put the footnotes on the feminine gender (they appear after the gender)?
base.genders = {{form = "m"}, {form = "f", footnotes = base.genders[1].footnotes}}
else
base.genders = {{form = "n", footnotes = base.genders[1].footnotes}}
end
end
elseif saw_adj then
if #base.genders > 1 then
parse_err("Internal error: More than one gender spec for adjectival declension")
else
base.props.adj = true
if saw_pl then
base.number = "pl"
base.genders = {{form = "p", footnotes = base.genders[1].footnotes}}
else
-- Stash the footnotes into `adj_footnotes`; we will put them onto the autodetected gender
-- in determine_adjectival_genders(), which will set base.genders appropriately.
base.adj_footnotes = base.genders[1].footnotes
base.genders = {}
end
end
elseif saw_pl then
-- Convert 'mp' to 'm-p', 'fp' to 'f-p', etc. as that's what [[Module:gender and number]] expects.
for _, genderspec in ipairs(base.genders) do
local gender = rmatch(genderspec.form, "^([mfn])p$")
if gender then
genderspec.form = gender .. "-p"
end
end
base.number = "pl"
end
elseif base.props.adj and part:find("^stem:") then
dot_separated_group[1] = rsub(part, "^stem:", "")
base.adj_stem = com.fetch_specs(dot_separated_group, ":", "adjectival stem", nil, parse_err)
elseif base.props.adj and part:find("^suppress:") then
if #dot_separated_group > 1 then
parse_err("Can't specify footnotes with suppress: '" .. table.concat(dot_separated_group) .. "'")
end
-- No need to parse or validate more. Will happen in [[Module:de-adjective]].
base.adj_suppress = rsub(part, "suppress:", "")
elseif part == "" then
if #dot_separated_group == 1 then
parse_err("Blank indicator")
end
base.footnotes = com.fetch_footnotes(dot_separated_group, parse_err)
elseif part:find(":") then
-- override
-- FIXME: Handle adjectival overrides
local case_prefix = usub(part, 1, 3)
if case_set_with_abl_voc[case_prefix] then
local slot, slot_indicator, override = parse_override(dot_separated_group)
if base.overrides[slot] then
parse_err("Can't specify override twice for slot '" .. slot_indicator .. "'")
else
base.overrides[slot] = override
end
else
parse_err("Unrecognized indicator '" .. part .. "'")
end
elseif #dot_separated_group > 1 then
local errmsg
if base.props.adj then
errmsg = "Footnotes only allowed with slot overrides, 'stem:' or by themselves"
else
errmsg = "Footnotes only allowed with genitive, plural, slot overrides or by themselves"
end
parse_err(errmsg .. ": '" .. table.concat(dot_separated_group) .. "'")
elseif part == "sg" or part == "both" then
if base.number then
if base.number ~= part then
parse_err("Can't specify '" .. part .. "' along with '" .. base.number .. "'")
else
parse_err("Can't specify '" .. part .. "' twice")
end
end
base.number = part
elseif not base.props.adj and (part == "weak" or part == "weak_n" or part == "ss" or part == "nodatpln" or part == "article" or part == "dat_with_e") then
if base.props[part] then
parse_err("Can't specify '" .. part .. "' twice")
end
base.props[part] = true
if part == "weak_n" then
-- weak_n implies weak
base.props.weak = true
end
elseif base.props.adj and (part == "article" or is_adjectival_decl_indicator(part)) then
if base.props[part] then
parse_err("Can't specify '" .. part .. "' twice")
end
base.props[part] = true
else
parse_err("Unrecognized indicator '" .. part .. "'")
end
end
end
return base
end
-- For an adjectival lemma, synthesize the predicative (lemma) form. It doesn't have to be perfect in that the
-- predicative form itself isn't used, so we don't have to try to convert -abler -> -abel or anything like that.
local function synthesize_adj_lemma(base)
local stem, ending = rmatch(base.lemma, "^(.*)(e[rs]?)$")
if not stem then
error("Unrecognized adjectival lemma, should end in '-er', '-e' or '-es': '" .. base.lemma .. "'")
end
base.lemma = stem
-- Will be ignored if number == "pl"
if ending == "er" then
base.autodetected_gender = "m"
elseif ending == "e" then
base.autodetected_gender = "f"
else
base.autodetected_gender = "n"
end
end
local function detect_indicator_spec(alternant_multiword_spec, base)
if base.props.article then
alternant_multiword_spec.props.article = true
end
for _, prop in ipairs {"surname", "toponym", "langname"} do
if alternant_multiword_spec.props[prop] == nil then
alternant_multiword_spec.props[prop] = base.props[prop]
elseif alternant_multiword_spec.props[prop] ~= base.props[prop] then
-- We do this because we have a special table with its own slots for each of these special variants.
-- FIXME: Consider supporting adjectives with these variants. That requires that we copy the adjectival
-- declensions to the appropriate per-variant slots.
error("If some alternants set '" .. prop .. "', all must do so")
end
end
if base.props.adj then
alternant_multiword_spec.props.overall_adj = true
synthesize_adj_lemma(base)
else
-- Set default values.
base.number =
base.number or
base.props.surname and "both" or
base.pls and "both" or
(alternant_multiword_spec.props.is_proper or base.props.toponym or base.props.langname) and "sg" or
"both"
if not base.props.surname then
if base.number == "pl" then
if base.gens then
error("Internal error: With plural-only noun, no genitive singular specs should be allowed")
end
if base.pls then
error("Internal error: With plural-only noun, no plural specs should be allowed")
end
end
if base.pls and base.number == "sg" then
error("Can't specify explicit plural specs along with explicit '.sg'")
end
end
base.gens = base.gens or {{form = "+"}}
base.pls = base.pls or {{form = "+"}}
end
end
local function detect_all_indicator_specs(alternant_multiword_spec)
iut.map_word_specs(alternant_multiword_spec, function(base)
detect_indicator_spec(alternant_multiword_spec, base)
end)
-- Now propagate some properties downwards.
iut.map_word_specs(alternant_multiword_spec, function(base)
base.props.overall_adj = alternant_multiword_spec.props.overall_adj
end)
end
local propagate_multiword_properties
local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only)
local seen_property
for _, multiword_spec in ipairs(alternant_spec.alternants) do
propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only)
if seen_property == nil then
seen_property = multiword_spec[property]
elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then
seen_property = mixed_value
end
end
alternant_spec[property] = seen_property
end
propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only)
local seen_property = nil
local last_seen_nounal_pos = 0
local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs
for i = 1, #word_specs do
local is_nounal
if word_specs[i].alternants then
propagate_alternant_properties(word_specs[i], property, mixed_value)
is_nounal = not not word_specs[i][property]
elseif nouns_only then
is_nounal = not word_specs[i].props.adj
else
is_nounal = not not word_specs[i][property]
end
if is_nounal then
if not word_specs[i][property] then
error("Internal error: noun-type word spec without " .. property .. " set")
end
for j = last_seen_nounal_pos + 1, i - 1 do
word_specs[j][property] = word_specs[j][property] or word_specs[i][property]
end
last_seen_nounal_pos = i
if seen_property == nil then
seen_property = word_specs[i][property]
elseif seen_property ~= word_specs[i][property] then
seen_property = mixed_value
end
end
end
if last_seen_nounal_pos > 0 then
for i = last_seen_nounal_pos + 1, #word_specs do
word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property]
end
end
multiword_spec[property] = seen_property
end
local function propagate_properties_downward(alternant_multiword_spec, property, default_propval)
local propval1 = alternant_multiword_spec[property] or default_propval
alternant_multiword_spec[property] = propval1
for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
local propval2 = alternant_or_word_spec[property] or propval1
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
local propval3 = multiword_spec[property] or propval2
for _, word_spec in ipairs(multiword_spec.word_specs) do
local propval4 = word_spec[property] or propval3
if propval4 == "mixed" then
error("Attempt to assign mixed " .. property .. " to word")
end
word_spec[property] = propval4
end
end
else
if propval2 == "mixed" then
error("Attempt to assign mixed " .. property .. " to word")
end
alternant_or_word_spec[property] = propval2
end
end
end
--[=[
Propagate `property` ("genders" or "number") from nouns to adjacent adjectives. We proceed as follows:
1. We assume the properties in question are already set on all nouns. This should happen in parse_indicator_spec().
2. We first propagate properties upwards and sideways. We recurse downwards from the top. When we encounter a
multiword spec, we proceed left to right looking for a noun. When we find a noun, we fetch its property
(recursing if the noun is an alternant), and propagate it to any adjectives to its left, up to the next noun
to the left. When we have processed the last noun, we also propagate its property value to any adjectives to the
right. Finally, we set the property value for the multiword spec itself by combining all the non-nil properties of
the individual elements. If all non-nil properties have the same value, the result is that value, otherwise it is
`mixed_value` (which is "mixed" gender, but "both" for number).
3. When we encounter an alternant spec in this process, we recursively process each alternant (which is a multiword
spec) using the previous step, and combine any non-nil properties we encounter the same way as for multiword specs.
4. The effect of steps 2 and 3 is to set the property of each alternant and multiword spec based on its children or its
neighbors.
]=]
local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value)
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only")
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false)
propagate_properties_downward(alternant_multiword_spec, property, default_propval)
end
-- Set the gender of adjectives and adjectival nouns to the gender autodetected during synthesize_adj_lemma(),
-- unless the form is plural. We don't just set the gender directly in synthesize_adj_lemma() because we don't know
-- until later (i.e. when propagate_properties() is called) whether an adjectival form in -e is feminine or plural.
-- We set the footnotes (i.e. qualifiers) of the gender to the footnotes (if any) specified directly after '+'.
local function determine_adjectival_genders(alternant_multiword_spec)
iut.map_word_specs(alternant_multiword_spec, function(base)
if base.props.adj and #base.genders == 0 then
base.genders = {{form = base.number == "pl" and "p" or base.autodetected_gender, footnotes = base.adj_footnotes}}
end
end)
end
-- Find the first noun in a multiword expression and set alternant_multiword_spec.first_noun
-- to the index of that noun. Also find the first adjective and set alternant_multiword_spec.first_adj
-- similarly. If there is a first noun, we use its properties to determine the overall expression's
-- properties; otherwise we use the first adjective's properties, otherwise the first word's properties.
-- If the "word" located this way is not an alternant spec, we just use its properties directly, otherwise
-- we use the properties of the first noun (or failing that the first adjective, or failing that the
-- first word) in each alternative alternant in the alternant spec. For this reason, we need to set the
-- the .first_noun of and .first_adj of each multiword expression embedded in the first noun alternant spec,
-- and the .first_adj of each multiword expression in each adjective alternant spec leading up to the
-- first noun alternant spec.
local function determine_noun_status(alternant_multiword_spec)
for i, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
if alternant_or_word_spec.alternants then
local alternant_type
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
for j, word_spec in ipairs(multiword_spec.word_specs) do
if not word_spec.props.adj then
multiword_spec.first_noun = j
alternant_type = "နာမ်"
break
elseif not multiword_spec.first_adj then
multiword_spec.first_adj = j
if not alternant_type then
alternant_type = "adj"
end
end
end
end
if alternant_type == "noun" then
alternant_multiword_spec.first_noun = i
return
elseif alternant_type == "adj" and not alternant_multiword_spec.first_adj then
alternant_multiword_spec.first_adj = i
end
else
if not alternant_or_word_spec.props.adj then
alternant_multiword_spec.first_noun = i
return
elseif not alternant_multiword_spec.first_adj then
alternant_multiword_spec.first_adj = i
end
end
end
end
local function decline_noun_or_adjective(base)
if base.props.surname then
decline_surname(base)
elseif base.props.toponym then
decline_toponym(base)
elseif base.props.langname then
decline_langname(base)
elseif base.props.adj then
decline_adjective(base)
else
decline_noun(base)
end
handle_derived_slots_and_overrides(base)
end
-- Set the overall articles. We can't do this using the normal inflection code as it will produce e.g.
-- '[[der]] [[und]] [[der]]' for conjoined nouns.
local function compute_non_surname_articles(alternant_multiword_spec)
if alternant_multiword_spec.number ~= "pl" then
iut.map_word_specs(alternant_multiword_spec, function(base)
for _, genderspec in ipairs(base.genders) do
for _, case in ipairs(cases_with_abl_voc) do
for _, def in ipairs(definitenesses) do
iut.insert_form(alternant_multiword_spec.forms, "art_" .. def .. "_" .. case .. "_s",
{form = com.articles[genderspec.form][def .. "_" .. case]})
end
end
end
end)
end
for _, case in ipairs(basic_cases) do
for _, def in ipairs(definitenesses) do
iut.insert_form(alternant_multiword_spec.forms, "art_" .. def .. "_" .. case .. "_p",
{form = com.articles.p[def .. "_" .. case]})
end
end
end
-- Set the overall surname articles. We can't do this using the normal inflection code as it will produce e.g.
-- '[[der]] [[und]] [[der]]' for conjoined nouns.
local function compute_surname_articles(alternant_multiword_spec)
for _, gender in ipairs {"m", "f"} do
for _, case in ipairs(basic_cases) do
for _, def in ipairs(definitenesses) do
iut.insert_form(alternant_multiword_spec.forms, "art_" .. def .. "_" .. case .. "_" .. gender .. "_s",
{form = "([[" .. com.articles[gender][def .. "_" .. case] .. "]])"})
end
end
end
for _, case in ipairs(basic_cases) do
iut.insert_form(alternant_multiword_spec.forms, "art_def_" .. case .. "_p",
{form = "([[" .. com.articles.p["def_" .. case] .. "]])"})
end
end
local function compute_articles(alternant_multiword_spec)
if alternant_multiword_spec.props.surname then
compute_surname_articles(alternant_multiword_spec)
else
compute_non_surname_articles(alternant_multiword_spec)
end
end
-- Call a function `fun` over the first noun in the `alternant_multiword_spec`, or over the first noun in each
-- alternant if there is more than one alternant. If there are no nouns, use the first adjective (in the case of an
-- adjectival noun).
local function map_first_noun(alternant_multiword_spec, fun)
local key_entry = alternant_multiword_spec.first_noun or alternant_multiword_spec.first_adj or 1
if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then
local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry]
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
key_entry = multiword_spec.first_noun or multiword_spec.first_adj or 1
if #multiword_spec.word_specs >= key_entry then
fun(multiword_spec.word_specs[key_entry])
end
end
else
fun(alternant_or_word_spec)
end
end
end
-- Compute the categories to add the noun to, as well as the annotation to display in the
-- declension title bar. We combine the code to do these functions as both categories and
-- title bar contain similar information.
local function compute_categories_and_annotation(alternant_multiword_spec)
alternant_multiword_spec.categories = {}
alternant_multiword_spec.decl_type = {}
local function insert(cattype)
cattype = rsub(cattype, "~", alternant_multiword_spec.pos)
m_table.insertIfNot(alternant_multiword_spec.categories, "ဂျာမာန်" .. cattype .. "")
end
if not alternant_multiword_spec.props.is_proper and alternant_multiword_spec.number == "sg" then
insert("~မတော်ဟွံဂွံဂမၠိုၚ်")
elseif alternant_multiword_spec.number == "pl" then
-- insert("pluralia tantum")
end
local annotation
local annparts = {}
local genderdescs = {}
local decldescs = {}
if alternant_multiword_spec.number == "sg" then
table.insert(annparts, "sg-only")
elseif alternant_multiword_spec.number == "pl" and alternant_multiword_spec.genders[1].spec ~= "p" then
-- If the gender is just 'p', we use "pl-only" below as a substitute for the gender and hook any qualifiers
-- onto it. Note that when 'p' is the gender, there can be only one gender.
table.insert(annparts, "pl-only")
end
for i, genderspec in ipairs(alternant_multiword_spec.genders) do
local genderdesc_parts = {}
local gender = genderspec.spec
if gender == "p" then
table.insert(genderdesc_parts, "pl-only")
else
gender = rsub(gender, "%-p$", "")
table.insert(genderdesc_parts, gender_spec_to_full_gender[gender])
end
if genderspec.qualifiers then
table.insert(genderdesc_parts, " ''(")
table.insert(genderdesc_parts, table.concat(genderspec.qualifiers, ", "))
table.insert(genderdesc_parts, ")''")
end
table.insert(genderdescs, table.concat(genderdesc_parts))
end
local function insert_decl_type(decl_type)
m_table.insertIfNot(decldescs, decl_type)
m_table.insertIfNot(alternant_multiword_spec.decl_type, decl_type)
end
local function do_word_spec(base)
if base.props.surname then
m_table.insertIfNot(decldescs, "surname")
elseif base.props.toponym then
m_table.insertIfNot(decldescs, "toponym")
elseif base.props.langname then
m_table.insertIfNot(decldescs, "langname")
elseif base.decl_type and #base.decl_type > 0 then
-- strong/weak/mixed declension type; should only be present on masculine or neuter nouns with a plural
for _, decl_type in ipairs(base.decl_type) do
if decl_type == "weak" then
insert("weak ~")
elseif decl_type == "mixed" then
-- insert("~မပံၚ်ဖနှဴလဝ်ဂမၠိုၚ်")
end
insert_decl_type(decl_type)
end
elseif base.props.saw_mn then
-- For singular-only masculine or neuter nouns, we can still classify as strong or weak.
-- We don't try to classify plural-only nouns. Even for nouns in -n or -en, we have no idea if they are
-- strong (-en is part of the stem), mixed or weak.
if base.props.weak then
insert("weak ~")
insert_decl_type("weak")
else
insert_decl_type("strong")
end
end
end
-- Use the surname/toponym/langname/weak/strong properties of the noun(s).
map_first_noun(alternant_multiword_spec, do_word_spec)
if #genderdescs > 0 then
table.insert(annparts, table.concat(genderdescs, " // "))
end
if #decldescs > 0 then
table.insert(annparts, table.concat(decldescs, " // "))
end
if not alternant_multiword_spec.first_noun and alternant_multiword_spec.first_adj then
insert("adjectival ~")
table.insert(annparts, "adjectival")
end
if alternant_multiword_spec.props.langname then
-- insert("specially-declined language names")
end
alternant_multiword_spec.annotation = table.concat(annparts, ", ")
end
local function compute_headword_genders(alternant_multiword_spec)
alternant_multiword_spec.genders = {}
-- Compute the genders based on the nouns. We don't want to use the adjectives in adjective-noun combinations
-- because that will cause issues in plural-only expressions like [[Kanarische Inseln]], where ''Inseln'' may be
-- 'f-p' but ''Kanarische'' will be just 'p', and we'd end up with both genders.
map_first_noun(alternant_multiword_spec, function(base)
for _, genderspec in ipairs(base.genders) do
-- Create the new spec to insert.
local spec = {spec = genderspec.form, qualifiers = genderspec.footnotes}
-- See if the gender of the spec is already present; if so, combine qualifiers.
local saw_existing = false
for _, existing_spec in ipairs(alternant_multiword_spec.genders) do
if existing_spec.spec == spec.spec then
existing_spec.qualifiers = iut.combine_footnotes(existing_spec.qualifiers, spec.qualifiers)
saw_existing = true
break
end
end
-- If not, add gender.
if not saw_existing then
table.insert(alternant_multiword_spec.genders, spec)
end
end
end)
-- Now convert the footnotes in the gender specs to qualifiers. This involves removing brackets and expanding any
-- footnote abbreviations.
for _, genderspec in ipairs(alternant_multiword_spec.genders) do
if genderspec.qualifiers then
local processed_qualifiers = {}
for _, qualifier in ipairs(genderspec.qualifiers) do
m_table.insertIfNot(processed_qualifiers,
iut.expand_footnote_or_references(qualifier, "return raw", "no parse refs"))
end
genderspec.qualifiers = processed_qualifiers
end
end
end
-- Process the specs in `arg_specs` given for dim=, m=, f=, n= or sg= and store the results in `slot` in
-- `alternant_multiword_spec.forms`. `arg_specs` is a list of specs, each of which is a comma-separated or
-- colon-separated string of specs, where each spec may be a suffix like "in", or a suffix with umlaut like
-- "^chen", or a full form beginning with a capital letter or exclamation point. Suffixes are added onto the lemma
-- with -e removed if present. `default` is the default value to use if "+" is given as a spec, and `literal_default`,
-- if given, indicates that `default` is always a literal (full) form; otherwise, it `default` begins with a
-- lowercase letter, it is taken as a suffix. (This is used in cases like the feminine of [[ordenlicher Professor]],
-- which is generated as "ordentiche Professorin"; we don't want this interpreted as a suffix.) `desc` is an English
-- description of the form whose specs are being processed, for display in error messages.
local function process_dim_m_f_n(alternant_multiword_spec, arg_specs, default, literal_default, slot, desc)
-- We don't want footnotes attached to a lemma to end up in the output. These footnotes typically get there if the
-- syntax `.[footnote]` is used, which attaches a footnote to every form.
local lemmas = export.get_lemmas(alternant_multiword_spec, nil, "remove footnotes")
lemmas = iut.map_forms(lemmas, function(form)
return rsub(form, "e$", "")
end)
for _, spec in ipairs(arg_specs) do
local function parse_err(msg)
error(msg .. ": " .. spec)
end
local segments = put.parse_balanced_segment_run(spec, "[", "]")
-- Allow comma (preferred) or colon as separator.
local ending_specs = com.fetch_specs(segments, "[,:]", desc, nil, parse_err)
-- FIXME, this should propagate the 'ss' property upwards
local props = {}
local function do_combine_stem_ending(stem, ending)
return combine_stem_ending(props, stem, ending)
end
local function process(stem, ending)
iut.add_forms(alternant_multiword_spec.forms, slot, stem or lemmas, ending, do_combine_stem_ending)
end
process_spec(ending_specs, nil, default, literal_default, desc, process)
end
end
local function show_forms(alternant_multiword_spec)
local lemmas = export.get_lemmas(alternant_multiword_spec)
local props = {
lang = lang,
lemmas = lemmas,
slot_list = alternant_multiword_spec.props.surname and surname_slot_list_with_linked_and_articles
or alternant_multiword_spec.props.langname and langname_slot_list_with_linked_and_articles
or alternant_multiword_spec.props.overall_adj and adjectival_slot_list_with_linked_and_articles
or noun_slot_list_with_linked_and_articles,
}
iut.show_forms(alternant_multiword_spec.forms, props)
end
local noun_template_both = [=[
<div class="NavFrame">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! style="{BG2};width:15%" |
! colspan="3" style="{BG2};width:46%" | ကိုန်ဨကဝုစ်
! colspan="2" style="{BG2};width:39%" | ကိုန်ဗဟုဝစ်
|-
! style="{BG3}" |
! style="{BG3};width:7%" | [[indefinite article|indef.]]
! style="{BG3};width:7%" | [[definite article|def.]]
! style="{BG3};width:32%" | နာမ်
! style="{BG3};width:7%" | [[definite article|def.]]
! style="{BG3};width:32%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_ind_nom_s}
| style="{BG4}" | {art_def_nom_s}
| {nom_s}
| style="{BG4}" | {art_def_nom_p}
| {nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_ind_gen_s}
| style="{BG4}" | {art_def_gen_s}
| {gen_s}
| style="{BG4}" | {art_def_gen_p}
| {gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_ind_dat_s}
| style="{BG4}" | {art_def_dat_s}
| {dat_s}
| style="{BG4}" | {art_def_dat_p}
| {dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_ind_acc_s}
| style="{BG4}" | {art_def_acc_s}
| {acc_s}
| style="{BG4}" | {art_def_acc_p}
| {acc_p}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_both_no_indef = [=[
<div class="NavFrame" style="width:93%">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! style="{BG2};width:15%" |
! colspan="2" style="{BG2};width:39%" | ကိုန်ဨကဝုစ်
! colspan="2" style="{BG2};width:39%" | ကိုန်ဗဟုဝစ်
|-
! style="{BG3}" |
! style="{BG3};width:7%" | [[definite article|def.]]
! style="{BG3};width:32%" | နာမ်
! style="{BG3};width:7%" | [[definite article|def.]]
! style="{BG3};width:32%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_def_nom_s}
| {nom_s}
| style="{BG4}" | {art_def_nom_p}
| {nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_def_gen_s}
| {gen_s}
| style="{BG4}" | {art_def_gen_p}
| {gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_def_dat_s}
| {dat_s}
| style="{BG4}" | {art_def_dat_p}
| {dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_def_acc_s}
| {acc_s}
| style="{BG4}" | {art_def_acc_p}
| {acc_p}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_abl_voc = [=[
|-
! style="{BG3}" | ပရေၚ်မလၚ်
| style="{BG4}" | {art_ind_abl_s}
| style="{BG4}" | {art_def_abl_s}
| {abl_s}
|-
! style="{BG3}" | ပရေၚ်ဂယိုၚ်လမျီု
| style="{BG4}" | {art_ind_voc_s}
| style="{BG4}" | {art_def_voc_s}
| {voc_s}]=]
local noun_template_sg = [=[
<div class="NavFrame" style="width:61%">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! style="{BG2};width:24.6%" |
! colspan="3" style="{BG2};" | ကိုန်ဨကဝုစ်
|-
! style="{BG3}" |
! style="{BG3};width:11.5%" | [[indefinite article|indef.]]
! style="{BG3};width:11.5%" | [[definite article|def.]]
! style="{BG3};width:52.5%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_ind_nom_s}
| style="{BG4}" | {art_def_nom_s}
| {nom_s}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_ind_gen_s}
| style="{BG4}" | {art_def_gen_s}
| {gen_s}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_ind_dat_s}
| style="{BG4}" | {art_def_dat_s}
| {dat_s}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_ind_acc_s}
| style="{BG4}" | {art_def_acc_s}
| {acc_s}{abl_voc_clause}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_sg_no_indef = [=[
<div class="NavFrame" style="width:50%">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! style="{BG2};width:24.6%" |
! colspan="2" style="{BG2};" | ကိုန်ဨကဝုစ်
|-
! style="{BG3}" |
! style="{BG3};width:11.5%" | [[definite article|def.]]
! style="{BG3};width:52.5%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_def_nom_s}
| {nom_s}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_def_gen_s}
| {gen_s}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_def_dat_s}
| {dat_s}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_def_acc_s}
| {acc_s}{abl_voc_clause}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_pl = [=[
<div class="NavFrame" style="width:61%">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! style="{BG2};width:24.6%" |
! colspan="2" style="{BG2};" | ကိုန်ဗဟုဝစ်
|-
! style="{BG3}" |
! style="{BG3};width:11.5%" | [[definite article|def.]]
! style="{BG3};width:52.5%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_def_nom_p}
| {nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_def_gen_p}
| {gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_def_dat_p}
| {dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_def_acc_p}
| {acc_p}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_surname = [=[
<div class="NavFrame">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-{decl_type}"
! rowspan="2" style="{BG2};width:11%" |
! colspan="6" style="{BG2}" | ကိုန်ဨကဝုစ်
! colspan="2" rowspan="2" style="{BG2}" | ကိုန်ဗဟုဝစ်
|-
! colspan="3" style="{BG2}" | ပုလ္လိၚ်
! colspan="3" style="{BG2}" | ဣတ္တိလိၚ်
|-
! style="{BG3}" |
! style="{BG3};width:4%" | [[indefinite article|indef.]]
! style="{BG3};width:4%" | [[definite article|def.]]
! style="{BG3};width:23%" | နာမ်
! style="{BG3};width:4%" | [[indefinite article|indef.]]
! style="{BG3};width:4%" | [[definite article|def.]]
! style="{BG3};width:23%" | နာမ်
! style="{BG3};width:4%" | [[definite article|def.]]
! style="{BG3};width:23%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | {art_ind_nom_m_s}
| style="{BG4}" | {art_def_nom_m_s}
| {nom_m_s}
| style="{BG4}" | {art_ind_nom_f_s}
| style="{BG4}" | {art_def_nom_f_s}
| {nom_f_s}
| style="{BG4}" | {art_def_nom_p}
| {nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | {art_ind_gen_m_s}
| style="{BG4}" | {art_def_gen_m_s}
| {gen_m_s}
| style="{BG4}" | {art_ind_gen_f_s}
| style="{BG4}" | {art_def_gen_f_s}
| {gen_f_s}
| style="{BG4}" | {art_def_gen_p}
| {gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | {art_ind_dat_m_s}
| style="{BG4}" | {art_def_dat_m_s}
| {dat_m_s}
| style="{BG4}" | {art_ind_dat_f_s}
| style="{BG4}" | {art_def_dat_f_s}
| {dat_f_s}
| style="{BG4}" | {art_def_dat_p}
| {dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | {art_ind_acc_m_s}
| style="{BG4}" | {art_def_acc_m_s}
| {acc_m_s}
| style="{BG4}" | {art_ind_acc_f_s}
| style="{BG4}" | {art_def_acc_f_s}
| {acc_f_s}
| style="{BG4}" | {art_def_acc_p}
| {acc_p}
|{\cl}{notes_clause}</div></div>]=]
local noun_template_langname = [=[
<div class="NavFrame" style="width:100%">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table inflection-table-de inflection-table-de-langname"
! style="{BG2};width:15%" |
! colspan="5" style="{BG2};width:85%" | singular ''([[Wiktionary:German entry guidelines#Declension of language names|explanation of the use and meaning of the forms]])''
|-
! style="{BG3}" |
! style="{BG3};width:14%" | (ဗွဲတၟေၚ်လ္ပာ်မ္ၚးပစ္စဲ)
! style="{BG3};width:32%" | နာမ်
! style="{BG3};width:7%" | [[definite article|def.]]
! style="{BG3};width:32%" | နာမ်
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4}" | ({art_def_nom_s})
| {nom_s}
| style="{BG4}" | {art_def_nom_s}
| {nom_s_alt}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4}" | ({art_def_gen_s})
| {gen_s}
| style="{BG4}" | {art_def_gen_s}
| {gen_s_alt}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4}" | ({art_def_dat_s})
| {dat_s}
| style="{BG4}" | {art_def_dat_s}
| {dat_s_alt}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4}" | ({art_def_acc_s})
| {acc_s}
| style="{BG4}" | {art_def_acc_s}
| {acc_s_alt}
|{\cl}{notes_clause}</div></div>]=]
local adjectival_template_both = [=[
<div class="NavFrame">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table"
! style="{BG3};width:15%" |
! colspan="2" style="{BG3}" | ကိုန်ဨကဝုစ်
! colspan="2" style="{BG3}" | ကိုန်ဗဟုဝစ်
|-
! style="{BG2}" | {gender}
! colspan="4" style="{BG2}" | သ္ကာတ်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| colspan="2" | {str_nom_s}
| colspan="2" | {str_nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| colspan="2" | {str_gen_s}
| colspan="2" | {str_gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| colspan="2" | {str_dat_s}
| colspan="2" | {str_dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| colspan="2" | {str_acc_s}
| colspan="2" | {str_acc_p}
|-
! style="{BG2}" |
! colspan="4" style="{BG2}" | ဇြဟတ်ဍိုန်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4};width:5em" | {art_def_nom_s}
| {wk_nom_s}
| style="{BG4};width:5em" | {art_def_nom_p}
| {wk_nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4};width:5em" | {art_def_gen_s}
| {wk_gen_s}
| style="{BG4};width:5em" | {art_def_gen_p}
| {wk_gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4};width:5em" | {art_def_dat_s}
| {wk_dat_s}
| style="{BG4};width:5em" | {art_def_dat_p}
| {wk_dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4};width:5em" | {art_def_acc_s}
| {wk_acc_s}
| style="{BG4};width:5em" | {art_def_acc_p}
| {wk_acc_p}
|-
! style="{BG2}" |
! colspan="4" style="{BG2}" | ဖက်ဖနှဴလဝ်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4};width:5em" | {art_ind_nom_s}
| {mix_nom_s}
| style="{BG4};width:5em" | {art_ind_nom_p}
| {mix_nom_p}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4};width:5em" | {art_ind_gen_s}
| {mix_gen_s}
| style="{BG4};width:5em" | {art_ind_gen_p}
| {mix_gen_p}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4};width:5em" | {art_ind_dat_s}
| {mix_dat_s}
| style="{BG4};width:5em" | {art_ind_dat_p}
| {mix_dat_p}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4};width:5em" | {art_ind_acc_s}
| {mix_acc_s}
| style="{BG4};width:5em" | {art_ind_acc_p}
| {mix_acc_p}
|{\cl}{notes_clause}</div></div>]=]
local adjectival_template_sg = [=[
<div class="NavFrame" style="width:500px">
<div class="NavHead">{title}{annotation}</div>
<div class="NavContent">
{| style="border: 1px solid var(--wikt-palette-darkgrey,#505050); border-collapse:collapse; {BG1}; text-align:center; width:100%" class="inflection-table"
! style="{BG3};width:15%" |
! colspan="2" style="{BG3}" | ကိုန်ဨကဝုစ်
|-
! style="{BG2}" | {gender}
! colspan="2" style="{BG2}" | သ္ကာတ်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| colspan="2" | {str_nom_s}
|-
! style="{BG3}" | ဗဳဇဂကူ
| colspan="2" | {str_gen_s}
|-
! style="{BG3}" | ပြကမ္မကာရက
| colspan="2" | {str_dat_s}
|-
! style="{BG3}" | ကမ္မကာရက
| colspan="2" | {str_acc_s}
|-
! style="{BG2}" |
! colspan="2" style="{BG2}" | ဇြဟတ်ဍိုန်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4};width:5em" | {art_def_nom_s}
| {wk_nom_s}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4};width:5em" | {art_def_gen_s}
| {wk_gen_s}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4};width:5em" | {art_def_dat_s}
| {wk_dat_s}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4};width:5em" | {art_def_acc_s}
| {wk_acc_s}
|-
! style="{BG2}" |
! colspan="2" style="{BG2}" | ဖက်ဖနှဴလဝ်မလဟုတ်စှ်ေ
|-
! style="{BG3}" | မဒုၚ်ယၟု
| style="{BG4};width:5em" | {art_ind_nom_s}
| {mix_nom_s}
|-
! style="{BG3}" | ဗဳဇဂကူ
| style="{BG4};width:5em" | {art_ind_gen_s}
| {mix_gen_s}
|-
! style="{BG3}" | ပြကမ္မကာရက
| style="{BG4};width:5em" | {art_ind_dat_s}
| {mix_dat_s}
|-
! style="{BG3}" | ကမ္မကာရက
| style="{BG4};width:5em" | {art_ind_acc_s}
| {mix_acc_s}
|{\cl}{notes_clause}</div></div>]=]
local notes_template = [===[
<div style="width:100%;text-align:left;background:var(--wikt-palette-lightblue,#d9ebff);color:inherit">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]===]
local function make_table(alternant_multiword_spec)
local forms = alternant_multiword_spec.forms
-- dark mode support
forms.BG1 = "background:var(--wikt-palette-white,#ffffff);color:inherit"
forms.BG2 = "background:var(--wikt-palette-grey,#9e9e9e);color:inherit"
forms.BG3 = "background:var(--wikt-palette-lightgrey,#cccccc);color:inherit"
forms.BG4 = "background:var(--wikt-palette-lightergrey,#eeeeee);color:inherit"
if alternant_multiword_spec.title then
forms.title = alternant_multiword_spec.title
else
forms.title = 'မလဟုတ်စှ်ေဆေၚ်စပ်ကဵု <i lang="de" class="Latn">' .. forms.lemma .. '</i>'
end
local annotation = alternant_multiword_spec.annotation
if annotation == "" then
forms.annotation = ""
else
forms.annotation = " [<span style=\"font-size: smaller;\">" .. annotation .. "</span>]"
end
local table_spec
if alternant_multiword_spec.props.surname then
table_spec = noun_template_surname
elseif alternant_multiword_spec.props.langname then
table_spec = noun_template_langname
elseif alternant_multiword_spec.props.overall_adj then
table_spec =
alternant_multiword_spec.number == "sg" and adjectival_template_sg or
alternant_multiword_spec.number == "pl" and rsub(rsub(adjectival_template_sg, "ကိုန်ဨကဝုစ်", "ကိုန်ဗဟုဝစ်"), "_s}", "_p}") or
adjectival_template_both
if alternant_multiword_spec.number == "pl" then
forms.gender = ""
else
local genderdesc_parts = {}
for _, gender in ipairs(alternant_multiword_spec.genders) do
table.insert(genderdesc_parts, gender_spec_to_full_gender[gender.spec])
end
forms.gender = "''" .. table.concat(genderdesc_parts, " or ") .. " gender ''"
end
else
local no_indef = alternant_multiword_spec.props.toponym or alternant_multiword_spec.props.article
table_spec =
alternant_multiword_spec.number == "sg" and (no_indef and noun_template_sg_no_indef or noun_template_sg) or
alternant_multiword_spec.number == "pl" and noun_template_pl or
(no_indef and noun_template_both_no_indef or noun_template_both)
if forms.abl_s ~= "—" or forms.voc_s ~= "—" then
forms.abl_voc_clause = m_string_utilities.format(noun_template_abl_voc, forms)
else
forms.abl_voc_clause = ""
end
end
forms.notes_clause = forms.footnote ~= "" and
m_string_utilities.format(notes_template, forms) or ""
return m_string_utilities.format(table_spec, forms)
end
-- Externally callable function to parse and decline a noun given user-specified arguments. Return value is
-- ALTERNANT_MULTIWORD_SPEC, an object where the declined forms are in `ALTERNANT_MULTIWORD_SPEC.forms` for each slot.
-- If there are no values for a slot, the slot key will be missing. The value for a given slot is a list of objects
-- {form=FORM, footnotes=FOOTNOTES}.
function export.do_generate_forms(parent_args, pos, from_headword, is_proper, def)
local params = {
[1] = {required = true, default = "Haus<n,es,^er>"},
pagename = {},
}
if from_headword or pretend_from_headword then
params["head"] = {list = true}
params["f"] = {list = true}
params["m"] = {list = true}
params["n"] = {list = true}
params["dim"] = {list = true}
params["sg"] = {list = true}
params["id"] = {}
params["sort"] = {}
params["splithyph"] = {type = "boolean"}
params["nolinkhead"] = {type = "boolean"}
end
local args = require("Module:parameters").process(parent_args, params)
local arg1 = args[1]
local need_surrounding_angle_brackets = true
-- Check whether we need to add <...> around the argument. If the
-- argument has no < in it, we definitely do. Otherwise, we need to
-- parse the balanced [...] and <...> and add <...> only if there isn't
-- a top-level <...>. We check for [...] because there might be angle
-- brackets inside of them (HTML tags in qualifiers or <<name:...>> and
-- such in references).
if arg1:find("<") then
local segments = put.parse_multi_delimiter_balanced_segment_run(arg1, {{"<", ">"}, {"[", "]"}})
for i = 2, #segments, 2 do
if segments[i]:find("^<.*>$") then
need_surrounding_angle_brackets = false
break
end
end
end
if need_surrounding_angle_brackets then
arg1 = "<" .. arg1 .. ">"
end
local pagename = args.pagename or mw.title.getCurrentTitle().text
local function do_parse_indicator_spec(angle_bracket_spec, lemma)
return parse_indicator_spec(angle_bracket_spec, lemma, pagename)
end
local parse_props = {
parse_indicator_spec = do_parse_indicator_spec,
allow_default_indicator = true,
allow_blank_lemma = true,
}
local alternant_multiword_spec = iut.parse_inflected_text(arg1, parse_props)
alternant_multiword_spec.args = args
alternant_multiword_spec.props = {}
alternant_multiword_spec.props.is_proper = is_proper
detect_all_indicator_specs(alternant_multiword_spec)
local default_number =
(alternant_multiword_spec.props.is_proper or alternant_multiword_spec.props.toponym) and "sg" or "both"
propagate_properties(alternant_multiword_spec, "number", default_number, "both")
-- FIXME, maybe should check that noun genders match adjective genders
determine_adjectival_genders(alternant_multiword_spec)
determine_noun_status(alternant_multiword_spec)
local inflect_props = {
skip_slot = function(slot)
return skip_slot(alternant_multiword_spec.number, slot)
end,
slot_list = alternant_multiword_spec.props.surname and surname_slot_list_with_linked
or alternant_multiword_spec.props.langname and langname_slot_list_with_linked
or alternant_multiword_spec.props.overall_adj and adjectival_slot_list_with_linked
or noun_slot_list_with_linked,
inflect_word_spec = decline_noun_or_adjective,
}
iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
compute_articles(alternant_multiword_spec)
compute_headword_genders(alternant_multiword_spec)
if not pos then
-- Compute part of speech for categories. Fetch the first lemma, or failing that (which would only happen
-- if the user overrides the nom_sg and nom_p to be missing) the pagename. If it begins with a hyphen,
-- it's a suffix, else a noun (proper nouns get categorized like nouns).
local lemmas = export.get_lemmas(alternant_multiword_spec)
local first_lemma = #lemmas > 0 and lemmas[1].form or pagename
pos = rfind(first_lemma, "^%-") and "အဆက်လက္ကရဴ" or "နာမ်"
end
alternant_multiword_spec.pos = pos
compute_categories_and_annotation(alternant_multiword_spec)
if from_headword or pretend_from_headword then
process_dim_m_f_n(alternant_multiword_spec, args.dim, "^chen", nil, "dim", "diminutive")
process_dim_m_f_n(alternant_multiword_spec, args.f, alternant_multiword_spec.forms.f_equiv,
"literal default", "f", "feminine equivalent")
process_dim_m_f_n(alternant_multiword_spec, args.m, alternant_multiword_spec.forms.m_equiv,
"literal default", "m", "masculine equivalent")
process_dim_m_f_n(alternant_multiword_spec, args.n, alternant_multiword_spec.forms.n_equiv,
"literal default", "n", "neuter equivalent")
process_dim_m_f_n(alternant_multiword_spec, args.sg, nil, nil, "sg", "singular")
end
return alternant_multiword_spec
end
-- Entry point for {{de-ndecl}}. Template-callable function to parse and decline a noun given
-- user-specified arguments and generate a displayable table of the declined forms.
function export.show(frame)
local parent_args = frame:getParent().args
local alternant_multiword_spec = export.do_generate_forms(parent_args)
show_forms(alternant_multiword_spec)
-- FIXME!
alternant_multiword_spec.forms.decl_type = "foo"
return make_table(alternant_multiword_spec) .. require("Module:utilities").format_categories(
alternant_multiword_spec.categories, lang, nil, nil, force_cat)
end
-- Concatenate all forms of all slots into a single string of the form "SLOT=FORM,FORM,...|SLOT=FORM,FORM,...|...".
-- Embedded pipe symbols (as might occur in embedded links) are converted to <!>. If INCLUDE_PROPS is given, also
-- include additional properties (currently, g= for headword genders). This is for use by bots.
local function concat_forms(alternant_multiword_spec, include_props)
local ins_text = {}
for _, slotaccel in ipairs(
alternant_multiword_spec.props.surname and surname_slot_list_with_linked or
alternant_multiword_spec.props.langname and langname_slot_list_with_linked or
alternant_multiword_spec.props.overall_adj and adjectival_slot_list_with_linked or
noun_slot_list_with_linked
) do
local slot, accel = unpack(slotaccel)
local formtext = iut.concat_forms_in_slot(alternant_multiword_spec.forms[slot])
if formtext then
table.insert(ins_text, slot .. "=" .. formtext)
end
end
if include_props then
table.insert(ins_text, "g=" .. table.concat(alternant_multiword_spec.genders, ","))
end
return table.concat(ins_text, "|")
end
-- Template-callable function to parse and decline a noun given user-specified arguments and return
-- the forms as a string of the same form as documented in concat_forms() above.
function export.generate_forms(frame)
local include_props = frame.args["include_props"]
local parent_args = frame:getParent().args
local alternant_multiword_spec = export.do_generate_forms(parent_args)
return concat_forms(alternant_multiword_spec, include_props)
end
return export
aoz3juao3gphtir1h0yyds7kbgdor9z
မဝ်ဂျူ:utilities/templates
828
41618
395132
371600
2026-05-19T12:01:13Z
咽頭べさ
33
395132
Scribunto
text/plain
local export = {}
local debug_track_module = "Module:debug/track"
local parameters_module = "Module:parameters"
local utilities_module = "Module:utilities"
local utilities_format_categories_with_sort_keys_module = "Module:utilities/format_categories_with_sort_keys"
local concat = table.concat
local insert = table.insert
local require = require
local function format_categories(...)
format_categories = require(utilities_module).format_categories
return format_categories(...)
end
local function format_categories_with_sort_keys(...)
format_categories_with_sort_keys = require(utilities_format_categories_with_sort_keys_module)
return format_categories_with_sort_keys(...)
end
local function process_params(...)
process_params = require(parameters_module).process
return process_params(...)
end
local function track(...)
track = require(debug_track_module)
return track(...)
end
-- Used by {{catfix}}.
function export.catfix(frame)
local args = process_params(frame:getParent().args, {
[1] = {type = "language", required = true},
[2] = {alias_of = "sc"},
["sc"] = {type = "script"},
})
return require("Module:utilities").catfix(args[1], args.sc)
end
-- Used by {{categorize}}, {{catlangname}} and {{topics}}.
function export.categorize(frame)
local args = process_params(frame:getParent().args, {
[1] = {required = true, type = "language", default = "und", sublist = true},
[2] = {required = true, list = true, allow_holes = true},
sort = {list = true, separate_no_index = true, allow_holes = true},
force = {type = "boolean"},
})
local langs = args[1]
if not langs[1] then
return ""
end
local parts = {}
for _, lang in ipairs(langs) do
local full_langcode = lang:getFullCode()
if lang:getCode() ~= full_langcode then
track("Module:utilities/templates/categorize called with variant langcode")
end
local raw_cats, sort_keys, format = args[2], args.sort, frame.args["format"]
local default_sort = sort_keys.default
local prefix = format == "pos" and lang:getFullName() .. " " or format == "topic" and full_langcode .. ":" or ""
-- Put the categories in an array. If any have an individual sortkey, they
-- will need to be tables with the category and sort key for
-- [[Module:utilities/format_categories_with_sort_keys]]; otherwise, add
-- them as strings.
local cats, n, with_sort_keys = {}, 0, false
for i = 1, raw_cats.maxindex do
local cat = raw_cats[i]
if cat ~= nil then
cat = prefix .. cat
local sort_key = sort_keys[i]
if with_sort_keys then
cat = {category = cat, sort_key = sort_key}
-- If a sort key exists, reformat all previously-processed
-- categories into the table format.
elseif sort_key ~= nil then
with_sort_keys = true
for j = 1, n do
cats[j] = {category = cats[j]}
end
cat = {category = cat, sort_key = sort_key}
end
n = n + 1
cats[n] = cat
end
end
if with_sort_keys then
insert(parts, format_categories_with_sort_keys(cats, lang, default_sort, nil, args.force))
else
insert(parts, format_categories(cats, lang, default_sort, nil, args.force))
end
end
return concat(parts)
end
return export
s31ss6vfmg5spqyg1qbe9bav60gd1kv
မဝ်ဂျူ:utilities/format categories with sort keys
828
41619
395133
151025
2026-05-19T12:03:13Z
咽頭べさ
33
395133
Scribunto
text/plain
local data = mw.loadData("Module:utilities/format_categories/data")
--[[
Format the categories with the appropriate sort key, which may be specified
individually per category. CATEGORIES is a list of objects as follows:
{
category = "Category name",
sort_key = "Sort key" or nil
}
The individual sort_key values are treated as individual SORT_KEY values for
only that specific category.
]]
-- Otherwise this function behaves like format_categories in [[Module:utilities]].
return function(categories, lang, sort_key, sort_base, force_output, sc)
if type(lang) == "table" and not lang.getCode then
error("The second argument to format_categories_with_sort_keys should be a language object.")
end
local title_obj = mw.title.getCurrentTitle()
if force_output or data.allowedNamespaces[title_obj.nsText] or data.allowedPrefixedPages[title_obj.prefixedText] then
local PAGENAME = mw.loadData("Module:headword/data").pagename
if not lang then
lang = require("Module:languages").getByCode("und")
end
-- Generate a default sort key
sort_base = lang:makeSortKey(sort_base or PAGENAME, sc)
if not sort_key or sort_key == "" then
sort_key = sort_base
end
-- If the sortkey is empty, remove it.
if sort_key == "" then
sort_key = nil
end
local out_categories = {}
for key, cat in ipairs(categories) do
local individual_sort_key = cat.sort_key or sort_key
if individual_sort_key == "" then individual_sort_key = " " end
out_categories[key] = "[[ကဏ္ဍ:" .. cat.category .. (individual_sort_key and "|" .. individual_sort_key or "") .. "]]"
end
return table.concat(out_categories, "")
else
return ""
end
end
osfkkix8oetdulmbzf7h8p4uesmjsgn
ထာမ်ပလိက်:list:Latin script letter names/ko
10
79467
395167
104164
2026-05-20T03:19:41Z
YeBoy371
1415
395167
wikitext
text/x-wiki
{{list helper 2
|title=ယၟုဗျဉ်အက္ခရ်လပ်တေန်ဂမၠိုၚ်
|cat=
|list=<!--
-->{{ko-l|에이}}, <!--
-->{{ko-l|비}}, <!--
-->{{ko-l|시}}/{{ko-l|씨}}, <!--
-->{{ko-l|디}}, <!--
-->{{ko-l|이}}, <!--
-->{{ko-l|에프}}, <!--
-->{{ko-l|지}}, <!--
-->{{ko-l|에이치}} <!--
-->{{ko-l|아이}}, <!--
-->{{ko-l|제이}}, <!--
-->{{ko-l|케이}}, <!--
-->{{ko-l|엘}}, <!--
-->{{ko-l|엠}}, <!--
-->{{ko-l|엔}}, <!--
-->{{ko-l|오}}, <!--
-->{{ko-l|피}}, <!--
-->{{ko-l|큐}}, <!--
-->{{ko-l|알}}/{{ko-l|아르}}, <!--
-->{{ko-l|에스}}, <!--
-->{{ko-l|티}}, <!--
-->{{ko-l|유}}, <!--
-->{{ko-l|브이}}, <!--
-->{{ko-l|더블유}}, <!--
-->{{ko-l|엑스}}, <!--
-->{{ko-l|와이}}, <!--
-->{{ko-l|제트}}/{{ko-l|지}}<!--
-->}}<!--
--><noinclude>{{list doc}}</noinclude>
5p3uslzx9rlxi20k4cjuxtrl33r6gmx
hamer
0
294749
395136
395129
2026-05-19T12:13:57Z
咽頭べさ
33
395136
wikitext
text/x-wiki
{{also|Hamer}}
==အေက်ဖရိကာန်==
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|af|/ˈɦɑː.mər/|/ˈɦa.mər/}}
* {{audio|af|LL-Q14196 (afr)-Oesjaar-hamer.wav}}
* {{hyphenation|af|ha|mer}}
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|af|nl|hamer}}၊ နကဵုအဆက်နူ {{inh|af|dum|hāmer}}၊ နကဵုမဆေၚ်စပ်ကဵုနူ {{inh|af|odt|*hamar}}၊ နူအဆက်နကဵု {{inh|af|gem-pro|*hamaraz}}
===နာမ်===
{{af-noun|s|hamertjie}}
# တ္ၚီ၊ ဝပ်တဲမရပ်စပ်သွက်ပွမၜုၚ်တက်ထတ်ထတ်။
# တ္ၚီအပ္ဍဲဗ္ဂပ်စုတ်တၞးကေက်ကေက်ဆေၚ်စပ်ကဵုစက်ယာန်နကဵုပရေၚ်သ္ဒးဒုၚ်ပန်သၞာတ်။
# တ္ၚီမွဲတၞုၚ်၊ တဝ်မှၚ်မသ္ကာတ်ဇြိုၚ်လ္တူဇမၠိၚ်မွဲသာ်နကဵုဗ္ဂပ်ပၠိုဟ်စုတ်ဇုက်လောတ်၊ မရပ်စပ်သွက်ပွမလဗိုတ်။
# တ္ၚီမွဲသာ်၊ ဒကုတ်ဒကာတ်မဆေၚ်စပ်ကဵုထၞမံၚ်၊ ဗာတ်ကေဲ ကဵု ဇုက်မတွိၚ်တက်ညံၚ်ရဴတူရိယာဂမၠိုၚ်။
# လေံဍံက်၊ မာလဳယာပ်သ်၊ ဇြေဟ်ဇုတ်မဇၞော်အိုတ်အပ္ဍဲသၠက္တောဝ်လဒေါဝ်။
#: {{syn|af|malleus|hamerbeentjie}}
#: {{hyper|af|gehoorbeentjie}}
===နိရုတ် ၂ ===
ဝေါဟာကၠုၚ်နူ {{inh|af|nl|hameren}}
===ကြိယာ===
{{af-verb}}
# သကဵုဂဒု။
==ဒါတ်==
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|nl|/ˈɦaː.mər/}}
* {{audio|nl|Nl-hamer.ogg}}
* {{hyphenation|nl|ha|mer}}
* {{rhymes|nl|aːmər|s=2}}
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|nl|dum|hāmer}}၊ နကဵုအဆက်နူ {{inh|nl|odt|*hamar}}၊ နကဵုမဆေၚ်စပ်ကဵုနူ {{inh|nl|gmw-pro|*hamar}}၊ နူအဆက်နကဵု {{inh|nl|gem-pro|*hamaraz}}
===နာမ်===
{{nl-noun|m|-s|+}}
# တ္ၚီ။
===မဒုၚ်လွဳစ===
* {{desc|af|hamer}}
* {{desc|brc|hambru}}
* {{desc|dcr|hammer|hambu}}
* {{desc|hns|hameri|bor=1}}
* {{desc|bor=1|jvn|amer|hamer}}
* {{desc|bor=1|jv|ꦲꦩꦼꦂ}}
* {{desc|umu|hámul|bor=1}}
* {{desc|srn|amra|bor=1|unc=1}}
** {{desc|djk|ambaa|bor=1}}
** {{desc|bor=1|srm|hám|áma}}
===ကြိယာ===
{{head|nl|verb form}}
# {{infl of|nl|hameren||bare-verb}}
==ဒါတ်အဒေါဝ်==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|dum|odt|*hamar}}၊ နကဵုအဆက်နူ {{inh|dum|gem-pro|*hamaraz}}
===နာမ်===
{{dum-noun|head=hāmer|m}}
# တ္ၚီ။
===မဒုၚ်လွဳစ===
* {{desc|nl|hamer}}
* {{desc|li|hamer|hammer}}
==အၚ်္ဂလိက် အဒေါဝ်==
===ပွံၚ်နဲတၞဟ်===
* {{alter|enm|hamber|hamere|hammer|hammyr|hamyr}}
* {{alter|enm|hambir|hambyr|hamowre|hamur||EA}}; {{alter|enm|homer|homur||West Midlands}}
* {{alter|enm|hemmyre||Sc}}
===နိရုတ်===
{{inh+|enm|ang|hamor}}၊ နကဵုအဆက်နူ {{inh|enm|gmw-pro|*hamar}}၊ နကဵုမဆေၚ်စပ်ကဵုနူ {{inh|enm|gem-pro|*hamaraz}}၊ နူအဆက်နကဵု {{inh|enm|ine-pro|*h₂eḱmoros}}
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|enm|/ˈhamər/|/ˈhaːmər/}}
* {{IPA|enm|/ˈhambər/|aa=EA}}
* {{IPA|enm|/ˈhɔmər/|aa=WM}}
===နာမ်===
{{enm-noun|hamers}}
# တ္ၚီ။
# မသ္ကာတ် ဝါ ပူဂဵုမဖျေံလဝ်ကဵုဒုဟ်ဒန်။
# ပရေၚ်မခ္ဍံက်လဝ်တရၚ်၊ ကပေါတ်ကရိယာသွက်ပွမခ္ဍံက်တရၚ်ဂမၠိုၚ်။
# ပသဲကရိယာသွက်ခၞိက်ပွမက္တဴ။
===မဒုၚ်လွဳစ===
* {{desc|en|hammer}}
** {{desc|hif|haamaa}}
* {{desc|gmw-msc|hamer|hammer|hemmer}}
** {{desc|sco|hammer|hemmer|haimer|haumer}}
==သာဗ်ခြဝ်ဨရှဳယာန်==
===နိရုတ်===
{{bor+|sh|de|Hammer}}
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|sh|/xâmer/}}
* {{hyphenation|sh|ha|mer}}
===နာမ်===
{{sh-noun|hȁmer|m-in}}
# တ္ၚီ။
9ya4lx8bozaofoyy3mkw83egqhs6y6m
хамер
0
294751
395137
2026-05-19T12:16:03Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "==သာဗ်ခြဝ်ဨရှဳယာန်== ===နိရုတ်=== {{bor+|sh|de|Hammer}} ===ဗွဟ်ရမ္သာၚ်=== * {{IPA|sh|/xâmer/}} * {{hyphenation|sh|ха|мер}} ===နာမ်=== {{sh-noun|ха̏мер|m}} # တ္ၚီ။"
395137
wikitext
text/x-wiki
==သာဗ်ခြဝ်ဨရှဳယာန်==
===နိရုတ်===
{{bor+|sh|de|Hammer}}
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|sh|/xâmer/}}
* {{hyphenation|sh|ха|мер}}
===နာမ်===
{{sh-noun|ха̏мер|m}}
# တ္ၚီ။
ba7jseja5xhyrhkfjb35j4w96z84nig
Hammer
0
294752
395138
2026-05-19T13:42:53Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{also|hammer|hämmer|Hämmer}} =={{=en=}}== ===နာမ်=== {{en-noun}} # ညးမဆက်စပ်ဒၟံၚ်ကဵုဂကောံဗဝ်ဝှက်သ်ဟာမ်ဂမၠိုၚ်။ ===နာမ်မကိတ်ညဳ=== {{en-proper noun}} # {{surname|en}} ==ဒိန်နေတ်== ===နာမ်မကိတ်ညဳ=== {{head|da|proper noun}} # {{surname|da|from=English}} ==ဂ..."
395138
wikitext
text/x-wiki
{{also|hammer|hämmer|Hämmer}}
=={{=en=}}==
===နာမ်===
{{en-noun}}
# ညးမဆက်စပ်ဒၟံၚ်ကဵုဂကောံဗဝ်ဝှက်သ်ဟာမ်ဂမၠိုၚ်။
===နာမ်မကိတ်ညဳ===
{{en-proper noun}}
# {{surname|en}}
==ဒိန်နေတ်==
===နာမ်မကိတ်ညဳ===
{{head|da|proper noun}}
# {{surname|da|from=English}}
==ဂျမာန်==
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|de|/ˈhamɐ/}}
* {{rhymes|de|amɐ|s=2}}
* {{audio|de|De-Hammer.ogg|a=<<Germany>> (<<Berlin>>)}}
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|de|gmh|hamer}}၊ နကဵုအဆက်နူ {{inh|de|goh|hamar}}၊ နကဵုမဆေၚ်စပ်ကဵုနူ {{inh|de|gmw-pro|*hamar}}၊ နူအဆက်နကဵု {{inh|de|gem-pro|*hamaraz}}၊ မဆက်ဆေန်နူ {{inh|de|ine-pro|*h₂eḱmoros}}
===နာမ်===
{{de-noun|m,,^:-|dim=^chen,^lein}}
# တ္ၚီ၊ ခတေက်။
#: {{syn|de|Mottek}}
# ပရေၚ်သ္ဂောံဒုၚ်စသိုၚ်အရီု၊ ညးမတၟေၚ်တၟဟ်မွဲမွဲဇကု။
# ပရေၚ်မပန်သၞာတ်အမၠံက်ထတ်ထတ်၊ ခ္ဍံက်မွဲဇြဟတ်။
=====မလဟုတ်စှ်ေ=====
{{de-ndecl|m,,^:-}}
===နာမ် ၂ ===
{{de-noun|m|f=in}}
# ဗီုပြၚ်အရာမွဲမွဲဆေၚ်စပ်ကဵုဍုၚ်နကဵုဝေါဟာ [[Hamm]]
====နာမဝိသေသန====
{{de-adj|indecl.pred:-}}
# အဆက်အစပ်နူဝေါဟာ [[Hamm]]
==ဂျာမာန် ပေန်သဲဗေနဳယျာ==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{inh|pdc|gmh|hamer}}၊ နကဵုအဆက်နူ {{inh|pdc|goh|hamar}}
===နာမ်===
{{head|pdc|noun|g=m|ကိုန်ဗဟုဝစ်|Hammer|ကိုန်ဗဟုဝစ်ဒုတိယ|Hemmer}}
# တ္ၚီ။
soy8n7lq8tdcgslbpeg3y4ohvjw1ucy
ထာမ်ပလိက်:de-ndecl
10
294753
395139
2026-05-19T13:44:08Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:de-noun|show}}<!-- --><noinclude>{{documentation}}</noinclude>"
395139
wikitext
text/x-wiki
{{#invoke:de-noun|show}}<!--
--><noinclude>{{documentation}}</noinclude>
r6aasf6ar29gmshejgtvg1yef8w8nh9
ထာမ်ပလိက်:de-ndecl/documentation
10
294754
395141
2026-05-19T14:26:30Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} {{uses lua|Module:de-noun}} ==Introduction== This template should be used to decline all German nouns and proper noun, in preference to any other, older template that may still exist. (Eventually these will all be eliminated.) ===Masculine and neuter nouns=== Generally, a single argument is supplied to the template, which specifies the gender and optionally the genitive singular and/or plura..."
395141
wikitext
text/x-wiki
{{documentation subpage}}
{{uses lua|Module:de-noun}}
==Introduction==
This template should be used to decline all German nouns and proper noun, in preference to any other, older template that may still exist. (Eventually these will all be eliminated.)
===Masculine and neuter nouns===
Generally, a single argument is supplied to the template, which specifies the gender and optionally the genitive singular and/or plural. For example, for the noun {{m|de|Alarm}}, use:
{{temp|de-ndecl|m,s,e}}
which produces
{{de-ndecl|m,s,e|pagename=Alarm}}
Here, <code>m</code> specifies the gender, <code>s</code> the genitive singular ending, and <code>e</code> the plural ending. Note that the module is smart enough to add ''-n'' onto the dative plural automatically.
Sensible defaults are provided for the genitive singular and plural. In this case, the default genitive singular for most nouns, including {{m|de|Alarm}}, adds an ''-s'', and the default plural adds an ''-e'', so both the genitive singular and plural could be omitted:
{{temp|de-ndecl|m}}
which has the same result as above.
If there is more than one possibility for a given ending, separate the possibilities with a colon, as with {{m|de|Fisch||fish}}:
{{temp|de-ndecl|m,es:s,e}}
which produces
{{de-ndecl|m,es:s,e|pagename=Fisch}}
Because genitive singulars in either ''-es'' or ''-s'' are so common, a shortcut <code>(e)s</code> is provided. Along with the default plural, the above could be equivalently written:
{{temp|de-ndecl|m,(e)s}}
Other such shortcuts are <code>(s)</code> (either ''-s'' or no ending) and <code>(es)</code> (either ''-es'' or no ending).
Use <code>-</code> to specify a null ending, e.g. for {{m|de|Mädchen||girl}}, plural ''Mädchen'':
{{temp|de-ndecl|n,s,-}}
which produces
{{de-ndecl|n,s,-|pagename=Mädchen}}
===Feminine nouns===
Feminine nouns normally have a null ending in the genitive singular. As a result, the syntax for feminine nouns omits the genitive singular and specifies only the plural, as for {{m|de|Ordnung||arrangement, order, rank}}, plural ''Ordnungen'':
{{temp|de-ndecl|f,en}}
which produces
{{de-ndecl|f,en|pagename=Ordnung}}
Feminine nouns default to the plural ''-en'' (or just ''-n'' after ''e''), so this could equivalently be written:
{{temp|de-ndecl|f}}
===Umlaut===
To include umlaut, precede the ending with <code>^</code>. For example, for {{m|de|Haus||house}}, with plural ''Häuser'', use the following:
{{temp|de-ndecl|n,es,^er}}
which produces
{{de-ndecl|n,es,^er|pagename=Haus}}
The module knows the correct rules for umlaut, e.g. {{m|de|Apfel||apple}} umlauts in the plural as ''Äpfel'':
{{temp|de-ndecl|m,s,^}}
which produces
{{de-ndecl|m,s,^|pagename=Apfel}}
Since ''-s'' is the default genitive ending in most cases, this could equivalently be written:
{{temp|de-ndecl|m,,^}}
===Weak nouns===
Additional modifiers beyond the gender, genitive singular and plural are known as ''indicators''. These are specified following a period (<code>.</code>). One common indicator is <code>weak</code>, indicating a weak noun, e.g. {{m|de|Präsident||president}}:
{{temp|de-ndecl|m.weak}}
an
which produces
{{de-ndecl|m.weak|pagename=Präsident}}
The code will automatically add ''-n'' in place of ''-en'' if the noun ends in ''-e''. However, if the noun has ''-n'' in place of ''-en'' after a consonant, as with {{m|de|Herr}}, {{m|de|Nachbar}}, {{m|de|Satyr}} and their compounds, use <code>weak_n</code>. For example, one of the declensions of {{m|de|Nachbar}} could be indicated as follows:
{{temp|de-ndecl|m.weak_n}}
which produces
{{de-ndecl|m.weak_n|pagename=Nachbar}}
If the noun has ''-n'' in the singular but ''-en'' in the plural, as is typical for {{m|de|Herr}} and its compounds, specify the plural explicitly, as follows:
{{temp|de-ndecl|m,,en.weak_n}}
which produces
{{de-ndecl|m,,en.weak_n|pagename=Herr}}
For nouns with multiple possible declensions, e.g. weak or strong, use the [[#Alternants|alternant]] notation, as described below. For example, {{m|de|Diakon||deacon}}, which can be declined either strong or weak, might be indicated as follows:
{{temp|de-ndecl|((<m>,<m.weak>))}}
which produces
{{de-ndecl|((<m>,<m.weak>))|pagename=Diakon}}
The general syntax here is to surround each declension with angle brackets, separate them with commas and put double parens around the whole thing. More than two alternants are possible; e.g. for {{m|de|Satyr}}, which can be either strong, weak with genitive in ''-n'' or weak with genitive in ''-en'', use the following:
{{temp|de-ndecl|((<m,s,n>,<m.weak_n>,<m,,n.weak>))}}
which produces
{{de-ndecl|((<m,s,n>,<m.weak_n>,<m,,n.weak>))|pagename=Satyr}}
In this case, the plural is always in ''-n'', so the alternants that would by default produce other plurals must have the plural given explicitly.
===Additional indicators===
Besides <code>weak</code> and <code>weak_n</code>, other supported indicators are as follows:
* <code>sg</code>: singular-only
* <code>article</code>: indicate that the term is normally used with the definite article
* <code>nodatpln</code>: don't add ''-n'' onto the dative plural when it normally would be added
* <code>ss</code>: change ''-ß'' to ''-ss'' before an ending beginning with a vowel (as is common in pre-1996 spellings)
* <code>dat_with_e</code>: include dative variant in ''-e'' with footnote even if no genitive with ''-e-'' exists; see below
Examples:
1. <code>sg</code> for singular-only, e.g. {{m|de|Butter||butter}}:
{{temp|de-ndecl|f.sg}}
which produces
{{de-ndecl|f.sg|pagename=Butter}}
2. <code>article</code> for nouns normally used with the definite article, e.g. {{m|de|Sternenmeer||the starry night sky {{q|poetic}}}}:
{{temp|de-ndecl|n,s:es.sg.article}}
which produces
{{de-ndecl|n,s:es.sg.article|pagename=Sternenmeer}}
This indicator is especially useful in the headword template {{temp|de-noun}}, e.g.:
{{temp|de-noun|n,s:es.sg.article}}
which produces
{{de-noun|n,s:es.sg.article|pagename=Sternenmeer}}
3. <code>nodatpln</code> to suppress the final ''-n'' in the dative plural after a final ''-e'', ''-er'' or ''-el'', e.g. {{m|de|Retina||retina}}:
{{temp|de-ndecl|f,e.nodatpln}}
which produces
{{de-ndecl|f,e.nodatpln|pagename=Retina}}
4. <code>ss</code> to indicate that a final ''-ß'' changes to ''-ss'' before an ending beginning with a vowel (for pre-1996 spellings), e.g. {{m|de|Schluß||end, conclusion}} (superseded spelling):
{{temp|de-ndecl|m,es,^e.ss}}
which produces
{{de-ndecl|m,es,^e.ss|pagename=Schluß}}
5. <code>dat_with_e</code> to explicitly include the dative variant in ''-e'' with footnote even if no genitive with ''-e-'' exists, e.g. for {{m|de|Admiral||admiral}}:
{{temp|de-ndecl|m,,e:^e.dat_with_e}}
which produces
{{de-ndecl|m,,e:^e.dat_with_e|pagename=Admiral}}
Note that by default the variant in ''-e'' with associated footnote appears whenever a genitive form includes an ''-e-'' in it (genitive is specified as <code>es</code>, <code>(e)s</code> or <code>ses</code>). To suppress its appearance, as in recently-coined words, use the override <code>dat:-</code> (see [[#Overrides|Overrides]] below).
===Irregular genitives and plurals===
If the genitive or plural are sufficiently irregular, they cannot be specified using an ending and/or the use of <code>^</code> to indicate umlaut. In those cases, specify the full form, e.g. for {{m|de|Vakuum||vacuum}} with plural ''Vakua'' or ''Vakuen'':
{{temp|de-ndecl|m,s,Vakua:Vakuen}}
which produces
{{de-ndecl|m,s,Vakua:Vakuen|pagename=Vakuum}}
Note that the default plural of neuter nouns in ''-um'' changes the ''-um'' to ''-en'' (see below), so the following would equivalently work:
{{temp|de-ndecl|m,s,Vakua:+}}
Here, <code>+</code> explicitly requests the default.
A full form is recognized as such by beginning with a capital letter. In the rare case where a full form must be given and begins with a lowercase letter, precede the full form with <code>!</code>. For example, for {{m|de|deus ex machina}} (alternative letter-case form of {{m|de|Deus ex Machina}}), use:
{{temp|de-ndecl|m,-,!dei ex machina}}
which produces
{{de-ndecl|m,-,!dei ex machina|pagename=deus ex machina}}
===Angle-bracket notation===
An alternative notation is available, which specifies the lemma explicitly along with the declension. E.g. for {{m|de|Haus}}, the following could be used:
{{temp|de-ndecl|Haus<n,es,^er>}}
Since the lemma here is the same as lemma attached to the angle brackets, it can be omitted:
{{temp|de-ndecl|<n,es,^er>}}
Both notations are equivalent to the following:
{{temp|de-ndecl|n,es,^er}}
For single-word terms, angle brackets aren't usually needed, but they become necessary with [[#Multiword expressions|multiword expressions]] and [[#Alternants|alternants]]; see below.
===Default genitive and plural algorithms===
The following algorithm produces the default genitive singular:
# If the noun is feminine, use a null ending.
# Otherwise, if the noun is weak, use ''-n'' after ''-e'', ''-nen'' after consonant + ''-in'', and otherwise ''-en''.
# Otherwise, if the noun ends in ''-nis'' (neuter like {{m|de|Erlebnis}}, {{m|de|Geheimnis}}, etc. or occasional masculine like {{m|de|Firnis}}), use ''-ses''.
# Otherwise, if the noun ends in a consonant + ''-us'', use a null ending, as in {{m|de|Euphemismus}}, {{m|de|Exitus}}, {{m|de|Exodus}}, etc.
# Otherwise, if the noun ends in ''s/ß/x/z'', use ''-es''.
# Otherwise, use ''-s''.
The following algorithm produces the default plural:
# If the noun ends in ''-nis'' (neuter like {{m|de|Erlebnis}} or {{m|de|Geheimnis}}, feminine like {{m|de|Kenntnis}} or {{m|de|Wildnis}}, or occasional masculine like {{m|de|Firnis}}), use ''-se''.
# Otherwise, if the noun is feminine or weak, use ''-n'' after ''-e'', ''-nen'' after consonant + ''-in'', and otherwise ''-en''.
# Otherwise, if the noun ends in ''-e'', use ''-n''.
# Otherwise, if the noun is neuter and ends in ''-lein'', use a null ending.
# Otherwise, if the noun is neuter and ends in ''-um'' (e.g. {{m|de|Museum}} or {{m|de|Vakuum}}), replace the ''-um'' with ''-en''.
# Otherwise, if the noun ends in ''-mus'' (e.g. {{m|de|Algorithmus}} or {{m|de|Aphorismus}}), replace the ''-mus'' with ''-men''.
# Otherwise, if the noun ends in a consonant + ''-us'' (e.g. {{m|de|Abakus}}, {{m|de|Zirkus}}, {{m|de|Autobus}}), use ''-se''.
# Otherwise, if the noun ends in ''-el'', ''-em'', ''-en'' or ''-er'' (e.g. {{m|de|Adler}}, {{m|de|Meier}}, {{m|de|Riedel}}), use a null ending. (But this does not apply to nouns in a consonant + ''-eer/-ier'', ''-eel/-iel'', etc., as in {{m|de|Heer}}, {{m|de|Bier}}, {{m|de|Ziel}}, which default to ''-e'' as below.)
# Otherwise, use ''-e''.
===Nouns with multiple genders===
If a noun has more than one possible gender, separate the genders with a colon, e.g. {{m|de|Abszess||abcess}}:
{{temp|de-ndecl|m:n}}
which produces
{{de-ndecl|m:n|pagename=Abszess}}
You can also place a footnote/qualifier in brackets after a given gender. For example, {{m|de|Abszess}} is neuter mainly in Austria; to note this, use the following:
{{temp|de-ndecl|m:n[mainly in Austria]}}
Gender qualifiers do not appear in the declension table, but they do appear in the headword, which is specified like this:
{{temp|de-noun|m:n[mainly in Austria]}}
which produces
{{de-noun|m:n[mainly in Austria]|pagename=Abszess}}
===Plural-only nouns===
To specify a plural-only noun, use <code>p</code> in place of the gender, e.g. for {{m|de|Achtzigerjahre|the eighties {{q|decade}}}}:
{{temp|de-ndecl|p}}
which produces
{{de-ndecl|p|pagename=Achtzigerjahre}}
With plural-only nouns, you cannot specify a genitive or plural form, and if you do so, an error will result.
===Footnotes===
Footnotes can be indicated by placing text in brackets after a given form. For example, {{m|de|Geschlecht||gender, type}} has a modern plural ''Geschlechter'' as well as an obsolete plural ''Geschlechte''. To indicate this, use the following:
{{temp|de-ndecl|n,s:es,er:e[obsolete]}}
which produces
{{de-ndecl|n,s:es,er:e[obsolete]|pagename=Geschlecht}}
Such footnotes show up as qualifiers in the headword:
{{de-noun|n,s:es,er:e[obsolete]|pagename=Geschlecht}}
You can also footnote an individual gender, an override, or an entire declension. For example, {{m|de|Hanf}} is normally masculine but rarely neuter. Indicate as follows:
<code><nowiki>{{de-ndecl|m:n[rare],(e)s.sg}}</nowiki></code>
which produces
{{de-ndecl|m:n[rare],(e)s.sg|pagename=Hanf}}
The footnote shows up as a qualifier in the headword:
{{de-noun|m:n[rare],(e)s.sg|pagename=Hanf}}
To footnote an entire declension, place the footnote in brackets directly after a period. For examples, see [[#Alternants|Alternants]] below.
===Overrides===
You can override a particular case/number combination using the form <code>SLOT:VALUE:VALUE:...</code>, where <code>SLOT</code> specifies the particular slot to override, e.g. <code>acc</code> for accusative singular, <code>datpl</code> for dative plural. For example, for the noun {{m|de|Häusle||house {{q|diminutive}}}}, whose plural is ''Häusle'' and whose dative plural can be either ''Häuslen'' or ''Häusle'', use the following:
{{temp|de-ndecl|n,-:s,-.datpl:n:-}}
which produces
{{de-ndecl|n,-:s,-.datpl:n:-|pagename=Häusle}}
Another use of overrides is specifying a distinct genitive for feminine nouns. An example is {{m|de|Quinquagesima||[[Quinquagesima]]}}, which has an alternative Latinate genitive singular ''Quinquagesimä'', used mostly when no article is present. Specify as follows:
{{temp|de-ndecl|f.gen:-:Quinquagesimä[especially without an article].sg}}
which produces
{{de-ndecl|f.gen:-:Quinquagesimä[especially without an article].sg|pagename=Quinquagesima}}
Here, we specify a footnote in brackets, as described in the [[#Footnotes|Footnotes]] section above. For another example of using a footnote with an override, see the declension of {{m|de|Spätzle}} [[#Defective forms|below]].
Recognized slot names are as follows:
{|class="wikitable"
! case !! singular slot !! plural slot
|-
| nominative || <code>nom</code> || <code>nompl</code>
|-
| genitive || <code>gen</code> || <code>genpl</code>
|-
| accusative || <code>acc</code> || <code>accpl</code>
|-
| dative || <code>dat</code> || <code>datpl</code>
|-
| ablative || <code>abl</code> || —
|-
| vocative || <code>voc</code> || —
|}
The ablative and vocative cases are used only in certain terms with special Latinate declensions, e.g. {{m|de|Jesus Christus}}.
===Defective forms===
If a given form is missing entirely, use <code>--</code> to indicate this. An example is {{m|de|Spätzle||spätzle}}, which is missing the genitive singular. Indicate as follows:
{{temp|de-ndecl|n,--,-.datpl:-:n[uncommon]}}
which produces
{{de-ndecl|n,--,-.datpl:-:n[uncommon]|pagename=Spätzle}}
When used in the headword, it displays as follows:
{{de-noun|n,--,-.datpl:-:n[uncommon]|pagename=Spätzle}}
However, for nouns without a plural form, <code>.sg</code> should be used instead of <code>--</code>.
===Embedded commas and periods===
If you need to include a form that contains an embedded comma or period, precede the comma or period with a backslash. For example, for {{m|de|Vf.}} (abbreviation of {{m|de|Verfasser||author}}), with plural either ''Vf.'' or ''Vff.'', use the following:
{{temp|de-ndecl|m,-,-:Vff\.}}
which produces
{{de-ndecl|m,-,-:Vff\.|pagename=Vf.}}
===Multiword expressions===
To decline a multiword expression, include the text of the expression in {{para|1}} and put the spec for each word after the word, surrounded by angle brackets. For example, for {{m|de|Gesellschaft mit beschränkter Haftung||limited liability company}}:
<code><nowiki>{{de-ndecl|[[Gesellschaft]]<f> [[mit]] [[beschränkt|beschränkter]] [[Haftung]]}}</nowiki></code>
which produces
{{de-ndecl|[[Gesellschaft]]<f> [[mit]] [[beschränkt|beschränkter]] [[Haftung]]}}
Another example, for {{m|de|Jäger und Sammler||hunter-gatherer}}:
<code><nowiki>{{de-ndecl|[[Jäger]]<m> [[und]] [[Sammler]]<m>}}</nowiki></code>
which produces
{{de-ndecl|[[Jäger]]<m> [[und]] [[Sammler]]<m>}}
It is not necessary to surround each word with brackets, but when used in {{temp|de-noun}} it ensures that the words are individually linked in the headword.
Arbitrary specs can be included inside of angle brackets, e.g. for {{m|de|Mittelwort der Gegenwart||present participle}}:
<code><nowiki>{{de-ndecl|[[Mittelwort]]<n,(e)s,^er> [[der]] [[Gegenwart]]}}</nowiki></code>
which produces
{{de-ndecl|[[Mittelwort]]<n,(e)s,^er> [[der]] [[Gegenwart]]}}
For singular-only nouns, include the indicator <code>.sg</code> inside the angle brackets, as for {{m|de|Kreuz des Südens||[[Southern Cross]]}}:
<code><nowiki>{{de-ndecl|[[Kreuz]]<n.sg> [[des]] [[Süden]]s}}</nowiki></code>
which produces
{{de-ndecl|[[Kreuz]]<n.sg> [[des]] [[Süden]]s}}
Any number of declined components can be included, and will be handled correctly. An example with three is {{m|de|Heiliges Römisches Reich||[[Holy Roman Empire]]}}:
<code><nowiki>{{de-ndecl|[[heilig|Heiliges]]<+> [[römisch|Römisches]]<+> [[Reich]]<n,s:es.sg>}}</nowiki></code>
which produces
{{de-ndecl|[[heilig|Heiliges]]<+> [[römisch|Römisches]]<+> [[Reich]]<n,s:es.sg>}}
===Adjectival nouns and adjective-noun combinations===
Some nouns are declined like adjectives. An example is {{m|de|Erwachsener||adult}} (definite nominative ''der Erwachsene'', indefinite nominative ''ein Erwachsener'', definitive genitive ''des Erwachsenen'', bare plural ''Erwachsene'', definite plural ''die Erwachsenen'', etc.). To indicate this, put a <code>+</code> in place of the declension:
{{temp|de-ndecl|+}}
which produces
{{de-ndecl|+|pagename=Erwachsener}}
The headword appears as follows:
{{de-noun|+|pagename=Erwachsener}}
Note how the declension table includes strong (i.e. bare), weak (i.e. definite) and mixed (i.e. indefinite) forms, and the headword includes both bare and definite versions of the nominative singular, genitive singular and nominative plural. For masculine and neuter nouns like this, the bare and definite genitive have the same ending and so the headword inflection combines the two, but for feminine nouns, they will be split. An example is {{m|de|Erwachsene||female adult}}:
{{temp|de-ndecl|+}}
which produces
{{de-ndecl|+|pagename=Erwachsene}}
The headword appears as follows:
{{de-noun|+|pagename=Erwachsene}}
You can likewise decline adjective-noun combinations using <code>+</code>, e.g. {{m|de|schwarzes Loch||black hole}}:
<code><nowiki>{{temp|de-ndecl|[[schwarz]]es<+> [[Loch]]<n,(e)s,^er>}}</nowiki></code>
which produces
{{de-ndecl|[[schwarz]]es<+> [[Loch]]<n,(e)s,^er>}}
The headword appears as follows:
{{de-noun|[[schwarz]]es<+> [[Loch]]<n,(e)s,^er>}}
Here, the alternative notation using angle brackets must be used to indicate the declensions of the individual words. Links must be included in order for there to be links to individual words in the headword.
Note that the gender does not need to be specified in conjunction with <code>+</code>, because it can automatically be inferred from the ending. When used in conjunction with a noun, the plural status of the adjective does not need to be given either, as with {{m|de|Kanarische Inseln}}:
<code><nowiki>{{temp|de-ndecl|[[kanarisch|Kanarische]]<+> [[Insel]]n<p>}}</nowiki></code>
which produces
{{de-ndecl|[[kanarisch|Kanarische]]<+> [[Insel]]n<p>}}
The headword appears as follows:
{{de-noun|[[kanarisch|Kanarische]]<+> [[Insel]]n<p>}}
However, this won't work for a plural-only adjectival noun, because the ''-e'' ending will be inferred as feminine singular. If for some reason you need to specify such a noun, use <code>p+</code> or <code>+p</code>, e.g. for {{m|de|Miese}}:
{{temp|de-ndecl|p+}}
which produces
{{de-ndecl|p+|pagename=Miese}}
The headword appears as follows:
{{de-noun|p+|pagename=Miese}}
===Alternants===
Sometimes a given expression has multiple possible declensions, in a way that can't easily be specified using the available possibilities for specifying multiple genders or alternative genitive or plural endings. For example, some nouns can be declined either strong or weak, and in some multiword expressions, one word may or may not be declined. An example is {{m|de|Hirsch}}, normally strong but sometimes weak (especially in Southern Germany and Austria). To indicate this, use a syntax like this: <code>((ALTERNANT1,ALTERNANT2,...))</code> where each alternant must have angle brackets in it:
{{temp|de-ndecl|((<m,(e)s>,<m.weak.[also in Southern Germany and Austria]>))}}
which produces
{{de-ndecl|((<m,(e)s>,<m.weak.[also in Southern Germany and Austria]>))|pagename=Hirsch}}
Here the first alternant is strong while the second is weak and includes an entire-declension footnote (see [[#Footnotes|Footnotes]] above). The headword appears as follows:
{{de-noun|((<m,(e)s>,<m.weak.[also in Southern Germany and Austria]>))|pagename=Hirsch}}
Another example is {{m|de|lüttje Lage||a beer with a shot of [[Korn]]}}, where ''lüttje'' (a Low German word) may or may not be inflected as a normal adjective:
<code><nowiki>{{de-ndecl|((lüttje<+> [[Lage]]<f>,lüttje [[Lage]]<f>))}}</nowiki></code>
which produces
{{de-ndecl|((lüttje<+> [[Lage]]<f>,lüttje [[Lage]]<f>))}}
The headword appears as follows:
{{de-noun|((lüttje<+> [[Lage]]<f>,lüttje [[Lage]]<f>))}}
Note that both the declension and headword use the adjectival form even though only one of the two alternants has an adjective in it. This is also an example where we purposely avoid linking one of the components, since ''lüttje'' by itself is not a German word.
<includeonly>
{{tcat}}
</includeonly>
kaqt66y5ul4u66s1f5ssdjb2whh7tdy
395143
395141
2026-05-19T14:34:17Z
咽頭べさ
33
395143
wikitext
text/x-wiki
{{documentation subpage}}
{{uses lua|Module:de-noun}}
==Introduction==
This template should be used to decline all German nouns and proper noun, in preference to any other, older template that may still exist. (Eventually these will all be eliminated.)
===Masculine and neuter nouns===
Generally, a single argument is supplied to the template, which specifies the gender and optionally the genitive singular and/or plural. For example, for the noun {{m|de|Alarm}}, use:
{{temp|de-ndecl|m,s,e}}
which produces
{{de-ndecl|m,s,e|pagename=Alarm}}
Here, <code>m</code> specifies the gender, <code>s</code> the genitive singular ending, and <code>e</code> the plural ending. Note that the module is smart enough to add ''-n'' onto the dative plural automatically.
Sensible defaults are provided for the genitive singular and plural. In this case, the default genitive singular for most nouns, including {{m|de|Alarm}}, adds an ''-s'', and the default plural adds an ''-e'', so both the genitive singular and plural could be omitted:
{{temp|de-ndecl|m}}
which has the same result as above.
If there is more than one possibility for a given ending, separate the possibilities with a colon, as with {{m|de|Fisch||fish}}:
{{temp|de-ndecl|m,es:s,e}}
which produces
{{de-ndecl|m,es:s,e|pagename=Fisch}}
Because genitive singulars in either ''-es'' or ''-s'' are so common, a shortcut <code>(e)s</code> is provided. Along with the default plural, the above could be equivalently written:
{{temp|de-ndecl|m,(e)s}}
Other such shortcuts are <code>(s)</code> (either ''-s'' or no ending) and <code>(es)</code> (either ''-es'' or no ending).
Use <code>-</code> to specify a null ending, e.g. for {{m|de|Mädchen||girl}}, plural ''Mädchen'':
{{temp|de-ndecl|n,s,-}}
which produces
{{de-ndecl|n,s,-|pagename=Mädchen}}
===Feminine nouns===
Feminine nouns normally have a null ending in the genitive singular. As a result, the syntax for feminine nouns omits the genitive singular and specifies only the plural, as for {{m|de|Ordnung||arrangement, order, rank}}, plural ''Ordnungen'':
{{temp|de-ndecl|f,en}}
which produces
{{de-ndecl|f,en|pagename=Ordnung}}
Feminine nouns default to the plural ''-en'' (or just ''-n'' after ''e''), so this could equivalently be written:
{{temp|de-ndecl|f}}
===Umlaut===
To include umlaut, precede the ending with <code>^</code>. For example, for {{m|de|Haus||house}}, with plural ''Häuser'', use the following:
{{temp|de-ndecl|n,es,^er}}
which produces
{{de-ndecl|n,es,^er|pagename=Haus}}
The module knows the correct rules for umlaut, e.g. {{m|de|Apfel||apple}} umlauts in the plural as ''Äpfel'':
{{temp|de-ndecl|m,s,^}}
which produces
{{de-ndecl|m,s,^|pagename=Apfel}}
Since ''-s'' is the default genitive ending in most cases, this could equivalently be written:
{{temp|de-ndecl|m,,^}}
===Weak nouns===
Additional modifiers beyond the gender, genitive singular and plural are known as ''indicators''. These are specified following a period (<code>.</code>). One common indicator is <code>weak</code>, indicating a weak noun, e.g. {{m|de|Präsident||president}}:
{{temp|de-ndecl|m.weak}}
an
which produces
{{de-ndecl|m.weak|pagename=Präsident}}
The code will automatically add ''-n'' in place of ''-en'' if the noun ends in ''-e''. However, if the noun has ''-n'' in place of ''-en'' after a consonant, as with {{m|de|Herr}}, {{m|de|Nachbar}}, {{m|de|Satyr}} and their compounds, use <code>weak_n</code>. For example, one of the declensions of {{m|de|Nachbar}} could be indicated as follows:
{{temp|de-ndecl|m.weak_n}}
which produces
{{de-ndecl|m.weak_n|pagename=Nachbar}}
If the noun has ''-n'' in the singular but ''-en'' in the plural, as is typical for {{m|de|Herr}} and its compounds, specify the plural explicitly, as follows:
{{temp|de-ndecl|m,,en.weak_n}}
which produces
{{de-ndecl|m,,en.weak_n|pagename=Herr}}
For nouns with multiple possible declensions, e.g. weak or strong, use the [[#Alternants|alternant]] notation, as described below. For example, {{m|de|Diakon||deacon}}, which can be declined either strong or weak, might be indicated as follows:
{{temp|de-ndecl|((<m>,<m.weak>))}}
which produces
{{de-ndecl|((<m>,<m.weak>))|pagename=Diakon}}
The general syntax here is to surround each declension with angle brackets, separate them with commas and put double parens around the whole thing. More than two alternants are possible; e.g. for {{m|de|Satyr}}, which can be either strong, weak with genitive in ''-n'' or weak with genitive in ''-en'', use the following:
{{temp|de-ndecl|((<m,s,n>,<m.weak_n>,<m,,n.weak>))}}
which produces
{{de-ndecl|((<m,s,n>,<m.weak_n>,<m,,n.weak>))|pagename=Satyr}}
In this case, the plural is always in ''-n'', so the alternants that would by default produce other plurals must have the plural given explicitly.
===Additional indicators===
Besides <code>weak</code> and <code>weak_n</code>, other supported indicators are as follows:
* <code>sg</code>: singular-only
* <code>article</code>: indicate that the term is normally used with the definite article
* <code>nodatpln</code>: don't add ''-n'' onto the dative plural when it normally would be added
* <code>ss</code>: change ''-ß'' to ''-ss'' before an ending beginning with a vowel (as is common in pre-1996 spellings)
* <code>dat_with_e</code>: include dative variant in ''-e'' with footnote even if no genitive with ''-e-'' exists; see below
Examples:
1. <code>sg</code> for singular-only, e.g. {{m|de|Butter||butter}}:
{{temp|de-ndecl|f.sg}}
which produces
{{de-ndecl|f.sg|pagename=Butter}}
2. <code>article</code> for nouns normally used with the definite article, e.g. {{m|de|Sternenmeer||the starry night sky {{q|poetic}}}}:
{{temp|de-ndecl|n,s:es.sg.article}}
which produces
{{de-ndecl|n,s:es.sg.article|pagename=Sternenmeer}}
This indicator is especially useful in the headword template {{temp|de-noun}}, e.g.:
{{temp|de-noun|n,s:es.sg.article}}
which produces
{{de-noun|n,s:es.sg.article|pagename=Sternenmeer}}
3. <code>nodatpln</code> to suppress the final ''-n'' in the dative plural after a final ''-e'', ''-er'' or ''-el'', e.g. {{m|de|Retina||retina}}:
{{temp|de-ndecl|f,e.nodatpln}}
which produces
{{de-ndecl|f,e.nodatpln|pagename=Retina}}
4. <code>ss</code> to indicate that a final ''-ß'' changes to ''-ss'' before an ending beginning with a vowel (for pre-1996 spellings), e.g. {{m|de|Schluß||end, conclusion}} (superseded spelling):
{{temp|de-ndecl|m,es,^e.ss}}
which produces
{{de-ndecl|m,es,^e.ss|pagename=Schluß}}
5. <code>dat_with_e</code> to explicitly include the dative variant in ''-e'' with footnote even if no genitive with ''-e-'' exists, e.g. for {{m|de|Admiral||admiral}}:
{{temp|de-ndecl|m,,e:^e.dat_with_e}}
which produces
{{de-ndecl|m,,e:^e.dat_with_e|pagename=Admiral}}
Note that by default the variant in ''-e'' with associated footnote appears whenever a genitive form includes an ''-e-'' in it (genitive is specified as <code>es</code>, <code>(e)s</code> or <code>ses</code>). To suppress its appearance, as in recently-coined words, use the override <code>dat:-</code> (see [[#Overrides|Overrides]] below).
===Irregular genitives and plurals===
If the genitive or plural are sufficiently irregular, they cannot be specified using an ending and/or the use of <code>^</code> to indicate umlaut. In those cases, specify the full form, e.g. for {{m|de|Vakuum||vacuum}} with plural ''Vakua'' or ''Vakuen'':
{{temp|de-ndecl|m,s,Vakua:Vakuen}}
which produces
{{de-ndecl|m,s,Vakua:Vakuen|pagename=Vakuum}}
Note that the default plural of neuter nouns in ''-um'' changes the ''-um'' to ''-en'' (see below), so the following would equivalently work:
{{temp|de-ndecl|m,s,Vakua:+}}
Here, <code>+</code> explicitly requests the default.
A full form is recognized as such by beginning with a capital letter. In the rare case where a full form must be given and begins with a lowercase letter, precede the full form with <code>!</code>. For example, for {{m|de|deus ex machina}} (alternative letter-case form of {{m|de|Deus ex Machina}}), use:
{{temp|de-ndecl|m,-,!dei ex machina}}
which produces
{{de-ndecl|m,-,!dei ex machina|pagename=deus ex machina}}
===Angle-bracket notation===
An alternative notation is available, which specifies the lemma explicitly along with the declension. E.g. for {{m|de|Haus}}, the following could be used:
{{temp|de-ndecl|Haus<n,es,^er>}}
Since the lemma here is the same as lemma attached to the angle brackets, it can be omitted:
{{temp|de-ndecl|<n,es,^er>}}
Both notations are equivalent to the following:
{{temp|de-ndecl|n,es,^er}}
For single-word terms, angle brackets aren't usually needed, but they become necessary with [[#Multiword expressions|multiword expressions]] and [[#Alternants|alternants]]; see below.
===Default genitive and plural algorithms===
The following algorithm produces the default genitive singular:
# If the noun is feminine, use a null ending.
# Otherwise, if the noun is weak, use ''-n'' after ''-e'', ''-nen'' after consonant + ''-in'', and otherwise ''-en''.
# Otherwise, if the noun ends in ''-nis'' (neuter like {{m|de|Erlebnis}}, {{m|de|Geheimnis}}, etc. or occasional masculine like {{m|de|Firnis}}), use ''-ses''.
# Otherwise, if the noun ends in a consonant + ''-us'', use a null ending, as in {{m|de|Euphemismus}}, {{m|de|Exitus}}, {{m|de|Exodus}}, etc.
# Otherwise, if the noun ends in ''s/ß/x/z'', use ''-es''.
# Otherwise, use ''-s''.
The following algorithm produces the default plural:
# If the noun ends in ''-nis'' (neuter like {{m|de|Erlebnis}} or {{m|de|Geheimnis}}, feminine like {{m|de|Kenntnis}} or {{m|de|Wildnis}}, or occasional masculine like {{m|de|Firnis}}), use ''-se''.
# Otherwise, if the noun is feminine or weak, use ''-n'' after ''-e'', ''-nen'' after consonant + ''-in'', and otherwise ''-en''.
# Otherwise, if the noun ends in ''-e'', use ''-n''.
# Otherwise, if the noun is neuter and ends in ''-lein'', use a null ending.
# Otherwise, if the noun is neuter and ends in ''-um'' (e.g. {{m|de|Museum}} or {{m|de|Vakuum}}), replace the ''-um'' with ''-en''.
# Otherwise, if the noun ends in ''-mus'' (e.g. {{m|de|Algorithmus}} or {{m|de|Aphorismus}}), replace the ''-mus'' with ''-men''.
# Otherwise, if the noun ends in a consonant + ''-us'' (e.g. {{m|de|Abakus}}, {{m|de|Zirkus}}, {{m|de|Autobus}}), use ''-se''.
# Otherwise, if the noun ends in ''-el'', ''-em'', ''-en'' or ''-er'' (e.g. {{m|de|Adler}}, {{m|de|Meier}}, {{m|de|Riedel}}), use a null ending. (But this does not apply to nouns in a consonant + ''-eer/-ier'', ''-eel/-iel'', etc., as in {{m|de|Heer}}, {{m|de|Bier}}, {{m|de|Ziel}}, which default to ''-e'' as below.)
# Otherwise, use ''-e''.
===Nouns with multiple genders===
If a noun has more than one possible gender, separate the genders with a colon, e.g. {{m|de|Abszess||abcess}}:
{{temp|de-ndecl|m:n}}
which produces
{{de-ndecl|m:n|pagename=Abszess}}
You can also place a footnote/qualifier in brackets after a given gender. For example, {{m|de|Abszess}} is neuter mainly in Austria; to note this, use the following:
{{temp|de-ndecl|m:n[mainly in Austria]}}
Gender qualifiers do not appear in the declension table, but they do appear in the headword, which is specified like this:
{{temp|de-noun|m:n[mainly in Austria]}}
which produces
{{de-noun|m:n[mainly in Austria]|pagename=Abszess}}
===Plural-only nouns===
To specify a plural-only noun, use <code>p</code> in place of the gender, e.g. for {{m|de|Achtzigerjahre|the eighties {{q|decade}}}}:
{{temp|de-ndecl|p}}
which produces
{{de-ndecl|p|pagename=Achtzigerjahre}}
With plural-only nouns, you cannot specify a genitive or plural form, and if you do so, an error will result.
===Footnotes===
Footnotes can be indicated by placing text in brackets after a given form. For example, {{m|de|Geschlecht||gender, type}} has a modern plural ''Geschlechter'' as well as an obsolete plural ''Geschlechte''. To indicate this, use the following:
{{temp|de-ndecl|n,s:es,er:e[obsolete]}}
which produces
{{de-ndecl|n,s:es,er:e[obsolete]|pagename=Geschlecht}}
Such footnotes show up as qualifiers in the headword:
{{de-noun|n,s:es,er:e[obsolete]|pagename=Geschlecht}}
You can also footnote an individual gender, an override, or an entire declension. For example, {{m|de|Hanf}} is normally masculine but rarely neuter. Indicate as follows:
<code><nowiki>{{de-ndecl|m:n[rare],(e)s.sg}}</nowiki></code>
which produces
{{de-ndecl|m:n[rare],(e)s.sg|pagename=Hanf}}
The footnote shows up as a qualifier in the headword:
{{de-noun|m:n[rare],(e)s.sg|pagename=Hanf}}
To footnote an entire declension, place the footnote in brackets directly after a period. For examples, see [[#Alternants|Alternants]] below.
===Overrides===
You can override a particular case/number combination using the form <code>SLOT:VALUE:VALUE:...</code>, where <code>SLOT</code> specifies the particular slot to override, e.g. <code>acc</code> for accusative singular, <code>datpl</code> for dative plural. For example, for the noun {{m|de|Häusle||house {{q|diminutive}}}}, whose plural is ''Häusle'' and whose dative plural can be either ''Häuslen'' or ''Häusle'', use the following:
{{temp|de-ndecl|n,-:s,-.datpl:n:-}}
which produces
{{de-ndecl|n,-:s,-.datpl:n:-|pagename=Häusle}}
Another use of overrides is specifying a distinct genitive for feminine nouns. An example is {{m|de|Quinquagesima||[[Quinquagesima]]}}, which has an alternative Latinate genitive singular ''Quinquagesimä'', used mostly when no article is present. Specify as follows:
{{temp|de-ndecl|f.gen:-:Quinquagesimä[especially without an article].sg}}
which produces
{{de-ndecl|f.gen:-:Quinquagesimä[especially without an article].sg|pagename=Quinquagesima}}
Here, we specify a footnote in brackets, as described in the [[#Footnotes|Footnotes]] section above. For another example of using a footnote with an override, see the declension of {{m|de|Spätzle}} [[#Defective forms|below]].
Recognized slot names are as follows:
{|class="wikitable"
! case !! singular slot !! plural slot
|-
| nominative || <code>nom</code> || <code>nompl</code>
|-
| genitive || <code>gen</code> || <code>genpl</code>
|-
| accusative || <code>acc</code> || <code>accpl</code>
|-
| dative || <code>dat</code> || <code>datpl</code>
|-
| ablative || <code>abl</code> || —
|-
| vocative || <code>voc</code> || —
|}
The ablative and vocative cases are used only in certain terms with special Latinate declensions, e.g. {{m|de|Jesus Christus}}.
===Defective forms===
If a given form is missing entirely, use <code>--</code> to indicate this. An example is {{m|de|Spätzle||spätzle}}, which is missing the genitive singular. Indicate as follows:
{{temp|de-ndecl|n,--,-.datpl:-:n[uncommon]}}
which produces
{{de-ndecl|n,--,-.datpl:-:n[uncommon]|pagename=Spätzle}}
When used in the headword, it displays as follows:
{{de-noun|n,--,-.datpl:-:n[uncommon]|pagename=Spätzle}}
However, for nouns without a plural form, <code>.sg</code> should be used instead of <code>--</code>.
===Embedded commas and periods===
If you need to include a form that contains an embedded comma or period, precede the comma or period with a backslash. For example, for {{m|de|Vf.}} (abbreviation of {{m|de|Verfasser||author}}), with plural either ''Vf.'' or ''Vff.'', use the following:
{{temp|de-ndecl|m,-,-:Vff\.}}
which produces
{{de-ndecl|m,-,-:Vff\.|pagename=Vf.}}
===Multiword expressions===
To decline a multiword expression, include the text of the expression in {{para|1}} and put the spec for each word after the word, surrounded by angle brackets. For example, for {{m|de|Gesellschaft mit beschränkter Haftung||limited liability company}}:
<code><nowiki>{{de-ndecl|[[Gesellschaft]]<f> [[mit]] [[beschränkt|beschränkter]] [[Haftung]]}}</nowiki></code>
which produces
{{de-ndecl|[[Gesellschaft]]<f> [[mit]] [[beschränkt|beschränkter]] [[Haftung]]}}
Another example, for {{m|de|Jäger und Sammler||hunter-gatherer}}:
<code><nowiki>{{de-ndecl|[[Jäger]]<m> [[und]] [[Sammler]]<m>}}</nowiki></code>
which produces
{{de-ndecl|[[Jäger]]<m> [[und]] [[Sammler]]<m>}}
It is not necessary to surround each word with brackets, but when used in {{temp|de-noun}} it ensures that the words are individually linked in the headword.
Arbitrary specs can be included inside of angle brackets, e.g. for {{m|de|Mittelwort der Gegenwart||present participle}}:
<code><nowiki>{{de-ndecl|[[Mittelwort]]<n,(e)s,^er> [[der]] [[Gegenwart]]}}</nowiki></code>
which produces
{{de-ndecl|[[Mittelwort]]<n,(e)s,^er> [[der]] [[Gegenwart]]}}
For singular-only nouns, include the indicator <code>.sg</code> inside the angle brackets, as for {{m|de|Kreuz des Südens||[[Southern Cross]]}}:
<code><nowiki>{{de-ndecl|[[Kreuz]]<n.sg> [[des]] [[Süden]]s}}</nowiki></code>
which produces
{{de-ndecl|[[Kreuz]]<n.sg> [[des]] [[Süden]]s}}
Any number of declined components can be included, and will be handled correctly. An example with three is {{m|de|Heiliges Römisches Reich||[[Holy Roman Empire]]}}:
<code><nowiki>{{de-ndecl|[[heilig|Heiliges]]<+> [[römisch|Römisches]]<+> [[Reich]]<n,s:es.sg>}}</nowiki></code>
which produces
{{de-ndecl|[[heilig|Heiliges]]<+> [[römisch|Römisches]]<+> [[Reich]]<n,s:es.sg>}}
===Adjectival nouns and adjective-noun combinations===
Some nouns are declined like adjectives. An example is {{m|de|Erwachsener||adult}} (definite nominative ''der Erwachsene'', indefinite nominative ''ein Erwachsener'', definitive genitive ''des Erwachsenen'', bare plural ''Erwachsene'', definite plural ''die Erwachsenen'', etc.). To indicate this, put a <code>+</code> in place of the declension:
{{temp|de-ndecl|+}}
which produces
{{de-ndecl|+|pagename=Erwachsener}}
The headword appears as follows:
{{de-noun|+|pagename=Erwachsener}}
Note how the declension table includes strong (i.e. bare), weak (i.e. definite) and mixed (i.e. indefinite) forms, and the headword includes both bare and definite versions of the nominative singular, genitive singular and nominative plural. For masculine and neuter nouns like this, the bare and definite genitive have the same ending and so the headword inflection combines the two, but for feminine nouns, they will be split. An example is {{m|de|Erwachsene||female adult}}:
{{temp|de-ndecl|+}}
which produces
{{de-ndecl|+|pagename=Erwachsene}}
The headword appears as follows:
{{de-noun|+|pagename=Erwachsene}}
You can likewise decline adjective-noun combinations using <code>+</code>, e.g. {{m|de|schwarzes Loch||black hole}}:
<code><nowiki>{{temp|de-ndecl|[[schwarz]]es<+> [[Loch]]<n,(e)s,^er>}}</nowiki></code>
which produces
{{de-ndecl|[[schwarz]]es<+> [[Loch]]<n,(e)s,^er>}}
The headword appears as follows:
{{de-noun|[[schwarz]]es<+> [[Loch]]<n,(e)s,^er>}}
Here, the alternative notation using angle brackets must be used to indicate the declensions of the individual words. Links must be included in order for there to be links to individual words in the headword.
Note that the gender does not need to be specified in conjunction with <code>+</code>, because it can automatically be inferred from the ending. When used in conjunction with a noun, the plural status of the adjective does not need to be given either, as with {{m|de|Kanarische Inseln}}:
<code><nowiki>{{temp|de-ndecl|[[kanarisch|Kanarische]]<+> [[Insel]]n<p>}}</nowiki></code>
which produces
{{de-ndecl|[[kanarisch|Kanarische]]<+> [[Insel]]n<p>}}
The headword appears as follows:
{{de-noun|[[kanarisch|Kanarische]]<+> [[Insel]]n<p>}}
However, this won't work for a plural-only adjectival noun, because the ''-e'' ending will be inferred as feminine singular. If for some reason you need to specify such a noun, use <code>p+</code> or <code>+p</code>, e.g. for {{m|de|Miese}}:
{{temp|de-ndecl|p+}}
which produces
{{de-ndecl|p+|pagename=Miese}}
The headword appears as follows:
{{de-noun|p+|pagename=Miese}}
===Alternants===
Sometimes a given expression has multiple possible declensions, in a way that can't easily be specified using the available possibilities for specifying multiple genders or alternative genitive or plural endings. For example, some nouns can be declined either strong or weak, and in some multiword expressions, one word may or may not be declined. An example is {{m|de|Hirsch}}, normally strong but sometimes weak (especially in Southern Germany and Austria). To indicate this, use a syntax like this: <code>((ALTERNANT1,ALTERNANT2,...))</code> where each alternant must have angle brackets in it:
{{temp|de-ndecl|((<m,(e)s>,<m.weak.[also in Southern Germany and Austria]>))}}
which produces
{{de-ndecl|((<m,(e)s>,<m.weak.[also in Southern Germany and Austria]>))|pagename=Hirsch}}
Here the first alternant is strong while the second is weak and includes an entire-declension footnote (see [[#Footnotes|Footnotes]] above). The headword appears as follows:
{{de-noun|((<m,(e)s>,<m.weak.[also in Southern Germany and Austria]>))|pagename=Hirsch}}
Another example is {{m|de|lüttje Lage||a beer with a shot of [[Korn]]}}, where ''lüttje'' (a Low German word) may or may not be inflected as a normal adjective:
<code><nowiki>{{de-ndecl|((lüttje<+> [[Lage]]<f>,lüttje [[Lage]]<f>))}}</nowiki></code>
which produces
{{de-ndecl|((lüttje<+> [[Lage]]<f>,lüttje [[Lage]]<f>))}}
The headword appears as follows:
{{de-noun|((lüttje<+> [[Lage]]<f>,lüttje [[Lage]]<f>))}}
Note that both the declension and headword use the adjectival form even though only one of the two alternants has an adjective in it. This is also an example where we purposely avoid linking one of the components, since ''lüttje'' by itself is not a German word.
<includeonly>
{{tcat|de:ndecl/table}}
</includeonly>
tb5ytg5yj8iak5911x4grenq8xjrxml
395144
395143
2026-05-19T14:37:13Z
咽頭べさ
33
395144
wikitext
text/x-wiki
{{documentation subpage}}
{{uses lua|Module:de-noun}}
==Introduction==
This template should be used to decline all German nouns and proper noun, in preference to any other, older template that may still exist. (Eventually these will all be eliminated.)
===Masculine and neuter nouns===
Generally, a single argument is supplied to the template, which specifies the gender and optionally the genitive singular and/or plural. For example, for the noun {{m|de|Alarm}}, use:
{{temp|de-ndecl|m,s,e}}
which produces
{{de-ndecl|m,s,e|pagename=Alarm}}
Here, <code>m</code> specifies the gender, <code>s</code> the genitive singular ending, and <code>e</code> the plural ending. Note that the module is smart enough to add ''-n'' onto the dative plural automatically.
Sensible defaults are provided for the genitive singular and plural. In this case, the default genitive singular for most nouns, including {{m|de|Alarm}}, adds an ''-s'', and the default plural adds an ''-e'', so both the genitive singular and plural could be omitted:
{{temp|de-ndecl|m}}
which has the same result as above.
If there is more than one possibility for a given ending, separate the possibilities with a colon, as with {{m|de|Fisch||fish}}:
{{temp|de-ndecl|m,es:s,e}}
which produces
{{de-ndecl|m,es:s,e|pagename=Fisch}}
Because genitive singulars in either ''-es'' or ''-s'' are so common, a shortcut <code>(e)s</code> is provided. Along with the default plural, the above could be equivalently written:
{{temp|de-ndecl|m,(e)s}}
Other such shortcuts are <code>(s)</code> (either ''-s'' or no ending) and <code>(es)</code> (either ''-es'' or no ending).
Use <code>-</code> to specify a null ending, e.g. for {{m|de|Mädchen||girl}}, plural ''Mädchen'':
{{temp|de-ndecl|n,s,-}}
which produces
{{de-ndecl|n,s,-|pagename=Mädchen}}
===Feminine nouns===
Feminine nouns normally have a null ending in the genitive singular. As a result, the syntax for feminine nouns omits the genitive singular and specifies only the plural, as for {{m|de|Ordnung||arrangement, order, rank}}, plural ''Ordnungen'':
{{temp|de-ndecl|f,en}}
which produces
{{de-ndecl|f,en|pagename=Ordnung}}
Feminine nouns default to the plural ''-en'' (or just ''-n'' after ''e''), so this could equivalently be written:
{{temp|de-ndecl|f}}
===Umlaut===
To include umlaut, precede the ending with <code>^</code>. For example, for {{m|de|Haus||house}}, with plural ''Häuser'', use the following:
{{temp|de-ndecl|n,es,^er}}
which produces
{{de-ndecl|n,es,^er|pagename=Haus}}
The module knows the correct rules for umlaut, e.g. {{m|de|Apfel||apple}} umlauts in the plural as ''Äpfel'':
{{temp|de-ndecl|m,s,^}}
which produces
{{de-ndecl|m,s,^|pagename=Apfel}}
Since ''-s'' is the default genitive ending in most cases, this could equivalently be written:
{{temp|de-ndecl|m,,^}}
===Weak nouns===
Additional modifiers beyond the gender, genitive singular and plural are known as ''indicators''. These are specified following a period (<code>.</code>). One common indicator is <code>weak</code>, indicating a weak noun, e.g. {{m|de|Präsident||president}}:
{{temp|de-ndecl|m.weak}}
an
which produces
{{de-ndecl|m.weak|pagename=Präsident}}
The code will automatically add ''-n'' in place of ''-en'' if the noun ends in ''-e''. However, if the noun has ''-n'' in place of ''-en'' after a consonant, as with {{m|de|Herr}}, {{m|de|Nachbar}}, {{m|de|Satyr}} and their compounds, use <code>weak_n</code>. For example, one of the declensions of {{m|de|Nachbar}} could be indicated as follows:
{{temp|de-ndecl|m.weak_n}}
which produces
{{de-ndecl|m.weak_n|pagename=Nachbar}}
If the noun has ''-n'' in the singular but ''-en'' in the plural, as is typical for {{m|de|Herr}} and its compounds, specify the plural explicitly, as follows:
{{temp|de-ndecl|m,,en.weak_n}}
which produces
{{de-ndecl|m,,en.weak_n|pagename=Herr}}
For nouns with multiple possible declensions, e.g. weak or strong, use the [[#Alternants|alternant]] notation, as described below. For example, {{m|de|Diakon||deacon}}, which can be declined either strong or weak, might be indicated as follows:
{{temp|de-ndecl|((<m>,<m.weak>))}}
which produces
{{de-ndecl|((<m>,<m.weak>))|pagename=Diakon}}
The general syntax here is to surround each declension with angle brackets, separate them with commas and put double parens around the whole thing. More than two alternants are possible; e.g. for {{m|de|Satyr}}, which can be either strong, weak with genitive in ''-n'' or weak with genitive in ''-en'', use the following:
{{temp|de-ndecl|((<m,s,n>,<m.weak_n>,<m,,n.weak>))}}
which produces
{{de-ndecl|((<m,s,n>,<m.weak_n>,<m,,n.weak>))|pagename=Satyr}}
In this case, the plural is always in ''-n'', so the alternants that would by default produce other plurals must have the plural given explicitly.
===Additional indicators===
Besides <code>weak</code> and <code>weak_n</code>, other supported indicators are as follows:
* <code>sg</code>: singular-only
* <code>article</code>: indicate that the term is normally used with the definite article
* <code>nodatpln</code>: don't add ''-n'' onto the dative plural when it normally would be added
* <code>ss</code>: change ''-ß'' to ''-ss'' before an ending beginning with a vowel (as is common in pre-1996 spellings)
* <code>dat_with_e</code>: include dative variant in ''-e'' with footnote even if no genitive with ''-e-'' exists; see below
Examples:
1. <code>sg</code> for singular-only, e.g. {{m|de|Butter||butter}}:
{{temp|de-ndecl|f.sg}}
which produces
{{de-ndecl|f.sg|pagename=Butter}}
2. <code>article</code> for nouns normally used with the definite article, e.g. {{m|de|Sternenmeer||the starry night sky {{q|poetic}}}}:
{{temp|de-ndecl|n,s:es.sg.article}}
which produces
{{de-ndecl|n,s:es.sg.article|pagename=Sternenmeer}}
This indicator is especially useful in the headword template {{temp|de-noun}}, e.g.:
{{temp|de-noun|n,s:es.sg.article}}
which produces
{{de-noun|n,s:es.sg.article|pagename=Sternenmeer}}
3. <code>nodatpln</code> to suppress the final ''-n'' in the dative plural after a final ''-e'', ''-er'' or ''-el'', e.g. {{m|de|Retina||retina}}:
{{temp|de-ndecl|f,e.nodatpln}}
which produces
{{de-ndecl|f,e.nodatpln|pagename=Retina}}
4. <code>ss</code> to indicate that a final ''-ß'' changes to ''-ss'' before an ending beginning with a vowel (for pre-1996 spellings), e.g. {{m|de|Schluß||end, conclusion}} (superseded spelling):
{{temp|de-ndecl|m,es,^e.ss}}
which produces
{{de-ndecl|m,es,^e.ss|pagename=Schluß}}
5. <code>dat_with_e</code> to explicitly include the dative variant in ''-e'' with footnote even if no genitive with ''-e-'' exists, e.g. for {{m|de|Admiral||admiral}}:
{{temp|de-ndecl|m,,e:^e.dat_with_e}}
which produces
{{de-ndecl|m,,e:^e.dat_with_e|pagename=Admiral}}
Note that by default the variant in ''-e'' with associated footnote appears whenever a genitive form includes an ''-e-'' in it (genitive is specified as <code>es</code>, <code>(e)s</code> or <code>ses</code>). To suppress its appearance, as in recently-coined words, use the override <code>dat:-</code> (see [[#Overrides|Overrides]] below).
===Irregular genitives and plurals===
If the genitive or plural are sufficiently irregular, they cannot be specified using an ending and/or the use of <code>^</code> to indicate umlaut. In those cases, specify the full form, e.g. for {{m|de|Vakuum||vacuum}} with plural ''Vakua'' or ''Vakuen'':
{{temp|de-ndecl|m,s,Vakua:Vakuen}}
which produces
{{de-ndecl|m,s,Vakua:Vakuen|pagename=Vakuum}}
Note that the default plural of neuter nouns in ''-um'' changes the ''-um'' to ''-en'' (see below), so the following would equivalently work:
{{temp|de-ndecl|m,s,Vakua:+}}
Here, <code>+</code> explicitly requests the default.
A full form is recognized as such by beginning with a capital letter. In the rare case where a full form must be given and begins with a lowercase letter, precede the full form with <code>!</code>. For example, for {{m|de|deus ex machina}} (alternative letter-case form of {{m|de|Deus ex Machina}}), use:
{{temp|de-ndecl|m,-,!dei ex machina}}
which produces
{{de-ndecl|m,-,!dei ex machina|pagename=deus ex machina}}
===Angle-bracket notation===
An alternative notation is available, which specifies the lemma explicitly along with the declension. E.g. for {{m|de|Haus}}, the following could be used:
{{temp|de-ndecl|Haus<n,es,^er>}}
Since the lemma here is the same as lemma attached to the angle brackets, it can be omitted:
{{temp|de-ndecl|<n,es,^er>}}
Both notations are equivalent to the following:
{{temp|de-ndecl|n,es,^er}}
For single-word terms, angle brackets aren't usually needed, but they become necessary with [[#Multiword expressions|multiword expressions]] and [[#Alternants|alternants]]; see below.
===Default genitive and plural algorithms===
The following algorithm produces the default genitive singular:
# If the noun is feminine, use a null ending.
# Otherwise, if the noun is weak, use ''-n'' after ''-e'', ''-nen'' after consonant + ''-in'', and otherwise ''-en''.
# Otherwise, if the noun ends in ''-nis'' (neuter like {{m|de|Erlebnis}}, {{m|de|Geheimnis}}, etc. or occasional masculine like {{m|de|Firnis}}), use ''-ses''.
# Otherwise, if the noun ends in a consonant + ''-us'', use a null ending, as in {{m|de|Euphemismus}}, {{m|de|Exitus}}, {{m|de|Exodus}}, etc.
# Otherwise, if the noun ends in ''s/ß/x/z'', use ''-es''.
# Otherwise, use ''-s''.
The following algorithm produces the default plural:
# If the noun ends in ''-nis'' (neuter like {{m|de|Erlebnis}} or {{m|de|Geheimnis}}, feminine like {{m|de|Kenntnis}} or {{m|de|Wildnis}}, or occasional masculine like {{m|de|Firnis}}), use ''-se''.
# Otherwise, if the noun is feminine or weak, use ''-n'' after ''-e'', ''-nen'' after consonant + ''-in'', and otherwise ''-en''.
# Otherwise, if the noun ends in ''-e'', use ''-n''.
# Otherwise, if the noun is neuter and ends in ''-lein'', use a null ending.
# Otherwise, if the noun is neuter and ends in ''-um'' (e.g. {{m|de|Museum}} or {{m|de|Vakuum}}), replace the ''-um'' with ''-en''.
# Otherwise, if the noun ends in ''-mus'' (e.g. {{m|de|Algorithmus}} or {{m|de|Aphorismus}}), replace the ''-mus'' with ''-men''.
# Otherwise, if the noun ends in a consonant + ''-us'' (e.g. {{m|de|Abakus}}, {{m|de|Zirkus}}, {{m|de|Autobus}}), use ''-se''.
# Otherwise, if the noun ends in ''-el'', ''-em'', ''-en'' or ''-er'' (e.g. {{m|de|Adler}}, {{m|de|Meier}}, {{m|de|Riedel}}), use a null ending. (But this does not apply to nouns in a consonant + ''-eer/-ier'', ''-eel/-iel'', etc., as in {{m|de|Heer}}, {{m|de|Bier}}, {{m|de|Ziel}}, which default to ''-e'' as below.)
# Otherwise, use ''-e''.
===Nouns with multiple genders===
If a noun has more than one possible gender, separate the genders with a colon, e.g. {{m|de|Abszess||abcess}}:
{{temp|de-ndecl|m:n}}
which produces
{{de-ndecl|m:n|pagename=Abszess}}
You can also place a footnote/qualifier in brackets after a given gender. For example, {{m|de|Abszess}} is neuter mainly in Austria; to note this, use the following:
{{temp|de-ndecl|m:n[mainly in Austria]}}
Gender qualifiers do not appear in the declension table, but they do appear in the headword, which is specified like this:
{{temp|de-noun|m:n[mainly in Austria]}}
which produces
{{de-noun|m:n[mainly in Austria]|pagename=Abszess}}
===Plural-only nouns===
To specify a plural-only noun, use <code>p</code> in place of the gender, e.g. for {{m|de|Achtzigerjahre|the eighties {{q|decade}}}}:
{{temp|de-ndecl|p}}
which produces
{{de-ndecl|p|pagename=Achtzigerjahre}}
With plural-only nouns, you cannot specify a genitive or plural form, and if you do so, an error will result.
===Footnotes===
Footnotes can be indicated by placing text in brackets after a given form. For example, {{m|de|Geschlecht||gender, type}} has a modern plural ''Geschlechter'' as well as an obsolete plural ''Geschlechte''. To indicate this, use the following:
{{temp|de-ndecl|n,s:es,er:e[obsolete]}}
which produces
{{de-ndecl|n,s:es,er:e[obsolete]|pagename=Geschlecht}}
Such footnotes show up as qualifiers in the headword:
{{de-noun|n,s:es,er:e[obsolete]|pagename=Geschlecht}}
You can also footnote an individual gender, an override, or an entire declension. For example, {{m|de|Hanf}} is normally masculine but rarely neuter. Indicate as follows:
<code><nowiki>{{de-ndecl|m:n[rare],(e)s.sg}}</nowiki></code>
which produces
{{de-ndecl|m:n[rare],(e)s.sg|pagename=Hanf}}
The footnote shows up as a qualifier in the headword:
{{de-noun|m:n[rare],(e)s.sg|pagename=Hanf}}
To footnote an entire declension, place the footnote in brackets directly after a period. For examples, see [[#Alternants|Alternants]] below.
===Overrides===
You can override a particular case/number combination using the form <code>SLOT:VALUE:VALUE:...</code>, where <code>SLOT</code> specifies the particular slot to override, e.g. <code>acc</code> for accusative singular, <code>datpl</code> for dative plural. For example, for the noun {{m|de|Häusle||house {{q|diminutive}}}}, whose plural is ''Häusle'' and whose dative plural can be either ''Häuslen'' or ''Häusle'', use the following:
{{temp|de-ndecl|n,-:s,-.datpl:n:-}}
which produces
{{de-ndecl|n,-:s,-.datpl:n:-|pagename=Häusle}}
Another use of overrides is specifying a distinct genitive for feminine nouns. An example is {{m|de|Quinquagesima||[[Quinquagesima]]}}, which has an alternative Latinate genitive singular ''Quinquagesimä'', used mostly when no article is present. Specify as follows:
{{temp|de-ndecl|f.gen:-:Quinquagesimä[especially without an article].sg}}
which produces
{{de-ndecl|f.gen:-:Quinquagesimä[especially without an article].sg|pagename=Quinquagesima}}
Here, we specify a footnote in brackets, as described in the [[#Footnotes|Footnotes]] section above. For another example of using a footnote with an override, see the declension of {{m|de|Spätzle}} [[#Defective forms|below]].
Recognized slot names are as follows:
{|class="wikitable"
! case !! singular slot !! plural slot
|-
| nominative || <code>nom</code> || <code>nompl</code>
|-
| genitive || <code>gen</code> || <code>genpl</code>
|-
| accusative || <code>acc</code> || <code>accpl</code>
|-
| dative || <code>dat</code> || <code>datpl</code>
|-
| ablative || <code>abl</code> || —
|-
| vocative || <code>voc</code> || —
|}
The ablative and vocative cases are used only in certain terms with special Latinate declensions, e.g. {{m|de|Jesus Christus}}.
===Defective forms===
If a given form is missing entirely, use <code>--</code> to indicate this. An example is {{m|de|Spätzle||spätzle}}, which is missing the genitive singular. Indicate as follows:
{{temp|de-ndecl|n,--,-.datpl:-:n[uncommon]}}
which produces
{{de-ndecl|n,--,-.datpl:-:n[uncommon]|pagename=Spätzle}}
When used in the headword, it displays as follows:
{{de-noun|n,--,-.datpl:-:n[uncommon]|pagename=Spätzle}}
However, for nouns without a plural form, <code>.sg</code> should be used instead of <code>--</code>.
===Embedded commas and periods===
If you need to include a form that contains an embedded comma or period, precede the comma or period with a backslash. For example, for {{m|de|Vf.}} (abbreviation of {{m|de|Verfasser||author}}), with plural either ''Vf.'' or ''Vff.'', use the following:
{{temp|de-ndecl|m,-,-:Vff\.}}
which produces
{{de-ndecl|m,-,-:Vff\.|pagename=Vf.}}
===Multiword expressions===
To decline a multiword expression, include the text of the expression in {{para|1}} and put the spec for each word after the word, surrounded by angle brackets. For example, for {{m|de|Gesellschaft mit beschränkter Haftung||limited liability company}}:
<code><nowiki>{{de-ndecl|[[Gesellschaft]]<f> [[mit]] [[beschränkt|beschränkter]] [[Haftung]]}}</nowiki></code>
which produces
{{de-ndecl|[[Gesellschaft]]<f> [[mit]] [[beschränkt|beschränkter]] [[Haftung]]}}
Another example, for {{m|de|Jäger und Sammler||hunter-gatherer}}:
<code><nowiki>{{de-ndecl|[[Jäger]]<m> [[und]] [[Sammler]]<m>}}</nowiki></code>
which produces
{{de-ndecl|[[Jäger]]<m> [[und]] [[Sammler]]<m>}}
It is not necessary to surround each word with brackets, but when used in {{temp|de-noun}} it ensures that the words are individually linked in the headword.
Arbitrary specs can be included inside of angle brackets, e.g. for {{m|de|Mittelwort der Gegenwart||present participle}}:
<code><nowiki>{{de-ndecl|[[Mittelwort]]<n,(e)s,^er> [[der]] [[Gegenwart]]}}</nowiki></code>
which produces
{{de-ndecl|[[Mittelwort]]<n,(e)s,^er> [[der]] [[Gegenwart]]}}
For singular-only nouns, include the indicator <code>.sg</code> inside the angle brackets, as for {{m|de|Kreuz des Südens||[[Southern Cross]]}}:
<code><nowiki>{{de-ndecl|[[Kreuz]]<n.sg> [[des]] [[Süden]]s}}</nowiki></code>
which produces
{{de-ndecl|[[Kreuz]]<n.sg> [[des]] [[Süden]]s}}
Any number of declined components can be included, and will be handled correctly. An example with three is {{m|de|Heiliges Römisches Reich||[[Holy Roman Empire]]}}:
<code><nowiki>{{de-ndecl|[[heilig|Heiliges]]<+> [[römisch|Römisches]]<+> [[Reich]]<n,s:es.sg>}}</nowiki></code>
which produces
{{de-ndecl|[[heilig|Heiliges]]<+> [[römisch|Römisches]]<+> [[Reich]]<n,s:es.sg>}}
===Adjectival nouns and adjective-noun combinations===
Some nouns are declined like adjectives. An example is {{m|de|Erwachsener||adult}} (definite nominative ''der Erwachsene'', indefinite nominative ''ein Erwachsener'', definitive genitive ''des Erwachsenen'', bare plural ''Erwachsene'', definite plural ''die Erwachsenen'', etc.). To indicate this, put a <code>+</code> in place of the declension:
{{temp|de-ndecl|+}}
which produces
{{de-ndecl|+|pagename=Erwachsener}}
The headword appears as follows:
{{de-noun|+|pagename=Erwachsener}}
Note how the declension table includes strong (i.e. bare), weak (i.e. definite) and mixed (i.e. indefinite) forms, and the headword includes both bare and definite versions of the nominative singular, genitive singular and nominative plural. For masculine and neuter nouns like this, the bare and definite genitive have the same ending and so the headword inflection combines the two, but for feminine nouns, they will be split. An example is {{m|de|Erwachsene||female adult}}:
{{temp|de-ndecl|+}}
which produces
{{de-ndecl|+|pagename=Erwachsene}}
The headword appears as follows:
{{de-noun|+|pagename=Erwachsene}}
You can likewise decline adjective-noun combinations using <code>+</code>, e.g. {{m|de|schwarzes Loch||black hole}}:
<code><nowiki>{{temp|de-ndecl|[[schwarz]]es<+> [[Loch]]<n,(e)s,^er>}}</nowiki></code>
which produces
{{de-ndecl|[[schwarz]]es<+> [[Loch]]<n,(e)s,^er>}}
The headword appears as follows:
{{de-noun|[[schwarz]]es<+> [[Loch]]<n,(e)s,^er>}}
Here, the alternative notation using angle brackets must be used to indicate the declensions of the individual words. Links must be included in order for there to be links to individual words in the headword.
Note that the gender does not need to be specified in conjunction with <code>+</code>, because it can automatically be inferred from the ending. When used in conjunction with a noun, the plural status of the adjective does not need to be given either, as with {{m|de|Kanarische Inseln}}:
<code><nowiki>{{temp|de-ndecl|[[kanarisch|Kanarische]]<+> [[Insel]]n<p>}}</nowiki></code>
which produces
{{de-ndecl|[[kanarisch|Kanarische]]<+> [[Insel]]n<p>}}
The headword appears as follows:
{{de-noun|[[kanarisch|Kanarische]]<+> [[Insel]]n<p>}}
However, this won't work for a plural-only adjectival noun, because the ''-e'' ending will be inferred as feminine singular. If for some reason you need to specify such a noun, use <code>p+</code> or <code>+p</code>, e.g. for {{m|de|Miese}}:
{{temp|de-ndecl|p+}}
which produces
{{de-ndecl|p+|pagename=Miese}}
The headword appears as follows:
{{de-noun|p+|pagename=Miese}}
===Alternants===
Sometimes a given expression has multiple possible declensions, in a way that can't easily be specified using the available possibilities for specifying multiple genders or alternative genitive or plural endings. For example, some nouns can be declined either strong or weak, and in some multiword expressions, one word may or may not be declined. An example is {{m|de|Hirsch}}, normally strong but sometimes weak (especially in Southern Germany and Austria). To indicate this, use a syntax like this: <code>((ALTERNANT1,ALTERNANT2,...))</code> where each alternant must have angle brackets in it:
{{temp|de-ndecl|((<m,(e)s>,<m.weak.[also in Southern Germany and Austria]>))}}
which produces
{{de-ndecl|((<m,(e)s>,<m.weak.[also in Southern Germany and Austria]>))|pagename=Hirsch}}
Here the first alternant is strong while the second is weak and includes an entire-declension footnote (see [[#Footnotes|Footnotes]] above). The headword appears as follows:
{{de-noun|((<m,(e)s>,<m.weak.[also in Southern Germany and Austria]>))|pagename=Hirsch}}
Another example is {{m|de|lüttje Lage||a beer with a shot of [[Korn]]}}, where ''lüttje'' (a Low German word) may or may not be inflected as a normal adjective:
<code><nowiki>{{de-ndecl|((lüttje<+> [[Lage]]<f>,lüttje [[Lage]]<f>))}}</nowiki></code>
which produces
{{de-ndecl|((lüttje<+> [[Lage]]<f>,lüttje [[Lage]]<f>))}}
The headword appears as follows:
{{de-noun|((lüttje<+> [[Lage]]<f>,lüttje [[Lage]]<f>))}}
Note that both the declension and headword use the adjectival form even though only one of the two alternants has an adjective in it. This is also an example where we purposely avoid linking one of the components, since ''lüttje'' by itself is not a German word.
<includeonly>
[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ဂျာမာန်ဂမၠိုၚ်]]
</includeonly>
bfexvrh4akqfquink575fy3bnt0uyqw
ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ဂျာမာန်ဂမၠိုၚ်
14
294755
395145
2026-05-19T14:38:41Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဂျာမာန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]]"
395145
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဂျာမာန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]]
taocg1fzi89klpahlgxb660di9vz9qm
ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဂျာမာန်ဂမၠိုၚ်
14
294756
395146
2026-05-19T14:41:12Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်ဂျာမာန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]]"
395146
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်ဂျာမာန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]]
6xyc33iqxhk54yuptdplj2w74zxjunt
ကဏ္ဍ:နာမ်ပုလ္လိၚ်ဂျာမာန် ပေန်သဲဗေနဳယျာဂမၠိုၚ်
14
294757
395147
2026-05-19T17:21:00Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » :ကဏ္ဍ:ဘာသာဂျာမာန် ပေန်သဲဗေနဳယျာ|ဂျာ..."
395147
wikitext
text/x-wiki
[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဂျာမာန် ပေန်သဲဗေနဳယျာ|ဂျာမာန် ပေန်သဲဗေနဳယျာ]] » [[:ကဏ္ဍ:ဝေါဟာအဓိကဂျာမာန် ပေန်သဲဗေနဳယျာဂမၠိုၚ်|ဝေါဟာတံသ္ဇိုၚ်]] » [[:ကဏ္ဍ:နာမ်ဂျာမာန် ပေန်သဲဗေနဳယျာဂမၠိုၚ်|နာမ်ဂမၠိုၚ်]] » [[:ကဏ္ဍ:နာမ်ဂျာမာန် ပေန်သဲဗေနဳယျာဗက်အလိုက်လိၚ်ဂမၠိုၚ်|ဗက်အလိုက်လိၚ်ဂမၠိုၚ်]] »'''ပုလ္လိၚ်ဂမၠိုၚ်'''
:နာမ်ဂျာမာန် ပေန်သဲဗေနဳယျာမဆေၚ်စပ်ကဵုလိၚ်တြုံ၊ ဥပမာ ဆေၚ်စပ်ကဵုကဏ္ဍလုပ်အဝေါၚ်လိၚ်အတေံ (အကြာတၞဟ်ခြာအရာမွဲမွဲအဂှ်) မက္တဵုဒှ်ပုလ္လိၚ်ဂမၠိုၚ်။
[[ကဏ္ဍ:နာမ်ဂျာမာန် ပေန်သဲဗေနဳယျာဗက်အလိုက်လိၚ်ဂမၠိုၚ်|ပ]][[ကဏ္ဍ:နာမ်ပုလ္လိၚ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]]
szhv1ubwtfnt2nzju8ugr3gfg3nxf0w
ကဏ္ဍ:နာမ်ဂျာမာန် ပေန်သဲဗေနဳယျာဗက်အလိုက်လိၚ်ဂမၠိုၚ်
14
294758
395148
2026-05-19T17:24:24Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » :ကဏ္ဍ:ဘာသာဂျာမာန် ပေန်သဲဗေနဳယျာ|ဂျာ..."
395148
wikitext
text/x-wiki
[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဂျာမာန် ပေန်သဲဗေနဳယျာ|ဂျာမာန် ပေန်သဲဗေနဳယျာ]] » [[:ကဏ္ဍ:ဝေါဟာအဓိကဂျာမာန် ပေန်သဲဗေနဳယျာဂမၠိုၚ်|ဝေါဟာတံသ္ဇိုၚ်]] » [[:ကဏ္ဍ:နာမ်ဂျာမာန် ပေန်သဲဗေနဳယျာဂမၠိုၚ်|နာမ်ဂမၠိုၚ်]] »'''ဗက်အလိုက်လိၚ်ဂမၠိုၚ်'''
:နာမ်ဂျာမာန် ပေန်သဲဗေနဳယျာမဂကောံလဝ်နူကဵုဆေၚ်စပ်ကဵုလိၚ်ပွမတုဲဒှ်နကဵုအတေံ။
[[ကဏ္ဍ:နာမ်ဂျာမာန် ပေန်သဲဗေနဳယျာဂမၠိုၚ်]][[ကဏ္ဍ:နာမ်နူကဵုလိၚ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ဂ]]
38kgpabv2fa3c6jp55m1psoyfmz0lq6
ကဏ္ဍ:ကာရန်:ဂျာမာန်/amɐ
14
294759
395149
2026-05-19T17:27:53Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဂျာမာန်|ဂျာမာန်]] » :ကဏ္ဍ:ကာ..."
395149
wikitext
text/x-wiki
[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာဂျာမာန်|ဂျာမာန်]] » [[:ကဏ္ဍ:ကာရန်:ဂျာမာန်|ကာရန်ဂမၠိုၚ်]] » -amɐ
:စရၚ်မဆေၚ်စပ်ကဵုဝေါဟာ[[:ကဏ္ဍ:ဘာသာဂျာမာန်|ဂျာမာန်]]မနွံကာရန် [[ကာရန်:ဂျာမာန်/amɐ|-amɐ]] ဂမၠိုၚ်။
[[ကဏ္ဍ:ကာရန်:ဂျာမာန်|amɐ]]
cr2gu3r7rxmvqf40q2ikprl3b1254jn
ကာရန်:ဂျာမာန်/amɐ
106
294760
395150
2026-05-19T17:31:06Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{rhymes nav|de|a|mɐ}} ==ဗွဟ်ရမ္သာၚ်== * {{IPA|de|/-amɐ/}} ==ကာရန်ဂမၠိုၚ်== ===ဝဏ္ဏမွဲ=== {{rhyme-top}} * {{l|de|Ammer}} * {{l|de|Hammer}} * {{l|de|Jammer}} * {{l|de|Kammer}} * {{l|de|Klammer}} {{rhyme-bottom}} ===ဝဏ္ဏၜါ=== {{rhyme-top}} * {{l|de|Briefklammer}} * {{l|de|Edamer}} * {{l|de|Gejammer}} * {{l|de|Goldammer}} * {{l|de|Grauammer}} *..."
395150
wikitext
text/x-wiki
{{rhymes nav|de|a|mɐ}}
==ဗွဟ်ရမ္သာၚ်==
* {{IPA|de|/-amɐ/}}
==ကာရန်ဂမၠိုၚ်==
===ဝဏ္ဏမွဲ===
{{rhyme-top}}
* {{l|de|Ammer}}
* {{l|de|Hammer}}
* {{l|de|Jammer}}
* {{l|de|Kammer}}
* {{l|de|Klammer}}
{{rhyme-bottom}}
===ဝဏ္ဏၜါ===
{{rhyme-top}}
* {{l|de|Briefklammer}}
* {{l|de|Edamer}}
* {{l|de|Gejammer}}
* {{l|de|Goldammer}}
* {{l|de|Grauammer}}
* {{l|de|Hausammer}}
* {{l|de|Heftklammer}}
* {{l|de|Holzhammer}}
* {{l|de|Maasdamer}}
* {{l|de|Rohrammer}}
* {{l|de|Schneeammer}}
* {{l|de|Windjammer}}
{{rhyme-bottom}}
===ဝဏ္ဏပိ===
{{rhyme-top}}
* {{l|de|Abbauhammer}}
* {{l|de|Amsterdamer}}
* {{l|de|Büroklammer}}
* {{l|de|Erdenjammer}}
* {{l|de|Fichtenammer}}
* {{l|de|Haubenammer}}
* {{l|de|Herzensjammer}}
* {{l|de|Katzenjammer}}
* {{l|de|Presslufthammer}}
* {{l|de|Rotterdamer}}
* {{l|de|Volendamer}}
* {{l|de|Vorschlaghammer}}
* {{l|de|Wäscheklammer}}
{{rhyme-bottom}}
===ဝဏ္ဏပန်===
{{rhyme-top}}
* {{l|de|geschweifte Klammer}}
* {{l|de|Nominalklammer}}
{{rhyme-bottom}}
79jrtuxjclgyu77yp8xvsj9es7xhp6u
ကဏ္ဍ:ယၟုမသဂကူနကဵုဘာသာဒိန်နေတ်ဂမၠိုၚ်
14
294761
395151
2026-05-19T17:34:13Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ဘာသာဒိန်နေတ်]]"
395151
wikitext
text/x-wiki
[[ကဏ္ဍ:ဘာသာဒိန်နေတ်]]
2sn65radkeaqux1jm576cd10nd4dojz
Hemmer
0
294762
395152
2026-05-19T17:45:06Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{also|hemmer}} =={{=en=}}== ===နာမ်မကိတ်ညဳ=== {{en-proper noun|s}} # {{surname|en}} ==ချက်ခ်== ===ဗွဟ်ရမ္သာၚ်=== * {{cs-IPA}} ===နာမ်မကိတ်ညဳ=== {{cs-proper noun|m-an|f=Hemmerová}} # {{surname|cs|g=m|from=German}} ====မလဟုတ်စှ်ေ==== {{cs-ndecl|m.an.surname}} ==ဂျမာန်== ===ဗွဟ်ရမ္သာ..."
395152
wikitext
text/x-wiki
{{also|hemmer}}
=={{=en=}}==
===နာမ်မကိတ်ညဳ===
{{en-proper noun|s}}
# {{surname|en}}
==ချက်ခ်==
===ဗွဟ်ရမ္သာၚ်===
* {{cs-IPA}}
===နာမ်မကိတ်ညဳ===
{{cs-proper noun|m-an|f=Hemmerová}}
# {{surname|cs|g=m|from=German}}
====မလဟုတ်စှ်ေ====
{{cs-ndecl|m.an.surname}}
==ဂျမာန်==
===ဗွဟ်ရမ္သာၚ်===
* {{audio|de|De-Hemmer.ogg|a=<<Germany>> (<<Berlin>>)}}
===နာမ်===
{{de-noun|m}}
# {{agent noun of|de|hemmen}}
## ညးမထိၚ်ဒက်။
##: {{syn|de|Inhibitor}}
====မလဟုတ်စှ်ေ====
{{de-ndecl|m}}
==ဟာန်သဝေတ်==
===နာမ်===
{{head|hrx|noun form}}
# {{plural of|hrx|Hemm}}
==ဂျာမာန် ပေန်သဲဗေနဳယျာ==
===နာမ်===
{{head|pdc|noun form}}
# {{plural of|pdc|Hammer}}
# {{plural of|pdc|Hemm}}
i4qojif0g4blr3fq5j0cltqfexjrwc7
ထာမ်ပလိက်:cs-ndecl
10
294763
395153
2026-05-19T17:47:17Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:cs-noun|show}}<noinclude>{{tcat|ndecl:ndecl}}{{documentation}}</noinclude>"
395153
wikitext
text/x-wiki
{{#invoke:cs-noun|show}}<noinclude>{{tcat|ndecl:ndecl}}{{documentation}}</noinclude>
583p7m81kt3fyifolp6ee8c65n4qkv8
မဝ်ဂျူ:cs-noun
828
294764
395154
2026-05-19T18:03:00Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "local export = {} --[=[ Authorship: Ben Wing <benwing2> ]=] --[=[ TERMINOLOGY: -- "slot" = A particular combination of case/number. Example slot names for nouns are "gen_s" (genitive singular) and "voc_p" (vocative plural). Each slot is filled with zero or more forms. -- "form" = The declined Czech form representing the value of a given slot. -- "lemma" = The dictionary form of a given Czech term. General..."
395154
Scribunto
text/plain
local export = {}
--[=[
Authorship: Ben Wing <benwing2>
]=]
--[=[
TERMINOLOGY:
-- "slot" = A particular combination of case/number.
Example slot names for nouns are "gen_s" (genitive singular) and
"voc_p" (vocative plural). Each slot is filled with zero or more forms.
-- "form" = The declined Czech form representing the value of a given slot.
-- "lemma" = The dictionary form of a given Czech term. Generally the nominative
masculine singular, but may occasionally be another form if the nominative
masculine singular is missing.
]=]
--[=[
FIXME:
1. Finish synthesize_singular_lemma(). [DONE]
2. Implement feminines in -ea, -oa/-ua, -ia, -oe. [DONE]
3. Implement "mixed" masculine nouns in -l, -n, -t (each different, also inanimate vs. animate). [DONE]
4. Allow 'stem:' override after vowel-final words like [[centurio]]. [DONE using decllemma:]
5. Support masculine foreign nouns in -us/-os/-es. [DONE]
6. Support masculine foreign nouns in -ius/-etc. [DONE]
7. Support masculine foreign nouns in unpronounced final -e (e.g. [[software]]). [DONE]
8. Support neuter foreign nouns in -um/-on. [DONE]
9. Support neuter foreign nouns in -ium/-ion. [DONE]
10. Support paired body parts, e.g. [[ruka]], [[noha]], [[oko]], [[ucho]], [[koleno]], [[rameno]]. [WON'T DO;
JUST SEPARATE THE MEANINGS AND GIVE THEM DIFFERENT DECLENSIONS]
11. Support masculine nouns in -e/ě that are neuter in the plural. [DONE]
12. Correctly handle -e vs. -ě, e.g. soft neuters have both [[kutě]] and [[poledne]]. [DONE]
13. Always use specified lemma in nom_pl and maybe acc_pl when plurale tantum. [DONE]
14. Support feminine nouns in -ca/-ča/-ša/-ža. [DONE]
15. Support feminine nouns in -ja/-ňa. [DONE]
16. Support mixed i-stem feminine nouns. [DONE]
17. Support "c as k" feminine nouns like [[ayahuasca]].
18. Support 'declgender'. [DONE]
19. Support pronouns with clitics. [DONE]
20. Singular-only and plural-only terms should not have number in accelerator form. [DONE]
21. Support [[úterý]] (like neuters in -í). [DONE]
22. Support feminines in -i ([[máti]], [[pramáti]]). [DONE]
23. Support foreign nouns in -ie ([[zombie]], [[hippie]], [[yuppie]]). [DONE]
24. Support foreign nouns in -í ([[muftí]], [[qádí]]). [DONE]
25. Support manual declensions. [DONE]
26. Support numerals. [DONE]
27. Allow for reducible spec in pluralia tantum and dereduce accordingly; also automatically assign reducibility
if singular stem ends in -Ck or -Cc. [DONE]
28. Use `pos` value in all categories.
29. Support determiners [[kolik]], [[tolik]], [[několik]], [[mnoho]]. [DONE]
30. Support a '.velar' indicator for foreign names whose pronunciation but not spelling ends in a velar: [[Remarque]],
[[Braque]], [[Mike]], [[Drake]], [[Jake]] with vocative 'Remarquu', 'Braquu', 'Mikeu', 'Drakeu', 'Jakeu'. In
general we need more thought around such foreign names; essentially, for names in a silent e, sometimes the -e
is dropped in all oblique forms (e.g. [[Shakespeare]], [[Pierre]], [[Barrande]], [[La Fontaine]], [[Braque]],
[[Remarque]] with gen sg 'Shakespeara', 'Pierra', Barranda', 'La Fontaina', 'Braqua', 'Remarqua') and sometimes
it's kept in all oblique forms except those ending in an -e, where -ee is avoided (e.g. [[Pete]], [[Gable]],
[[Jake]], [[White]], [[Byrne]], [[Mike]], [[Drake]] with gen sg 'Petea', 'Gablea' etc. and voc sg 'Pete', 'Gable'
but 'Jakeu', 'Mikeu'). Sometimes there are doublets, e.g. [[Hubble]] and [[Hume]] have gen sg 'Hubbla/Hubblea'
(where the second form is used among astronomers in a technical sense and the first form may be more popular)
and 'Huma/Humea'. We already have a '.foreign' indicator that when applied to a noun ending in -e drops the -e
in oblique forms e.g. for [[software]]. We may need to combine this with an explicit indicator of hard, soft or
velar as there will be names with silent -e and preceding soft consonant e.g. [[Bruce]], [[Coleridge]]. Note
that when the -e is kept it is still dropped before front vowels, hence dat sg 'Bruci'/Bruceovi'. Need some
investigation in IJP and cswikt. [.velar DONE]
31. Support 'declnumber'. [DONE]
32. Support foreign nouns in -ee ([[Yankee]]). [DONE]
]=]
local lang = require("Module:languages").getByCode("cs")
local m_table = require("Module:table")
local m_links = require("Module:links")
local m_string_utilities = require("Module:string utilities")
local iut = require("Module:inflection utilities")
local put = require("Module:parse utilities")
local m_para = require("Module:parameters")
local com = require("Module:cs-common")
local en_utilities_module = "Module:en-utilities"
local u = mw.ustring.char
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rgmatch = mw.ustring.gmatch
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local usub = mw.ustring.sub
local uupper = mw.ustring.upper
local ulower = mw.ustring.lower
local force_cat = false -- set to true to make categories appear in non-mainspace pages, for testing
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
local function track(track_id)
require("Module:debug/track")("cs-noun/" .. track_id)
return true
end
local output_noun_slots = {
nom_s = "nom|s",
nom_s_linked = "nom|s",
gen_s = "gen|s",
gen_s_linked = "gen|s",
clitic_gen_s = "clitic|gen|s",
dat_s = "dat|s",
clitic_dat_s = "clitic|dat|s",
acc_s = "acc|s",
clitic_acc_s = "clitic|acc|s",
voc_s = "voc|s",
loc_s = "loc|s",
ins_s = "ins|s",
nom_p = "nom|p",
nom_p_linked = "nom|p",
gen_p = "gen|p",
dat_p = "dat|p",
acc_p = "acc|p",
voc_p = "voc|p",
loc_p = "loc|p",
ins_p = "ins|p",
}
local function get_output_noun_slots(alternant_multiword_spec)
-- FIXME: To save memory we modify the table in-place. This won't work if we ever end up with multiple calls to
-- this module in the same Lua invocation, and we would need to clone the table.
if alternant_multiword_spec.actual_number ~= "both" then
for slot, accel_form in pairs(output_noun_slots) do
output_noun_slots[slot] = accel_form:gsub("|[sp]$", "")
end
end
return output_noun_slots
end
local potential_lemma_slots = {"nom_s", "nom_p", "gen_s"}
local cases = {
nom = true,
gen = true,
dat = true,
acc = true,
voc = true,
loc = true,
ins = true,
}
local clitic_cases = {
gen = true,
dat = true,
acc = true,
}
local function dereduce(base, stem)
local dereduced_stem = com.dereduce(base, stem)
if not dereduced_stem then
error("Unable to dereduce stem '" .. stem .. "'")
end
return dereduced_stem
end
--[=[
Maybe modify the stem and/or ending in certain special cases:
1. Final -e in vocative singular triggers first palatalization of the stem in some cases (e.g. hard masc).
2. Endings beginning with ě, i, í trigger second palatalization, as does -e in the loc_s.
NOTE: Correctly handling -e vs. -ě and -tdn/-ťďň alternations is tricky. We have to deal with the following:
1. Soft-stem and t-stem neuters can have either -e or -ě. With coronals we have both [[poledne]] "noon" with /n/ and
[[kutě]] "bed" with /ť/. We also have soft-stem neuter [[Labe]] with /b/ vs. t-stem neuter [[hříbě]] with /bj/.
2. Underlying palatal coronals maintain their nature before back vowels and when not followed by a vowel, e.g. [[štěně]]
"puppy" becomes 'štěňata' in the nom/acc/voc plural and [[přítelkyně]] "girlfriend" becomes 'přítelkyň' in the gen
plural, but underlying palatal labials become non-palatal, e.g. [[hříbě]] "foal" becomes 'hříbata' in the nom/acc/voc
plural.
3. There are at least four types of endings beginning with '-e':
a. "maintaining" endings, e.g. instrumental singular '-em', which do not change the nature of the consonant, e.g.
[[zákon]] "law" becomes 'zákonem' while [[vězeň]] "prisoner" becomes 'vězeněm';
b. "palatalizing" endings, e.g. locative singular '-e', which palatalizes t/d/n (and more generally applies the
Slavic second palatalization, e.g. k -> c, r -> ř), e.g. [[žena]] "woman" becomes 'ženě';
c. "depalatalizing" endings, e.g. feminine i-stem dative plural '-em', which actively depalatalize ť/ď/ň, e.g.
[[oběť]] "sacrifice, victim" becomes 'obětem';
d. vocative singular '-e' of hard-stem masculines, which applies the Slavic first palatalization in some
circumstances (e.g. k -> č, Cr -> Cř, sometimes c -> č).
The way we handle this as follows:
1. We maintain the underlying stems always in their "pronounced" form, i.e. if the last consonant is pronounced ť/ď/ň
we maintain the stem in that form, but if pronounced t/d/n, we use those consonants. Hence neuter [[poledne]] "noon"
has stem 'poledn-' but neuter [[štěně]] "puppy" has stem 'štěň'. If the stem ends in labial + /j/, we use a special
TEMP_SOFT_LABIAL character after the labial (rather than 'j', in case of stems that actually have a written 'j' in
them such as [[banjo]]).
2. We signal types (a), (b) and (c) above using respectively 'e', 'ě' and 'E'. Type (d) uses 'e' and sets
`base.palatalize_voc`.
3. In combine_stem_ending(), we convert the stem back to the written form before adding the ending. If the ending begins
with -e, this may entail converting -e to -ě, and in all cases -E is converted to -e. "Converting to the written
form" converts ť/ď/ň to plain equivalents and deletes TEMP_SOFT_LABIAL before -e, converting -e to -ě with such
consonants. The same conversions happen before other front consonants -ě/-é/-i/-í, which don't allow ť/ď/ň to
precede, and in all cases with TEMP_SOFT_LABIAL, which is not an actual consonant.
4. If the ending is specified using -ě, this is maintained after plain coronals and labials in combine_stem_ending(),
and converted to -e in other cases.
5. Applying the first and second palatalization happens below in apply_special_cases().
]=]
local function apply_special_cases(base, slot, stem, ending)
local palatalize_voc
if base.c_as_k and rfind(ending, "^[aouyáóúůý]") then
local k_stem = rsub(stem, "c$", "k")
stem = {stem, k_stem}
elseif slot == "voc_s" and ending == "e" and base.palatalize_voc and not base["-velar"] then
-- Don't palatalize words like [[hadíth]] with silent -h.
local palstem = com.apply_first_palatalization(stem)
-- According to IJP, nouns ending in -Cr palatalize in the vocative, but those in -Vr don't. In reality,
-- though, it's more complex. It appears that animate nouns in -Cr tend to palatalize but inanimate nouns
-- do it optionally. Specifics:
-- -- Inanimate nouns with optional palatalization (ř listed second): [[alabastr]], [[amfiteátr]], [[barometr]],
-- [[centilitr]], [[centrimetr]], [[decilitr]], [[decimetr]], [[Dněstr]], [[filtr]], [[galvanometr]],
-- [[hektolitr]], [[kalorimetr]], [[litr]], [[lustr]], [[manometr]], [[manšestr]], [[metr]] (NOTE: is both
-- animate and inanimate), [[mikrometr]], [[miliampérmetr]], [[mililitr]], [[nanometr]], [[orchestr]],
-- [[parametr]], [[piastr]], [[půllitr]], [[radiometr]], [[registr]], [[rotmistr]], [[semestr]], [[skútr]],
-- [[spirometr]], [[svetr]], [[šutr]], [[tachometr]], [[titr]], [[vítr]] (NOTE: has í-ě alternation),
-- [[voltmetr]]; [[bagr]], [[bunkr]], [[cedr]], [[Dněpr]], [[fofr]], [[habr]] (NOTE: ř listed first), [[hadr]]
-- (NOTE: ř listed first), [[hamr]], [[kafr]], [[kepr]], [[kopr]], [[koriandr]], [[krekr]], [[kufr]],
-- [[Kypr]], [[lágr]], [[lógr]], [[manévr]], [[masakr]], [[okr]], [[oleandr]], [[pulovr]], [[šlágr]],
-- [[vichr]] (NOTE: ř listed first), [[žánr]]
--
-- -- Inanimate nouns that don't palatalize: [[ampérmetr]], [[anemometr]], [[sfygmomanometr]], [[sfygmometr]];
-- [[dodekaedr]], [[Hamr]], [[ikozaedr]], [[kvádr]], [[sandr]], [[torr]]
--
-- -- Animate nouns that palatalize: [[arbitr]], [[bratr]], [[ekonometr]], [[foniatr]], [[fotr]], [[geometr]],
-- [[kmotr]], [[lotr]], [[magistr]], [[metr]] (NOTE: is both animate and inanimate), [[ministr]], [[mistr]],
-- [[pediatr]], [[Petr]], [[psychiatr]], [[purkmistr]], [[setr]], [[šamstr]]; [[bobr]], [[fajnšmekr]],
-- [[humr]], [[hypochondr]], [[kapr]], [[lídr]], [[negr]], [[obr]], [[salamandr]], [[sólokapr]], [[švagr]],
-- [[tygr]], [[zlobr]], [[zubr]]
--
-- -- Animate nouns with optional palatalization (ř listed first): [[Silvestr]]; [[Alexandr]], [[snajpr]]
--
-- Note the inconsistencies, e.g. [[sfygmomanometr]] and [[ampérmetr]] don't palatalize but [[manometr]] and
-- [[miliampérmetr]] do it optionally. In reality, inanimate vocatives are extremely rare so this may not be the
-- final word.
if base.animacy == "inan" and rfind(stem, com.cons_c .. "r$") and not rfind(stem, "rr$") then
-- optional r -> ř
stem = {stem, palstem}
else
stem = palstem
end
elseif rfind(ending, "^[ěií]") or slot == "loc_s" and ending == "e" then
if rfind(stem, "ck$") and rfind(base.lemma, "ck$") then
-- IJP says nouns in -ck (back, comeback, crack, deadlock, hatchback, hattrick, joystick, paperback, quarterback,
-- rock, soundtrack, track, truck) simplify the resulting -cc ending in the loc_p to -c. Similarly [[quarterback]]
-- has nom_pl 'quarterbaci, quarterbackove'. We need to check the lemma as well because nouns in -cek don't do this.
stem = rsub(stem, "ck$", "k")
end
if base.velar then
-- [[petanque]] /petank/ -> loc pl 'petancích'.
stem = rsub(stem, "gu$", "g")
stem = rsub(stem, "qu$", "k")
end
-- loc_s of hard masculines is sometimes -e/ě; the user might indicate this as -e, which we should handle
-- correctly
stem = com.apply_second_palatalization(stem)
end
return stem, ending
end
local function skip_slot(number, slot)
return number == "sg" and rfind(slot, "_p$") or
number == "pl" and rfind(slot, "_s$")
end
-- Basic function to combine stem(s) and ending(s) and insert the result into the appropriate slot. `stems` is either
-- the `stems` object passed into the declension functions (containing the various stems; see below) or a string to
-- override the stem. (NOTE: If you pass a string in as `stems`, you should pass the value of `stems.footnotes` as the
-- value of `footnotes` as it will be lost otherwise. If you need to supply your own footnote in addition, use
-- iut.combine_footnotes() to combine any user-specified footnote(s) with your footnote(s).) `endings` is either a
-- string specifying a single ending or a list of endings. If `endings` is nil, no forms are inserted. If an ending is
-- "-", the value of `stems` is ignored and the lemma is used instead as the stem; this is important in case the user
-- used `decllemma:` to specify a declension lemma different from the actual lemma, or specified '.foreign' (which has
-- a similar effect).
local function add(base, slot, stems, endings, footnotes)
if not endings then
return
end
-- Call skip_slot() based on the declined number; if the actual number is different, we correct this in
-- decline_noun() at the end.
if skip_slot(base.number, slot) then
return
end
local stems_footnotes = type(stems) == "table" and stems.footnotes or nil
footnotes = iut.combine_footnotes(iut.combine_footnotes(base.footnotes, stems_footnotes), footnotes)
if type(endings) == "string" then
endings = {endings}
end
for _, ending in ipairs(endings) do
-- Compute the stem. If ending is "-", use the lemma regardless. Otherwise if `stems` is a string, use it.
-- Otherwise `stems` is an object containing four stems (vowel-vs-non-vowel cross regular-vs-oblique);
-- compute the appropriate stem based on the slot and whether the ending begins with a vowel.
local stem
if ending == "-" then
stem = base.actual_lemma
ending = ""
elseif type(stems) == "string" then
stem = stems
else
local is_vowel_ending = rfind(ending, "^" .. com.vowel_c)
if stems.oblique_slots == "all" or
(stems.oblique_slots == "gen_p" or stems.oblique_slots == "all-oblique") and slot == "gen_p" or
stems.oblique_slots == "all-oblique" and (slot == "ins_s" or slot == "dat_p" or slot == "loc_p" or slot == "ins_p") then
if is_vowel_ending then
stem = stems.oblique_vowel_stem
else
stem = stems.oblique_nonvowel_stem
end
elseif is_vowel_ending then
stem = stems.vowel_stem
else
stem = stems.nonvowel_stem
end
end
-- Maybe apply the first or second Slavic palatalization.
stem, ending = apply_special_cases(base, slot, stem, ending)
ending = iut.combine_form_and_footnotes(ending, footnotes)
local function combine_stem_ending(stem, ending)
return com.combine_stem_ending(base, slot, stem, ending)
end
iut.add_forms(base.forms, slot, stem, ending, combine_stem_ending)
end
end
local function process_slot_overrides(base, do_slot)
for slot, overrides in pairs(base.overrides) do
-- Call skip_slot() based on the declined number; if the actual number is different, we correct this in
-- decline_noun() at the end.
if skip_slot(base.number, slot) then
error("Override specified for invalid slot '" .. slot .. "' due to '" .. base.number .. "' number restriction")
end
if do_slot(slot) then
base.slot_overridden[slot] = true
base.forms[slot] = nil
for _, override in ipairs(overrides) do
for _, value in ipairs(override.values) do
local form = value.form
local combined_notes = iut.combine_footnotes(base.footnotes, value.footnotes)
if override.full then
if form ~= "" then
iut.insert_form(base.forms, slot, {form = form, footnotes = combined_notes})
end
else
-- Convert a null ending to "-" in the acc/voc sg slots so that e.g. [[Kerberos]] declared as
-- <m.sg.foreign.gena:u.acc-:a> works correctly and generates accusative 'Kerberos/Kerbera' not
-- #'Kerber/Kerbera'.
if (slot == "acc_s" or slot == "voc_s") and form == "" then
form = "-"
end
for _, stems in ipairs(base.stem_sets) do
add(base, slot, stems, form, combined_notes)
end
end
end
end
end
end
end
local function add_decl(base, stems,
gen_s, dat_s, acc_s, voc_s, loc_s, ins_s,
nom_p, gen_p, dat_p, acc_p, loc_p, ins_p, footnotes
)
add(base, "nom_s", stems, "-", footnotes)
add(base, "gen_s", stems, gen_s, footnotes)
add(base, "dat_s", stems, dat_s, footnotes)
add(base, "acc_s", stems, acc_s, footnotes)
add(base, "voc_s", stems, voc_s, footnotes)
add(base, "loc_s", stems, loc_s, footnotes)
add(base, "ins_s", stems, ins_s, footnotes)
if base.number == "pl" then
-- If this is a plurale tantum noun and we're processing the nominative plural, use the user-specified lemma
-- rather than generating the plural from the synthesized singular, which may not match the specified lemma
-- (e.g. [[tvargle]] "Olomouc cheese" using <m.pl.mixed> would try to generate 'tvargle/tvargly', and [[peníze]]
-- "money" using <m.pl.#ě.genpl-> would try to generate 'peněze').
local acc_p_like_nom = m_table.deepEquals(nom_p, acc_p)
nom_p = "-"
if acc_p_like_nom then
acc_p = "-"
end
end
add(base, "nom_p", stems, nom_p, footnotes)
add(base, "gen_p", stems, gen_p, footnotes)
add(base, "dat_p", stems, dat_p, footnotes)
add(base, "acc_p", stems, acc_p, footnotes)
add(base, "loc_p", stems, loc_p, footnotes)
add(base, "ins_p", stems, ins_p, footnotes)
end
local function add_sg_decl(base, stems,
gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, footnotes
)
add_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s,
nil, nil, nil, nil, nil, nil, footnotes)
end
local function add_pl_only_decl(base, stems,
gen_p, dat_p, acc_p, loc_p, ins_p, footnotes
)
add_decl(base, stems, nil, nil, nil, nil, nil, nil,
"-", gen_p, dat_p, acc_p, loc_p, ins_p, footnotes)
end
local function add_sg_decl_with_clitic(base, stems,
gen_s, clitic_gen_s, dat_s, clitic_dat_s, acc_s, clitic_acc_s, voc_s, loc_s, ins_s, footnotes, no_nom_s
)
if not no_nom_s then
add(base, "nom_s", stems, "-", footnotes)
end
add(base, "gen_s", stems, gen_s, footnotes)
add(base, "clitic_gen_s", stems, clitic_gen_s, footnotes)
add(base, "dat_s", stems, dat_s, footnotes)
add(base, "clitic_dat_s", stems, clitic_dat_s, footnotes)
add(base, "acc_s", stems, acc_s, footnotes)
add(base, "clitic_acc_s", stems, clitic_acc_s, footnotes)
add(base, "voc_s", stems, voc_s, footnotes)
add(base, "loc_s", stems, loc_s, footnotes)
add(base, "ins_s", stems, ins_s, footnotes)
end
local function handle_derived_slots_and_overrides(base)
local function is_non_derived_slot(slot)
return slot ~= "voc_p" and slot ~= "acc_s" and slot ~= "clitic_acc_s"
end
local function is_derived_slot(slot)
return not is_non_derived_slot(slot)
end
base.slot_overridden = {}
-- Handle overrides for the non-derived slots. Do this before generating the derived
-- slots so overrides of the source slots (e.g. nom_p) propagate to the derived slots.
process_slot_overrides(base, is_non_derived_slot)
-- Generate the remaining slots that are derived from other slots.
if not base.pron and not base.det then
-- Pronouns don't have a vocative (singular or plural).
iut.insert_forms(base.forms, "voc_p", base.forms.nom_p)
end
if not base.forms.acc_s and not base.slot_overridden.acc_s then
iut.insert_forms(base.forms, "acc_s", base.forms[base.animacy == "inan" and "nom_s" or "gen_s"])
end
if not base.forms.clitic_acc_s and not base.slot_overridden.clitic_acc_s then
iut.insert_forms(base.forms, "clitic_acc_s", base.forms[base.animacy == "inan" and "nom_s" or "clitic_gen_s"])
end
-- Handle overrides for derived slots, to allow them to be overridden.
process_slot_overrides(base, is_derived_slot)
-- Compute linked versions of potential lemma slots, for use in {{cs-noun}}.
-- We substitute the original lemma (before removing links) for forms that
-- are the same as the lemma, if the original lemma has links.
for _, slot in ipairs(potential_lemma_slots) do
iut.insert_forms(base.forms, slot .. "_linked", iut.map_forms(base.forms[slot], function(form)
if form == base.orig_lemma_no_links and rfind(base.orig_lemma, "%[%[") then
return base.orig_lemma
else
return form
end
end))
end
end
-- Table mapping declension types to functions to decline the noun. The function takes two arguments, `base` and
-- `stems`; the latter specifies the computed stems (vowel vs. non-vowel, singular vs. plural) and whether the noun
-- is reducible and/or has vowel alternations in the stem. Most of the specifics of determining which stem to use
-- and how to modify it for the given ending are handled in add_decl(); the declension functions just need to generate
-- the appropriate endings.
local decls = {}
-- Table specifying additional properties for declension types. Every declension type must have such a table, which
-- specifies which category or categories to add and what annotation to show in the title bar of the declension table.
--
-- * Only the `cat` property of this table is mandatory; there is also a `desc` property to specify the annotation, but
-- this can be omitted and the annotation will then be computed from the `cat` property. The `cat` property is either
-- a string, a list of strings or a function (of two arguments, `base` and `stems` as above) returning a string or
-- list of strings. The string can contain the keywords GENDER to substitute the gender (and animacy for masculine
-- nouns) and POS (to substitute the pluralized part of speech). The keyword GENPOS is equivalent to 'GENDER POS'. If
-- no keyword is present, ' GENPOS' is added onto the end. If only GENDER is present, ' POS' is added onto the end.
-- In all cases, the language name is added onto the beginning to form the full category name.
-- * The `desc` property is of the same form as the `cat` property and specifies the annotation to display in the title
-- bar (which may have the same format as the category minus the part of speech, or may be abbreviated). The value
-- may not be a list of strings, as only one annotation is displayed. If omitted, it is derived from the category
-- spec(s) by taking the last category (if more than one is given) and removing ' POS' before keyword substitution.
local declprops = {}
-- Return the default masculine animate nominative plural ending(s) given `base` and `stems`. This is called for hard
-- and soft masculines ending in a consonant, but not for nouns ending in a vowel, which have their own defaults
-- (particularly nouns in -a, where -ista/-ita/-asta behave differently from other nouns in -a).
local function default_masc_animate_nom_pl(base, stems)
return
-- [monosyllabic words: Dánové, Irové, králové, mágové, Rusové, sokové, synové, špehové, zběhové, zeťové, manové, danové
-- (but Žid → Židé, Čech → Češi).] -- There are too many exceptions to this to make a special rule. It is better to use
-- the overall default of -i and require that cases with -ove, -ove/-i, -i/-ove, etc. use overrides.
-- com.is_monosyllabic(base.lemma) and "ové" or
-- reducible terms in -Cek; order of -ové vs. -i sometimes varies:
-- [[fracek]] (ové/i), [[klacek]] (i/ové), [[macek]] (ové/i), [[nácek]] (i/ové), [[prcek]] (ové/i), [[racek]] (ové/i);
-- [[bazilišek]] (i/ové), [[černoušek]] (i/ové), [[drahoušek]] (ové/i), [[fanoušek]] (i/ové), [[františek]] (an/inan,
-- ends in -i/-y but not -ové), [[koloušek]] (-i only), [[kulíšek]] (i/ové), [[oříšek]] (i/ové), [[papoušek]] (-i only),
-- [[prášek]] (i/ové), [[šašek]] (i/ové).
-- make sure to check `stems` as we don't want to include non-reducible words in -Cek (but do want to include
-- [[quarterback]], with -i/-ové)
rfind(stems.vowel_stem, "^" .. com.lowercase_c .. ".*" .. com.cons_c .. "k$") and {"i", "ové"} or
-- [[stoik]], [[neurotik]], [[logik]], [[fyzik]], etc.
rfind(base.lemma, "^" .. com.lowercase_c .. ".ik$") and {"i", "ové"} or
-- barmani, gentlemani, jazzmani, kameramani, narkomani, ombudsmani, pivotmani, rekordmani, showmani, supermani, toxikomani
rfind(base.lemma, "^" .. com.lowercase_c .. ".*man$") and "i" or
-- terms ending in -an after a palatal or a consonant that doesn't change when palatalized, i.e. labial or l (but -man
-- forms -mani unless in a proper noun): Brňan → Brňané, křesťan → křesťané, měšťan → měšťané, Moravan → Moravané,
-- občan → občané, ostrovan → ostrované, Pražan → Pražané, Slovan → Slované, svatebčan → svatebčané, venkovan → venkované,
-- Australan → Australané; also s, because there are many demonyms in -san e.g. [[Andalusan]], [[Barbadosan]], [[Oděsan]],
-- and few proper nouns in -san; similarly z because of [[Belizan]], [[Gazan]], [[Kavkazan]], etc.; also w, which isn't a
-- normal consonant in Czech but occurs in [[Glasgowan]] and [[Zimbabwan]]; NOTE: a few misc words like [[pohan]] also
-- work this way but need manual overrides
rfind(base.lemma, "[" .. com.inherently_soft .. com.labial .. "wlsz]an$") and {"é", "i"} or -- most now can also take -i
-- proper names: Baťové, Novákové, Petrové, Tomášové, Vláďové; exclude demonyms (but include surnames)
rfind(base.lemma, "^" .. com.uppercase_c) and (base.surname or not rfind(base.lemma, "[eě]c$")) and "ové" or
-- demonyms: [[Albánec]], [[Gruzínec]], [[Izraelec]], [[Korejec]], [[Libyjec]], [[Litevec]], [[Němec]], [[Portugalec]]
rfind(base.lemma, "^" .. com.uppercase_c .. ".*[eě]c$") and "i" or
-- From here on down, we're dealing only with lowercase terms.
-- buditelé, budovatelé, čekatelé, činitelé, hostitelé, jmenovatelé, pisatelé, ručitelé, velitelé, živitelé
rfind(base.lemma, ".*tel$") and "é" or
-- nouns in -j: čaroděj → čarodějové, lokaj → lokajové, patricij → patricijové, plebej → plebejové, šohaj → šohajové, žokej → žokejové
-- nouns in -l: apoštol → apoštolové, břídil → břídilové, fňukal → fňukalové, hýřil → hýřilové, kutil → kutilové,
-- loudal → loudalové, mazal → mazalové, škrabal → škrabalové, škudlil → škudlilové, vyvrhel → vyvrhelové, žvanil → žvanilové
-- (we excluded those in -tel above)
rfind(base.lemma, ".*[jl]$") and "ové" or
-- archeolog → archeologové, biolog → biologové, geolog → geologové, meteorolog → meteorologové
rfind(base.lemma, ".*log$") and "ové" or
-- dramaturg → dramaturgové, chirurg → chirurgové
rfind(base.lemma, ".*urg$") and "ové" or
-- fotograf → fotografové, geograf → geografové, lexikograf → lexikografové
rfind(base.lemma, ".*graf$") and "ové" or
-- bibliofil → bibliofilové, germanofil → germanofilové
rfind(base.lemma, ".*fil$") and "ové" or
-- rusofob → rusofobové
rfind(base.lemma, ".*fob$") and "ové" or
-- agronom → agronomové, ekonom → ekonomové
rfind(base.lemma, ".*nom$") and "ové" or
"i"
end
decls["hard-m"] = function(base, stems)
-- Nouns ending in hard -c, e.g. [[hec]] "joke", [[kibuc]] "kibbutz", don't palatalize.
base.palatalize_voc = not rfind(stems.vowel_stem, "c$")
base.hard_c = true
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- See [https://prirucka.ujc.cas.cz/en/?id=360] on declension of toponyms.
local toponym = base.animacy == "inan" and rfind(base.lemma, "^" .. com.uppercase_c)
-- Some toponyms take -a in the genitive singular, e.g. toponyms in -ín ([[Zlín]], [[Jičín]], [[Berlín]]);
-- -ýn ([[Hostýn]], [[Londýn]]); -ov ([[Havířov]]); and -ev ([[Bezdrev]]), as do some others, e.g. domestic
-- [[Beroun]], [[Brandýs]], [[Náchod]], [[Tábor]] and foreign [[Betlém]] "Bethlehem", [[Egypt]],
-- [[Jeruzalém]] "Jerusalem", [[Milán]] "Milan", [[Řím]] "Rome", [[Rýn]] "Rhine". Also some transferred from
-- common nouns e.g. ([[Nový]]) [[Kostel]], ([[Starý]]) [[Rybník]].
local toponym_gen_a = toponym and (rfind(base.lemma, "[íý]n$") or rfind(base.lemma, "[oe]v$"))
-- Toponyms in -ík (Mělník, Braník, Rakovník, Lipník) seem to fluctuate between gen -a and -u. Also some in
-- ‑štejn, ‑berg, ‑perk, ‑burk, ‑purk (Rabštejn, Heidelberg, Kašperk, Hamburk, Prešpurk) and some others:
-- Zbiroh, Kamýk, Příbor, Zábřeh, Žebrák, Praděd.
local toponym_gen_a_u = toponym and rfind(base.lemma, "ík$")
-- Toponyms that take -a in the genitive singular tend to take -ě in the locative singular; so do those in
-- -štejn (Rabštejn), -hrad (Petrohrad), -grad (Volgograd).
local toponym_loc_e = toponym and (toponym_gen_a or rfind(base.lemma, "štejn$") or rfind(base.lemma, "[gh]rad$"))
-- Toponyms in -ík seem to fluctuate between loc -ě and -u.
local toponym_loc_e_u = toponym_gen_a_u
-- Inanimate gen_s in -a other than toponyms in -ín/-ýn/-ev/-ov (e.g. [[zákon]] "law", [[oběd]] "lunch", [[kostel]] "church",
-- [[dnešek]] "today", [[leden]] "January", [[trujúhelník]] "triangle") needs to be given manually, using '<gena>'.
local gen_s = toponym_gen_a and "a" or toponym_gen_a_u and {"a", "u"} or base.animacy == "inan" and "u" or "a"
-- Animates with dat_s only in -u (e.g. [[člověk]] "person", [[Bůh]] "God") need to give this manually,
-- using '<datu>'.
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi" or {"ovi", "u"}
-- Inanimates with loc_s in -e/ě other than certain toponyms (see above) need to give this manually, using <locě>, but
-- it will trigger the second palatalization automatically.
local loc_s = toponym_loc_e and "ě" or toponym_loc_e_u and {"ě", "u"} or dat_s
-- Velar-stem animates with voc_s in -e (e.g. [[Bůh]] "God", voc_s 'Bože'; [[člověk]] "person", voc_s 'člověče')
-- need to give this manually using <voce>; it will trigger the first palatalization automatically.
local voc_s = velar and "u" or "e" -- 'e' will trigger first palatalization in apply_special_cases()
-- Nom_p in -i will trigger second palatalization in apply_special_cases().
local nom_p = base.animacy == "inan" and "y" or default_masc_animate_nom_pl(base, stems)
-- Per IJP and Janda and Townsend:
-- * loc_p in -ích is currently the default for velars but not otherwise; it will automatically trigger the second
-- palatalization (e.g. [[práh]] "threshold", loc_p 'prazích'). Otherwise, -ích needs to be given manually using
-- <locplích>, e.g. [[les]] "forest"; [[hotel]] "hotel"; likewise for loc_p in -ách (e.g. [[plech]]
-- "metal plate"), using <locplách>.
-- * Inanimate hard nouns in -c normally have -ech: [[hec]] "joke", [[tác]] "tray", [[truc]], [[kec]], [[frc]],
-- [[flanc]], [[kibuc]] "kibbutz", [[pokec]] "chat".
-- In the IJP tables, inanimate reducible nouns in -ček (and most in -cek, although there are many fewer; also some
-- in -žek, but in this case it's too inconsistent to make the default) regularly have both -ích and -ách in the
-- locative plural, while similar animate nouns only have -ích. This applies even to nouns like [[háček]] and
-- [[koníček]] that can be either animate or inanimate. Make sure to exclude nouns in -ck such as [[comeback]] and
-- [[joystick]], which have only -ích.
local loc_p =
base.animacy == "inan" and rfind(base.lemma, "[cč]ek$") and rfind(stems.vowel_stem, "[cč]k$") and {"ích", "ách"} or
velar and "ích" or "ech"
add_decl(base, stems, gen_s, dat_s, nil, voc_s, loc_s, "em",
-- loc_p in -ích not after velar stems (e.g. [[les]] "forest"; [[hotel]] "hotel") needs to be given manually
-- using <locplích>; it will automatically trigger the second palatalization; loc_p in -ách (e.g. [[plech]]
-- "metal plate") also needs to be given manually using <locplách>
nom_p, "ů", "ům", "y", loc_p, "y")
end
declprops["hard-m"] = {
desc = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar GENDER"
else
return "hard GENDER"
end
end,
cat = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar-stem"
else
return "hard"
end
end
}
decls["semisoft-m"] = function(base, stems)
-- Examples:
-- * Animate in -ius: génius, nuncius, nonius (breed of horse), notárius, ordinárius, patricius, primárius,
-- pronuncius, various names
-- * Animate in -eus: farizeus, basileus, pygmeus ([[skarabeus]] inflects hard in the plural), various names
-- * Inanimate in -ius: nonius (measuring device), rádius, sestercius
-- NOTE: Inanimate nouns in -eus (nukleus, choreus) inflect hard in the plural
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi" or {"ovi", "u"}
local loc_s = dat_s
local nom_p = base.animacy == "inan" and "e" or "ové"
add_decl(base, stems, "a", dat_s, nil, "e", loc_s, "em",
nom_p, "ů", "ům", "e", "ích", "i")
end
declprops["semisoft-m"] = {
cat = "semisoft"
}
decls["soft-m"] = function(base, stems)
base.palatalize_voc = true
-- animates with dat_s only in -i need to give this manually, using '<dati>'
local dat_s = base.animacy == "inan" and "i" or base.surname and "ovi" or {"ovi", "i"}
local loc_s = dat_s
-- Per IJP, the vast majority of soft masculine animates take -i in the voc_s, but those in -ec/-ěc take -e with first
-- palatalization to -če, e.g. [[otec]] "father", [[lovec]] "hunter", [[blbec]] "fool, idiot", [[horolezec]]
-- "mountaineer", [[znalec]] "expert", [[chlapec]] "boy", [[nadšenec]] "enthusiast", [[luněc]] (type of bird).
-- Demonyms but not surnames ending in -ec but beginning with a capital letter take either -e or -i (only the former
-- triggers the first palatalization). Examples: [[Portugalec]], [[Slovinec]] "Slovenian", [[Japonec]], [[Vietnamec]].
-- Not [[Kadlec]] (surname).
local voc_s = base.animacy == "an" and rfind(base.lemma, "[eě]c$") and stems.reducible and
(not base.surname and rfind(base.lemma, "^" .. com.uppercase_c) and {"e", "i"} or "e") or "i"
local nom_p = base.animacy == "inan" and "e" or default_masc_animate_nom_pl(base, stems)
-- nouns with loc_p in -ech (e.g. [[cíl]] "goal") need to give this manually, using <locplech>
add_decl(base, stems, "e", dat_s, nil, voc_s, loc_s, "em",
nom_p, "ů", "ům", "e", "ích", "i")
end
declprops["soft-m"] = {
cat = "soft"
}
decls["mixed-m"] = function(base, stems)
-- NOTE: IJP tends to list the soft endings first, but per their section on this
-- (https://prirucka.ujc.cas.cz/en/?id=220), the hard endings tend to predominate in modern use, so we list them
-- first.
if base.animacy == "an" then
if rfind(base.lemma, "l$") then
-- [[anděl]] "angel", [[manžel]] "husband", [[strašpytel]] "coward"; 'strašpytel' has a different declension
-- from the other two, with more soft forms. [[manžel]] has plural in -é or -ové and needs an override.
local dat_s = base.surname and "ovi" or {"ovi", "u"}
local loc_s = dat_s
add_decl(base, stems, "a", dat_s, nil, "i", loc_s, "em",
"é", "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
else
-- -s/-z: rorýs, platýs, pilous, markýz, všekaz, stávkokaz, penězokaz, listokaz, dřevokaz, zrnokaz, boss.
-- Others recently moving towards this declension: primas, karas, kalous, konipas, ibis, chabrus, chuďas,
-- kakabus, kliďas, kandrdas, morous, vágus.
-- Some names: Alois, Mánes.
-- Both hard and soft endings throughout. Most have -i and -ové in the nominative plural.
local dat_s = base.surname and "ovi" or {"u", "i", "ovi"}
local loc_s = dat_s
add_decl(base, stems, {"a", "e"}, dat_s, nil, {"e", "i"}, loc_s, "em",
{"i", "ové"}, "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
end
else
-- Given in IJP: burel, hnědel, chmel, krevel, kužel, námel, plevel, tmel, zádrhel, apríl, artikul, koukol, rubl,
-- úběl, plus reducible nouns cumel, chrchel, [[kotel]] "cauldron", sopel, uhel. Also [[městys]]. Many of them are listed in the
-- IJP tables with only hard or with fewer soft forms, so need to be investigated individually.
if rfind(base.lemma, "[ls]$") then
add_decl(base, stems, {"u", "e"}, {"u", "i"}, nil, {"e", "i"}, {"u", "e", "i"}, "em",
{"y", "e"}, "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
else
-- -n/-t; hard in the plural: hřeben, ječmen, [[kámen]] "stone", kmen, kořen, křemen, plamen,
-- [[pramen]] "source", [[řemen]] "strap", den, týden, [[loket]] "elbow".
-- There may be deviations (e.g. soft plural forms for [[den]]), so need to be investigated individually.
add_decl(base, stems, {"u", "e"}, {"u", "i"}, nil, "i", {"u", "i"}, "em",
"y", "ů", "ům", "y", "ech", "y")
end
end
end
declprops["mixed-m"] = {
cat = "mixed"
}
decls["a-m"] = function(base, stems)
-- husita → husité, izraelita → izraelité, jezuita → jezuité, kosmopolita → kosmopolité, táborita → táborité
-- fašista → fašisté, filatelista → filatelisté, fotbalista → fotbalisté, kapitalista → kapitalisté,
-- marxista → marxisté, šachista → šachisté, terorista → teroristé. NOTE: most these words actually appear in
-- the IJP tables with -é/-i, so we go accordingly.
--
-- gymnasta → gymnasté, fantasta → fantasté; also chiliasta, orgiasta, scholiasta, entuziasta, dynasta, ochlasta,
-- sarkasta, vymasta; NOTE: Only 'gymnasta' actually given with just -é; 'fantasta' with -ové/-é, 'dynasta' and
-- 'ochlasta' with just -ové, vymasta not in IJP (no plural given in SSJC), and the rest with -é/-i. So we go
-- accordingly.
local it_ist = rfind(stems.vowel_stem, "is?t$") or rfind(stems.vowel_stem, "ast$")
-- Velar nouns (e.g. [[sluha]] "servant") have -ích in the loc_p (which triggers the second palatalization)
-- instead of -ech. Nouns whose stem ends in a soft consonant ([[rikša]], [[paša]], [[bača]], [[mahárádža]],
-- [[paňáca]], etc.) behave likewise.
-- FIXME: [[pária]] "pariah", [[Maria]] etc.
local loc_p =
(base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") or rfind(stems.vowel_stem, com.inherently_soft_c .. "$")) and
"ích" or "ech"
add_decl(base, stems, "y", "ovi", "u", "o", "ovi", "ou",
it_ist and {"é", "i"} or "ové", "ů", "ům", "y", loc_p, "y")
end
declprops["a-m"] = {
cat = "GENPOS in -a"
}
decls["e-m"] = function(base, stems)
-- [[zachránce]] "savior"; [[soudce]] "judge"; etc.
-- At least two inanimates: [[průvodce]] "guide, guidebook; computing wizard"; [[správce]] "manager (software program), configuration program"
local dat_s = base.animacy == "inan" and "i" or base.surname and "ovi" or {"ovi", "i"}
local loc_s = dat_s
add_decl(base, stems, "e", dat_s, nil, "-", loc_s, "em",
-- nouns with -ové as well (e.g. [[soudce]] "judge") will need to specify that manually, e.g. <nompli:ové>
base.animacy == "inan" and "e" or "i", "ů", "ům", "e", "ích", "i")
end
declprops["e-m"] = {
cat = "GENPOS in -e"
}
decls["i-m"] = function(base, stems)
-- [[kivi]] "kiwi (bird)"; [[kuli]] "coolie"; [[lori]] "lory, lorikeet (bird)" (loc_pl 'loriech/loriích/lorich');
-- [[vini]] "parrot of the genus Vini"; [[yetti]]/[[yeti]] "yeti". other examples: [[aguti]], [[efendi]], [[hadži]],
-- [[pekari]], [[regenschori]], [[yetti]]/[[yeti]].
--
-- [[grizzly]]/[[grizly]] "grizzly bear"; [[pony]] "pony"; [[husky]] "husky"; [[dandy]] "dandy"; [[Billy]] "billy".
--
-- NOTE: Some nouns in -y are regular soft stems, e.g. [[gay]] "gay person"; [[gray]] "gray (unit of absorbed
-- radiation)"; [[Nagy]] (surname).
--
-- NOTE: The stem ends in -i/-y.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
-- ins_pl 'kivii/kivimi'
{"ové", ""}, {"ů", "ch"}, {"ům", "m"}, {"e", ""}, {"ích", "ch"}, {"i", "mi"})
end
declprops["i-m"] = {
cat = "GENPOS in -i/-y"
}
decls["í-m"] = function(base, stems)
-- [[kádí]] "qadi (Islamic judge)", [[mahdí]] "Mahdi (Islamic prophet)", [[muftí]] "mufti (Islamic scholar)",
-- [[sipáhí]] "sipahi (Algerian cavalryman in the French army)"
--
-- No obvious examples in -ý, but the support is there.
--
-- NOTE: The stem ends in -í/-ý.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
{"ové", ""}, {"ů", "ch"}, {"ům", "m"}, {"e", ""}, "ích", "mi")
end
declprops["í-m"] = {
cat = "GENPOS in -í/-ý"
}
decls["ie-m"] = function(base, stems)
-- [[zombie]] "zombie" (also fem/neut), [[hippie]] "hippie", [[yuppie]] "yuppie", [[rowdie]] "rowdy/hooligan"
--
-- NOTE: The stem ends in -i (not -ie, because of the plural).
add_decl(base, stems, "eho", "emu", nil, "-", "em", "em",
{"ové", "es"}, {"ů", "es"}, {"ům", "es"}, {"e", "es"}, {"ích", "es"}, {"i", "es"})
end
declprops["ie-m"] = {
cat = "GENPOS in -ie"
}
decls["ee-m"] = function(base, stems)
-- [[Yankee]] "Yankee"
--
-- NOTE: The stem ends in -ee.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
"ové", "ů", "ům", "e", "ích", "i")
end
declprops["ee-m"] = {
cat = "GENPOS in -ee"
}
decls["o-m"] = function(base, stems)
-- [[kápo]] "head, leader"; [[lamželezo]] "strongman"; [[torero]] "bullfighter"; [[žako]] "African gray parrot";
-- [[dingo]] "dingo"; [[kakapo]] "kakapo" (given in Wiktionary with dat_s/loc_s in -ovi only not -ovi/-u; probably
-- wrong but not in IJP); [[maestro]] "maestro"; [[Bruno]] "Bruno", [[Hugo]] "Hugo"; [[Ivo]] "Yves" (these names
-- are singular-only per IJP); [[Kvido]] "Guido, Guy" (per IJP has accusative in -a or -ona); [[Oto]] "Otto" (per
-- IJP also declinable like virile -a masculines; singular-only); [[Kuřátko]] (a surname; how declined?);
-- [[Picasso]] (surname; how declined?); [[Pluto]] "Pluto (God)", also "Pluto (planet)", which is inanimate;
-- [[Samo]]/[[Sámo]] "Samo (7th century Slavic ruler)" (dat_s/loc_s only in -ovi, needs override); [[Tomio]]
-- "Tomio (Japanese male given name)" (how declined?); [[nemakačenko]] "idler, loafer" (given in Wiktionary with
-- dat_s/loc_s in -ovi only, as for [[kakapo]]); [[nefachčenko]] "idler, loafer"; note also [[gadžo]] "gadjo",
-- which has a unique declension.
--
-- Velar nouns ([[žako]], [[dingo]], etc.) have -ích in the loc_p (which triggers the second palatalization)
-- instead of -ech.
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- inanimates e.g. [[Pluto]] (planet) have -u only, like for normal hard masculines.
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi"or {"ovi", "u"}
local loc_s = dat_s
local loc_p = velar and "ích" or "ech"
add_decl(base, stems, "a", dat_s, nil, "-", loc_s, "em",
"ové", "ů", "ům", "y", loc_p, "y")
end
declprops["o-m"] = {
cat = "GENPOS in -o"
}
decls["u-m"] = function(base, stems)
-- [[emu]] "emu", [[guru]] "guru", [[kakadu]] "cockatoo", [[marabu]] "marabou" (declined the same way)
-- [[Osamu]] "Osamu (Japanese male given name)" [how declined?]
-- [[Višnu]] "Vishnu" (declined like [[guru]] but singular-only)
-- [[budižkničemu]] "good-for-nothing, ne'er-do-well" (indeclinable in the singular, declinable as masculine hard stem
-- budižkničemové etc. in the plural, declinable as feminine hard stem budižkničemy etc. in the plural when feminine).
--
-- NOTE: The stem ends in -u.
add_decl(base, stems, "a", "ovi", nil, "-", "ovi", "em",
"ové", "ů", "ům", "y", "ech", "y")
end
declprops["u-m"] = {
cat = "GENPOS in -u"
}
decls["tstem-m"] = function(base, stems)
-- E.g. [[kníže]] "prince", [[hrabě]] "earl", [[markrabě]] "margrave".
add_decl(base, stems, "ete", "eti", "ete", "-", "eti", "etem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["tstem-m"] = {
cat = "t-stem"
}
decls["hard-f"] = function(base, stems)
base.no_palatalize_c = true
if base.c_as_k then
-- forms like 'ayahuascy' are allowed.
base.hard_c = true
end
-- [[skica]] "sketch", [[gejša]] "geisha", [[rikša]] "rickshaw (vehicle)"; [[arakača]], [[čača]], [[čiča]] (drink),
-- [[dača]] "dacha", [[gutaperča]] "guttapercha", [[viskača]]; [[babča]], [[číča]], [[káča]], [[mamča]], [[úča]].
-- Also appears to apply to ď (e.g. [[Naďa]]) and ť, as well as certain words with stems in -ň and -j (e.g. [[doňa]],
-- and personal names such as [[Táňa]] and [[Darja]]), which normally have a mixed declension.
local soft_cons = rfind(base.vowel_stem, "[cčšžďťjň]$") and not base.c_as_k
local dat_s = soft_cons and {"ě", "i"} or "ě"
local loc_s = dat_s
add_decl(base, stems, "y", dat_s, "u", "o", loc_s, "ou",
"y", "", "ám", "y", "ách", "ami")
end
declprops["hard-f"] = {
cat = "hard"
}
decls["soft-f"] = function(base, stems)
-- This also includes feminines in -ie, e.g. [[belarie]], [[signorie]], [[uncie]], and feminines in -oe, e.g.
-- [[kánoe]], [[aloe]] and medical terms like [[dyspnoe]], [[apnoe]], [[hemoptoe]], [[kalanchoe]].
-- Nouns in -ice like [[ulice]] "street" have null genitive plural e.g. 'ulic'; nouns in -yně e.g. [[přítelkyně]]
-- "girlfriend" have gen pl 'přítelkyň' or 'přítelkyní' with two possible endings; otherwise -í. (Alternation between
-- -ň and -n and between -e and -ě handled automatically by combine_stem_ending().)
local gen_p = rfind(base.lemma, "ice$") and "" or rfind(base.lemma, "yně$") and {"", "í"} or "í"
-- Vocative really ends in -e, not just a copy of the nominative; cf. [[sinfonia]], which is soft-f except for
-- the nominative and has -e in the vocative singular.
add_decl(base, stems, "e", "i", "i", "e", "i", "í",
"e", gen_p, "ím", "e", "ích", "emi")
end
declprops["soft-f"] = {
cat = "soft"
}
decls["mixed-f"] = function(base, stems)
-- Lowercase nouns in -ňa (e.g. bárišňa/báryšňa, doňa, dueňa, piraňa, vikuňa) and -ja (e.g. maracuja, papája, sója).
-- Also non-personal proper nouns in -ňa (e.g. [[Keňa]] "Kenya") and -ja (e.g. [[Troja]]/[[Trója]] "Troy",
-- [[Amudarja]] "Amu Darya"). Does not appear to apply to personal proper nouns (e.g. [[Táňa]] "Tanya", [[Darja]] "Daria"),
-- which usually decline like [[gejša]], [[dača]], [[skica]]).
add_decl(base, stems, {"i", "e"}, {"e", "i"}, "u", "o", {"e", "i"}, "ou",
{"i", "e"}, {"", "í"}, {"ám", "ím"}, {"i", "e"}, {"ách", "ích"}, {"ami", "emi"})
end
declprops["mixed-f"] = {
cat = "mixed"
}
decls["cons-f"] = function(base, stems)
-- e.g. [[dlaň]] "palm (of the hand)"
add_decl(base, stems, "e", "i", "-", "i", "i", "í",
"e", "í", "ím", "e", "ích", "emi")
end
declprops["cons-f"] = {
cat = "soft zero-ending"
}
decls["istem-f"] = function(base, stems)
add_decl(base, stems, "i", "i", "-", "i", "i", "í",
-- See above under apply_special_cases(); -E causes depalatalization of ť/ď/ň.
"i", "í", "Em", "i", "Ech", "mi")
end
declprops["istem-f"] = {
cat = "i-stem"
}
decls["mixed-istem-f"] = function(base, stems)
local gen_s, nom_p, dat_p, loc_p, ins_p
-- Use of ě vs E below is intentional. Contrast [[oběť]] dat pl 'obětem' (depalatalizing) with [[nit]] ins pl
-- 'nitěmi' (palatalizing). See comment above under apply_special_cases().
if base.mixedistem == "pěst" then
-- pěst, past, mast, lest [reducible; ins pl 'lstmi'], pelest, propust, plst, oběť, zeď [reducible; ins pl
-- 'zdmi'], paměť [ins pl 'pamětmi/paměťmi]
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", {"ím", "Em"}, {"ích", "Ech"}, "mi"
elseif base.mixedistem == "moc" then
-- moc, nemoc, pomoc, velmoc; NOTE: pravomoc has -i/-e alternation in gen_s, nom_p
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", {"Em", "ím"}, {"Ech", "ích"}, "ěmi"
elseif base.mixedistem == "myš" then
-- myš, veš [reducible, ins pl vešmi], hruď, měď, pleť, spleť, směs, smrt, step, odpověď [ins pl 'odpověď'mi/odpovědmi'], šeď,
-- závěť [ins pl 'závěťmi/závětmi'], plsť [ins pl 'plstmi']
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", "ím", "ích", "mi"
elseif base.mixedistem == "noc" then
-- lež [reducible], noc, mosaz, rez [reducible], ves [reducible], mysl, sůl, běl, žluť
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", "ím", "ích", "ěmi"
elseif base.mixedistem == "žluč" then
-- žluč, moč, modř, čeleď, kapraď, záď, žerď, čtvrť/čtvrt, drť, huť, chuť, nit, pečeť, závrať, pouť, stať, ocel
gen_s, nom_p, dat_p, loc_p, ins_p = {"i", "ě"}, {"i", "ě"}, "ím", "ích", "ěmi"
elseif base.mixedistem == "loď" then
-- loď, suť
gen_s, nom_p, dat_p, loc_p, ins_p = {"i", "ě"}, {"i", "ě"}, "ím", "ích", {"ěmi", "mi"}
else
error(("Unrecognized value '%s' for 'mixedistem', should be one of 'pěst', 'moc', 'myš', 'noc', 'žluč' or 'loď'"):
format(base.mixedistem))
end
add_decl(base, stems, gen_s, "i", "-", "i", "i", "í",
nom_p, "í", dat_p, nom_p, loc_p, ins_p)
end
declprops["mixed-istem-f"] = {
-- Include subtype in the table description but not in the category to avoid too many categories.
desc = function(base, stems)
return ("mixed i-stem [type '%s'] GENDER"):format(base.mixedistem)
end,
cat = function(base, stems)
return {"mixed i-stem", ("mixed i-stem GENPOS (type '%s')"):format(base.mixedistem)}
end,
}
decls["i-f"] = function(base, stems)
-- [[máti]] "mother" (singular-only), [[pramáti]] "foremother"; very similar to the 'noc' mixed i-stem type
add_decl(base, stems, "i", "i", "-", "i", "i", "í",
"i", "í", "ím", "i", "ích", "ěmi")
end
declprops["i-f"] = {
cat = "GENPOS in -i"
}
decls["ea-f"] = function(base, stems)
-- Stem ends in -e.
if base.tech then
-- diarea, gonorea, chorea, nauzea, paleogea, seborea, trachea
add_decl(base, stems, "y", "i", "u", "o", "i", "ou",
"y", "í", {"ám", "ím"}, "y", {"ách", "ích"}, "ami")
elseif base.persname then
-- Medea, Andrea, etc.
add_decl(base, stems, {"y", "je", "ji"}, {"e", "je", "ji"}, "u", "o", {"e", "je", "ji"}, "ou",
-- this is a guess, based on the same as below; plural of personal names not attested in IJP
{"y", "je"}, "jí", {"ám", "jím"}, {"y", "je"}, {"ách", "jích"}, {"ami", "jemi"})
else
-- idea, odysea ("wandering pilgrimage"), orchidea, palea, spirea
-- proper names Galilea, Judea, Caesarea, Korea, Odyssea ("epic poem")
add_decl(base, stems, {"y", "je"}, "ji", "u", "o", "ji", {"ou", "jí"},
{"y", "je"}, "jí", {"ám", "jím"}, {"y", "je"}, {"ách", "jích"}, {"ami", "jemi"})
end
end
declprops["ea-f"] = {
cat = function(base, stems)
if base.tech then
return {"GENPOS in -ea", "technical GENPOS in -ea"}
else
return "GENPOS in -ea"
end
end
}
decls["oa-f"] = function(base, stems)
-- Stem ends in -o/-u.
-- stoa, kongrua; proper names Samoa, Managua, Nikaragua, Capua
add_decl(base, stems, "y", "i", "u", "o", "i", "ou",
"y", "í", "ám", "y", "ách", "ami")
end
declprops["oa-f"] = {
cat = "GENPOS in -oa/-ua"
}
decls["ia-f"] = function(base, stems)
-- Stem ends in -i.
-- belaria, signoria, uncia; paranoia, sinfonia;
-- proper names Alexandria, Alexia, Livia, Monrovia, Olympia, Sofia
-- Identical to soft declension except for nom sg.
decls["soft-f"](base, stems)
end
declprops["ia-f"] = {
cat = "GENPOS in -ia"
}
decls["hard-n"] = function(base, stems)
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- NOTE: Per IJP it appears the meaning of the preceding preposition makes a difference: 'o' = "about" takes
-- '-u' or '-ě', while 'na/v' = "in, on" normally takes '-ě'.
local loc_s =
-- Exceptions: [[mléko]] "milk" ('mléku' or 'mléce'), [[břicho]] "belly" ('břiše' or (less often) 'břichu'),
-- [[roucho]] ('na rouchu' or 'v rouše'; why the difference in preposition?).
velar and "u" or
-- IJP says nouns in -dlo take only -e but the declension tables show otherwise. It appears -u is possible
-- but significantly less common. Other nouns in -lo usually take just -e ([[čelo]] "forehead",
-- [[kolo]] "wheel", [[křeslo]] "armchair", [[máslo]] "butter", [[peklo]] "hell", [[sklo]] "glass",
-- [[světlo]] "light", [[tělo]] "body"; but [[číslo]] "number' with -e/-u; [[zlo]] "evil" and [[kouzlo]] "spell"
-- with -u/-e).
rfind(base.lemma, "dlo$") and {"ě", "u"} or
rfind(base.lemma, "lo$") and "ě" or
(rfind(base.lemma, "[sc]tvo$") or rfind(base.lemma, "ivo$")) and "u" or
-- Per IJP: Borrowed words and abstracts take -u (e.g. [[banjo]]/[[bendžo]]/[[benžo]] "banjo", [[depo]] "depot",
-- [[chladno]] "cold", [[mokro]] "damp, dampness", [[právo]] "law, right", [[šeru]] "twilight?",
-- [[temno]] "dark, darkness", [[tempo]] "rate, tempo", [[ticho]] "quiet, silence", [[vedro]] "heat") and others
-- often take -ě/-u. Formerly we defaulted to -ě/-u but it seems better to default to just -u, similarly to hard
-- masculines.
-- {"ě", "u"}
"u"
local loc_p =
-- Note, lemmas in -isko also have mixed-reducible as default, handled in determine_default_reducible().
-- Note also, ending -ích triggers the second palatalization.
rfind(base.lemma, "isko$") and {"ích", "ách"} or
-- Diminutives in -ko, -čko, -tko; also [[lýtko]], [[děcko]], [[vrátka]], [[dvířka]], [[jho]], [[roucho]],
-- [[tango]], [[mango]], [[sucho]], [[blaho]], [[víko]], [[echo]], [[embargo]], [[largo]], [[jericho]] (from
-- IJP). Also foreign nouns in -kum: [[antibiotikum]], [[narkotikum]], [[afrodiziakum]], [[analgetikum]], etc.
-- [[jablko]] "apple" has '-ách' or '-ích' and needs an override; likewise for [[vojsko]] "troop"; [[riziko]]
-- "risk" normally has '-ích' and needs and override.
velar and "ách" or
"ech"
add_decl(base, stems, "a", "u", "-", "-", loc_s, "em",
"a", "", "ům", "a", loc_p, "y")
-- FIXME: paired body parts e.g. [[rameno]] "shoulder" (gen_p/loc_p 'ramenou/ramen'), [[koleno]] "knee"
-- (gen_p/loc_p 'kolenou/kolen'), [[prsa]] "chest, breasts" (plurale tantum; gen_p/loc_p 'prsou').
-- FIXME: Nouns with both neuter and feminine forms in the plural, e.g. [[lýtko]] "calf (of the leg)",
-- [[bedro]] "hip", [[vrátka]] "gate".
end
declprops["hard-n"] = {
desc = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar GENDER"
else
return "hard GENDER"
end
end,
cat = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar-stem"
else
return "hard"
end
end
}
decls["semisoft-n"] = function(base, stems)
-- Examples:
-- * In -ao: [[kakao]] "cacao", [[makao]] "Macao (gambling card game, see Wikipedia)", [[curaçao]] "curaçao (liqueur)"
-- (IJP gives gen pl 'curaç' but ASSC [https://slovnikcestiny.cz/heslo/cura%C3%A7ao/0/9967] says 'curaçí' as expected),
-- [[farao]] "faro (card game)"; also [[Makao]], [[Pathet Lao]], but these are sg-only
-- * In -eo: [[stereo]], [[rodeo]], [[video]], [[solideo]]; also [[Borneo]], [[Montevideo]], but these are sg-only
-- * In -io: [[rádio]] "radio", [[gramorádio]], [[studio]], [[scenário]], [[trio]], [[ážio]] (also spelled [[agio]]),
-- [[disážio]], [[folio]], [[vibrio]]; also [[arpeggio]], [[adagio]], [[capriccio]], [[solfeggio]] although
-- pronounced the Italian way without /i/; also [[Ohio]], [[Ontario]], [[Tokio]], but these are sg-only
-- * In -uo: only [[duo]]
-- * In -yo: only [[embryo]]
-- * In -eum: [[muzeum]], [[lyceum]], [[linoleum]], [[ileum]], etc.
-- * In -ium: [[atrium]] "atrium", most chemical elements, etc.
-- * In -uum: [[individuum]], [[kontinuum]], [[premenstruum]], [[residuum]], [[vakuum]]/[[vacuum]]
-- * In -yum: only [[baryum]] "barium" (none others in SSJC)
-- * In -ion: [[enkómion]] "encomium", [[eufonion]] (variant of [[eufonium]]), [[amnion]], [[ganglion]], [[gymnasion]],
-- [[scholion]], [[kritérion]] (rare for [[kritérium]]), [[onomatopoion]] (variant of [[onomatopoie]]),
-- [[symposion]], [[synedrion]]; also [[Byzantion]], but this is sg-only; most words in -ion are masculine
-- Hard in the singular, mostly soft in the plural. Those in -eo and -uo have alternative hard endings in the
-- dat/loc/ins pl, but not those in -eum or -uum. Those in -ao have only hard endings except in the gen pl. (There are
-- apparently no neuters in -eon; those in -eon or -yon e.g. [[akordeon]], [[neon]], [[nukleon]], [[karyon]], [[Lyon]]
-- are masculine.)
local dat_p, loc_p, ins_p
if rfind(base.actual_lemma, "ao$") then
dat_p, loc_p, ins_p = "ům", "ech", "y"
elseif rfind(base.actual_lemma, "[eu]o$") then
dat_p, loc_p, ins_p = {"ím", "ům"}, {"ích", "ech"}, {"i", "y"}
else
dat_p, loc_p, ins_p = "ím", "ích", "i"
end
add_decl(base, stems, "a", "u", "-", "-", "u", "em",
"a", "í", dat_p, "a", loc_p, ins_p)
end
declprops["semisoft-n"] = {
cat = "semisoft"
}
decls["soft-n"] = function(base, stems)
-- Examples: [[moře]] "sea", [[slunce]] "sun", [[srdce]] "heart", [[citoslovce]] "interjection",
-- [[dopoledne]] "late morning", [[odpoledne]] "afternoon", [[hoře]] "sorrow, grief" (archaic or literary),
-- [[inhalace]] "inhalation", [[kafe]] "coffee", [[kanape]] "sofa", [[kutě]] "bed", [[Labe]] "Elbe (singular only)",
-- [[líce]] "cheek", [[lože]] "bed", [[nebe]] "sky; heaven", [[ovoce]] "fruit", [[pole]] "field", [[poledne]]
-- "noon", [[příslovce]] "adverb", [[pukrle]] "curtsey" (also t-n), [[vejce]] "egg" (NOTE: gen pl 'vajec').
--
-- Many nouns in -iště, with null genitive plural.
local gen_p = rfind(base.vowel_stem, "išť$") and "" or "í"
add_decl(base, stems, "e", "i", "-", "-", "i", "em",
"e", gen_p, "ím", "e", "ích", "i")
-- NOTE: Some neuter words in -e indeclinable, e.g. [[Belize]], [[Chile]], [[garde]] "chaperone", [[karaoke]],
-- [[karate]], [[re]] "double raise (card games)", [[ukulele]], [[Zimbabwe]], [[zombie]] (pl. 'zombie' or
-- 'zombies')
-- some nearly indeclinable, e.g. [[finále]], [[chucpe]]; see mostly-indecl below
end
declprops["soft-n"] = {
cat = "soft"
}
decls["í-n"] = function(base, stems)
-- [[nábřeží]] "waterfront" and a zillion others; also [[úterý]] "Tuesday".
-- NOTE: The stem ends in -í/-ý.
add_decl(base, stems, "", "", "-", "-", "", "m",
"", "", "m", "", "ch", "mi")
end
declprops["í-n"] = {
cat = "GENPOS in -í/-ý"
}
decls["n-n"] = function(base, stems)
-- E.g. [[břemeno]] "burden" (also [[břímě]], use 'decllemma:'); [[písmeno]] "letter"; [[plemeno]] "breed";
-- [[rameno]] "shoulder" (also [[rámě]], use 'decllemma:'); [[semeno]] "seed" (also [[sémě]], [[símě]], use
-- 'decllemma:'); [[temeno]] "crown (of the head)"; [[vemeno]] "udder"
add_decl(base, stems, {"a", "e"}, {"i", "u"}, "-", "-", {"ě", "i", "u"}, "em",
"a", "", "ům", "a", "ech", "y")
end
declprops["n-n"] = {
cat = "n-stem"
}
decls["tstem-n"] = function(base, stems)
-- E.g. [[batole]] "toddler", [[čuně]] "pig", [[daňče]] "fallow deer fawn", [[děvče]] "girl", [[ďouče]] "girl"
-- (dialectal), [[dítě]] "child" (NOTE: feminine in the plural [[děti]], declined as a feminine i-stem), [[dvojče]]
-- "twin", [[hádě]] "young snake", [[house]] "gosling", [[hříbě]] "foal" (pl. hříbata), [[jehně]] "lamb", [[kavče]]
-- "young jackdaw; chough", [[káče]] "duckling", [[káně]] "buzzard chick" (NOTE: also feminine meaning "buzzard"),
-- [[klíště]] "tick", [[kose]] "blackbird chick" (rare), [[kuře]] "chick (young chicken)", [[kůzle]]
-- "kid (young goat)", [[lišče]] "fox cub", [[lvíče]] "lion cub", [[medvídě]] "bear cub", [[mládě]] "baby animal",
-- [[morče]] "guinea pig", [[mrně]] "toddler", [[nemluvně]] "infant", [[novorozeně]] "newborn", [[orle]] "eaglet",
-- [[osle]] "donkey foal", [[pachole]] "boy (obsolete); page, squire", [[páže]] "page, squire", [[podsvinče]]
-- "suckling pig", [[prase]] "pig", [[prtě]] "toddler", [[ptáče]] "chick (young bird)",
-- [[robě]] "baby, small child", [[saranče]] "locust" (NOTE: also feminine), [[sele]] "piglet",
-- [[slůně]] "baby elephant", [[škvrně]] "toddler", [[štěně]] "puppy", [[tele]] "calf", [[velbloudě]] "camel colt",
-- [[vlče]] "wolf cub", [[vnouče]] "grandchild", [[vyžle]] "small hunting dog; slender person",
-- [[zvíře]] "animal, beast".
--
-- Some referring to inanimates, e.g. [[doupě]] "lair" (pl. doupata), [[koště]]/[[chvoště]] "broom", [[paraple]]
-- "umbrella", [[poupě]] "bud", [[pukrle]] "curtsey" (also soft-n), [[rajče]] "tomato", [[šuple]] "drawer",
-- [[varle]] "testicle", [[vole]] "craw (of a bird); goiter".
add_decl(base, stems, "ete", "eti", "-", "-", "eti", "etem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["tstem-n"] = {
cat = "t-stem"
}
decls["ma-n"] = function(base, stems)
-- E.g. [[drama]] "drama", [[dogma]] "dogma", [[aneurysma]]/[[aneuryzma]] "aneurysm", [[dilema]] "dilemma",
-- [[gumma]] "gumma" (non-cancerous syphilitic growth), [[klima]] "climate", [[kóma]] "coma", [[lemma]] "lemma",
-- [[melisma]] "melisma", [[paradigma]] "paradigm", [[plasma]]/[[plazma]] "plasma [partly ionized gas]"
-- (note [[plasma]]/[[plazma]] "blood plasma" is feminine), [[revma]] "rheumatism", [[schéma]] "schema, diagram",
-- [[schisma]]/[[schizma]] "schism", [[smegma]] "smegma", [[sofisma]]/[[sofizma]] "sophism", [[sperma]] "sperm",
-- [[stigma]] "stigma", [[téma]] "theme", [[trauma]] "trauma", [[trilema]] "trilemma", [[zeugma]] "zeugma".
add_decl(base, stems, "atu", "atu", "-", "-", "atu", "atem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["ma-n"] = {
cat = "ma-stem"
}
decls["adj"] = function(base, stems)
local props = {}
local propspec = table.concat(props, ".")
if propspec ~= "" then
propspec = "<" .. propspec .. ">"
end
local adj_alternant_multiword_spec = require("Module:cs-adjective").do_generate_forms({base.lemma .. propspec})
local function copy(from_slot, to_slot)
base.forms[to_slot] = adj_alternant_multiword_spec.forms[from_slot]
end
if base.number ~= "pl" then
if base.gender == "m" then
copy("nom_m", "nom_s")
copy("gen_mn", "gen_s")
copy("dat_mn", "dat_s")
copy("loc_mn", "loc_s")
copy("ins_mn", "ins_s")
elseif base.gender == "f" then
copy("nom_f", "nom_s")
copy("gen_f", "gen_s")
copy("dat_f", "dat_s")
copy("acc_f", "acc_s")
copy("loc_f", "loc_s")
copy("ins_f", "ins_s")
else
copy("nom_n", "nom_s")
copy("gen_mn", "gen_s")
copy("dat_mn", "dat_s")
copy("acc_n", "acc_s")
copy("loc_mn", "loc_s")
copy("ins_mn", "ins_s")
end
if not base.forms.voc_s then
iut.insert_forms(base.forms, "voc_s", base.forms.nom_s)
end
end
if base.number ~= "sg" then
if base.gender == "m" then
if base.animacy == "an" then
copy("nom_mp_an", "nom_p")
else
copy("nom_fp", "nom_p")
end
copy("acc_mfp", "acc_p")
elseif base.gender == "f" then
copy("nom_fp", "nom_p")
copy("acc_mfp", "acc_p")
else
copy("nom_np", "nom_p")
copy("acc_np", "acc_p")
end
copy("gen_p", "gen_p")
copy("dat_p", "dat_p")
copy("ins_p", "ins_p")
copy("loc_p", "loc_p")
end
end
local function get_stemtype(base)
if rfind(base.lemma, "ý$") then
return "hard"
elseif rfind(base.lemma, "í$") then
return "soft"
else
return "possessive"
end
end
declprops["adj"] = {
cat = function(base, stems)
return {"adjectival POS", get_stemtype(base) .. " GENDER adjectival POS"}
end,
}
decls["mostly-indecl"] = function(base, stems)
-- Several neuters: E.g. [[finále]] "final (sports)", [[čtvrtfinále]] "quarterfinal", [[chucpe]] "chutzpah",
-- [[penále]] "fine, penalty", [[promile]] "" (NOTE: loc pl also promilech), [[rande]] "rendezvous", [[semifinále]]
-- "semifinal", [[skóre]] "score".
-- At least one masculine animate: [[kamikaze]]/[[kamikadze]], where IJP says only -m in the ins sg.
local ins_s = base.gender == "m" and "m" or {"-", "m"}
add_decl(base, stems, "-", "-", "-", "-", "-", ins_s,
"-", "-", "-", "-", "-", "-")
end
declprops["mostly-indecl"] = {
cat = "mostly indeclinable"
}
decls["indecl"] = function(base, stems)
-- Indeclinable. Note that fully indeclinable nouns should not have a table at all rather than one all of whose forms
-- are the same; but having an indeclinable declension is useful for nouns that may or may not be indeclinable, e.g.
-- [[desatero]] "group of ten" or the plural of [[peso]], which may be indeclinable 'pesos'.
add_decl(base, stems, "-", "-", "-", "-", "-", "-",
"-", "-", "-", "-", "-", "-")
end
declprops["indecl"] = {
cat = function(base, stems)
if base.adj then
return {"adjectival POS", "indeclinable adjectival POS", "indeclinable GENDER adjectival POS"}
else
return {"indeclinable POS", "indeclinable GENPOS"}
end
end
}
decls["manual"] = function(base, stems)
-- Anything declined manually using overrides. We don't set any declensions except the nom_s (or nom_p if plurale
-- tantum).
add(base, base.number == "pl" and "nom_p" or "nom_s", stems, "-")
end
declprops["manual"] = {
desc = "GENDER",
cat = {},
}
local function set_pron_defaults(base)
if base.gender or base.lemma ~= "ona" and base.number or base.animacy then
error("Can't specify gender, number or animacy for pronouns")
end
local function pron_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
if base.lemma == "kdo" then
return "none", "sg", "an", false
elseif base.lemma == "co" then
return "none", "sg", "inan", false
elseif base.lemma == "já" or base.lemma == "ty" then
return "none", "sg", "an", true
elseif base.lemma == "my" or base.lemma == "vy" then
return "none", "pl", "an", false
elseif base.lemma == "on" then
return "m", "sg", "none", true
elseif base.lemma == "ono" then
return "n", "sg", "inan", true
elseif base.lemma == "oni" then
return "m", "pl", "an", false
elseif base.lemma == "ony" then
return "none", "pl", "none", false
elseif base.lemma == "ona" then
if base.number ~= "sg" and base.number ~= "pl" then
error("Must specify '.sg' or '.pl' with lemma 'ona'")
end
if base.number == "sg" then
return "f", "sg", "none", false
else
return "n", "pl", "inan", false
end
elseif base.lemma == "sebe" then
return "none", "none", "none", true
else
error(("Unrecognized pronoun '%s'"):format(base.lemma))
end
end
local gender, number, animacy, has_clitic = pron_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_pronoun_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with pronouns")
end
base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}}
base.decl = "pron"
end
decls["pron"] = function(base, stems)
local after_prep_footnote = "[after a preposition]"
local dual_footnote = "[when referring to dual nouns, e.g. [[oči]], [[ruce]]]"
local animate_footnote = "[animate]"
if base.lemma == "kdo" then
add_decl(base, stems, "koho", "komu", nil, nil, "kom", "kým")
elseif base.lemma == "co" then
add_decl(base, stems, "čeho", "čemu", nil, nil, "čem", "čím")
elseif base.lemma == "já" then
add_sg_decl_with_clitic(base, stems, "mne", "mě", "mně", "mi", nil, nil, nil, "mně", "mnou")
elseif base.lemma == "ty" then
add_sg_decl_with_clitic(base, stems, "tebe", "tě", "tobě", "ti", nil, nil, nil, "tobě", "tebou")
elseif base.lemma == "my" then
add_pl_only_decl(base, stems, "nás", "nám", "nás", "nás", "námi")
add_pl_only_decl(base, stems, nil, nil, nil, nil, "náma", dual_footnote)
elseif base.lemma == "vy" then
add_pl_only_decl(base, stems, "vás", "vám", "vás", "vás", "vámi")
add_pl_only_decl(base, stems, nil, nil, nil, nil, "váma", dual_footnote)
elseif base.lemma == "on" or base.lemma == "ono" then
local acc_s = base.lemma == "on" and "jej" or {"jej", "je"}
local clitic_acc_s = base.lemma == "on" and {"jej", "ho"} or {"jej", "ho", "je"}
local prep_acc_s = base.lemma == "on" and "něj" or {"něj", "ně"}
local prep_clitic_acc_s = base.lemma == "on" and "-ň" or nil
add_sg_decl_with_clitic(base, stems, {"jeho", "jej"}, {"ho", "jej"}, "jemu", "mu", acc_s, clitic_acc_s, nil, nil, "jím")
add_sg_decl_with_clitic(base, stems, {"něho", "něj"}, nil, "němu", nil, prep_acc_s, prep_clitic_acc_s, nil, "něm", "ním",
after_prep_footnote)
if base.lemma == "on" then
add_sg_decl_with_clitic(base, stems, nil, nil, nil, nil, "jeho", nil, nil, nil, nil,
animate_footnote)
add_sg_decl_with_clitic(base, stems, nil, nil, nil, nil, "něho", nil, nil, nil, nil,
after_prep_footnote and animate_footnote)
end
elseif base.lemma == "ona" and base.number == "sg" then
add_sg_decl(base, stems, "jí", "jí", "ji", nil, nil, "jí")
add_sg_decl(base, stems, "ní", "ní", "ni", nil, "ní", "ní", after_prep_footnote)
elseif base.lemma == "oni" or base.lemma == "ony" or base.lemma == "ona" then
add_pl_only_decl(base, stems, "jich", "jim", "je", nil, "jimi")
add_pl_only_decl(base, stems, "nich", "nim", "ně", "nich", "nimi", after_prep_footnote)
add_pl_only_decl(base, stems, nil, nil, nil, nil, "jima", dual_footnote)
add_pl_only_decl(base, stems, nil, nil, nil, nil, "nima", dual_footnote)
elseif base.lemma == "sebe" then
-- Underlyingly we handle [[sebe]]'s slots as singular.
add_sg_decl_with_clitic(base, stems, "sebe", "sebe", "sobě", "si", "sebe", "se", nil, "sobě", "sebou",
nil, "no nom_s")
else
error(("Internal error: Unrecognized pronoun lemma '%s'"):format(base.lemma))
end
end
declprops["pron"] = {
desc = "GENDER pronoun",
cat = {},
}
local function set_num_defaults(base)
if base.gender or base.number or base.animacy then
error("Can't specify gender, number or animacy for numeral")
end
local function num_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
return "none", "pl", "none", false
end
local gender, number, animacy, has_clitic = num_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_numeral_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with numerals")
end
local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma
base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}}
base.decl = "num"
end
decls["num"] = function(base, stems)
local after_prep_footnote = "[after a preposition]"
if base.lemma == "devět" then
add_pl_only_decl(base, "", "devíti", "devíti", "-", "devíti", "devíti", stems.footnotes)
elseif base.lemma == "sta" or base.lemma == "stě" or base.lemma == "set" then
add_pl_only_decl(base, "", "set", "stům", "-", "stech", "sty", stems.footnotes)
elseif rfind(base.lemma, "[cs]et$") then
-- [[deset]] and all numbers ending in -cet ([[dvacet]], [[třicet]], [[čtyřicet]] and inverted compound
-- numerals such as [[pětadvacet]] "25" and [[dvaatřicet]] "32")
local begin = rmatch(base.lemma, "^(.*)et$")
add_pl_only_decl(base, stems, "i", "i", "-", "i", "i")
add_pl_only_decl(base, begin, "íti", "íti", "-", "íti", "íti", stems.footnotes)
else
add_pl_only_decl(base, stems, "i", "i", "-", "i", "i")
end
end
declprops["num"] = {
desc = "GENDER numeral",
cat = {},
}
local function set_det_defaults(base)
if base.gender or base.number or base.animacy then
error("Can't specify gender, number or animacy for determiner")
end
local function det_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
return "none", "none", "none", false
end
local gender, number, animacy, has_clitic = det_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_determiner_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with determiners")
end
local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma
base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}}
base.decl = "det"
end
decls["det"] = function(base, stems)
add_sg_decl(base, stems, "a", "a", "-", nil, "a", "a")
end
declprops["det"] = {
desc = "GENDER determiner",
cat = {},
}
local function fetch_footnotes(separated_group)
local footnotes
for j = 2, #separated_group - 1, 2 do
if separated_group[j + 1] ~= "" then
error("Extraneous text after bracketed footnotes: '" .. table.concat(separated_group) .. "'")
end
if not footnotes then
footnotes = {}
end
table.insert(footnotes, separated_group[j])
end
return footnotes
end
--[=[
Parse a single override spec (e.g. 'nomplé:ové' or 'ins:autodráhou:autodrahou[rare]') and return
two values: the slot(s) the override applies to, and an object describing the override spec.
The input is actually a list where the footnotes have been separated out; for example,
given the spec 'inspl:čobotami:čobotámi[rare]:čobitmi[archaic]', the input will be a list
{"inspl:čobotami:čobotámi", "[rare]", ":čobitmi", "[archaic]", ""}. The object returned
for 'ins:autodráhou:autodrahou[rare]' looks like this:
{
full = true,
values = {
{
form = "autodráhou"
},
{
form = "autodrahou",
footnotes = {"[rare]"}
}
}
}
The object returned for 'nomplé:ové' looks like this:
{
values = {
{
form = "é",
},
{
form = "ové",
}
}
}
]=]
local function parse_override(segments)
local retval = {values = {}}
local part = segments[1]
local slots = {}
while true do
local case = usub(part, 1, 3)
if cases[case] then
-- ok
else
error(("Unrecognized case '%s' in override: '%s'"):format(case, table.concat(segments)))
end
part = usub(part, 4)
local slot
if rfind(part, "^pl") then
part = usub(part, 3)
slot = case .. "_p"
elseif rfind(part, "^cl") then
-- No plural clitic cases at this point.
part = usub(part, 3)
if clitic_cases[case] then
slot = "clitic_" .. case .. "_s"
else
error(("Unrecognized clitic case '%s' in override: '%s'"):format(case, table.concat(segments)))
end
else
slot = case .. "_s"
end
table.insert(slots, slot)
if rfind(part, "^%+") then
part = usub(part, 2)
else
break
end
end
if rfind(part, "^:") then
retval.full = true
part = usub(part, 2)
end
segments[1] = part
local colon_separated_groups = put.split_alternating_runs_and_strip_spaces(segments, ":")
for i, colon_separated_group in ipairs(colon_separated_groups) do
local value = {}
local form = colon_separated_group[1]
if form == "" then
error(("Use - to indicate an empty ending for slot%s '%s': '%s'"):format(#slots > 1 and "s" or "", table.concat(slots), table.concat(segments)))
elseif form == "-" then
value.form = ""
else
value.form = form
end
value.footnotes = fetch_footnotes(colon_separated_group)
table.insert(retval.values, value)
end
return slots, retval
end
--[=[
Parse an indicator spec (text consisting of angle brackets and zero or more
dot-separated indicators within them). Return value is an object of the form
{
overrides = {
SLOT = {OVERRIDE, OVERRIDE, ...}, -- as returned by parse_override()
...
},
forms = {}, -- forms for a single spec alternant; see `forms` below
footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing
stems = { -- may be missing
{
reducible = TRUE_OR_FALSE,
footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing
-- The following fields are filled in by determine_stems()
vowel_stem = "STEM",
nonvowel_stem = "STEM",
oblique_slots = one of {nil, "gen_p", "all", "all-oblique"},
oblique_vowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil),
oblique_nonvowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil),
},
...
},
gender = "GENDER", -- "m", "f", "n"
number = "NUMBER", -- "sg", "pl"; may be missing
animacy = "ANIMACY", -- "inan", "an"; may be missing
hard = true, -- may be missing
soft = true, -- may be missing
mixed = true, -- may be missing
surname = true, -- may be missing
istem = true, -- may be missing
["-istem"] = true, -- may be missing
tstem = true, -- may be missing
nstem = true, -- may be missing
tech = true, -- may be missing
foreign = true, -- may be missing
mostlyindecl = true, -- may be missing
indecl = true, -- may be missing
manual = true, -- may be missing
adj = true, -- may be missing
decllemma = "DECLENSION-LEMMA", -- may be missing
declgender = "DECLENSION-GENDER", -- may be missing
declnumber = "DECLENSION-NUMBER", -- may be missing
-- The following additional fields are added by other functions:
orig_lemma = "ORIGINAL-LEMMA", -- as given by the user
orig_lemma_no_links = "ORIGINAL-LEMMA-NO-LINKS", -- links removed
lemma = "LEMMA", -- `orig_lemma_no_links`, converted to singular form if plural and lowercase if all-uppercase
forms = {
SLOT = {
{
form = "FORM",
footnotes = {"FOOTNOTE", "FOOTNOTE", ...} -- may be missing
},
...
},
...
},
decl = "DECL", -- declension, e.g. "hard-m"
vowel_stem = "VOWEL-STEM", -- derived from vowel-ending lemmas
nonvowel_stem = "NONVOWEL-STEM", -- derived from non-vowel-ending lemmas
}
]=]
local function parse_indicator_spec(angle_bracket_spec)
local inside = rmatch(angle_bracket_spec, "^<(.*)>$")
assert(inside)
local base = {overrides = {}, forms = {}}
if inside ~= "" then
local segments = put.parse_balanced_segment_run(inside, "[", "]")
local dot_separated_groups = put.split_alternating_runs_and_strip_spaces(segments, "%.")
for i, dot_separated_group in ipairs(dot_separated_groups) do
local part = dot_separated_group[1]
local case_prefix = usub(part, 1, 3)
if cases[case_prefix] then
local slots, override = parse_override(dot_separated_group)
for _, slot in ipairs(slots) do
if base.overrides[slot] then
error(("Two overrides specified for slot '%s'"):format(slot))
else
base.overrides[slot] = {override}
end
end
elseif part == "" then
if #dot_separated_group == 1 then
error("Blank indicator: '" .. inside .. "'")
end
base.footnotes = fetch_footnotes(dot_separated_group)
elseif rfind(part, "^[-*#ě]*$") or rfind(part, "^[-*#ě]*,") then
if base.stem_sets then
error("Can't specify reducible/vowel-alternant indicator twice: '" .. inside .. "'")
end
local comma_separated_groups = put.split_alternating_runs_and_strip_spaces(dot_separated_group, ",")
local stem_sets = {}
for i, comma_separated_group in ipairs(comma_separated_groups) do
local pattern = comma_separated_group[1]
local orig_pattern = pattern
local reducible, vowelalt, oblique_slots
if pattern == "-" then
-- default reducible, no vowel alt
else
local before, after
before, reducible, after = rmatch(pattern, "^(.-)(%-?%*)(.-)$")
if before then
pattern = before .. after
reducible = reducible == "*"
end
if pattern ~= "" then
if not rfind(pattern, "^##?ě?$") then
error("Unrecognized vowel-alternation pattern '" .. pattern .. "', should be one of #, ##, #ě or ##ě: '" .. inside .. "'")
end
if pattern == "#ě" or pattern == "##ě" then
vowelalt = "quant-ě"
else
vowelalt = "quant"
end
-- `oblique_slots` will be later changed to "all" if the lemma ends in a consonant.
if pattern == "##" or pattern == "##ě" then
oblique_slots = "all-oblique"
else
oblique_slots = "gen_p"
end
end
end
table.insert(stem_sets, {
reducible = reducible,
vowelalt = vowelalt,
oblique_slots = oblique_slots,
footnotes = fetch_footnotes(comma_separated_group)
})
end
base.stem_sets = stem_sets
elseif #dot_separated_group > 1 then
error("Footnotes only allowed with slot overrides, reducible or vowel alternation specs or by themselves: '" .. table.concat(dot_separated_group) .. "'")
elseif part == "m" or part == "f" or part == "n" then
if base.gender then
error("Can't specify gender twice: '" .. inside .. "'")
end
base.gender = part
elseif part == "sg" or part == "pl" then
if base.number then
error("Can't specify number twice: '" .. inside .. "'")
end
base.number = part
elseif part == "an" or part == "inan" then
if base.animacy then
error("Can't specify animacy twice: '" .. inside .. "'")
end
base.animacy = part
elseif part == "hard" or part == "soft" or part == "mixed" or part == "surname" or part == "istem" or
part == "-istem" or part == "tstem" or part == "nstem" or part == "tech" or part == "foreign" or
part == "mostlyindecl" or part == "indecl" or part == "pron" or part == "det" or part == "num" or
-- Use 'velar' with words like [[petanque]] and [[Braque]] that end with a pronounced velar (and hence are declined
-- like velars) but not with a spelled velar; use '-velar' with words like [[hadíth]] that end with a spelled but
-- silent velar.
part == "collapse_ee" or part == "persname" or part == "c_as_k" or part == "velar" or part == "-velar" then
if base[part] then
error("Can't specify '" .. part .. "' twice: '" .. inside .. "'")
end
base[part] = true
-- Allow 'hard' to signal that -y is allowed after -c, as in hard masculine nouns such as [[hec]]
-- "joke", and also feminines in -ca where the c is pronounced as /k/, e.g. [[ayahuasca]], [[pororoca]],
-- [[Petrarca]], [[Mallorca]], [[Casablanca]]. (Contrast [[mangalica]], [[Kusturica]], [[Bjelica]],
-- where the c is pronounced as /ts/ and -y is disallowed.)
if part == "hard" then
base.hard_c = true
end
elseif part == "+" then
if base.adj then
error("Can't specify '+' twice: '" .. inside .. "'")
end
base.adj = true
elseif part == "!" then
if base.manual then
error("Can't specify '!' twice: '" .. inside .. "'")
end
base.manual = true
elseif rfind(part, "^mixedistem:") then
if base.mixedistem then
error("Can't specify 'mixedistem:' twice: '" .. inside .. "'")
end
base.mixedistem = rsub(part, "^mixedistem:", "")
elseif rfind(part, "^decllemma:") then
if base.decllemma then
error("Can't specify 'decllemma:' twice: '" .. inside .. "'")
end
base.decllemma = rsub(part, "^decllemma:", "")
elseif rfind(part, "^declgender:") then
if base.declgender then
error("Can't specify 'declgender:' twice: '" .. inside .. "'")
end
base.declgender = rsub(part, "^declgender:", "")
elseif rfind(part, "^declnumber:") then
if base.declnumber then
error("Can't specify 'declnumber:' twice: '" .. inside .. "'")
end
base.declnumber = rsub(part, "^declnumber:", "")
else
error("Unrecognized indicator '" .. part .. "': '" .. inside .. "'")
end
end
end
return base
end
local function is_regular_noun(base)
return not base.adj and not base.pron and not base.det and not base.num
end
local function process_declnumber(base)
base.actual_number = base.number
if base.declnumber then
if base.declnumber == "sg" or base.declnumber == "pl" then
base.number = base.declnumber
else
error(("Unrecognized value '%s' for 'declnumber', should be 'sg' or 'pl'"):format(base.declnumber))
end
end
end
local function set_defaults_and_check_bad_indicators(base)
-- Set default values.
local regular_noun = is_regular_noun(base)
if base.pron then
set_pron_defaults(base)
elseif base.det then
set_det_defaults(base)
elseif base.num then
set_num_defaults(base)
elseif not base.adj then
if not base.gender then
if base.manual then
base.gender = "none"
else
error("For nouns, gender must be specified")
end
end
base.number = base.number or "both"
process_declnumber(base)
base.animacy = base.animacy or "inan"
base.actual_gender = base.gender
base.actual_animacy = base.animacy
if base.declgender then
if base.declgender == "m-an" then
base.gender = "m"
base.animacy = "an"
elseif base.declgender == "m-in" then
base.gender = "m"
base.animacy = "inan"
elseif base.declgender == "f" or base.declgender == "n" then
base.gender = base.declgender
else
error(("Unrecognized value '%s' for 'declgender', should be 'm-an', 'm-in', 'f' or 'n'"):format(base.declgender))
end
end
end
-- Check for bad indicator combinations.
if (base.hard and 1 or 0) + (base.soft and 1 or 0) + (base.mixed and 1 or 0) > 1 then
error("At most one of 'hard', 'soft' and 'mixed' can be specified")
end
if base.istem and base["-istem"] then
error("'istem' and '-istem' cannot be specified together")
end
if (base.istem or base["-istem"]) then
if base.gender ~= "f" then
error("'istem' and '-istem' can only be specified with the feminine gender")
end
if not regular_noun then
error("'istem' and '-istem' can only be specified with regular nouns")
end
end
if base.declgender and not regular_noun then
error("'declgender' can only be specified with regular nouns")
end
end
local function set_all_defaults_and_check_bad_indicators(alternant_multiword_spec)
local is_multiword = #alternant_multiword_spec.alternant_or_word_specs > 1
iut.map_word_specs(alternant_multiword_spec, function(base)
set_defaults_and_check_bad_indicators(base)
base.multiword = is_multiword -- FIXME: not currently used; consider deleting
alternant_multiword_spec.has_clitic = alternant_multiword_spec.has_clitic or base.has_clitic
if base.pron then
alternant_multiword_spec.saw_pron = true
else
alternant_multiword_spec.saw_non_pron = true
end
if base.det then
alternant_multiword_spec.saw_det = true
else
alternant_multiword_spec.saw_non_det = true
end
if base.num then
alternant_multiword_spec.saw_num = true
else
alternant_multiword_spec.saw_non_num = true
end
end)
end
local function undo_second_palatalization(base, word, is_adjective)
local function try(from, to)
local stem = rmatch(word, "^(.*)" .. from .. "$")
if stem then
return stem .. to
end
return nil
end
return is_adjective and try("št", "sk") or
is_adjective and try("čt", "ck") or
try("c", "k") or -- FIXME, this could be wrong and c correct
try("ř", "r") or
try("z", "h") or -- FIXME, this could be wrong and z or g correct
try("š", "ch") or
word
end
-- For a plural-only lemma, synthesize a likely singular lemma. It doesn't have to be
-- theoretically correct as long as it generates all the correct plural forms.
local function synthesize_singular_lemma(base)
if not base.stem_sets then
base.stem_sets = {{}}
end
local lemma_determined
-- Loop over all stem sets in case the user specified multiple ones (e.g. '*,-*'). If we try to reconstruct
-- different lemmas for different stem sets, we'll throw an error below.
for _, stems in ipairs(base.stem_sets) do
local stem, lemma
while true do
if base.indecl then
-- If specified as indeclinable, leave it alone; e.g. 'pesos' indeclinable plural of [[peso]].
lemma = base.lemma
break
elseif base.gender == "m" then
if base.animacy == "an" then
stem = rmatch(base.lemma, "^(.*)i$")
if stem then
if base.soft then
-- [[Blíženci]] "Gemini"
-- Since the nominative singular has no ending.
lemma = com.convert_paired_plain_to_palatal(stem, ending)
else
lemma = undo_second_palatalization(base, stem)
end
else
stem = rmatch(base.lemma, "^(.*)ové$") or rmatch(base.lemma, "^(.*)é$")
if stem then
-- [[manželé]] "married couple", [[Velšané]] "Welsh people"
lemma = stem
else
error(("Animate masculine plural-only lemma '%s' should end in -i, -ové or -é"):format(base.lemma))
end
end
else
stem = rmatch(base.lemma, "^(.*)y$")
if stem then
-- [[droby]] "giblets"; [[tvarůžky]] "Olomouc cheese"; [[alimenty]] "alimony"; etc.
lemma = stem
else
local ending
stem, ending = rmatch(base.lemma, "^(.*)([eě])$")
if stem then
-- [[peníze]] "money", [[tvargle]] "Olomouc cheese" (mixed declension), [[údaje]] "data",
-- [[Lazce]] (a village), [[lováče]] "money", [[Krkonoše]] "Giant Mountains", [[kříže]] "clubs"
lemma = com.convert_paired_plain_to_palatal(stem, ending)
if not base.mixed then
base.soft = true
end
else
error(("Inanimate masculine plural-only lemma '%s' should end in -y, -e or -ě"):format(base.lemma))
end
end
end
if stems.reducible == nil then
if rfind(lemma, com.cons_c .. "[ck]$") and not com.is_monosyllabic(base.lemma) then
stems.reducible = true
end
if stems.reducible then
lemma = dereduce(base, lemma)
end
end
break
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)y$")
if stem then
lemma = stem .. "a"
break
end
stem = rmatch(base.lemma, "^(.*)[eě]$")
if stem then
-- Singular like the plural. Cons-stem feminines like [[dlaň]] "palm (of the hand)" have identical
-- plurals to soft-stem feminines like [[růže]] (modulo e/ě differences), so we don't need to
-- reconstruct the former type.
lemma = base.lemma
break
end
stem = rmatch(base.lemma, "^(.*)i$")
if stem then
-- i-stems.
lemma = stem
base.istem = true
break
end
error(("Feminine plural-only lemma '%s' should end in -y, -ě, -e or -i"):format(base.lemma))
elseif base.gender == "n" then
-- -ata nouns like [[slůně]] "baby elephant" nom pl 'slůňata' are declined in the plural same as if
-- the singular were 'slůňato' so we don't have to worry about them.
stem = rmatch(base.lemma, "^(.*)a$")
if stem then
lemma = stem .. "o"
break
end
stem = rmatch(base.lemma, "^(.*)[eěí]$")
if stem then
-- singular lemma also in -e, -ě or -í; e.g. [[věčná loviště]] "[[happy hunting ground]]"
lemma = base.lemma
break
end
error(("Neuter plural-only lemma '%s' should end in -a, -í, -ě or -e"):format(base.lemma))
else
error(("Internal error: Unrecognized gender '%s'"):format(base.gender))
end
end
if lemma_determined and lemma_determined ~= lemma then
error(("Attempt to set two different singular lemmas '%s' and '%s'"):format(lemma_determined, lemma))
end
lemma_determined = lemma
end
base.lemma = lemma_determined
end
-- For an adjectival lemma, synthesize the masc singular form.
local function synthesize_adj_lemma(base)
local stem
if base.indecl then
base.decl = "indecl"
stem = base.lemma
else
local gender, number
local function sub_ov(stem)
stem = stem:gsub("ov$", "ův")
return stem
end
while true do
if base.number == "pl" then
if base.gender == "m" then
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
if base.soft then
-- nothing to do
else
if base.animacy ~= "an" then
error(("Masculine plural-only adjectival lemma '%s' ending in -í can only be animate unless '.soft' is specified"):
format(base.lemma))
end
base.lemma = undo_second_palatalization(base, stem, "is adjective") .. "ý"
end
break
end
stem = rmatch(base.lemma, "^(.*)é$")
if stem then
if base.animacy == "an" then
error(("Masculine plural-only adjectival lemma '%s' ending in -é must be inanimate"):
format(base.lemma))
end
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*ov)i$") or rmatch(base.lemma, "^(.*in)i$")
if stem then
if base.animacy ~= "an" then
error(("Masculine plural-only possessive adjectival lemma '%s' ending in -i must be animate"):
format(base.lemma))
end
base.lemma = sub_ov(stem)
break
end
stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$")
if stem then
if base.animacy == "an" then
error(("Masculine plural-only possessive adjectival lemma '%s' ending in -y must be inanimate"):
format(base.lemma))
end
base.lemma = sub_ov(stem)
break
end
if base.animacy == "an" then
error(("Animate masculine plural-only adjectival lemma '%s' should end in -í, -ovi or -ini"):
format(base.lemma))
elseif base.soft then
error(("Soft masculine plural-only adjectival lemma '%s' should end in -í"):format(base.lemma))
else
error(("Inanimate masculine plural-only adjectival lemma '%s' should end in -é, -ovy or -iny"):
format(base.lemma))
end
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)é$") -- hard adjective
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$") -- possessive adjective
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Feminine plural-only adjectival lemma '%s' should end in -é, -í, -ovy or -iny"):format(base.lemma))
else
stem = rmatch(base.lemma, "^(.*)á$") -- hard adjective
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$") -- possessive adjective
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Neuter plural-only adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma))
end
else
if base.gender == "m" then
stem = rmatch(base.lemma, "^(.*)[ýí]$") or rmatch(base.lemma, "^(.*)ův$") or rmatch(base.lemma, "^(.*)in$")
if stem then
break
end
error(("Masculine adjectival lemma '%s' should end in -ý, -í, -ův or -in"):format(base.lemma))
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)á$")
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$")
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Feminine adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma))
else
stem = rmatch(base.lemma, "^(.*)é$")
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)o$") or rmatch(base.lemma, "^(.*in)o$")
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Neuter adjectival lemma '%s' should end in -é, -í, -ovo or -ino"):format(base.lemma))
end
end
end
base.decl = "adj"
end
-- Now set the stem sets if not given.
-- Now set the stem sets if not given.
if not base.stem_sets then
base.stem_sets = {{reducible = false}}
end
for _, stems in ipairs(base.stem_sets) do
-- Set the stems.
stems.vowel_stem = stem
stems.nonvowel_stem = stem
end
end
-- Determine the declension based on the lemma, gender and number. The declension is set in base.decl. In the process,
-- we set either base.vowel_stem (if the lemma ends in a vowel) or base.nonvowel_stem (if the lemma does not end in a
-- vowel), which is used by determine_stems(). In some cases (specifically with certain foreign nouns), we set
-- base.lemma to a new value; this is as if the user specified 'decllemma:'.
local function determine_declension(base)
if base.mostlyindecl then
base.decl = "mostly-indecl"
base.nonvowel_stem = base.lemma
return
end
if base.indecl then
base.decl = "indecl"
base.nonvowel_stem = base.lemma
return
end
-- Determine declension
stem = rmatch(base.lemma, "^(.*)a$")
if stem then
if base.gender == "m" then
if base.animacy ~= "an" then
error("Masculine lemma in -a must be animate")
end
base.decl = "a-m"
elseif base.gender == "f" then
if base.hard then
-- e.g. [[doňa]], which seems not to have soft alternates as [[piraňa]] does (despite IJP; but see the note at the
-- bottom)
base.decl = "hard-f"
elseif rfind(stem, "e$") then
-- [[idea]], [[diarea]] (subtype '.tech'), [[Korea]], etc.
base.decl = "ea-f"
elseif rfind(stem, "i$") then
-- [[signoria]], [[sinfonia]], [[paranoia]], etc.
base.decl = "ia-f"
elseif rfind(stem, "[ou]$") then
-- [[stoa]], [[kongrua]], [[Samoa]], [[Nikaragua]], etc.
base.decl = "oa-f"
elseif not base.persname and rfind(stem, "^.*[ňj]$") or base.mixed then
-- [[maracuja]], [[papája]], [[sója]]; [[piraňa]] etc. Also [[Keňa]], [[Troja]]/[[Trója]], [[Amudarja]].
-- Not [[Táňa]], [[Darja]], which decline like [[gejša]], [[skica]], etc. (subtype of hard feminines).
base.decl = "mixed-f"
else
base.decl = "hard-f"
end
elseif base.gender == "n" then
if rfind(stem, "m$") then
base.decl = "ma-n"
else
error("Lemma ending in -a and neuter must end in -ma")
end
end
base.vowel_stem = stem
return
end
local ending
stem, ending = rmatch(base.lemma, "^(.*)([eě])$")
if stem then
if ending == "ě" then
stem = com.convert_paired_plain_to_palatal(stem)
end
if base.gender == "m" then
if base.foreign then
-- [[software]] and similar English-derived nouns with silent -e; set the lemma here as if decllemma: were given
base.lemma = stem
base.nonvowel_stem = stem
base.decl = "hard-m"
return
end
if base.hard then
-- -e be damned; e.g. [[Sofokles]] with hard stem 'Sofokle-' (genitive 'Sofoklea', dative 'Sofokleovi', etc.)
base.nonvowel_stem = base.lemma
base.decl = "hard-m"
return
end
if base.tstem then
if base.animacy ~= "an" then
error("T-stem masculine lemma in -e must be animate")
end
base.decl = "tstem-m"
elseif rfind(stem, "i$") then
-- [[zombie]], [[hippie]], [[yuppie]], [[rowdie]]
base.decl = "ie-m"
elseif rfind(stem, "e$") then
-- [[Yankee]]
base.nonvowel_stem = base.lemma
base.decl = "ee-m"
return
else
base.decl = "e-m"
end
elseif base.gender == "f" then
base.decl = "soft-f"
else
if base.tstem then
base.decl = "tstem-n"
else
base.decl = "soft-n"
end
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*)o$")
if stem then
if base.gender == "m" then
-- Cf. [[maestro]] m.
base.decl = "o-m"
elseif base.gender == "f" then
-- [[zoo]]; [[Žemaitsko]]?
error("Feminine nouns in -o are indeclinable; use '.indecl' if needed")
elseif base.nstem then
base.decl = "n-n"
elseif base.hard then
base.decl = "hard-n"
elseif rfind(stem, "[aeiuy]$") then
-- These have gen pl in -í and often other soft plural endings.
base.decl = "semisoft-n"
else
base.decl = "hard-n"
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*[iy])$")
if stem then
if base.gender == "m" then
if base.soft then
-- [[gay]] "gay man", [[gray]] "gray (scientific unit)", [[Nagy]] (surname)
base.decl = "soft-m"
else
-- Cf. [[kivi]] "kiwi (bird)", [[husky]] "kusky", etc.
base.decl = "i-m"
end
elseif base.gender == "f" then
if base.soft then
-- [[Uruguay]], [[Paraguay]]
base.decl = "soft-f"
else
-- [[máti]], [[pramáti]]; note also indeclinable [[tsunami]]/[[cunami]], [[okapi]]
base.decl = "i-f"
if stem:find("i$") then
stem = stem:gsub("i$", "")
else
error("Feminine nouns in -y are either soft or indeclinable; use '.soft' or '.indecl' as needed")
end
end
else
error("Neuter nouns in -i are indeclinable; use '.indecl' if needed")
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*u)$")
if stem then
if base.gender == "m" then
-- Cf. [[emu]], [[guru]], etc.
base.decl = "u-m"
elseif base.gender == "f" then
-- Only one I know is [[budižkničemu]], which is indeclinable in the singular and declines in the plural as
-- if written 'budižkničema'.
error("Feminine nouns in -u are indeclinable; use '.indecl' if needed")
else
error("Neuter nouns in -u are indeclinable; use '.indecl' if needed")
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*[íý])$")
if stem then
if base.gender == "m" then
base.decl = "í-m"
elseif base.gender == "f" then
-- FIXME: Do any exist? If not, update this message.
error("Support for non-adjectival non-indeclinable feminine nouns in -í/-ý not yet implemented")
else
base.decl = "í-n"
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$")
if stem then
if base.gender == "m" then
if base.foreign then
-- [[komunismus]] "communism", [[kosmos]] "cosmos", [[hádes]] "Hades"
stem = rmatch(base.lemma, "^(.*)[ueoaéá]s$")
if not stem then
error("Unrecognized masculine foreign ending, should be -us, -es, -os, -as, -és or -ás")
end
if not base.hard and (rfind(stem, "[ei]$") and base.animacy == "an" or
rfind(stem, "i$") and base.animacy == "inan") then
-- [[genius]], [[basileus]], [[rádius]]; not [[nukleus]], [[choreus]] (inanimate); not
-- [[skarabeus]] (animate), which should specify 'hard'
base.decl = "semisoft-m"
else
base.decl = "hard-m"
end
-- set the lemma here as if decllemma: were given
base.lemma = stem
elseif base.hard then
base.decl = "hard-m"
elseif base.soft then
base.decl = "soft-m"
elseif base.mixed then
base.decl = "mixed-m"
elseif rfind(base.lemma, com.inherently_soft_c .. "$") or rfind(base.lemma, "tel$") then
base.decl = "soft-m"
else
base.decl = "hard-m"
end
elseif base.gender == "f" then
if base.mixedistem then
base.decl = "mixed-istem-f"
elseif base.istem then
base.decl = "istem-f"
elseif base["-istem"] then
base.decl = "cons-f"
elseif rfind(base.lemma, "st$") then
-- Numerous abstracts in -ost; also [[kost]], [[část]], [[srst]], [[bolest]]
base.decl = "istem-f"
else
base.decl = "cons-f"
end
elseif base.gender == "n" then
if base.foreign then
stem = rmatch(base.lemma, "^(.*)um$") or rmatch(base.lemma, "^(.*)on$")
if not stem then
error("Unrecognized neuter foreign ending, should be -um or -on")
end
if base.hard then
base.decl = "hard-n"
elseif rfind(stem, "[eiuy]$") then
base.decl = "semisoft-n"
else
base.decl = "hard-n"
end
-- set the lemma here as if decllemma: were given
base.lemma = stem .. "o"
base.vowel_stem = stem
return
else
error("Neuter nouns ending in a consonant should use '.foreign' or '.decllemma:...'")
end
end
base.nonvowel_stem = stem
return
end
error("Unrecognized ending for lemma: '" .. base.lemma .. "'")
end
-- Determine the default value for the 'reducible' flag.
local function determine_default_reducible(base)
-- Nouns in vowels other than -a/o as well as masculine nouns ending in all vowels don't have null endings so not
-- reducible. Note, we are never called on adjectival nouns.
if rfind(base.lemma, "[iyuíeě]$") or base.gender == "m" and rfind(base.lemma, "[ao]$") or base.tstem then
base.default_reducible = false
return
end
local stem
stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$")
if stem then
-- When analyzing existing manual declensions in -ec and -ek, 290 were reducible vs. 23 non-reducible. Of these
-- 23, 15 were monosyllabic (and none of the 290 reducible nouns were monosyllabic) -- and two of these were
-- actually reducible but irregularly: [[švec]] "shoemaker" (gen sg 'ševce') and [[žnec]] "reaper (person)"
-- (gen sg. 'žence'). Of the remaining 8 multisyllabic non-reducible words, two were actually reducible but
-- irregularly: [[stařec]] "old man" (gen sg 'starce') and [[tkadlec]] "weaver" (gen sg 'tkalce'). The remaining
-- six consisted of 5 compounds of monosyllabic words: [[dotek]], [[oblek]], [[kramflek]], [[pucflek]],
-- [[pokec]], plus [[česnek]], which should be reducible but would lead to an impossible consonant cluster.
if base.gender == "m" and rfind(stem, "[eě][ck]$") and not com.is_monosyllabic(stem) then
base.default_reducible = true
elseif base.gender == "f" and rfind(stem, "[eě]ň$") then
-- [[pochodeň]] "torch", [[píseň]] "leather", [[žeň]] "harvest"; not [[reveň]] "rhubarb" or [[dřeň]] "pulp",
-- which need an override.
base.default_reducible = true
else
base.default_reducible = false
end
return
end
if base.number == "sg" then
base.default_reducible = false
return
end
if rfind(base.lemma, "isko$") then
-- e.g. [[středisko]]
base.default_reducible = "mixed"
return
end
stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$")
if not stem then
error(("Internal error: Something wrong, lemma '%s' doesn't end in consonant or vowel"):format(base.lemma))
end
-- Substitute 'ch' with a single character to make the following code simpler.
stem = stem:gsub("ch", com.TEMP_CH)
if rfind(stem, com.cons_c .. "[lr]" .. com.cons_c .. "$") then
-- [[vrba]], [[vlha]]; not reducible. (But note [[jablko]], reducible; needs override.)
base.default_reducible = false
elseif not base.foreign and (rfind(stem, com.cons_c .. "[bkhlrmnv]$") or base.c_as_k and rfind(stem, com.cons_c .. "c$")) then
-- [[ayahuasca]] has gen pl 'ayahuasek'
base.default_reducible = true
elseif base.foreign and rfind(stem, com.cons_c .. "r$") then
-- Foreign nouns in -CCum seem generally non-reducible in the gen pl except for those in -Crum like [[centrum]],
-- Examples: [[album]], [[verbum]], [[signum]], [[interregnum]], [[sternum]]. [[infernum]] has gen pl 'infern/inferen'.
base.default_reducible = true
else
base.default_reducible = false
end
end
-- Determine the stems to use for each stem set: vowel and nonvowel stems, for singular
-- and plural. We assume that one of base.vowel_stem or base.nonvowel_stem has been
-- set in determine_declension(), depending on whether the lemma ends in
-- a vowel. We construct all the rest given the reducibility, vowel alternation spec and
-- any explicit stems given. We store the determined stems inside of the stem-set objects
-- in `base.stem_sets`, meaning that if the user gave multiple reducible or vowel-alternation
-- patterns, we will compute multiple sets of stems. The reason is that the stems may vary
-- depending on the reducibility and vowel alternation.
local function determine_stems(base)
if not base.stem_sets then
base.stem_sets = {{}}
end
-- Set default reducible and check for default mixed reducible, which needs to be expanded into two entries.
local default_mixed_reducible = false
for _, stems in ipairs(base.stem_sets) do
if stems.reducible == nil then
stems.reducible = base.default_reducible
end
if stems.reducible == "mixed" then
default_mixed_reducible = true
end
end
if default_mixed_reducible then
local new_stem_sets = {}
for _, stems in ipairs(base.stem_sets) do
if stems.reducible == "mixed" then
local non_reducible_copy = m_table.shallowCopy(stems)
non_reducible_copy.reducible = false
stems.reducible = true
table.insert(new_stem_sets, stems)
table.insert(new_stem_sets, non_reducible_copy)
else
table.insert(new_stem_sets, stems)
end
end
base.stem_sets = new_stem_sets
end
-- Now determine all the stems for each stem set.
for _, stems in ipairs(base.stem_sets) do
local lemma_is_vowel_stem = not not base.vowel_stem
if base.vowel_stem then
stems.vowel_stem = base.vowel_stem
stems.nonvowel_stem = stems.vowel_stem
-- Apply vowel alternation first in cases like jádro -> jader; apply_vowel_alternation() will throw an error
-- if the vowel being modified isn't the last vowel in the stem.
stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem)
if stems.reducible then
stems.nonvowel_stem = dereduce(base, stems.nonvowel_stem)
stems.oblique_nonvowel_stem = dereduce(base, stems.oblique_nonvowel_stem)
end
else
stems.nonvowel_stem = base.nonvowel_stem
-- The user specified #, #ě, ## or ##ě and we're dealing with a term like masculine [[bůh]] or feminine
-- [[sůl]] that ends in a consonant. In this case, all slots except the nom_s and maybe acc_s have vowel
-- alternation.
if stems.oblique_slots then
stems.oblique_slots = "all"
end
stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem)
if stems.reducible then
stems.vowel_stem = com.reduce(base.nonvowel_stem)
if not stems.vowel_stem then
error("Unable to reduce stem '" .. base.nonvowel_stem .. "'")
end
else
stems.vowel_stem = base.nonvowel_stem
end
end
stems.oblique_vowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.vowel_stem)
end
end
local function detect_indicator_spec(base)
if base.pron then
determine_pronoun_stems(base)
elseif base.det then
determine_determiner_stems(base)
elseif base.num then
determine_numeral_stems(base)
elseif base.adj then
process_declnumber(base)
synthesize_adj_lemma(base)
elseif base.manual then
if base.stem_sets then
-- FIXME, maybe this should be allowed?
error("Reducible and vowel alternation specs cannot be given with manual declensions")
end
base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}}
base.decl = "manual"
else
if base.number == "pl" then
synthesize_singular_lemma(base)
end
determine_declension(base)
determine_default_reducible(base)
determine_stems(base)
end
end
local function detect_all_indicator_specs(alternant_multiword_spec)
-- Keep track of all genders seen in the singular and plural so we can determine whether to add the term to
-- [[:Category:Czech nouns that change gender in the plural]].
alternant_multiword_spec.sg_genders = {}
alternant_multiword_spec.pl_genders = {}
iut.map_word_specs(alternant_multiword_spec, function(base)
detect_indicator_spec(base)
if base.number ~= "pl" then
alternant_multiword_spec.sg_genders[base.actual_gender] = true
end
if base.number ~= "sg" then
-- All t-stem masculines are neuter in the plural.
local plgender
if base.decl == "tstem-m" then
plgender = "n"
else
plgender = base.actual_gender
end
alternant_multiword_spec.pl_genders[plgender] = true
end
end)
if (alternant_multiword_spec.saw_pron and 1 or 0) + (alternant_multiword_spec.saw_det and 1 or 0) + (alternant_multiword_spec.saw_num and 1 or 0) > 1 then
error("Can't combine pronouns, determiners and/or numerals")
end
end
local propagate_multiword_properties
local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only)
local seen_property
for _, multiword_spec in ipairs(alternant_spec.alternants) do
propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only)
if seen_property == nil then
seen_property = multiword_spec[property]
elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then
seen_property = mixed_value
end
end
alternant_spec[property] = seen_property
end
propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only)
local seen_property = nil
local last_seen_nounal_pos = 0
local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs
for i = 1, #word_specs do
local is_nounal
if word_specs[i].alternants then
propagate_alternant_properties(word_specs[i], property, mixed_value)
is_nounal = not not word_specs[i][property]
elseif nouns_only then
is_nounal = is_regular_noun(word_specs[i])
else
is_nounal = not not word_specs[i][property]
end
if is_nounal then
if not word_specs[i][property] then
error("Internal error: noun-type word spec without " .. property .. " set")
end
for j = last_seen_nounal_pos + 1, i - 1 do
word_specs[j][property] = word_specs[j][property] or word_specs[i][property]
end
last_seen_nounal_pos = i
if seen_property == nil then
seen_property = word_specs[i][property]
elseif seen_property ~= word_specs[i][property] then
seen_property = mixed_value
end
end
end
if last_seen_nounal_pos > 0 then
for i = last_seen_nounal_pos + 1, #word_specs do
word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property]
end
end
multiword_spec[property] = seen_property
end
local function propagate_properties_downward(alternant_multiword_spec, property, default_propval)
local function set_and_fetch(obj, default)
local retval
if obj[property] then
retval = obj[property]
else
obj[property] = default
retval = default
end
if not obj["actual_" .. property] then
obj["actual_" .. property] = retval
end
return retval
end
local propval1 = set_and_fetch(alternant_multiword_spec, default_propval)
for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
local propval2 = set_and_fetch(alternant_or_word_spec, propval1)
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
local propval3 = set_and_fetch(multiword_spec, propval2)
for _, word_spec in ipairs(multiword_spec.word_specs) do
local propval4 = set_and_fetch(word_spec, propval3)
if propval4 == "mixed" then
-- FIXME, use clearer error message.
error("Attempt to assign mixed " .. property .. " to word")
end
set_and_fetch(word_spec, propval4)
end
end
else
if propval2 == "mixed" then
-- FIXME, use clearer error message.
error("Attempt to assign mixed " .. property .. " to word")
end
set_and_fetch(alternant_or_word_spec, propval2)
end
end
end
--[=[
Propagate `property` (one of "animacy", "gender" or "number") from nouns to adjacent
adjectives. We proceed as follows:
1. We assume the properties in question are already set on all nouns. This should happen in
set_defaults_and_check_bad_indicators().
2. We first propagate properties upwards and sideways. We recurse downwards from the top. When we encounter a multiword
spec, we proceed left to right looking for a noun. When we find a noun, we fetch its property (recursing if the noun
is an alternant), and propagate it to any adjectives to its left, up to the next noun to the left. When we have
processed the last noun, we also propagate its property value to any adjectives to the right (to handle e.g.
[[anděl strážný]] "guardian angel", where the adjective [[strážný]] should inherit the 'masculine' and 'animate'
properties of [[anděl]]). Finally, we set the property value for the multiword spec itself by combining all the
non-nil properties of the individual elements. If all non-nil properties have the same value, the result is that
value, otherwise it is `mixed_value` (which is "mixed" for animacy and gender, but "both" for number).
3. When we encounter an alternant spec in this process, we recursively process each alternant (which is a multiword
spec) using the previous step, and combine any non-nil properties we encounter the same way as for multiword specs.
4. The effect of steps 2 and 3 is to set the property of each alternant and multiword spec based on its children or its
neighbors.
]=]
local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value)
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only")
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false)
propagate_properties_downward(alternant_multiword_spec, property, default_propval)
end
local function determine_noun_status(alternant_multiword_spec)
for i, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
if alternant_or_word_spec.alternants then
local is_noun = false
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
for j, word_spec in ipairs(multiword_spec.word_specs) do
if is_regular_noun(word_spec) then
multiword_spec.first_noun = j
is_noun = true
break
end
end
end
if is_noun then
alternant_multiword_spec.first_noun = i
end
elseif is_regular_noun(alternant_or_word_spec) then
alternant_multiword_spec.first_noun = i
return
end
end
end
-- Set the part of speech based on properties of the individual words.
local function set_pos(alternant_multiword_spec)
if alternant_multiword_spec.args.pos then
alternant_multiword_spec.pos = alternant_multiword_spec.args.pos
elseif alternant_multiword_spec.saw_pron and not alternant_multiword_spec.saw_non_pron then
alternant_multiword_spec.pos = "သဗ္ဗနာမ်"
elseif alternant_multiword_spec.saw_det and not alternant_multiword_spec.saw_non_det then
alternant_multiword_spec.pos = "ဖျေံလဝ်သန္နိဋ္ဌာန်"
elseif alternant_multiword_spec.saw_num and not alternant_multiword_spec.saw_non_num then
alternant_multiword_spec.pos = "ဂၞန်သၚ်္ချာ"
else
alternant_multiword_spec.pos = "နာမ်"
end
alternant_multiword_spec.plpos = require(en_utilities_module).pluralize(alternant_multiword_spec.pos)
end
local function normalize_all_lemmas(alternant_multiword_spec, pagename)
iut.map_word_specs(alternant_multiword_spec, function(base)
if base.lemma == "" then
base.lemma = pagename
end
base.orig_lemma = base.lemma
base.orig_lemma_no_links = m_links.remove_links(base.lemma)
local lemma = base.orig_lemma_no_links
-- If the lemma is all-uppercase, lowercase it but note this, so that later in combine_stem_ending() we convert it
-- back to uppercase. This allows us to handle all-uppercase acronyms without a lot of extra complexity.
-- FIXME: This may not make sense at all.
if uupper(lemma) == lemma then
base.all_uppercase = true
lemma = ulower(lemma)
end
base.actual_lemma = lemma
base.lemma = base.decllemma or lemma
end)
end
local function decline_noun(base)
for _, stems in ipairs(base.stem_sets) do
if not decls[base.decl] then
error("Internal error: Unrecognized declension type '" .. base.decl .. "'")
end
decls[base.decl](base, stems)
end
handle_derived_slots_and_overrides(base)
local function copy(from_slot, to_slot)
base.forms[to_slot] = base.forms[from_slot]
end
if base.actual_number ~= base.number then
local source_num = base.number == "sg" and "_s" or "_p"
local dest_num = base.number == "sg" and "_p" or "_s"
for case, _ in pairs(cases) do
copy(case .. source_num, case .. dest_num)
copy("nom" .. source_num .. "_linked", "nom" .. dest_num .. "_linked")
end
if base.actual_number ~= "both" then
local erase_num = base.actual_number == "sg" and "_p" or "_s"
for case, _ in pairs(cases) do
base.forms[case .. erase_num] = nil
end
base.forms["nom" .. erase_num .. "_linked"] = nil
end
end
end
local function get_variants(form)
return nil
--[=[
FIXME
return
form:find(com.VAR1) and "var1" or
form:find(com.VAR2) and "var2" or
form:find(com.VAR3) and "var3" or
nil
]=]
end
-- Compute the categories to add the noun to, as well as the annotation to display in the
-- declension title bar. We combine the code to do these functions as both categories and
-- title bar contain similar information.
local function compute_categories_and_annotation(alternant_multiword_spec)
local all_cats = {}
local function insert(cattype)
m_table.insertIfNot(all_cats, "Czech " .. cattype)
end
if alternant_multiword_spec.pos == "နာမ်" then
if alternant_multiword_spec.actual_number == "sg" then
-- insert("uncountable nouns")
elseif alternant_multiword_spec.actual_number == "pl" then
-- insert("pluralia tantum")
end
end
local annotation
local annparts = {}
local decldescs = {}
local vowelalts = {}
local foreign = {}
local irregs = {}
local stemspecs = {}
local reducible = nil
local function get_genanim(gender, animacy)
local gender_code_to_desc = {
m = "masculine",
f = "feminine",
n = "neuter",
none = nil,
}
local animacy_code_to_desc = {
an = "animate",
inan = "inanimate",
none = nil,
}
local descs = {}
table.insert(descs, gender_code_to_desc[gender])
if gender ~= "f" and gender ~= "n" then
-- masculine or "none" (e.g. certain pronouns and numerals)
table.insert(descs, animacy_code_to_desc[animacy])
end
return table.concat(descs, " ")
end
local function trim(text)
text = text:gsub(" +", " ")
return mw.text.trim(text)
end
local function do_word_spec(base)
local actual_genanim = get_genanim(base.actual_gender, base.actual_animacy)
local declined_genanim = get_genanim(base.gender, base.animacy)
local genanim
if actual_genanim ~= declined_genanim then
genanim = ("%s (declined as %s)"):format(actual_genanim, declined_genanim)
insert("nouns with actual gender different from declined gender")
else
genanim = actual_genanim
end
if base.actual_gender == "m" then
-- Insert a category for 'Czech masculine animate nouns' or 'Czech masculine inanimate nouns'; the base categories
-- [[:Category:Czech masculine nouns]], [[:Czech animate nouns]] are auto-inserted.
insert(actual_genanim .. " " .. alternant_multiword_spec.plpos)
end
for _, stems in ipairs(base.stem_sets) do
local props = declprops[base.decl]
local cats = props.cat
if type(cats) == "function" then
cats = cats(base, stems)
end
if type(cats) == "string" then
cats = {cats}
end
local default_desc
for i, cat in ipairs(cats) do
if not cat:find("GENDER") and not cat:find("GENPOS") and not cat:find("POS") then
cat = cat .. " GENPOS"
end
cat = cat:gsub("GENPOS", "GENDER POS")
if not cat:find("POS") then
cat = cat .. " POS"
end
if i == #cats then
default_desc = cat:gsub(" POS", "")
end
cat = cat:gsub("GENDER", actual_genanim)
cat = cat:gsub("POS", alternant_multiword_spec.plpos)
-- Need to trim `cat` because actual_genanim may be an empty string.
insert(trim(cat))
end
local desc = props.desc
if type(desc) == "function" then
desc = desc(base, stems)
end
desc = desc or default_desc
desc = desc:gsub("GENDER", genanim)
-- Need to trim `desc` because genanim may be an empty string.
m_table.insertIfNot(decldescs, trim(desc))
local vowelalt
if stems.vowelalt == "quant" then
vowelalt = "quant-alt"
-- insert("nouns with quantitative vowel alternation")
elseif stems.vowelalt == "quant-ě" then
vowelalt = "í-ě-alt"
-- insert("nouns with í-ě alternation")
end
if vowelalt then
m_table.insertIfNot(vowelalts, vowelalt)
end
if reducible == nil then
reducible = stems.reducible
elseif reducible ~= stems.reducible then
reducible = "mixed"
end
if stems.reducible then
-- insert("nouns with reducible stem")
end
if base.foreign then
m_table.insertIfNot(foreign, "foreign")
if not base.decllemma then
-- NOTE: there are nouns that use both 'foreign' and 'decllemma', e.g. [[Zeus]].
insert("nouns with regular foreign declension")
end
end
-- User-specified 'decllemma:' indicates irregular stem. Don't consider foreign nouns in -us/-os/-es, -um/-on or
-- silent -e (e.g. [[software]]) where this ending is simply dropped in oblique and plural forms as irregular;
-- there are too many of these and they are already categorized above as 'nouns with regular foreign declension'.
if base.decllemma then
m_table.insertIfNot(irregs, "irreg-stem")
insert("nouns with irregular stem")
end
m_table.insertIfNot(stemspecs, stems.vowel_stem)
end
end
local key_entry = alternant_multiword_spec.first_noun or 1
if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then
local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry]
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
key_entry = multiword_spec.first_noun or 1
if #multiword_spec.word_specs >= key_entry then
do_word_spec(multiword_spec.word_specs[key_entry])
end
end
else
do_word_spec(alternant_or_word_spec)
end
end
if alternant_multiword_spec.actual_number == "sg" or alternant_multiword_spec.actual_number == "pl" then
-- not "both" or "none" (for [[sebe]])
table.insert(annparts, alternant_multiword_spec.actual_number == "sg" and "sg-only" or "pl-only")
end
if #decldescs == 0 then
table.insert(annparts, "indecl")
else
table.insert(annparts, table.concat(decldescs, " // "))
end
if #vowelalts > 0 then
table.insert(annparts, table.concat(vowelalts, "/"))
end
if reducible == "mixed" then
table.insert(annparts, "mixed-reducible")
elseif reducible then
table.insert(annparts, "reducible")
end
if #foreign > 0 then
table.insert(annparts, table.concat(foreign, " // "))
end
if #irregs > 0 then
table.insert(annparts, table.concat(irregs, " // "))
end
alternant_multiword_spec.annotation = table.concat(annparts, " ")
if #stemspecs > 1 then
-- insert("nouns with multiple stems")
end
if alternant_multiword_spec.actual_number == "both" and not m_table.deepEquals(alternant_multiword_spec.sg_genders, alternant_multiword_spec.pl_genders) then
-- insert("nouns that change gender in the plural")
end
alternant_multiword_spec.categories = all_cats
end
local function show_forms(alternant_multiword_spec)
local lemmas = {}
for _, slot in ipairs(potential_lemma_slots) do
if alternant_multiword_spec.forms[slot] then
for _, formobj in ipairs(alternant_multiword_spec.forms[slot]) do
-- FIXME, now can support footnotes as qualifiers in headwords?
table.insert(lemmas, formobj.form)
end
break
end
end
local props = {
lemmas = lemmas,
slot_table = alternant_multiword_spec.output_noun_slots,
lang = lang,
canonicalize = function(form)
-- return com.remove_variant_codes(form)
return form
end,
}
iut.show_forms(alternant_multiword_spec.forms, props)
end
local function make_table(alternant_multiword_spec)
local forms = alternant_multiword_spec.forms
local function template_prelude(min_width)
return rsub([=[
<div>
<div class="NavFrame" style="max-width:MINWIDTHem">
<div class="NavHead" style="background:var(--wikt-palette-lighterblue, #ebf4ff);">{title}{annotation}</div>
<div class="NavContent" style="overflow:auto">
{\op}| style="min-width:MINWIDTHem" class="inflection-table inflection"
|- class="rowgroup"
]=], "MINWIDTH", min_width)
end
local function template_postlude()
return [=[
|{\cl}{notes_clause}</div></div></div>]=]
end
local table_spec_both = template_prelude("45") .. [=[
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" |
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | ကိုန်ဨကဝုစ်
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | ကိုန်ဗဟုဝစ်
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| {nom_s}
| {nom_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_s}
| {gen_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_s}
| {dat_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_s}
| {acc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| {voc_s}
| {voc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| {loc_s}
| {loc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| {ins_s}
| {ins_p}
]=] .. template_postlude()
local function get_table_spec_one_number(number, numcode)
local table_spec_one_number = [=[
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" |
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | NUMBER
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| {nom_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| {voc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| {loc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| {ins_CODE}
]=]
return template_prelude("30") .. table_spec_one_number:gsub("NUMBER", number):gsub("CODE", numcode) ..
template_postlude()
end
local function get_table_spec_one_number_clitic(number, numcode)
local table_spec_one_number_clitic = [=[
! rowspan=2 style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);"|
! colspan=2 style="background:var(--wikt-palette-lightblue, #d9ebff);" | NUMBER
|-
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" | stressed
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | clitic
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| colspan=2 | {nom_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_CODE}
| {clitic_gen_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_CODE}
| {clitic_dat_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_CODE}
| {clitic_acc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| colspan=2 | {voc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| colspan=2 | {loc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| colspan=2 | {ins_CODE}
]=]
return template_prelude("40") .. table_spec_one_number_clitic:gsub("NUMBER", number):gsub("CODE", numcode) ..
template_postlude()
end
local notes_template = [=[
<div style="width:100%;text-align:left;background:var(--wikt-palette-lightblue, #d9ebff);">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]=]
if alternant_multiword_spec.title then
forms.title = alternant_multiword_spec.title
else
forms.title = 'မလဟုတ်စှ်ေဆေၚ်စပ်ကဵု <i lang="cs">' .. forms.lemma .. '</i>'
end
local annotation = alternant_multiword_spec.annotation
if annotation == "" then
forms.annotation = ""
else
forms.annotation = " (<span style=\"font-size: smaller;\">" .. annotation .. "</span>)"
end
local number, numcode
if alternant_multiword_spec.actual_number == "sg" then
number, numcode = "singular", "s"
elseif alternant_multiword_spec.actual_number == "pl" then
number, numcode = "plural", "p"
elseif alternant_multiword_spec.actual_number == "none" then -- used for [[sebe]]
number, numcode = "", "s"
end
local table_spec =
alternant_multiword_spec.actual_number == "both" and table_spec_both or
alternant_multiword_spec.has_clitic and get_table_spec_one_number_clitic(number, numcode) or
get_table_spec_one_number(number, numcode)
forms.notes_clause = forms.footnote ~= "" and
m_string_utilities.format(notes_template, forms) or ""
return m_string_utilities.format(table_spec, forms)
end
local function compute_headword_genders(alternant_multiword_spec)
local genders = {}
local number
if alternant_multiword_spec.actual_number == "pl" then
number = "-p"
else
number = ""
end
iut.map_word_specs(alternant_multiword_spec, function(base)
local animacy = base.animacy
if animacy == "inan" then
animacy = "in"
end
m_table.insertIfNot(genders, base.gender .. "-" .. animacy .. number)
end)
return genders
end
-- Externally callable function to parse and decline a noun given user-specified arguments.
-- Return value is ALTERNANT_MULTIWORD_SPEC, an object where the declined forms are in
-- `ALTERNANT_MULTIWORD_SPEC.forms` for each slot. If there are no values for a slot, the
-- slot key will be missing. The value for a given slot is a list of objects
-- {form=FORM, footnotes=FOOTNOTES}.
function export.do_generate_forms(parent_args, from_headword)
local params = {
[1] = {required = true, template_default = "bůh<m.an.#.voce>"},
title = true,
pagename = true,
json = {type = "boolean"},
pos = true,
}
if from_headword then
params["head"] = {list = true}
params["lemma"] = {list = true}
params["g"] = {list = true}
params["f"] = {list = true}
params["m"] = {list = true}
params["adj"] = {list = true}
params["dim"] = {list = true}
params["id"] = {}
end
local args = m_para.process(parent_args, params)
local parse_props = {
parse_indicator_spec = parse_indicator_spec,
angle_brackets_omittable = true,
allow_blank_lemma = true,
}
local alternant_multiword_spec = iut.parse_inflected_text(args[1], parse_props)
alternant_multiword_spec.title = args.title
alternant_multiword_spec.args = args
local pagename = args.pagename or from_headword and args.head[1] or mw.loadData("Module:headword/data").pagename
normalize_all_lemmas(alternant_multiword_spec, pagename)
set_all_defaults_and_check_bad_indicators(alternant_multiword_spec)
-- These need to happen before detect_all_indicator_specs() so that adjectives get their genders and numbers set
-- appropriately, which are needed to correctly synthesize the adjective lemma.
propagate_properties(alternant_multiword_spec, "animacy", "inan", "mixed")
propagate_properties(alternant_multiword_spec, "number", "both", "both")
-- FIXME, the default value (third param) used to be 'm' with a comment indicating that this applied only to
-- plural adjectives, where it didn't matter; but in Czech, plural adjectives are distinguished for gender and
-- animacy. Make sure 'mixed' works.
propagate_properties(alternant_multiword_spec, "gender", "mixed", "mixed")
detect_all_indicator_specs(alternant_multiword_spec)
-- Propagate 'actual_number' after calling detect_all_indicator_specs(), which sets 'actual_number' for adjectives.
propagate_properties(alternant_multiword_spec, "actual_number", "both", "both")
determine_noun_status(alternant_multiword_spec)
set_pos(alternant_multiword_spec)
alternant_multiword_spec.output_noun_slots = get_output_noun_slots(alternant_multiword_spec)
local inflect_props = {
skip_slot = function(slot)
return skip_slot(alternant_multiword_spec.actual_number, slot)
end,
slot_table = alternant_multiword_spec.output_noun_slots,
get_variants = get_variants,
inflect_word_spec = decline_noun,
}
iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
compute_categories_and_annotation(alternant_multiword_spec)
alternant_multiword_spec.genders = compute_headword_genders(alternant_multiword_spec)
if args.json then
alternant_multiword_spec.args = nil
return require("Module:JSON").toJSON(alternant_multiword_spec)
end
return alternant_multiword_spec
end
-- Entry point for {{cs-ndecl}}. Template-callable function to parse and decline a noun given
-- user-specified arguments and generate a displayable table of the declined forms.
function export.show(frame)
local parent_args = frame:getParent().args
local alternant_multiword_spec = export.do_generate_forms(parent_args)
if type(alternant_multiword_spec) == "string" then
-- JSON return value
return alternant_multiword_spec
end
show_forms(alternant_multiword_spec)
return make_table(alternant_multiword_spec) ..
require("Module:utilities").format_categories(alternant_multiword_spec.categories, lang, nil, nil, force_cat)
end
return export
emm73z04m4bznmmfgcztx4ulzlx47gk
395158
395154
2026-05-19T18:12:32Z
咽頭べさ
33
395158
Scribunto
text/plain
local export = {}
--[=[
Authorship: Ben Wing <benwing2>
]=]
--[=[
TERMINOLOGY:
-- "slot" = A particular combination of case/number.
Example slot names for nouns are "gen_s" (genitive singular) and
"voc_p" (vocative plural). Each slot is filled with zero or more forms.
-- "form" = The declined Czech form representing the value of a given slot.
-- "lemma" = The dictionary form of a given Czech term. Generally the nominative
masculine singular, but may occasionally be another form if the nominative
masculine singular is missing.
]=]
--[=[
FIXME:
1. Finish synthesize_singular_lemma(). [DONE]
2. Implement feminines in -ea, -oa/-ua, -ia, -oe. [DONE]
3. Implement "mixed" masculine nouns in -l, -n, -t (each different, also inanimate vs. animate). [DONE]
4. Allow 'stem:' override after vowel-final words like [[centurio]]. [DONE using decllemma:]
5. Support masculine foreign nouns in -us/-os/-es. [DONE]
6. Support masculine foreign nouns in -ius/-etc. [DONE]
7. Support masculine foreign nouns in unpronounced final -e (e.g. [[software]]). [DONE]
8. Support neuter foreign nouns in -um/-on. [DONE]
9. Support neuter foreign nouns in -ium/-ion. [DONE]
10. Support paired body parts, e.g. [[ruka]], [[noha]], [[oko]], [[ucho]], [[koleno]], [[rameno]]. [WON'T DO;
JUST SEPARATE THE MEANINGS AND GIVE THEM DIFFERENT DECLENSIONS]
11. Support masculine nouns in -e/ě that are neuter in the plural. [DONE]
12. Correctly handle -e vs. -ě, e.g. soft neuters have both [[kutě]] and [[poledne]]. [DONE]
13. Always use specified lemma in nom_pl and maybe acc_pl when plurale tantum. [DONE]
14. Support feminine nouns in -ca/-ča/-ša/-ža. [DONE]
15. Support feminine nouns in -ja/-ňa. [DONE]
16. Support mixed i-stem feminine nouns. [DONE]
17. Support "c as k" feminine nouns like [[ayahuasca]].
18. Support 'declgender'. [DONE]
19. Support pronouns with clitics. [DONE]
20. Singular-only and plural-only terms should not have number in accelerator form. [DONE]
21. Support [[úterý]] (like neuters in -í). [DONE]
22. Support feminines in -i ([[máti]], [[pramáti]]). [DONE]
23. Support foreign nouns in -ie ([[zombie]], [[hippie]], [[yuppie]]). [DONE]
24. Support foreign nouns in -í ([[muftí]], [[qádí]]). [DONE]
25. Support manual declensions. [DONE]
26. Support numerals. [DONE]
27. Allow for reducible spec in pluralia tantum and dereduce accordingly; also automatically assign reducibility
if singular stem ends in -Ck or -Cc. [DONE]
28. Use `pos` value in all categories.
29. Support determiners [[kolik]], [[tolik]], [[několik]], [[mnoho]]. [DONE]
30. Support a '.velar' indicator for foreign names whose pronunciation but not spelling ends in a velar: [[Remarque]],
[[Braque]], [[Mike]], [[Drake]], [[Jake]] with vocative 'Remarquu', 'Braquu', 'Mikeu', 'Drakeu', 'Jakeu'. In
general we need more thought around such foreign names; essentially, for names in a silent e, sometimes the -e
is dropped in all oblique forms (e.g. [[Shakespeare]], [[Pierre]], [[Barrande]], [[La Fontaine]], [[Braque]],
[[Remarque]] with gen sg 'Shakespeara', 'Pierra', Barranda', 'La Fontaina', 'Braqua', 'Remarqua') and sometimes
it's kept in all oblique forms except those ending in an -e, where -ee is avoided (e.g. [[Pete]], [[Gable]],
[[Jake]], [[White]], [[Byrne]], [[Mike]], [[Drake]] with gen sg 'Petea', 'Gablea' etc. and voc sg 'Pete', 'Gable'
but 'Jakeu', 'Mikeu'). Sometimes there are doublets, e.g. [[Hubble]] and [[Hume]] have gen sg 'Hubbla/Hubblea'
(where the second form is used among astronomers in a technical sense and the first form may be more popular)
and 'Huma/Humea'. We already have a '.foreign' indicator that when applied to a noun ending in -e drops the -e
in oblique forms e.g. for [[software]]. We may need to combine this with an explicit indicator of hard, soft or
velar as there will be names with silent -e and preceding soft consonant e.g. [[Bruce]], [[Coleridge]]. Note
that when the -e is kept it is still dropped before front vowels, hence dat sg 'Bruci'/Bruceovi'. Need some
investigation in IJP and cswikt. [.velar DONE]
31. Support 'declnumber'. [DONE]
32. Support foreign nouns in -ee ([[Yankee]]). [DONE]
]=]
local lang = require("Module:languages").getByCode("cs")
local m_table = require("Module:table")
local m_links = require("Module:links")
local m_string_utilities = require("Module:string utilities")
local iut = require("Module:inflection utilities")
local put = require("Module:parse utilities")
local m_para = require("Module:parameters")
local com = require("Module:cs-common")
local en_utilities_module = "Module:en-utilities"
local u = mw.ustring.char
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rgmatch = mw.ustring.gmatch
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local usub = mw.ustring.sub
local uupper = mw.ustring.upper
local ulower = mw.ustring.lower
local force_cat = false -- set to true to make categories appear in non-mainspace pages, for testing
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
local function track(track_id)
require("Module:debug/track")("cs-noun/" .. track_id)
return true
end
local output_noun_slots = {
nom_s = "nom|s",
nom_s_linked = "nom|s",
gen_s = "gen|s",
gen_s_linked = "gen|s",
clitic_gen_s = "clitic|gen|s",
dat_s = "dat|s",
clitic_dat_s = "clitic|dat|s",
acc_s = "acc|s",
clitic_acc_s = "clitic|acc|s",
voc_s = "voc|s",
loc_s = "loc|s",
ins_s = "ins|s",
nom_p = "nom|p",
nom_p_linked = "nom|p",
gen_p = "gen|p",
dat_p = "dat|p",
acc_p = "acc|p",
voc_p = "voc|p",
loc_p = "loc|p",
ins_p = "ins|p",
}
local function get_output_noun_slots(alternant_multiword_spec)
-- FIXME: To save memory we modify the table in-place. This won't work if we ever end up with multiple calls to
-- this module in the same Lua invocation, and we would need to clone the table.
if alternant_multiword_spec.actual_number ~= "both" then
for slot, accel_form in pairs(output_noun_slots) do
output_noun_slots[slot] = accel_form:gsub("|[sp]$", "")
end
end
return output_noun_slots
end
local potential_lemma_slots = {"nom_s", "nom_p", "gen_s"}
local cases = {
nom = true,
gen = true,
dat = true,
acc = true,
voc = true,
loc = true,
ins = true,
}
local clitic_cases = {
gen = true,
dat = true,
acc = true,
}
local function dereduce(base, stem)
local dereduced_stem = com.dereduce(base, stem)
if not dereduced_stem then
error("Unable to dereduce stem '" .. stem .. "'")
end
return dereduced_stem
end
--[=[
Maybe modify the stem and/or ending in certain special cases:
1. Final -e in vocative singular triggers first palatalization of the stem in some cases (e.g. hard masc).
2. Endings beginning with ě, i, í trigger second palatalization, as does -e in the loc_s.
NOTE: Correctly handling -e vs. -ě and -tdn/-ťďň alternations is tricky. We have to deal with the following:
1. Soft-stem and t-stem neuters can have either -e or -ě. With coronals we have both [[poledne]] "noon" with /n/ and
[[kutě]] "bed" with /ť/. We also have soft-stem neuter [[Labe]] with /b/ vs. t-stem neuter [[hříbě]] with /bj/.
2. Underlying palatal coronals maintain their nature before back vowels and when not followed by a vowel, e.g. [[štěně]]
"puppy" becomes 'štěňata' in the nom/acc/voc plural and [[přítelkyně]] "girlfriend" becomes 'přítelkyň' in the gen
plural, but underlying palatal labials become non-palatal, e.g. [[hříbě]] "foal" becomes 'hříbata' in the nom/acc/voc
plural.
3. There are at least four types of endings beginning with '-e':
a. "maintaining" endings, e.g. instrumental singular '-em', which do not change the nature of the consonant, e.g.
[[zákon]] "law" becomes 'zákonem' while [[vězeň]] "prisoner" becomes 'vězeněm';
b. "palatalizing" endings, e.g. locative singular '-e', which palatalizes t/d/n (and more generally applies the
Slavic second palatalization, e.g. k -> c, r -> ř), e.g. [[žena]] "woman" becomes 'ženě';
c. "depalatalizing" endings, e.g. feminine i-stem dative plural '-em', which actively depalatalize ť/ď/ň, e.g.
[[oběť]] "sacrifice, victim" becomes 'obětem';
d. vocative singular '-e' of hard-stem masculines, which applies the Slavic first palatalization in some
circumstances (e.g. k -> č, Cr -> Cř, sometimes c -> č).
The way we handle this as follows:
1. We maintain the underlying stems always in their "pronounced" form, i.e. if the last consonant is pronounced ť/ď/ň
we maintain the stem in that form, but if pronounced t/d/n, we use those consonants. Hence neuter [[poledne]] "noon"
has stem 'poledn-' but neuter [[štěně]] "puppy" has stem 'štěň'. If the stem ends in labial + /j/, we use a special
TEMP_SOFT_LABIAL character after the labial (rather than 'j', in case of stems that actually have a written 'j' in
them such as [[banjo]]).
2. We signal types (a), (b) and (c) above using respectively 'e', 'ě' and 'E'. Type (d) uses 'e' and sets
`base.palatalize_voc`.
3. In combine_stem_ending(), we convert the stem back to the written form before adding the ending. If the ending begins
with -e, this may entail converting -e to -ě, and in all cases -E is converted to -e. "Converting to the written
form" converts ť/ď/ň to plain equivalents and deletes TEMP_SOFT_LABIAL before -e, converting -e to -ě with such
consonants. The same conversions happen before other front consonants -ě/-é/-i/-í, which don't allow ť/ď/ň to
precede, and in all cases with TEMP_SOFT_LABIAL, which is not an actual consonant.
4. If the ending is specified using -ě, this is maintained after plain coronals and labials in combine_stem_ending(),
and converted to -e in other cases.
5. Applying the first and second palatalization happens below in apply_special_cases().
]=]
local function apply_special_cases(base, slot, stem, ending)
local palatalize_voc
if base.c_as_k and rfind(ending, "^[aouyáóúůý]") then
local k_stem = rsub(stem, "c$", "k")
stem = {stem, k_stem}
elseif slot == "voc_s" and ending == "e" and base.palatalize_voc and not base["-velar"] then
-- Don't palatalize words like [[hadíth]] with silent -h.
local palstem = com.apply_first_palatalization(stem)
-- According to IJP, nouns ending in -Cr palatalize in the vocative, but those in -Vr don't. In reality,
-- though, it's more complex. It appears that animate nouns in -Cr tend to palatalize but inanimate nouns
-- do it optionally. Specifics:
-- -- Inanimate nouns with optional palatalization (ř listed second): [[alabastr]], [[amfiteátr]], [[barometr]],
-- [[centilitr]], [[centrimetr]], [[decilitr]], [[decimetr]], [[Dněstr]], [[filtr]], [[galvanometr]],
-- [[hektolitr]], [[kalorimetr]], [[litr]], [[lustr]], [[manometr]], [[manšestr]], [[metr]] (NOTE: is both
-- animate and inanimate), [[mikrometr]], [[miliampérmetr]], [[mililitr]], [[nanometr]], [[orchestr]],
-- [[parametr]], [[piastr]], [[půllitr]], [[radiometr]], [[registr]], [[rotmistr]], [[semestr]], [[skútr]],
-- [[spirometr]], [[svetr]], [[šutr]], [[tachometr]], [[titr]], [[vítr]] (NOTE: has í-ě alternation),
-- [[voltmetr]]; [[bagr]], [[bunkr]], [[cedr]], [[Dněpr]], [[fofr]], [[habr]] (NOTE: ř listed first), [[hadr]]
-- (NOTE: ř listed first), [[hamr]], [[kafr]], [[kepr]], [[kopr]], [[koriandr]], [[krekr]], [[kufr]],
-- [[Kypr]], [[lágr]], [[lógr]], [[manévr]], [[masakr]], [[okr]], [[oleandr]], [[pulovr]], [[šlágr]],
-- [[vichr]] (NOTE: ř listed first), [[žánr]]
--
-- -- Inanimate nouns that don't palatalize: [[ampérmetr]], [[anemometr]], [[sfygmomanometr]], [[sfygmometr]];
-- [[dodekaedr]], [[Hamr]], [[ikozaedr]], [[kvádr]], [[sandr]], [[torr]]
--
-- -- Animate nouns that palatalize: [[arbitr]], [[bratr]], [[ekonometr]], [[foniatr]], [[fotr]], [[geometr]],
-- [[kmotr]], [[lotr]], [[magistr]], [[metr]] (NOTE: is both animate and inanimate), [[ministr]], [[mistr]],
-- [[pediatr]], [[Petr]], [[psychiatr]], [[purkmistr]], [[setr]], [[šamstr]]; [[bobr]], [[fajnšmekr]],
-- [[humr]], [[hypochondr]], [[kapr]], [[lídr]], [[negr]], [[obr]], [[salamandr]], [[sólokapr]], [[švagr]],
-- [[tygr]], [[zlobr]], [[zubr]]
--
-- -- Animate nouns with optional palatalization (ř listed first): [[Silvestr]]; [[Alexandr]], [[snajpr]]
--
-- Note the inconsistencies, e.g. [[sfygmomanometr]] and [[ampérmetr]] don't palatalize but [[manometr]] and
-- [[miliampérmetr]] do it optionally. In reality, inanimate vocatives are extremely rare so this may not be the
-- final word.
if base.animacy == "inan" and rfind(stem, com.cons_c .. "r$") and not rfind(stem, "rr$") then
-- optional r -> ř
stem = {stem, palstem}
else
stem = palstem
end
elseif rfind(ending, "^[ěií]") or slot == "loc_s" and ending == "e" then
if rfind(stem, "ck$") and rfind(base.lemma, "ck$") then
-- IJP says nouns in -ck (back, comeback, crack, deadlock, hatchback, hattrick, joystick, paperback, quarterback,
-- rock, soundtrack, track, truck) simplify the resulting -cc ending in the loc_p to -c. Similarly [[quarterback]]
-- has nom_pl 'quarterbaci, quarterbackove'. We need to check the lemma as well because nouns in -cek don't do this.
stem = rsub(stem, "ck$", "k")
end
if base.velar then
-- [[petanque]] /petank/ -> loc pl 'petancích'.
stem = rsub(stem, "gu$", "g")
stem = rsub(stem, "qu$", "k")
end
-- loc_s of hard masculines is sometimes -e/ě; the user might indicate this as -e, which we should handle
-- correctly
stem = com.apply_second_palatalization(stem)
end
return stem, ending
end
local function skip_slot(number, slot)
return number == "sg" and rfind(slot, "_p$") or
number == "pl" and rfind(slot, "_s$")
end
-- Basic function to combine stem(s) and ending(s) and insert the result into the appropriate slot. `stems` is either
-- the `stems` object passed into the declension functions (containing the various stems; see below) or a string to
-- override the stem. (NOTE: If you pass a string in as `stems`, you should pass the value of `stems.footnotes` as the
-- value of `footnotes` as it will be lost otherwise. If you need to supply your own footnote in addition, use
-- iut.combine_footnotes() to combine any user-specified footnote(s) with your footnote(s).) `endings` is either a
-- string specifying a single ending or a list of endings. If `endings` is nil, no forms are inserted. If an ending is
-- "-", the value of `stems` is ignored and the lemma is used instead as the stem; this is important in case the user
-- used `decllemma:` to specify a declension lemma different from the actual lemma, or specified '.foreign' (which has
-- a similar effect).
local function add(base, slot, stems, endings, footnotes)
if not endings then
return
end
-- Call skip_slot() based on the declined number; if the actual number is different, we correct this in
-- decline_noun() at the end.
if skip_slot(base.number, slot) then
return
end
local stems_footnotes = type(stems) == "table" and stems.footnotes or nil
footnotes = iut.combine_footnotes(iut.combine_footnotes(base.footnotes, stems_footnotes), footnotes)
if type(endings) == "string" then
endings = {endings}
end
for _, ending in ipairs(endings) do
-- Compute the stem. If ending is "-", use the lemma regardless. Otherwise if `stems` is a string, use it.
-- Otherwise `stems` is an object containing four stems (vowel-vs-non-vowel cross regular-vs-oblique);
-- compute the appropriate stem based on the slot and whether the ending begins with a vowel.
local stem
if ending == "-" then
stem = base.actual_lemma
ending = ""
elseif type(stems) == "string" then
stem = stems
else
local is_vowel_ending = rfind(ending, "^" .. com.vowel_c)
if stems.oblique_slots == "all" or
(stems.oblique_slots == "gen_p" or stems.oblique_slots == "all-oblique") and slot == "gen_p" or
stems.oblique_slots == "all-oblique" and (slot == "ins_s" or slot == "dat_p" or slot == "loc_p" or slot == "ins_p") then
if is_vowel_ending then
stem = stems.oblique_vowel_stem
else
stem = stems.oblique_nonvowel_stem
end
elseif is_vowel_ending then
stem = stems.vowel_stem
else
stem = stems.nonvowel_stem
end
end
-- Maybe apply the first or second Slavic palatalization.
stem, ending = apply_special_cases(base, slot, stem, ending)
ending = iut.combine_form_and_footnotes(ending, footnotes)
local function combine_stem_ending(stem, ending)
return com.combine_stem_ending(base, slot, stem, ending)
end
iut.add_forms(base.forms, slot, stem, ending, combine_stem_ending)
end
end
local function process_slot_overrides(base, do_slot)
for slot, overrides in pairs(base.overrides) do
-- Call skip_slot() based on the declined number; if the actual number is different, we correct this in
-- decline_noun() at the end.
if skip_slot(base.number, slot) then
error("Override specified for invalid slot '" .. slot .. "' due to '" .. base.number .. "' number restriction")
end
if do_slot(slot) then
base.slot_overridden[slot] = true
base.forms[slot] = nil
for _, override in ipairs(overrides) do
for _, value in ipairs(override.values) do
local form = value.form
local combined_notes = iut.combine_footnotes(base.footnotes, value.footnotes)
if override.full then
if form ~= "" then
iut.insert_form(base.forms, slot, {form = form, footnotes = combined_notes})
end
else
-- Convert a null ending to "-" in the acc/voc sg slots so that e.g. [[Kerberos]] declared as
-- <m.sg.foreign.gena:u.acc-:a> works correctly and generates accusative 'Kerberos/Kerbera' not
-- #'Kerber/Kerbera'.
if (slot == "acc_s" or slot == "voc_s") and form == "" then
form = "-"
end
for _, stems in ipairs(base.stem_sets) do
add(base, slot, stems, form, combined_notes)
end
end
end
end
end
end
end
local function add_decl(base, stems,
gen_s, dat_s, acc_s, voc_s, loc_s, ins_s,
nom_p, gen_p, dat_p, acc_p, loc_p, ins_p, footnotes
)
add(base, "nom_s", stems, "-", footnotes)
add(base, "gen_s", stems, gen_s, footnotes)
add(base, "dat_s", stems, dat_s, footnotes)
add(base, "acc_s", stems, acc_s, footnotes)
add(base, "voc_s", stems, voc_s, footnotes)
add(base, "loc_s", stems, loc_s, footnotes)
add(base, "ins_s", stems, ins_s, footnotes)
if base.number == "pl" then
-- If this is a plurale tantum noun and we're processing the nominative plural, use the user-specified lemma
-- rather than generating the plural from the synthesized singular, which may not match the specified lemma
-- (e.g. [[tvargle]] "Olomouc cheese" using <m.pl.mixed> would try to generate 'tvargle/tvargly', and [[peníze]]
-- "money" using <m.pl.#ě.genpl-> would try to generate 'peněze').
local acc_p_like_nom = m_table.deepEquals(nom_p, acc_p)
nom_p = "-"
if acc_p_like_nom then
acc_p = "-"
end
end
add(base, "nom_p", stems, nom_p, footnotes)
add(base, "gen_p", stems, gen_p, footnotes)
add(base, "dat_p", stems, dat_p, footnotes)
add(base, "acc_p", stems, acc_p, footnotes)
add(base, "loc_p", stems, loc_p, footnotes)
add(base, "ins_p", stems, ins_p, footnotes)
end
local function add_sg_decl(base, stems,
gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, footnotes
)
add_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s,
nil, nil, nil, nil, nil, nil, footnotes)
end
local function add_pl_only_decl(base, stems,
gen_p, dat_p, acc_p, loc_p, ins_p, footnotes
)
add_decl(base, stems, nil, nil, nil, nil, nil, nil,
"-", gen_p, dat_p, acc_p, loc_p, ins_p, footnotes)
end
local function add_sg_decl_with_clitic(base, stems,
gen_s, clitic_gen_s, dat_s, clitic_dat_s, acc_s, clitic_acc_s, voc_s, loc_s, ins_s, footnotes, no_nom_s
)
if not no_nom_s then
add(base, "nom_s", stems, "-", footnotes)
end
add(base, "gen_s", stems, gen_s, footnotes)
add(base, "clitic_gen_s", stems, clitic_gen_s, footnotes)
add(base, "dat_s", stems, dat_s, footnotes)
add(base, "clitic_dat_s", stems, clitic_dat_s, footnotes)
add(base, "acc_s", stems, acc_s, footnotes)
add(base, "clitic_acc_s", stems, clitic_acc_s, footnotes)
add(base, "voc_s", stems, voc_s, footnotes)
add(base, "loc_s", stems, loc_s, footnotes)
add(base, "ins_s", stems, ins_s, footnotes)
end
local function handle_derived_slots_and_overrides(base)
local function is_non_derived_slot(slot)
return slot ~= "voc_p" and slot ~= "acc_s" and slot ~= "clitic_acc_s"
end
local function is_derived_slot(slot)
return not is_non_derived_slot(slot)
end
base.slot_overridden = {}
-- Handle overrides for the non-derived slots. Do this before generating the derived
-- slots so overrides of the source slots (e.g. nom_p) propagate to the derived slots.
process_slot_overrides(base, is_non_derived_slot)
-- Generate the remaining slots that are derived from other slots.
if not base.pron and not base.det then
-- Pronouns don't have a vocative (singular or plural).
iut.insert_forms(base.forms, "voc_p", base.forms.nom_p)
end
if not base.forms.acc_s and not base.slot_overridden.acc_s then
iut.insert_forms(base.forms, "acc_s", base.forms[base.animacy == "inan" and "nom_s" or "gen_s"])
end
if not base.forms.clitic_acc_s and not base.slot_overridden.clitic_acc_s then
iut.insert_forms(base.forms, "clitic_acc_s", base.forms[base.animacy == "inan" and "nom_s" or "clitic_gen_s"])
end
-- Handle overrides for derived slots, to allow them to be overridden.
process_slot_overrides(base, is_derived_slot)
-- Compute linked versions of potential lemma slots, for use in {{cs-noun}}.
-- We substitute the original lemma (before removing links) for forms that
-- are the same as the lemma, if the original lemma has links.
for _, slot in ipairs(potential_lemma_slots) do
iut.insert_forms(base.forms, slot .. "_linked", iut.map_forms(base.forms[slot], function(form)
if form == base.orig_lemma_no_links and rfind(base.orig_lemma, "%[%[") then
return base.orig_lemma
else
return form
end
end))
end
end
-- Table mapping declension types to functions to decline the noun. The function takes two arguments, `base` and
-- `stems`; the latter specifies the computed stems (vowel vs. non-vowel, singular vs. plural) and whether the noun
-- is reducible and/or has vowel alternations in the stem. Most of the specifics of determining which stem to use
-- and how to modify it for the given ending are handled in add_decl(); the declension functions just need to generate
-- the appropriate endings.
local decls = {}
-- Table specifying additional properties for declension types. Every declension type must have such a table, which
-- specifies which category or categories to add and what annotation to show in the title bar of the declension table.
--
-- * Only the `cat` property of this table is mandatory; there is also a `desc` property to specify the annotation, but
-- this can be omitted and the annotation will then be computed from the `cat` property. The `cat` property is either
-- a string, a list of strings or a function (of two arguments, `base` and `stems` as above) returning a string or
-- list of strings. The string can contain the keywords GENDER to substitute the gender (and animacy for masculine
-- nouns) and POS (to substitute the pluralized part of speech). The keyword GENPOS is equivalent to 'GENDER POS'. If
-- no keyword is present, ' GENPOS' is added onto the end. If only GENDER is present, ' POS' is added onto the end.
-- In all cases, the language name is added onto the beginning to form the full category name.
-- * The `desc` property is of the same form as the `cat` property and specifies the annotation to display in the title
-- bar (which may have the same format as the category minus the part of speech, or may be abbreviated). The value
-- may not be a list of strings, as only one annotation is displayed. If omitted, it is derived from the category
-- spec(s) by taking the last category (if more than one is given) and removing ' POS' before keyword substitution.
local declprops = {}
-- Return the default masculine animate nominative plural ending(s) given `base` and `stems`. This is called for hard
-- and soft masculines ending in a consonant, but not for nouns ending in a vowel, which have their own defaults
-- (particularly nouns in -a, where -ista/-ita/-asta behave differently from other nouns in -a).
local function default_masc_animate_nom_pl(base, stems)
return
-- [monosyllabic words: Dánové, Irové, králové, mágové, Rusové, sokové, synové, špehové, zběhové, zeťové, manové, danové
-- (but Žid → Židé, Čech → Češi).] -- There are too many exceptions to this to make a special rule. It is better to use
-- the overall default of -i and require that cases with -ove, -ove/-i, -i/-ove, etc. use overrides.
-- com.is_monosyllabic(base.lemma) and "ové" or
-- reducible terms in -Cek; order of -ové vs. -i sometimes varies:
-- [[fracek]] (ové/i), [[klacek]] (i/ové), [[macek]] (ové/i), [[nácek]] (i/ové), [[prcek]] (ové/i), [[racek]] (ové/i);
-- [[bazilišek]] (i/ové), [[černoušek]] (i/ové), [[drahoušek]] (ové/i), [[fanoušek]] (i/ové), [[františek]] (an/inan,
-- ends in -i/-y but not -ové), [[koloušek]] (-i only), [[kulíšek]] (i/ové), [[oříšek]] (i/ové), [[papoušek]] (-i only),
-- [[prášek]] (i/ové), [[šašek]] (i/ové).
-- make sure to check `stems` as we don't want to include non-reducible words in -Cek (but do want to include
-- [[quarterback]], with -i/-ové)
rfind(stems.vowel_stem, "^" .. com.lowercase_c .. ".*" .. com.cons_c .. "k$") and {"i", "ové"} or
-- [[stoik]], [[neurotik]], [[logik]], [[fyzik]], etc.
rfind(base.lemma, "^" .. com.lowercase_c .. ".ik$") and {"i", "ové"} or
-- barmani, gentlemani, jazzmani, kameramani, narkomani, ombudsmani, pivotmani, rekordmani, showmani, supermani, toxikomani
rfind(base.lemma, "^" .. com.lowercase_c .. ".*man$") and "i" or
-- terms ending in -an after a palatal or a consonant that doesn't change when palatalized, i.e. labial or l (but -man
-- forms -mani unless in a proper noun): Brňan → Brňané, křesťan → křesťané, měšťan → měšťané, Moravan → Moravané,
-- občan → občané, ostrovan → ostrované, Pražan → Pražané, Slovan → Slované, svatebčan → svatebčané, venkovan → venkované,
-- Australan → Australané; also s, because there are many demonyms in -san e.g. [[Andalusan]], [[Barbadosan]], [[Oděsan]],
-- and few proper nouns in -san; similarly z because of [[Belizan]], [[Gazan]], [[Kavkazan]], etc.; also w, which isn't a
-- normal consonant in Czech but occurs in [[Glasgowan]] and [[Zimbabwan]]; NOTE: a few misc words like [[pohan]] also
-- work this way but need manual overrides
rfind(base.lemma, "[" .. com.inherently_soft .. com.labial .. "wlsz]an$") and {"é", "i"} or -- most now can also take -i
-- proper names: Baťové, Novákové, Petrové, Tomášové, Vláďové; exclude demonyms (but include surnames)
rfind(base.lemma, "^" .. com.uppercase_c) and (base.surname or not rfind(base.lemma, "[eě]c$")) and "ové" or
-- demonyms: [[Albánec]], [[Gruzínec]], [[Izraelec]], [[Korejec]], [[Libyjec]], [[Litevec]], [[Němec]], [[Portugalec]]
rfind(base.lemma, "^" .. com.uppercase_c .. ".*[eě]c$") and "i" or
-- From here on down, we're dealing only with lowercase terms.
-- buditelé, budovatelé, čekatelé, činitelé, hostitelé, jmenovatelé, pisatelé, ručitelé, velitelé, živitelé
rfind(base.lemma, ".*tel$") and "é" or
-- nouns in -j: čaroděj → čarodějové, lokaj → lokajové, patricij → patricijové, plebej → plebejové, šohaj → šohajové, žokej → žokejové
-- nouns in -l: apoštol → apoštolové, břídil → břídilové, fňukal → fňukalové, hýřil → hýřilové, kutil → kutilové,
-- loudal → loudalové, mazal → mazalové, škrabal → škrabalové, škudlil → škudlilové, vyvrhel → vyvrhelové, žvanil → žvanilové
-- (we excluded those in -tel above)
rfind(base.lemma, ".*[jl]$") and "ové" or
-- archeolog → archeologové, biolog → biologové, geolog → geologové, meteorolog → meteorologové
rfind(base.lemma, ".*log$") and "ové" or
-- dramaturg → dramaturgové, chirurg → chirurgové
rfind(base.lemma, ".*urg$") and "ové" or
-- fotograf → fotografové, geograf → geografové, lexikograf → lexikografové
rfind(base.lemma, ".*graf$") and "ové" or
-- bibliofil → bibliofilové, germanofil → germanofilové
rfind(base.lemma, ".*fil$") and "ové" or
-- rusofob → rusofobové
rfind(base.lemma, ".*fob$") and "ové" or
-- agronom → agronomové, ekonom → ekonomové
rfind(base.lemma, ".*nom$") and "ové" or
"i"
end
decls["hard-m"] = function(base, stems)
-- Nouns ending in hard -c, e.g. [[hec]] "joke", [[kibuc]] "kibbutz", don't palatalize.
base.palatalize_voc = not rfind(stems.vowel_stem, "c$")
base.hard_c = true
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- See [https://prirucka.ujc.cas.cz/en/?id=360] on declension of toponyms.
local toponym = base.animacy == "inan" and rfind(base.lemma, "^" .. com.uppercase_c)
-- Some toponyms take -a in the genitive singular, e.g. toponyms in -ín ([[Zlín]], [[Jičín]], [[Berlín]]);
-- -ýn ([[Hostýn]], [[Londýn]]); -ov ([[Havířov]]); and -ev ([[Bezdrev]]), as do some others, e.g. domestic
-- [[Beroun]], [[Brandýs]], [[Náchod]], [[Tábor]] and foreign [[Betlém]] "Bethlehem", [[Egypt]],
-- [[Jeruzalém]] "Jerusalem", [[Milán]] "Milan", [[Řím]] "Rome", [[Rýn]] "Rhine". Also some transferred from
-- common nouns e.g. ([[Nový]]) [[Kostel]], ([[Starý]]) [[Rybník]].
local toponym_gen_a = toponym and (rfind(base.lemma, "[íý]n$") or rfind(base.lemma, "[oe]v$"))
-- Toponyms in -ík (Mělník, Braník, Rakovník, Lipník) seem to fluctuate between gen -a and -u. Also some in
-- ‑štejn, ‑berg, ‑perk, ‑burk, ‑purk (Rabštejn, Heidelberg, Kašperk, Hamburk, Prešpurk) and some others:
-- Zbiroh, Kamýk, Příbor, Zábřeh, Žebrák, Praděd.
local toponym_gen_a_u = toponym and rfind(base.lemma, "ík$")
-- Toponyms that take -a in the genitive singular tend to take -ě in the locative singular; so do those in
-- -štejn (Rabštejn), -hrad (Petrohrad), -grad (Volgograd).
local toponym_loc_e = toponym and (toponym_gen_a or rfind(base.lemma, "štejn$") or rfind(base.lemma, "[gh]rad$"))
-- Toponyms in -ík seem to fluctuate between loc -ě and -u.
local toponym_loc_e_u = toponym_gen_a_u
-- Inanimate gen_s in -a other than toponyms in -ín/-ýn/-ev/-ov (e.g. [[zákon]] "law", [[oběd]] "lunch", [[kostel]] "church",
-- [[dnešek]] "today", [[leden]] "January", [[trujúhelník]] "triangle") needs to be given manually, using '<gena>'.
local gen_s = toponym_gen_a and "a" or toponym_gen_a_u and {"a", "u"} or base.animacy == "inan" and "u" or "a"
-- Animates with dat_s only in -u (e.g. [[člověk]] "person", [[Bůh]] "God") need to give this manually,
-- using '<datu>'.
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi" or {"ovi", "u"}
-- Inanimates with loc_s in -e/ě other than certain toponyms (see above) need to give this manually, using <locě>, but
-- it will trigger the second palatalization automatically.
local loc_s = toponym_loc_e and "ě" or toponym_loc_e_u and {"ě", "u"} or dat_s
-- Velar-stem animates with voc_s in -e (e.g. [[Bůh]] "God", voc_s 'Bože'; [[člověk]] "person", voc_s 'člověče')
-- need to give this manually using <voce>; it will trigger the first palatalization automatically.
local voc_s = velar and "u" or "e" -- 'e' will trigger first palatalization in apply_special_cases()
-- Nom_p in -i will trigger second palatalization in apply_special_cases().
local nom_p = base.animacy == "inan" and "y" or default_masc_animate_nom_pl(base, stems)
-- Per IJP and Janda and Townsend:
-- * loc_p in -ích is currently the default for velars but not otherwise; it will automatically trigger the second
-- palatalization (e.g. [[práh]] "threshold", loc_p 'prazích'). Otherwise, -ích needs to be given manually using
-- <locplích>, e.g. [[les]] "forest"; [[hotel]] "hotel"; likewise for loc_p in -ách (e.g. [[plech]]
-- "metal plate"), using <locplách>.
-- * Inanimate hard nouns in -c normally have -ech: [[hec]] "joke", [[tác]] "tray", [[truc]], [[kec]], [[frc]],
-- [[flanc]], [[kibuc]] "kibbutz", [[pokec]] "chat".
-- In the IJP tables, inanimate reducible nouns in -ček (and most in -cek, although there are many fewer; also some
-- in -žek, but in this case it's too inconsistent to make the default) regularly have both -ích and -ách in the
-- locative plural, while similar animate nouns only have -ích. This applies even to nouns like [[háček]] and
-- [[koníček]] that can be either animate or inanimate. Make sure to exclude nouns in -ck such as [[comeback]] and
-- [[joystick]], which have only -ích.
local loc_p =
base.animacy == "inan" and rfind(base.lemma, "[cč]ek$") and rfind(stems.vowel_stem, "[cč]k$") and {"ích", "ách"} or
velar and "ích" or "ech"
add_decl(base, stems, gen_s, dat_s, nil, voc_s, loc_s, "em",
-- loc_p in -ích not after velar stems (e.g. [[les]] "forest"; [[hotel]] "hotel") needs to be given manually
-- using <locplích>; it will automatically trigger the second palatalization; loc_p in -ách (e.g. [[plech]]
-- "metal plate") also needs to be given manually using <locplách>
nom_p, "ů", "ům", "y", loc_p, "y")
end
declprops["hard-m"] = {
desc = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar GENDER"
else
return "hard GENDER"
end
end,
cat = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar-stem"
else
return "hard"
end
end
}
decls["semisoft-m"] = function(base, stems)
-- Examples:
-- * Animate in -ius: génius, nuncius, nonius (breed of horse), notárius, ordinárius, patricius, primárius,
-- pronuncius, various names
-- * Animate in -eus: farizeus, basileus, pygmeus ([[skarabeus]] inflects hard in the plural), various names
-- * Inanimate in -ius: nonius (measuring device), rádius, sestercius
-- NOTE: Inanimate nouns in -eus (nukleus, choreus) inflect hard in the plural
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi" or {"ovi", "u"}
local loc_s = dat_s
local nom_p = base.animacy == "inan" and "e" or "ové"
add_decl(base, stems, "a", dat_s, nil, "e", loc_s, "em",
nom_p, "ů", "ům", "e", "ích", "i")
end
declprops["semisoft-m"] = {
cat = "semisoft"
}
decls["soft-m"] = function(base, stems)
base.palatalize_voc = true
-- animates with dat_s only in -i need to give this manually, using '<dati>'
local dat_s = base.animacy == "inan" and "i" or base.surname and "ovi" or {"ovi", "i"}
local loc_s = dat_s
-- Per IJP, the vast majority of soft masculine animates take -i in the voc_s, but those in -ec/-ěc take -e with first
-- palatalization to -če, e.g. [[otec]] "father", [[lovec]] "hunter", [[blbec]] "fool, idiot", [[horolezec]]
-- "mountaineer", [[znalec]] "expert", [[chlapec]] "boy", [[nadšenec]] "enthusiast", [[luněc]] (type of bird).
-- Demonyms but not surnames ending in -ec but beginning with a capital letter take either -e or -i (only the former
-- triggers the first palatalization). Examples: [[Portugalec]], [[Slovinec]] "Slovenian", [[Japonec]], [[Vietnamec]].
-- Not [[Kadlec]] (surname).
local voc_s = base.animacy == "an" and rfind(base.lemma, "[eě]c$") and stems.reducible and
(not base.surname and rfind(base.lemma, "^" .. com.uppercase_c) and {"e", "i"} or "e") or "i"
local nom_p = base.animacy == "inan" and "e" or default_masc_animate_nom_pl(base, stems)
-- nouns with loc_p in -ech (e.g. [[cíl]] "goal") need to give this manually, using <locplech>
add_decl(base, stems, "e", dat_s, nil, voc_s, loc_s, "em",
nom_p, "ů", "ům", "e", "ích", "i")
end
declprops["soft-m"] = {
cat = "soft"
}
decls["mixed-m"] = function(base, stems)
-- NOTE: IJP tends to list the soft endings first, but per their section on this
-- (https://prirucka.ujc.cas.cz/en/?id=220), the hard endings tend to predominate in modern use, so we list them
-- first.
if base.animacy == "an" then
if rfind(base.lemma, "l$") then
-- [[anděl]] "angel", [[manžel]] "husband", [[strašpytel]] "coward"; 'strašpytel' has a different declension
-- from the other two, with more soft forms. [[manžel]] has plural in -é or -ové and needs an override.
local dat_s = base.surname and "ovi" or {"ovi", "u"}
local loc_s = dat_s
add_decl(base, stems, "a", dat_s, nil, "i", loc_s, "em",
"é", "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
else
-- -s/-z: rorýs, platýs, pilous, markýz, všekaz, stávkokaz, penězokaz, listokaz, dřevokaz, zrnokaz, boss.
-- Others recently moving towards this declension: primas, karas, kalous, konipas, ibis, chabrus, chuďas,
-- kakabus, kliďas, kandrdas, morous, vágus.
-- Some names: Alois, Mánes.
-- Both hard and soft endings throughout. Most have -i and -ové in the nominative plural.
local dat_s = base.surname and "ovi" or {"u", "i", "ovi"}
local loc_s = dat_s
add_decl(base, stems, {"a", "e"}, dat_s, nil, {"e", "i"}, loc_s, "em",
{"i", "ové"}, "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
end
else
-- Given in IJP: burel, hnědel, chmel, krevel, kužel, námel, plevel, tmel, zádrhel, apríl, artikul, koukol, rubl,
-- úběl, plus reducible nouns cumel, chrchel, [[kotel]] "cauldron", sopel, uhel. Also [[městys]]. Many of them are listed in the
-- IJP tables with only hard or with fewer soft forms, so need to be investigated individually.
if rfind(base.lemma, "[ls]$") then
add_decl(base, stems, {"u", "e"}, {"u", "i"}, nil, {"e", "i"}, {"u", "e", "i"}, "em",
{"y", "e"}, "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
else
-- -n/-t; hard in the plural: hřeben, ječmen, [[kámen]] "stone", kmen, kořen, křemen, plamen,
-- [[pramen]] "source", [[řemen]] "strap", den, týden, [[loket]] "elbow".
-- There may be deviations (e.g. soft plural forms for [[den]]), so need to be investigated individually.
add_decl(base, stems, {"u", "e"}, {"u", "i"}, nil, "i", {"u", "i"}, "em",
"y", "ů", "ům", "y", "ech", "y")
end
end
end
declprops["mixed-m"] = {
cat = "mixed"
}
decls["a-m"] = function(base, stems)
-- husita → husité, izraelita → izraelité, jezuita → jezuité, kosmopolita → kosmopolité, táborita → táborité
-- fašista → fašisté, filatelista → filatelisté, fotbalista → fotbalisté, kapitalista → kapitalisté,
-- marxista → marxisté, šachista → šachisté, terorista → teroristé. NOTE: most these words actually appear in
-- the IJP tables with -é/-i, so we go accordingly.
--
-- gymnasta → gymnasté, fantasta → fantasté; also chiliasta, orgiasta, scholiasta, entuziasta, dynasta, ochlasta,
-- sarkasta, vymasta; NOTE: Only 'gymnasta' actually given with just -é; 'fantasta' with -ové/-é, 'dynasta' and
-- 'ochlasta' with just -ové, vymasta not in IJP (no plural given in SSJC), and the rest with -é/-i. So we go
-- accordingly.
local it_ist = rfind(stems.vowel_stem, "is?t$") or rfind(stems.vowel_stem, "ast$")
-- Velar nouns (e.g. [[sluha]] "servant") have -ích in the loc_p (which triggers the second palatalization)
-- instead of -ech. Nouns whose stem ends in a soft consonant ([[rikša]], [[paša]], [[bača]], [[mahárádža]],
-- [[paňáca]], etc.) behave likewise.
-- FIXME: [[pária]] "pariah", [[Maria]] etc.
local loc_p =
(base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") or rfind(stems.vowel_stem, com.inherently_soft_c .. "$")) and
"ích" or "ech"
add_decl(base, stems, "y", "ovi", "u", "o", "ovi", "ou",
it_ist and {"é", "i"} or "ové", "ů", "ům", "y", loc_p, "y")
end
declprops["a-m"] = {
cat = "GENPOS in -a"
}
decls["e-m"] = function(base, stems)
-- [[zachránce]] "savior"; [[soudce]] "judge"; etc.
-- At least two inanimates: [[průvodce]] "guide, guidebook; computing wizard"; [[správce]] "manager (software program), configuration program"
local dat_s = base.animacy == "inan" and "i" or base.surname and "ovi" or {"ovi", "i"}
local loc_s = dat_s
add_decl(base, stems, "e", dat_s, nil, "-", loc_s, "em",
-- nouns with -ové as well (e.g. [[soudce]] "judge") will need to specify that manually, e.g. <nompli:ové>
base.animacy == "inan" and "e" or "i", "ů", "ům", "e", "ích", "i")
end
declprops["e-m"] = {
cat = "GENPOS in -e"
}
decls["i-m"] = function(base, stems)
-- [[kivi]] "kiwi (bird)"; [[kuli]] "coolie"; [[lori]] "lory, lorikeet (bird)" (loc_pl 'loriech/loriích/lorich');
-- [[vini]] "parrot of the genus Vini"; [[yetti]]/[[yeti]] "yeti". other examples: [[aguti]], [[efendi]], [[hadži]],
-- [[pekari]], [[regenschori]], [[yetti]]/[[yeti]].
--
-- [[grizzly]]/[[grizly]] "grizzly bear"; [[pony]] "pony"; [[husky]] "husky"; [[dandy]] "dandy"; [[Billy]] "billy".
--
-- NOTE: Some nouns in -y are regular soft stems, e.g. [[gay]] "gay person"; [[gray]] "gray (unit of absorbed
-- radiation)"; [[Nagy]] (surname).
--
-- NOTE: The stem ends in -i/-y.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
-- ins_pl 'kivii/kivimi'
{"ové", ""}, {"ů", "ch"}, {"ům", "m"}, {"e", ""}, {"ích", "ch"}, {"i", "mi"})
end
declprops["i-m"] = {
cat = "GENPOS in -i/-y"
}
decls["í-m"] = function(base, stems)
-- [[kádí]] "qadi (Islamic judge)", [[mahdí]] "Mahdi (Islamic prophet)", [[muftí]] "mufti (Islamic scholar)",
-- [[sipáhí]] "sipahi (Algerian cavalryman in the French army)"
--
-- No obvious examples in -ý, but the support is there.
--
-- NOTE: The stem ends in -í/-ý.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
{"ové", ""}, {"ů", "ch"}, {"ům", "m"}, {"e", ""}, "ích", "mi")
end
declprops["í-m"] = {
cat = "GENPOS in -í/-ý"
}
decls["ie-m"] = function(base, stems)
-- [[zombie]] "zombie" (also fem/neut), [[hippie]] "hippie", [[yuppie]] "yuppie", [[rowdie]] "rowdy/hooligan"
--
-- NOTE: The stem ends in -i (not -ie, because of the plural).
add_decl(base, stems, "eho", "emu", nil, "-", "em", "em",
{"ové", "es"}, {"ů", "es"}, {"ům", "es"}, {"e", "es"}, {"ích", "es"}, {"i", "es"})
end
declprops["ie-m"] = {
cat = "GENPOS in -ie"
}
decls["ee-m"] = function(base, stems)
-- [[Yankee]] "Yankee"
--
-- NOTE: The stem ends in -ee.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
"ové", "ů", "ům", "e", "ích", "i")
end
declprops["ee-m"] = {
cat = "GENPOS in -ee"
}
decls["o-m"] = function(base, stems)
-- [[kápo]] "head, leader"; [[lamželezo]] "strongman"; [[torero]] "bullfighter"; [[žako]] "African gray parrot";
-- [[dingo]] "dingo"; [[kakapo]] "kakapo" (given in Wiktionary with dat_s/loc_s in -ovi only not -ovi/-u; probably
-- wrong but not in IJP); [[maestro]] "maestro"; [[Bruno]] "Bruno", [[Hugo]] "Hugo"; [[Ivo]] "Yves" (these names
-- are singular-only per IJP); [[Kvido]] "Guido, Guy" (per IJP has accusative in -a or -ona); [[Oto]] "Otto" (per
-- IJP also declinable like virile -a masculines; singular-only); [[Kuřátko]] (a surname; how declined?);
-- [[Picasso]] (surname; how declined?); [[Pluto]] "Pluto (God)", also "Pluto (planet)", which is inanimate;
-- [[Samo]]/[[Sámo]] "Samo (7th century Slavic ruler)" (dat_s/loc_s only in -ovi, needs override); [[Tomio]]
-- "Tomio (Japanese male given name)" (how declined?); [[nemakačenko]] "idler, loafer" (given in Wiktionary with
-- dat_s/loc_s in -ovi only, as for [[kakapo]]); [[nefachčenko]] "idler, loafer"; note also [[gadžo]] "gadjo",
-- which has a unique declension.
--
-- Velar nouns ([[žako]], [[dingo]], etc.) have -ích in the loc_p (which triggers the second palatalization)
-- instead of -ech.
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- inanimates e.g. [[Pluto]] (planet) have -u only, like for normal hard masculines.
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi"or {"ovi", "u"}
local loc_s = dat_s
local loc_p = velar and "ích" or "ech"
add_decl(base, stems, "a", dat_s, nil, "-", loc_s, "em",
"ové", "ů", "ům", "y", loc_p, "y")
end
declprops["o-m"] = {
cat = "GENPOS in -o"
}
decls["u-m"] = function(base, stems)
-- [[emu]] "emu", [[guru]] "guru", [[kakadu]] "cockatoo", [[marabu]] "marabou" (declined the same way)
-- [[Osamu]] "Osamu (Japanese male given name)" [how declined?]
-- [[Višnu]] "Vishnu" (declined like [[guru]] but singular-only)
-- [[budižkničemu]] "good-for-nothing, ne'er-do-well" (indeclinable in the singular, declinable as masculine hard stem
-- budižkničemové etc. in the plural, declinable as feminine hard stem budižkničemy etc. in the plural when feminine).
--
-- NOTE: The stem ends in -u.
add_decl(base, stems, "a", "ovi", nil, "-", "ovi", "em",
"ové", "ů", "ům", "y", "ech", "y")
end
declprops["u-m"] = {
cat = "GENPOS in -u"
}
decls["tstem-m"] = function(base, stems)
-- E.g. [[kníže]] "prince", [[hrabě]] "earl", [[markrabě]] "margrave".
add_decl(base, stems, "ete", "eti", "ete", "-", "eti", "etem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["tstem-m"] = {
cat = "t-stem"
}
decls["hard-f"] = function(base, stems)
base.no_palatalize_c = true
if base.c_as_k then
-- forms like 'ayahuascy' are allowed.
base.hard_c = true
end
-- [[skica]] "sketch", [[gejša]] "geisha", [[rikša]] "rickshaw (vehicle)"; [[arakača]], [[čača]], [[čiča]] (drink),
-- [[dača]] "dacha", [[gutaperča]] "guttapercha", [[viskača]]; [[babča]], [[číča]], [[káča]], [[mamča]], [[úča]].
-- Also appears to apply to ď (e.g. [[Naďa]]) and ť, as well as certain words with stems in -ň and -j (e.g. [[doňa]],
-- and personal names such as [[Táňa]] and [[Darja]]), which normally have a mixed declension.
local soft_cons = rfind(base.vowel_stem, "[cčšžďťjň]$") and not base.c_as_k
local dat_s = soft_cons and {"ě", "i"} or "ě"
local loc_s = dat_s
add_decl(base, stems, "y", dat_s, "u", "o", loc_s, "ou",
"y", "", "ám", "y", "ách", "ami")
end
declprops["hard-f"] = {
cat = "hard"
}
decls["soft-f"] = function(base, stems)
-- This also includes feminines in -ie, e.g. [[belarie]], [[signorie]], [[uncie]], and feminines in -oe, e.g.
-- [[kánoe]], [[aloe]] and medical terms like [[dyspnoe]], [[apnoe]], [[hemoptoe]], [[kalanchoe]].
-- Nouns in -ice like [[ulice]] "street" have null genitive plural e.g. 'ulic'; nouns in -yně e.g. [[přítelkyně]]
-- "girlfriend" have gen pl 'přítelkyň' or 'přítelkyní' with two possible endings; otherwise -í. (Alternation between
-- -ň and -n and between -e and -ě handled automatically by combine_stem_ending().)
local gen_p = rfind(base.lemma, "ice$") and "" or rfind(base.lemma, "yně$") and {"", "í"} or "í"
-- Vocative really ends in -e, not just a copy of the nominative; cf. [[sinfonia]], which is soft-f except for
-- the nominative and has -e in the vocative singular.
add_decl(base, stems, "e", "i", "i", "e", "i", "í",
"e", gen_p, "ím", "e", "ích", "emi")
end
declprops["soft-f"] = {
cat = "soft"
}
decls["mixed-f"] = function(base, stems)
-- Lowercase nouns in -ňa (e.g. bárišňa/báryšňa, doňa, dueňa, piraňa, vikuňa) and -ja (e.g. maracuja, papája, sója).
-- Also non-personal proper nouns in -ňa (e.g. [[Keňa]] "Kenya") and -ja (e.g. [[Troja]]/[[Trója]] "Troy",
-- [[Amudarja]] "Amu Darya"). Does not appear to apply to personal proper nouns (e.g. [[Táňa]] "Tanya", [[Darja]] "Daria"),
-- which usually decline like [[gejša]], [[dača]], [[skica]]).
add_decl(base, stems, {"i", "e"}, {"e", "i"}, "u", "o", {"e", "i"}, "ou",
{"i", "e"}, {"", "í"}, {"ám", "ím"}, {"i", "e"}, {"ách", "ích"}, {"ami", "emi"})
end
declprops["mixed-f"] = {
cat = "mixed"
}
decls["cons-f"] = function(base, stems)
-- e.g. [[dlaň]] "palm (of the hand)"
add_decl(base, stems, "e", "i", "-", "i", "i", "í",
"e", "í", "ím", "e", "ích", "emi")
end
declprops["cons-f"] = {
cat = "soft zero-ending"
}
decls["istem-f"] = function(base, stems)
add_decl(base, stems, "i", "i", "-", "i", "i", "í",
-- See above under apply_special_cases(); -E causes depalatalization of ť/ď/ň.
"i", "í", "Em", "i", "Ech", "mi")
end
declprops["istem-f"] = {
cat = "i-stem"
}
decls["mixed-istem-f"] = function(base, stems)
local gen_s, nom_p, dat_p, loc_p, ins_p
-- Use of ě vs E below is intentional. Contrast [[oběť]] dat pl 'obětem' (depalatalizing) with [[nit]] ins pl
-- 'nitěmi' (palatalizing). See comment above under apply_special_cases().
if base.mixedistem == "pěst" then
-- pěst, past, mast, lest [reducible; ins pl 'lstmi'], pelest, propust, plst, oběť, zeď [reducible; ins pl
-- 'zdmi'], paměť [ins pl 'pamětmi/paměťmi]
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", {"ím", "Em"}, {"ích", "Ech"}, "mi"
elseif base.mixedistem == "moc" then
-- moc, nemoc, pomoc, velmoc; NOTE: pravomoc has -i/-e alternation in gen_s, nom_p
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", {"Em", "ím"}, {"Ech", "ích"}, "ěmi"
elseif base.mixedistem == "myš" then
-- myš, veš [reducible, ins pl vešmi], hruď, měď, pleť, spleť, směs, smrt, step, odpověď [ins pl 'odpověď'mi/odpovědmi'], šeď,
-- závěť [ins pl 'závěťmi/závětmi'], plsť [ins pl 'plstmi']
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", "ím", "ích", "mi"
elseif base.mixedistem == "noc" then
-- lež [reducible], noc, mosaz, rez [reducible], ves [reducible], mysl, sůl, běl, žluť
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", "ím", "ích", "ěmi"
elseif base.mixedistem == "žluč" then
-- žluč, moč, modř, čeleď, kapraď, záď, žerď, čtvrť/čtvrt, drť, huť, chuť, nit, pečeť, závrať, pouť, stať, ocel
gen_s, nom_p, dat_p, loc_p, ins_p = {"i", "ě"}, {"i", "ě"}, "ím", "ích", "ěmi"
elseif base.mixedistem == "loď" then
-- loď, suť
gen_s, nom_p, dat_p, loc_p, ins_p = {"i", "ě"}, {"i", "ě"}, "ím", "ích", {"ěmi", "mi"}
else
error(("Unrecognized value '%s' for 'mixedistem', should be one of 'pěst', 'moc', 'myš', 'noc', 'žluč' or 'loď'"):
format(base.mixedistem))
end
add_decl(base, stems, gen_s, "i", "-", "i", "i", "í",
nom_p, "í", dat_p, nom_p, loc_p, ins_p)
end
declprops["mixed-istem-f"] = {
-- Include subtype in the table description but not in the category to avoid too many categories.
desc = function(base, stems)
return ("mixed i-stem [type '%s'] GENDER"):format(base.mixedistem)
end,
cat = function(base, stems)
return {"mixed i-stem", ("mixed i-stem GENPOS (type '%s')"):format(base.mixedistem)}
end,
}
decls["i-f"] = function(base, stems)
-- [[máti]] "mother" (singular-only), [[pramáti]] "foremother"; very similar to the 'noc' mixed i-stem type
add_decl(base, stems, "i", "i", "-", "i", "i", "í",
"i", "í", "ím", "i", "ích", "ěmi")
end
declprops["i-f"] = {
cat = "GENPOS in -i"
}
decls["ea-f"] = function(base, stems)
-- Stem ends in -e.
if base.tech then
-- diarea, gonorea, chorea, nauzea, paleogea, seborea, trachea
add_decl(base, stems, "y", "i", "u", "o", "i", "ou",
"y", "í", {"ám", "ím"}, "y", {"ách", "ích"}, "ami")
elseif base.persname then
-- Medea, Andrea, etc.
add_decl(base, stems, {"y", "je", "ji"}, {"e", "je", "ji"}, "u", "o", {"e", "je", "ji"}, "ou",
-- this is a guess, based on the same as below; plural of personal names not attested in IJP
{"y", "je"}, "jí", {"ám", "jím"}, {"y", "je"}, {"ách", "jích"}, {"ami", "jemi"})
else
-- idea, odysea ("wandering pilgrimage"), orchidea, palea, spirea
-- proper names Galilea, Judea, Caesarea, Korea, Odyssea ("epic poem")
add_decl(base, stems, {"y", "je"}, "ji", "u", "o", "ji", {"ou", "jí"},
{"y", "je"}, "jí", {"ám", "jím"}, {"y", "je"}, {"ách", "jích"}, {"ami", "jemi"})
end
end
declprops["ea-f"] = {
cat = function(base, stems)
if base.tech then
return {"GENPOS in -ea", "technical GENPOS in -ea"}
else
return "GENPOS in -ea"
end
end
}
decls["oa-f"] = function(base, stems)
-- Stem ends in -o/-u.
-- stoa, kongrua; proper names Samoa, Managua, Nikaragua, Capua
add_decl(base, stems, "y", "i", "u", "o", "i", "ou",
"y", "í", "ám", "y", "ách", "ami")
end
declprops["oa-f"] = {
cat = "GENPOS in -oa/-ua"
}
decls["ia-f"] = function(base, stems)
-- Stem ends in -i.
-- belaria, signoria, uncia; paranoia, sinfonia;
-- proper names Alexandria, Alexia, Livia, Monrovia, Olympia, Sofia
-- Identical to soft declension except for nom sg.
decls["soft-f"](base, stems)
end
declprops["ia-f"] = {
cat = "GENPOS in -ia"
}
decls["hard-n"] = function(base, stems)
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- NOTE: Per IJP it appears the meaning of the preceding preposition makes a difference: 'o' = "about" takes
-- '-u' or '-ě', while 'na/v' = "in, on" normally takes '-ě'.
local loc_s =
-- Exceptions: [[mléko]] "milk" ('mléku' or 'mléce'), [[břicho]] "belly" ('břiše' or (less often) 'břichu'),
-- [[roucho]] ('na rouchu' or 'v rouše'; why the difference in preposition?).
velar and "u" or
-- IJP says nouns in -dlo take only -e but the declension tables show otherwise. It appears -u is possible
-- but significantly less common. Other nouns in -lo usually take just -e ([[čelo]] "forehead",
-- [[kolo]] "wheel", [[křeslo]] "armchair", [[máslo]] "butter", [[peklo]] "hell", [[sklo]] "glass",
-- [[světlo]] "light", [[tělo]] "body"; but [[číslo]] "number' with -e/-u; [[zlo]] "evil" and [[kouzlo]] "spell"
-- with -u/-e).
rfind(base.lemma, "dlo$") and {"ě", "u"} or
rfind(base.lemma, "lo$") and "ě" or
(rfind(base.lemma, "[sc]tvo$") or rfind(base.lemma, "ivo$")) and "u" or
-- Per IJP: Borrowed words and abstracts take -u (e.g. [[banjo]]/[[bendžo]]/[[benžo]] "banjo", [[depo]] "depot",
-- [[chladno]] "cold", [[mokro]] "damp, dampness", [[právo]] "law, right", [[šeru]] "twilight?",
-- [[temno]] "dark, darkness", [[tempo]] "rate, tempo", [[ticho]] "quiet, silence", [[vedro]] "heat") and others
-- often take -ě/-u. Formerly we defaulted to -ě/-u but it seems better to default to just -u, similarly to hard
-- masculines.
-- {"ě", "u"}
"u"
local loc_p =
-- Note, lemmas in -isko also have mixed-reducible as default, handled in determine_default_reducible().
-- Note also, ending -ích triggers the second palatalization.
rfind(base.lemma, "isko$") and {"ích", "ách"} or
-- Diminutives in -ko, -čko, -tko; also [[lýtko]], [[děcko]], [[vrátka]], [[dvířka]], [[jho]], [[roucho]],
-- [[tango]], [[mango]], [[sucho]], [[blaho]], [[víko]], [[echo]], [[embargo]], [[largo]], [[jericho]] (from
-- IJP). Also foreign nouns in -kum: [[antibiotikum]], [[narkotikum]], [[afrodiziakum]], [[analgetikum]], etc.
-- [[jablko]] "apple" has '-ách' or '-ích' and needs an override; likewise for [[vojsko]] "troop"; [[riziko]]
-- "risk" normally has '-ích' and needs and override.
velar and "ách" or
"ech"
add_decl(base, stems, "a", "u", "-", "-", loc_s, "em",
"a", "", "ům", "a", loc_p, "y")
-- FIXME: paired body parts e.g. [[rameno]] "shoulder" (gen_p/loc_p 'ramenou/ramen'), [[koleno]] "knee"
-- (gen_p/loc_p 'kolenou/kolen'), [[prsa]] "chest, breasts" (plurale tantum; gen_p/loc_p 'prsou').
-- FIXME: Nouns with both neuter and feminine forms in the plural, e.g. [[lýtko]] "calf (of the leg)",
-- [[bedro]] "hip", [[vrátka]] "gate".
end
declprops["hard-n"] = {
desc = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar GENDER"
else
return "hard GENDER"
end
end,
cat = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar-stem"
else
return "hard"
end
end
}
decls["semisoft-n"] = function(base, stems)
-- Examples:
-- * In -ao: [[kakao]] "cacao", [[makao]] "Macao (gambling card game, see Wikipedia)", [[curaçao]] "curaçao (liqueur)"
-- (IJP gives gen pl 'curaç' but ASSC [https://slovnikcestiny.cz/heslo/cura%C3%A7ao/0/9967] says 'curaçí' as expected),
-- [[farao]] "faro (card game)"; also [[Makao]], [[Pathet Lao]], but these are sg-only
-- * In -eo: [[stereo]], [[rodeo]], [[video]], [[solideo]]; also [[Borneo]], [[Montevideo]], but these are sg-only
-- * In -io: [[rádio]] "radio", [[gramorádio]], [[studio]], [[scenário]], [[trio]], [[ážio]] (also spelled [[agio]]),
-- [[disážio]], [[folio]], [[vibrio]]; also [[arpeggio]], [[adagio]], [[capriccio]], [[solfeggio]] although
-- pronounced the Italian way without /i/; also [[Ohio]], [[Ontario]], [[Tokio]], but these are sg-only
-- * In -uo: only [[duo]]
-- * In -yo: only [[embryo]]
-- * In -eum: [[muzeum]], [[lyceum]], [[linoleum]], [[ileum]], etc.
-- * In -ium: [[atrium]] "atrium", most chemical elements, etc.
-- * In -uum: [[individuum]], [[kontinuum]], [[premenstruum]], [[residuum]], [[vakuum]]/[[vacuum]]
-- * In -yum: only [[baryum]] "barium" (none others in SSJC)
-- * In -ion: [[enkómion]] "encomium", [[eufonion]] (variant of [[eufonium]]), [[amnion]], [[ganglion]], [[gymnasion]],
-- [[scholion]], [[kritérion]] (rare for [[kritérium]]), [[onomatopoion]] (variant of [[onomatopoie]]),
-- [[symposion]], [[synedrion]]; also [[Byzantion]], but this is sg-only; most words in -ion are masculine
-- Hard in the singular, mostly soft in the plural. Those in -eo and -uo have alternative hard endings in the
-- dat/loc/ins pl, but not those in -eum or -uum. Those in -ao have only hard endings except in the gen pl. (There are
-- apparently no neuters in -eon; those in -eon or -yon e.g. [[akordeon]], [[neon]], [[nukleon]], [[karyon]], [[Lyon]]
-- are masculine.)
local dat_p, loc_p, ins_p
if rfind(base.actual_lemma, "ao$") then
dat_p, loc_p, ins_p = "ům", "ech", "y"
elseif rfind(base.actual_lemma, "[eu]o$") then
dat_p, loc_p, ins_p = {"ím", "ům"}, {"ích", "ech"}, {"i", "y"}
else
dat_p, loc_p, ins_p = "ím", "ích", "i"
end
add_decl(base, stems, "a", "u", "-", "-", "u", "em",
"a", "í", dat_p, "a", loc_p, ins_p)
end
declprops["semisoft-n"] = {
cat = "semisoft"
}
decls["soft-n"] = function(base, stems)
-- Examples: [[moře]] "sea", [[slunce]] "sun", [[srdce]] "heart", [[citoslovce]] "interjection",
-- [[dopoledne]] "late morning", [[odpoledne]] "afternoon", [[hoře]] "sorrow, grief" (archaic or literary),
-- [[inhalace]] "inhalation", [[kafe]] "coffee", [[kanape]] "sofa", [[kutě]] "bed", [[Labe]] "Elbe (singular only)",
-- [[líce]] "cheek", [[lože]] "bed", [[nebe]] "sky; heaven", [[ovoce]] "fruit", [[pole]] "field", [[poledne]]
-- "noon", [[příslovce]] "adverb", [[pukrle]] "curtsey" (also t-n), [[vejce]] "egg" (NOTE: gen pl 'vajec').
--
-- Many nouns in -iště, with null genitive plural.
local gen_p = rfind(base.vowel_stem, "išť$") and "" or "í"
add_decl(base, stems, "e", "i", "-", "-", "i", "em",
"e", gen_p, "ím", "e", "ích", "i")
-- NOTE: Some neuter words in -e indeclinable, e.g. [[Belize]], [[Chile]], [[garde]] "chaperone", [[karaoke]],
-- [[karate]], [[re]] "double raise (card games)", [[ukulele]], [[Zimbabwe]], [[zombie]] (pl. 'zombie' or
-- 'zombies')
-- some nearly indeclinable, e.g. [[finále]], [[chucpe]]; see mostly-indecl below
end
declprops["soft-n"] = {
cat = "soft"
}
decls["í-n"] = function(base, stems)
-- [[nábřeží]] "waterfront" and a zillion others; also [[úterý]] "Tuesday".
-- NOTE: The stem ends in -í/-ý.
add_decl(base, stems, "", "", "-", "-", "", "m",
"", "", "m", "", "ch", "mi")
end
declprops["í-n"] = {
cat = "GENPOS in -í/-ý"
}
decls["n-n"] = function(base, stems)
-- E.g. [[břemeno]] "burden" (also [[břímě]], use 'decllemma:'); [[písmeno]] "letter"; [[plemeno]] "breed";
-- [[rameno]] "shoulder" (also [[rámě]], use 'decllemma:'); [[semeno]] "seed" (also [[sémě]], [[símě]], use
-- 'decllemma:'); [[temeno]] "crown (of the head)"; [[vemeno]] "udder"
add_decl(base, stems, {"a", "e"}, {"i", "u"}, "-", "-", {"ě", "i", "u"}, "em",
"a", "", "ům", "a", "ech", "y")
end
declprops["n-n"] = {
cat = "n-stem"
}
decls["tstem-n"] = function(base, stems)
-- E.g. [[batole]] "toddler", [[čuně]] "pig", [[daňče]] "fallow deer fawn", [[děvče]] "girl", [[ďouče]] "girl"
-- (dialectal), [[dítě]] "child" (NOTE: feminine in the plural [[děti]], declined as a feminine i-stem), [[dvojče]]
-- "twin", [[hádě]] "young snake", [[house]] "gosling", [[hříbě]] "foal" (pl. hříbata), [[jehně]] "lamb", [[kavče]]
-- "young jackdaw; chough", [[káče]] "duckling", [[káně]] "buzzard chick" (NOTE: also feminine meaning "buzzard"),
-- [[klíště]] "tick", [[kose]] "blackbird chick" (rare), [[kuře]] "chick (young chicken)", [[kůzle]]
-- "kid (young goat)", [[lišče]] "fox cub", [[lvíče]] "lion cub", [[medvídě]] "bear cub", [[mládě]] "baby animal",
-- [[morče]] "guinea pig", [[mrně]] "toddler", [[nemluvně]] "infant", [[novorozeně]] "newborn", [[orle]] "eaglet",
-- [[osle]] "donkey foal", [[pachole]] "boy (obsolete); page, squire", [[páže]] "page, squire", [[podsvinče]]
-- "suckling pig", [[prase]] "pig", [[prtě]] "toddler", [[ptáče]] "chick (young bird)",
-- [[robě]] "baby, small child", [[saranče]] "locust" (NOTE: also feminine), [[sele]] "piglet",
-- [[slůně]] "baby elephant", [[škvrně]] "toddler", [[štěně]] "puppy", [[tele]] "calf", [[velbloudě]] "camel colt",
-- [[vlče]] "wolf cub", [[vnouče]] "grandchild", [[vyžle]] "small hunting dog; slender person",
-- [[zvíře]] "animal, beast".
--
-- Some referring to inanimates, e.g. [[doupě]] "lair" (pl. doupata), [[koště]]/[[chvoště]] "broom", [[paraple]]
-- "umbrella", [[poupě]] "bud", [[pukrle]] "curtsey" (also soft-n), [[rajče]] "tomato", [[šuple]] "drawer",
-- [[varle]] "testicle", [[vole]] "craw (of a bird); goiter".
add_decl(base, stems, "ete", "eti", "-", "-", "eti", "etem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["tstem-n"] = {
cat = "t-stem"
}
decls["ma-n"] = function(base, stems)
-- E.g. [[drama]] "drama", [[dogma]] "dogma", [[aneurysma]]/[[aneuryzma]] "aneurysm", [[dilema]] "dilemma",
-- [[gumma]] "gumma" (non-cancerous syphilitic growth), [[klima]] "climate", [[kóma]] "coma", [[lemma]] "lemma",
-- [[melisma]] "melisma", [[paradigma]] "paradigm", [[plasma]]/[[plazma]] "plasma [partly ionized gas]"
-- (note [[plasma]]/[[plazma]] "blood plasma" is feminine), [[revma]] "rheumatism", [[schéma]] "schema, diagram",
-- [[schisma]]/[[schizma]] "schism", [[smegma]] "smegma", [[sofisma]]/[[sofizma]] "sophism", [[sperma]] "sperm",
-- [[stigma]] "stigma", [[téma]] "theme", [[trauma]] "trauma", [[trilema]] "trilemma", [[zeugma]] "zeugma".
add_decl(base, stems, "atu", "atu", "-", "-", "atu", "atem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["ma-n"] = {
cat = "ma-stem"
}
decls["adj"] = function(base, stems)
local props = {}
local propspec = table.concat(props, ".")
if propspec ~= "" then
propspec = "<" .. propspec .. ">"
end
local adj_alternant_multiword_spec = require("Module:cs-adjective").do_generate_forms({base.lemma .. propspec})
local function copy(from_slot, to_slot)
base.forms[to_slot] = adj_alternant_multiword_spec.forms[from_slot]
end
if base.number ~= "pl" then
if base.gender == "m" then
copy("nom_m", "nom_s")
copy("gen_mn", "gen_s")
copy("dat_mn", "dat_s")
copy("loc_mn", "loc_s")
copy("ins_mn", "ins_s")
elseif base.gender == "f" then
copy("nom_f", "nom_s")
copy("gen_f", "gen_s")
copy("dat_f", "dat_s")
copy("acc_f", "acc_s")
copy("loc_f", "loc_s")
copy("ins_f", "ins_s")
else
copy("nom_n", "nom_s")
copy("gen_mn", "gen_s")
copy("dat_mn", "dat_s")
copy("acc_n", "acc_s")
copy("loc_mn", "loc_s")
copy("ins_mn", "ins_s")
end
if not base.forms.voc_s then
iut.insert_forms(base.forms, "voc_s", base.forms.nom_s)
end
end
if base.number ~= "sg" then
if base.gender == "m" then
if base.animacy == "an" then
copy("nom_mp_an", "nom_p")
else
copy("nom_fp", "nom_p")
end
copy("acc_mfp", "acc_p")
elseif base.gender == "f" then
copy("nom_fp", "nom_p")
copy("acc_mfp", "acc_p")
else
copy("nom_np", "nom_p")
copy("acc_np", "acc_p")
end
copy("gen_p", "gen_p")
copy("dat_p", "dat_p")
copy("ins_p", "ins_p")
copy("loc_p", "loc_p")
end
end
local function get_stemtype(base)
if rfind(base.lemma, "ý$") then
return "hard"
elseif rfind(base.lemma, "í$") then
return "soft"
else
return "possessive"
end
end
declprops["adj"] = {
cat = function(base, stems)
return {"adjectival POS", get_stemtype(base) .. " GENDER adjectival POS"}
end,
}
decls["mostly-indecl"] = function(base, stems)
-- Several neuters: E.g. [[finále]] "final (sports)", [[čtvrtfinále]] "quarterfinal", [[chucpe]] "chutzpah",
-- [[penále]] "fine, penalty", [[promile]] "" (NOTE: loc pl also promilech), [[rande]] "rendezvous", [[semifinále]]
-- "semifinal", [[skóre]] "score".
-- At least one masculine animate: [[kamikaze]]/[[kamikadze]], where IJP says only -m in the ins sg.
local ins_s = base.gender == "m" and "m" or {"-", "m"}
add_decl(base, stems, "-", "-", "-", "-", "-", ins_s,
"-", "-", "-", "-", "-", "-")
end
declprops["mostly-indecl"] = {
cat = "mostly indeclinable"
}
decls["indecl"] = function(base, stems)
-- Indeclinable. Note that fully indeclinable nouns should not have a table at all rather than one all of whose forms
-- are the same; but having an indeclinable declension is useful for nouns that may or may not be indeclinable, e.g.
-- [[desatero]] "group of ten" or the plural of [[peso]], which may be indeclinable 'pesos'.
add_decl(base, stems, "-", "-", "-", "-", "-", "-",
"-", "-", "-", "-", "-", "-")
end
declprops["indecl"] = {
cat = function(base, stems)
if base.adj then
return {"adjectival POS", "indeclinable adjectival POS", "indeclinable GENDER adjectival POS"}
else
return {"indeclinable POS", "indeclinable GENPOS"}
end
end
}
decls["manual"] = function(base, stems)
-- Anything declined manually using overrides. We don't set any declensions except the nom_s (or nom_p if plurale
-- tantum).
add(base, base.number == "pl" and "nom_p" or "nom_s", stems, "-")
end
declprops["manual"] = {
desc = "GENDER",
cat = {},
}
local function set_pron_defaults(base)
if base.gender or base.lemma ~= "ona" and base.number or base.animacy then
error("Can't specify gender, number or animacy for pronouns")
end
local function pron_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
if base.lemma == "kdo" then
return "none", "sg", "an", false
elseif base.lemma == "co" then
return "none", "sg", "inan", false
elseif base.lemma == "já" or base.lemma == "ty" then
return "none", "sg", "an", true
elseif base.lemma == "my" or base.lemma == "vy" then
return "none", "pl", "an", false
elseif base.lemma == "on" then
return "m", "sg", "none", true
elseif base.lemma == "ono" then
return "n", "sg", "inan", true
elseif base.lemma == "oni" then
return "m", "pl", "an", false
elseif base.lemma == "ony" then
return "none", "pl", "none", false
elseif base.lemma == "ona" then
if base.number ~= "sg" and base.number ~= "pl" then
error("Must specify '.sg' or '.pl' with lemma 'ona'")
end
if base.number == "sg" then
return "f", "sg", "none", false
else
return "n", "pl", "inan", false
end
elseif base.lemma == "sebe" then
return "none", "none", "none", true
else
error(("Unrecognized pronoun '%s'"):format(base.lemma))
end
end
local gender, number, animacy, has_clitic = pron_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_pronoun_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with pronouns")
end
base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}}
base.decl = "pron"
end
decls["pron"] = function(base, stems)
local after_prep_footnote = "[after a preposition]"
local dual_footnote = "[when referring to dual nouns, e.g. [[oči]], [[ruce]]]"
local animate_footnote = "[animate]"
if base.lemma == "kdo" then
add_decl(base, stems, "koho", "komu", nil, nil, "kom", "kým")
elseif base.lemma == "co" then
add_decl(base, stems, "čeho", "čemu", nil, nil, "čem", "čím")
elseif base.lemma == "já" then
add_sg_decl_with_clitic(base, stems, "mne", "mě", "mně", "mi", nil, nil, nil, "mně", "mnou")
elseif base.lemma == "ty" then
add_sg_decl_with_clitic(base, stems, "tebe", "tě", "tobě", "ti", nil, nil, nil, "tobě", "tebou")
elseif base.lemma == "my" then
add_pl_only_decl(base, stems, "nás", "nám", "nás", "nás", "námi")
add_pl_only_decl(base, stems, nil, nil, nil, nil, "náma", dual_footnote)
elseif base.lemma == "vy" then
add_pl_only_decl(base, stems, "vás", "vám", "vás", "vás", "vámi")
add_pl_only_decl(base, stems, nil, nil, nil, nil, "váma", dual_footnote)
elseif base.lemma == "on" or base.lemma == "ono" then
local acc_s = base.lemma == "on" and "jej" or {"jej", "je"}
local clitic_acc_s = base.lemma == "on" and {"jej", "ho"} or {"jej", "ho", "je"}
local prep_acc_s = base.lemma == "on" and "něj" or {"něj", "ně"}
local prep_clitic_acc_s = base.lemma == "on" and "-ň" or nil
add_sg_decl_with_clitic(base, stems, {"jeho", "jej"}, {"ho", "jej"}, "jemu", "mu", acc_s, clitic_acc_s, nil, nil, "jím")
add_sg_decl_with_clitic(base, stems, {"něho", "něj"}, nil, "němu", nil, prep_acc_s, prep_clitic_acc_s, nil, "něm", "ním",
after_prep_footnote)
if base.lemma == "on" then
add_sg_decl_with_clitic(base, stems, nil, nil, nil, nil, "jeho", nil, nil, nil, nil,
animate_footnote)
add_sg_decl_with_clitic(base, stems, nil, nil, nil, nil, "něho", nil, nil, nil, nil,
after_prep_footnote and animate_footnote)
end
elseif base.lemma == "ona" and base.number == "sg" then
add_sg_decl(base, stems, "jí", "jí", "ji", nil, nil, "jí")
add_sg_decl(base, stems, "ní", "ní", "ni", nil, "ní", "ní", after_prep_footnote)
elseif base.lemma == "oni" or base.lemma == "ony" or base.lemma == "ona" then
add_pl_only_decl(base, stems, "jich", "jim", "je", nil, "jimi")
add_pl_only_decl(base, stems, "nich", "nim", "ně", "nich", "nimi", after_prep_footnote)
add_pl_only_decl(base, stems, nil, nil, nil, nil, "jima", dual_footnote)
add_pl_only_decl(base, stems, nil, nil, nil, nil, "nima", dual_footnote)
elseif base.lemma == "sebe" then
-- Underlyingly we handle [[sebe]]'s slots as singular.
add_sg_decl_with_clitic(base, stems, "sebe", "sebe", "sobě", "si", "sebe", "se", nil, "sobě", "sebou",
nil, "no nom_s")
else
error(("Internal error: Unrecognized pronoun lemma '%s'"):format(base.lemma))
end
end
declprops["pron"] = {
desc = "GENDER pronoun",
cat = {},
}
local function set_num_defaults(base)
if base.gender or base.number or base.animacy then
error("Can't specify gender, number or animacy for numeral")
end
local function num_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
return "none", "pl", "none", false
end
local gender, number, animacy, has_clitic = num_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_numeral_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with numerals")
end
local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma
base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}}
base.decl = "num"
end
decls["num"] = function(base, stems)
local after_prep_footnote = "[after a preposition]"
if base.lemma == "devět" then
add_pl_only_decl(base, "", "devíti", "devíti", "-", "devíti", "devíti", stems.footnotes)
elseif base.lemma == "sta" or base.lemma == "stě" or base.lemma == "set" then
add_pl_only_decl(base, "", "set", "stům", "-", "stech", "sty", stems.footnotes)
elseif rfind(base.lemma, "[cs]et$") then
-- [[deset]] and all numbers ending in -cet ([[dvacet]], [[třicet]], [[čtyřicet]] and inverted compound
-- numerals such as [[pětadvacet]] "25" and [[dvaatřicet]] "32")
local begin = rmatch(base.lemma, "^(.*)et$")
add_pl_only_decl(base, stems, "i", "i", "-", "i", "i")
add_pl_only_decl(base, begin, "íti", "íti", "-", "íti", "íti", stems.footnotes)
else
add_pl_only_decl(base, stems, "i", "i", "-", "i", "i")
end
end
declprops["num"] = {
desc = "GENDER numeral",
cat = {},
}
local function set_det_defaults(base)
if base.gender or base.number or base.animacy then
error("Can't specify gender, number or animacy for determiner")
end
local function det_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
return "none", "none", "none", false
end
local gender, number, animacy, has_clitic = det_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_determiner_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with determiners")
end
local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma
base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}}
base.decl = "det"
end
decls["det"] = function(base, stems)
add_sg_decl(base, stems, "a", "a", "-", nil, "a", "a")
end
declprops["det"] = {
desc = "GENDER determiner",
cat = {},
}
local function fetch_footnotes(separated_group)
local footnotes
for j = 2, #separated_group - 1, 2 do
if separated_group[j + 1] ~= "" then
error("Extraneous text after bracketed footnotes: '" .. table.concat(separated_group) .. "'")
end
if not footnotes then
footnotes = {}
end
table.insert(footnotes, separated_group[j])
end
return footnotes
end
--[=[
Parse a single override spec (e.g. 'nomplé:ové' or 'ins:autodráhou:autodrahou[rare]') and return
two values: the slot(s) the override applies to, and an object describing the override spec.
The input is actually a list where the footnotes have been separated out; for example,
given the spec 'inspl:čobotami:čobotámi[rare]:čobitmi[archaic]', the input will be a list
{"inspl:čobotami:čobotámi", "[rare]", ":čobitmi", "[archaic]", ""}. The object returned
for 'ins:autodráhou:autodrahou[rare]' looks like this:
{
full = true,
values = {
{
form = "autodráhou"
},
{
form = "autodrahou",
footnotes = {"[rare]"}
}
}
}
The object returned for 'nomplé:ové' looks like this:
{
values = {
{
form = "é",
},
{
form = "ové",
}
}
}
]=]
local function parse_override(segments)
local retval = {values = {}}
local part = segments[1]
local slots = {}
while true do
local case = usub(part, 1, 3)
if cases[case] then
-- ok
else
error(("Unrecognized case '%s' in override: '%s'"):format(case, table.concat(segments)))
end
part = usub(part, 4)
local slot
if rfind(part, "^pl") then
part = usub(part, 3)
slot = case .. "_p"
elseif rfind(part, "^cl") then
-- No plural clitic cases at this point.
part = usub(part, 3)
if clitic_cases[case] then
slot = "clitic_" .. case .. "_s"
else
error(("Unrecognized clitic case '%s' in override: '%s'"):format(case, table.concat(segments)))
end
else
slot = case .. "_s"
end
table.insert(slots, slot)
if rfind(part, "^%+") then
part = usub(part, 2)
else
break
end
end
if rfind(part, "^:") then
retval.full = true
part = usub(part, 2)
end
segments[1] = part
local colon_separated_groups = put.split_alternating_runs_and_strip_spaces(segments, ":")
for i, colon_separated_group in ipairs(colon_separated_groups) do
local value = {}
local form = colon_separated_group[1]
if form == "" then
error(("Use - to indicate an empty ending for slot%s '%s': '%s'"):format(#slots > 1 and "s" or "", table.concat(slots), table.concat(segments)))
elseif form == "-" then
value.form = ""
else
value.form = form
end
value.footnotes = fetch_footnotes(colon_separated_group)
table.insert(retval.values, value)
end
return slots, retval
end
--[=[
Parse an indicator spec (text consisting of angle brackets and zero or more
dot-separated indicators within them). Return value is an object of the form
{
overrides = {
SLOT = {OVERRIDE, OVERRIDE, ...}, -- as returned by parse_override()
...
},
forms = {}, -- forms for a single spec alternant; see `forms` below
footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing
stems = { -- may be missing
{
reducible = TRUE_OR_FALSE,
footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing
-- The following fields are filled in by determine_stems()
vowel_stem = "STEM",
nonvowel_stem = "STEM",
oblique_slots = one of {nil, "gen_p", "all", "all-oblique"},
oblique_vowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil),
oblique_nonvowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil),
},
...
},
gender = "GENDER", -- "m", "f", "n"
number = "NUMBER", -- "sg", "pl"; may be missing
animacy = "ANIMACY", -- "inan", "an"; may be missing
hard = true, -- may be missing
soft = true, -- may be missing
mixed = true, -- may be missing
surname = true, -- may be missing
istem = true, -- may be missing
["-istem"] = true, -- may be missing
tstem = true, -- may be missing
nstem = true, -- may be missing
tech = true, -- may be missing
foreign = true, -- may be missing
mostlyindecl = true, -- may be missing
indecl = true, -- may be missing
manual = true, -- may be missing
adj = true, -- may be missing
decllemma = "DECLENSION-LEMMA", -- may be missing
declgender = "DECLENSION-GENDER", -- may be missing
declnumber = "DECLENSION-NUMBER", -- may be missing
-- The following additional fields are added by other functions:
orig_lemma = "ORIGINAL-LEMMA", -- as given by the user
orig_lemma_no_links = "ORIGINAL-LEMMA-NO-LINKS", -- links removed
lemma = "LEMMA", -- `orig_lemma_no_links`, converted to singular form if plural and lowercase if all-uppercase
forms = {
SLOT = {
{
form = "FORM",
footnotes = {"FOOTNOTE", "FOOTNOTE", ...} -- may be missing
},
...
},
...
},
decl = "DECL", -- declension, e.g. "hard-m"
vowel_stem = "VOWEL-STEM", -- derived from vowel-ending lemmas
nonvowel_stem = "NONVOWEL-STEM", -- derived from non-vowel-ending lemmas
}
]=]
local function parse_indicator_spec(angle_bracket_spec)
local inside = rmatch(angle_bracket_spec, "^<(.*)>$")
assert(inside)
local base = {overrides = {}, forms = {}}
if inside ~= "" then
local segments = put.parse_balanced_segment_run(inside, "[", "]")
local dot_separated_groups = put.split_alternating_runs_and_strip_spaces(segments, "%.")
for i, dot_separated_group in ipairs(dot_separated_groups) do
local part = dot_separated_group[1]
local case_prefix = usub(part, 1, 3)
if cases[case_prefix] then
local slots, override = parse_override(dot_separated_group)
for _, slot in ipairs(slots) do
if base.overrides[slot] then
error(("Two overrides specified for slot '%s'"):format(slot))
else
base.overrides[slot] = {override}
end
end
elseif part == "" then
if #dot_separated_group == 1 then
error("Blank indicator: '" .. inside .. "'")
end
base.footnotes = fetch_footnotes(dot_separated_group)
elseif rfind(part, "^[-*#ě]*$") or rfind(part, "^[-*#ě]*,") then
if base.stem_sets then
error("Can't specify reducible/vowel-alternant indicator twice: '" .. inside .. "'")
end
local comma_separated_groups = put.split_alternating_runs_and_strip_spaces(dot_separated_group, ",")
local stem_sets = {}
for i, comma_separated_group in ipairs(comma_separated_groups) do
local pattern = comma_separated_group[1]
local orig_pattern = pattern
local reducible, vowelalt, oblique_slots
if pattern == "-" then
-- default reducible, no vowel alt
else
local before, after
before, reducible, after = rmatch(pattern, "^(.-)(%-?%*)(.-)$")
if before then
pattern = before .. after
reducible = reducible == "*"
end
if pattern ~= "" then
if not rfind(pattern, "^##?ě?$") then
error("Unrecognized vowel-alternation pattern '" .. pattern .. "', should be one of #, ##, #ě or ##ě: '" .. inside .. "'")
end
if pattern == "#ě" or pattern == "##ě" then
vowelalt = "quant-ě"
else
vowelalt = "quant"
end
-- `oblique_slots` will be later changed to "all" if the lemma ends in a consonant.
if pattern == "##" or pattern == "##ě" then
oblique_slots = "all-oblique"
else
oblique_slots = "gen_p"
end
end
end
table.insert(stem_sets, {
reducible = reducible,
vowelalt = vowelalt,
oblique_slots = oblique_slots,
footnotes = fetch_footnotes(comma_separated_group)
})
end
base.stem_sets = stem_sets
elseif #dot_separated_group > 1 then
error("Footnotes only allowed with slot overrides, reducible or vowel alternation specs or by themselves: '" .. table.concat(dot_separated_group) .. "'")
elseif part == "m" or part == "f" or part == "n" then
if base.gender then
error("Can't specify gender twice: '" .. inside .. "'")
end
base.gender = part
elseif part == "sg" or part == "pl" then
if base.number then
error("Can't specify number twice: '" .. inside .. "'")
end
base.number = part
elseif part == "an" or part == "inan" then
if base.animacy then
error("Can't specify animacy twice: '" .. inside .. "'")
end
base.animacy = part
elseif part == "hard" or part == "soft" or part == "mixed" or part == "surname" or part == "istem" or
part == "-istem" or part == "tstem" or part == "nstem" or part == "tech" or part == "foreign" or
part == "mostlyindecl" or part == "indecl" or part == "pron" or part == "det" or part == "num" or
-- Use 'velar' with words like [[petanque]] and [[Braque]] that end with a pronounced velar (and hence are declined
-- like velars) but not with a spelled velar; use '-velar' with words like [[hadíth]] that end with a spelled but
-- silent velar.
part == "collapse_ee" or part == "persname" or part == "c_as_k" or part == "velar" or part == "-velar" then
if base[part] then
error("Can't specify '" .. part .. "' twice: '" .. inside .. "'")
end
base[part] = true
-- Allow 'hard' to signal that -y is allowed after -c, as in hard masculine nouns such as [[hec]]
-- "joke", and also feminines in -ca where the c is pronounced as /k/, e.g. [[ayahuasca]], [[pororoca]],
-- [[Petrarca]], [[Mallorca]], [[Casablanca]]. (Contrast [[mangalica]], [[Kusturica]], [[Bjelica]],
-- where the c is pronounced as /ts/ and -y is disallowed.)
if part == "hard" then
base.hard_c = true
end
elseif part == "+" then
if base.adj then
error("Can't specify '+' twice: '" .. inside .. "'")
end
base.adj = true
elseif part == "!" then
if base.manual then
error("Can't specify '!' twice: '" .. inside .. "'")
end
base.manual = true
elseif rfind(part, "^mixedistem:") then
if base.mixedistem then
error("Can't specify 'mixedistem:' twice: '" .. inside .. "'")
end
base.mixedistem = rsub(part, "^mixedistem:", "")
elseif rfind(part, "^decllemma:") then
if base.decllemma then
error("Can't specify 'decllemma:' twice: '" .. inside .. "'")
end
base.decllemma = rsub(part, "^decllemma:", "")
elseif rfind(part, "^declgender:") then
if base.declgender then
error("Can't specify 'declgender:' twice: '" .. inside .. "'")
end
base.declgender = rsub(part, "^declgender:", "")
elseif rfind(part, "^declnumber:") then
if base.declnumber then
error("Can't specify 'declnumber:' twice: '" .. inside .. "'")
end
base.declnumber = rsub(part, "^declnumber:", "")
else
error("Unrecognized indicator '" .. part .. "': '" .. inside .. "'")
end
end
end
return base
end
local function is_regular_noun(base)
return not base.adj and not base.pron and not base.det and not base.num
end
local function process_declnumber(base)
base.actual_number = base.number
if base.declnumber then
if base.declnumber == "sg" or base.declnumber == "pl" then
base.number = base.declnumber
else
error(("Unrecognized value '%s' for 'declnumber', should be 'sg' or 'pl'"):format(base.declnumber))
end
end
end
local function set_defaults_and_check_bad_indicators(base)
-- Set default values.
local regular_noun = is_regular_noun(base)
if base.pron then
set_pron_defaults(base)
elseif base.det then
set_det_defaults(base)
elseif base.num then
set_num_defaults(base)
elseif not base.adj then
if not base.gender then
if base.manual then
base.gender = "none"
else
error("For nouns, gender must be specified")
end
end
base.number = base.number or "both"
process_declnumber(base)
base.animacy = base.animacy or "inan"
base.actual_gender = base.gender
base.actual_animacy = base.animacy
if base.declgender then
if base.declgender == "m-an" then
base.gender = "m"
base.animacy = "an"
elseif base.declgender == "m-in" then
base.gender = "m"
base.animacy = "inan"
elseif base.declgender == "f" or base.declgender == "n" then
base.gender = base.declgender
else
error(("Unrecognized value '%s' for 'declgender', should be 'm-an', 'm-in', 'f' or 'n'"):format(base.declgender))
end
end
end
-- Check for bad indicator combinations.
if (base.hard and 1 or 0) + (base.soft and 1 or 0) + (base.mixed and 1 or 0) > 1 then
error("At most one of 'hard', 'soft' and 'mixed' can be specified")
end
if base.istem and base["-istem"] then
error("'istem' and '-istem' cannot be specified together")
end
if (base.istem or base["-istem"]) then
if base.gender ~= "f" then
error("'istem' and '-istem' can only be specified with the feminine gender")
end
if not regular_noun then
error("'istem' and '-istem' can only be specified with regular nouns")
end
end
if base.declgender and not regular_noun then
error("'declgender' can only be specified with regular nouns")
end
end
local function set_all_defaults_and_check_bad_indicators(alternant_multiword_spec)
local is_multiword = #alternant_multiword_spec.alternant_or_word_specs > 1
iut.map_word_specs(alternant_multiword_spec, function(base)
set_defaults_and_check_bad_indicators(base)
base.multiword = is_multiword -- FIXME: not currently used; consider deleting
alternant_multiword_spec.has_clitic = alternant_multiword_spec.has_clitic or base.has_clitic
if base.pron then
alternant_multiword_spec.saw_pron = true
else
alternant_multiword_spec.saw_non_pron = true
end
if base.det then
alternant_multiword_spec.saw_det = true
else
alternant_multiword_spec.saw_non_det = true
end
if base.num then
alternant_multiword_spec.saw_num = true
else
alternant_multiword_spec.saw_non_num = true
end
end)
end
local function undo_second_palatalization(base, word, is_adjective)
local function try(from, to)
local stem = rmatch(word, "^(.*)" .. from .. "$")
if stem then
return stem .. to
end
return nil
end
return is_adjective and try("št", "sk") or
is_adjective and try("čt", "ck") or
try("c", "k") or -- FIXME, this could be wrong and c correct
try("ř", "r") or
try("z", "h") or -- FIXME, this could be wrong and z or g correct
try("š", "ch") or
word
end
-- For a plural-only lemma, synthesize a likely singular lemma. It doesn't have to be
-- theoretically correct as long as it generates all the correct plural forms.
local function synthesize_singular_lemma(base)
if not base.stem_sets then
base.stem_sets = {{}}
end
local lemma_determined
-- Loop over all stem sets in case the user specified multiple ones (e.g. '*,-*'). If we try to reconstruct
-- different lemmas for different stem sets, we'll throw an error below.
for _, stems in ipairs(base.stem_sets) do
local stem, lemma
while true do
if base.indecl then
-- If specified as indeclinable, leave it alone; e.g. 'pesos' indeclinable plural of [[peso]].
lemma = base.lemma
break
elseif base.gender == "m" then
if base.animacy == "an" then
stem = rmatch(base.lemma, "^(.*)i$")
if stem then
if base.soft then
-- [[Blíženci]] "Gemini"
-- Since the nominative singular has no ending.
lemma = com.convert_paired_plain_to_palatal(stem, ending)
else
lemma = undo_second_palatalization(base, stem)
end
else
stem = rmatch(base.lemma, "^(.*)ové$") or rmatch(base.lemma, "^(.*)é$")
if stem then
-- [[manželé]] "married couple", [[Velšané]] "Welsh people"
lemma = stem
else
error(("Animate masculine plural-only lemma '%s' should end in -i, -ové or -é"):format(base.lemma))
end
end
else
stem = rmatch(base.lemma, "^(.*)y$")
if stem then
-- [[droby]] "giblets"; [[tvarůžky]] "Olomouc cheese"; [[alimenty]] "alimony"; etc.
lemma = stem
else
local ending
stem, ending = rmatch(base.lemma, "^(.*)([eě])$")
if stem then
-- [[peníze]] "money", [[tvargle]] "Olomouc cheese" (mixed declension), [[údaje]] "data",
-- [[Lazce]] (a village), [[lováče]] "money", [[Krkonoše]] "Giant Mountains", [[kříže]] "clubs"
lemma = com.convert_paired_plain_to_palatal(stem, ending)
if not base.mixed then
base.soft = true
end
else
error(("Inanimate masculine plural-only lemma '%s' should end in -y, -e or -ě"):format(base.lemma))
end
end
end
if stems.reducible == nil then
if rfind(lemma, com.cons_c .. "[ck]$") and not com.is_monosyllabic(base.lemma) then
stems.reducible = true
end
if stems.reducible then
lemma = dereduce(base, lemma)
end
end
break
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)y$")
if stem then
lemma = stem .. "a"
break
end
stem = rmatch(base.lemma, "^(.*)[eě]$")
if stem then
-- Singular like the plural. Cons-stem feminines like [[dlaň]] "palm (of the hand)" have identical
-- plurals to soft-stem feminines like [[růže]] (modulo e/ě differences), so we don't need to
-- reconstruct the former type.
lemma = base.lemma
break
end
stem = rmatch(base.lemma, "^(.*)i$")
if stem then
-- i-stems.
lemma = stem
base.istem = true
break
end
error(("Feminine plural-only lemma '%s' should end in -y, -ě, -e or -i"):format(base.lemma))
elseif base.gender == "n" then
-- -ata nouns like [[slůně]] "baby elephant" nom pl 'slůňata' are declined in the plural same as if
-- the singular were 'slůňato' so we don't have to worry about them.
stem = rmatch(base.lemma, "^(.*)a$")
if stem then
lemma = stem .. "o"
break
end
stem = rmatch(base.lemma, "^(.*)[eěí]$")
if stem then
-- singular lemma also in -e, -ě or -í; e.g. [[věčná loviště]] "[[happy hunting ground]]"
lemma = base.lemma
break
end
error(("Neuter plural-only lemma '%s' should end in -a, -í, -ě or -e"):format(base.lemma))
else
error(("Internal error: Unrecognized gender '%s'"):format(base.gender))
end
end
if lemma_determined and lemma_determined ~= lemma then
error(("Attempt to set two different singular lemmas '%s' and '%s'"):format(lemma_determined, lemma))
end
lemma_determined = lemma
end
base.lemma = lemma_determined
end
-- For an adjectival lemma, synthesize the masc singular form.
local function synthesize_adj_lemma(base)
local stem
if base.indecl then
base.decl = "indecl"
stem = base.lemma
else
local gender, number
local function sub_ov(stem)
stem = stem:gsub("ov$", "ův")
return stem
end
while true do
if base.number == "pl" then
if base.gender == "m" then
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
if base.soft then
-- nothing to do
else
if base.animacy ~= "an" then
error(("Masculine plural-only adjectival lemma '%s' ending in -í can only be animate unless '.soft' is specified"):
format(base.lemma))
end
base.lemma = undo_second_palatalization(base, stem, "is adjective") .. "ý"
end
break
end
stem = rmatch(base.lemma, "^(.*)é$")
if stem then
if base.animacy == "an" then
error(("Masculine plural-only adjectival lemma '%s' ending in -é must be inanimate"):
format(base.lemma))
end
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*ov)i$") or rmatch(base.lemma, "^(.*in)i$")
if stem then
if base.animacy ~= "an" then
error(("Masculine plural-only possessive adjectival lemma '%s' ending in -i must be animate"):
format(base.lemma))
end
base.lemma = sub_ov(stem)
break
end
stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$")
if stem then
if base.animacy == "an" then
error(("Masculine plural-only possessive adjectival lemma '%s' ending in -y must be inanimate"):
format(base.lemma))
end
base.lemma = sub_ov(stem)
break
end
if base.animacy == "an" then
error(("Animate masculine plural-only adjectival lemma '%s' should end in -í, -ovi or -ini"):
format(base.lemma))
elseif base.soft then
error(("Soft masculine plural-only adjectival lemma '%s' should end in -í"):format(base.lemma))
else
error(("Inanimate masculine plural-only adjectival lemma '%s' should end in -é, -ovy or -iny"):
format(base.lemma))
end
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)é$") -- hard adjective
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$") -- possessive adjective
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Feminine plural-only adjectival lemma '%s' should end in -é, -í, -ovy or -iny"):format(base.lemma))
else
stem = rmatch(base.lemma, "^(.*)á$") -- hard adjective
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$") -- possessive adjective
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Neuter plural-only adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma))
end
else
if base.gender == "m" then
stem = rmatch(base.lemma, "^(.*)[ýí]$") or rmatch(base.lemma, "^(.*)ův$") or rmatch(base.lemma, "^(.*)in$")
if stem then
break
end
error(("Masculine adjectival lemma '%s' should end in -ý, -í, -ův or -in"):format(base.lemma))
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)á$")
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$")
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Feminine adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma))
else
stem = rmatch(base.lemma, "^(.*)é$")
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)o$") or rmatch(base.lemma, "^(.*in)o$")
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Neuter adjectival lemma '%s' should end in -é, -í, -ovo or -ino"):format(base.lemma))
end
end
end
base.decl = "adj"
end
-- Now set the stem sets if not given.
-- Now set the stem sets if not given.
if not base.stem_sets then
base.stem_sets = {{reducible = false}}
end
for _, stems in ipairs(base.stem_sets) do
-- Set the stems.
stems.vowel_stem = stem
stems.nonvowel_stem = stem
end
end
-- Determine the declension based on the lemma, gender and number. The declension is set in base.decl. In the process,
-- we set either base.vowel_stem (if the lemma ends in a vowel) or base.nonvowel_stem (if the lemma does not end in a
-- vowel), which is used by determine_stems(). In some cases (specifically with certain foreign nouns), we set
-- base.lemma to a new value; this is as if the user specified 'decllemma:'.
local function determine_declension(base)
if base.mostlyindecl then
base.decl = "mostly-indecl"
base.nonvowel_stem = base.lemma
return
end
if base.indecl then
base.decl = "indecl"
base.nonvowel_stem = base.lemma
return
end
-- Determine declension
stem = rmatch(base.lemma, "^(.*)a$")
if stem then
if base.gender == "m" then
if base.animacy ~= "an" then
error("Masculine lemma in -a must be animate")
end
base.decl = "a-m"
elseif base.gender == "f" then
if base.hard then
-- e.g. [[doňa]], which seems not to have soft alternates as [[piraňa]] does (despite IJP; but see the note at the
-- bottom)
base.decl = "hard-f"
elseif rfind(stem, "e$") then
-- [[idea]], [[diarea]] (subtype '.tech'), [[Korea]], etc.
base.decl = "ea-f"
elseif rfind(stem, "i$") then
-- [[signoria]], [[sinfonia]], [[paranoia]], etc.
base.decl = "ia-f"
elseif rfind(stem, "[ou]$") then
-- [[stoa]], [[kongrua]], [[Samoa]], [[Nikaragua]], etc.
base.decl = "oa-f"
elseif not base.persname and rfind(stem, "^.*[ňj]$") or base.mixed then
-- [[maracuja]], [[papája]], [[sója]]; [[piraňa]] etc. Also [[Keňa]], [[Troja]]/[[Trója]], [[Amudarja]].
-- Not [[Táňa]], [[Darja]], which decline like [[gejša]], [[skica]], etc. (subtype of hard feminines).
base.decl = "mixed-f"
else
base.decl = "hard-f"
end
elseif base.gender == "n" then
if rfind(stem, "m$") then
base.decl = "ma-n"
else
error("Lemma ending in -a and neuter must end in -ma")
end
end
base.vowel_stem = stem
return
end
local ending
stem, ending = rmatch(base.lemma, "^(.*)([eě])$")
if stem then
if ending == "ě" then
stem = com.convert_paired_plain_to_palatal(stem)
end
if base.gender == "m" then
if base.foreign then
-- [[software]] and similar English-derived nouns with silent -e; set the lemma here as if decllemma: were given
base.lemma = stem
base.nonvowel_stem = stem
base.decl = "hard-m"
return
end
if base.hard then
-- -e be damned; e.g. [[Sofokles]] with hard stem 'Sofokle-' (genitive 'Sofoklea', dative 'Sofokleovi', etc.)
base.nonvowel_stem = base.lemma
base.decl = "hard-m"
return
end
if base.tstem then
if base.animacy ~= "an" then
error("T-stem masculine lemma in -e must be animate")
end
base.decl = "tstem-m"
elseif rfind(stem, "i$") then
-- [[zombie]], [[hippie]], [[yuppie]], [[rowdie]]
base.decl = "ie-m"
elseif rfind(stem, "e$") then
-- [[Yankee]]
base.nonvowel_stem = base.lemma
base.decl = "ee-m"
return
else
base.decl = "e-m"
end
elseif base.gender == "f" then
base.decl = "soft-f"
else
if base.tstem then
base.decl = "tstem-n"
else
base.decl = "soft-n"
end
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*)o$")
if stem then
if base.gender == "m" then
-- Cf. [[maestro]] m.
base.decl = "o-m"
elseif base.gender == "f" then
-- [[zoo]]; [[Žemaitsko]]?
error("Feminine nouns in -o are indeclinable; use '.indecl' if needed")
elseif base.nstem then
base.decl = "n-n"
elseif base.hard then
base.decl = "hard-n"
elseif rfind(stem, "[aeiuy]$") then
-- These have gen pl in -í and often other soft plural endings.
base.decl = "semisoft-n"
else
base.decl = "hard-n"
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*[iy])$")
if stem then
if base.gender == "m" then
if base.soft then
-- [[gay]] "gay man", [[gray]] "gray (scientific unit)", [[Nagy]] (surname)
base.decl = "soft-m"
else
-- Cf. [[kivi]] "kiwi (bird)", [[husky]] "kusky", etc.
base.decl = "i-m"
end
elseif base.gender == "f" then
if base.soft then
-- [[Uruguay]], [[Paraguay]]
base.decl = "soft-f"
else
-- [[máti]], [[pramáti]]; note also indeclinable [[tsunami]]/[[cunami]], [[okapi]]
base.decl = "i-f"
if stem:find("i$") then
stem = stem:gsub("i$", "")
else
error("Feminine nouns in -y are either soft or indeclinable; use '.soft' or '.indecl' as needed")
end
end
else
error("Neuter nouns in -i are indeclinable; use '.indecl' if needed")
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*u)$")
if stem then
if base.gender == "m" then
-- Cf. [[emu]], [[guru]], etc.
base.decl = "u-m"
elseif base.gender == "f" then
-- Only one I know is [[budižkničemu]], which is indeclinable in the singular and declines in the plural as
-- if written 'budižkničema'.
error("Feminine nouns in -u are indeclinable; use '.indecl' if needed")
else
error("Neuter nouns in -u are indeclinable; use '.indecl' if needed")
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*[íý])$")
if stem then
if base.gender == "m" then
base.decl = "í-m"
elseif base.gender == "f" then
-- FIXME: Do any exist? If not, update this message.
error("Support for non-adjectival non-indeclinable feminine nouns in -í/-ý not yet implemented")
else
base.decl = "í-n"
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$")
if stem then
if base.gender == "m" then
if base.foreign then
-- [[komunismus]] "communism", [[kosmos]] "cosmos", [[hádes]] "Hades"
stem = rmatch(base.lemma, "^(.*)[ueoaéá]s$")
if not stem then
error("Unrecognized masculine foreign ending, should be -us, -es, -os, -as, -és or -ás")
end
if not base.hard and (rfind(stem, "[ei]$") and base.animacy == "an" or
rfind(stem, "i$") and base.animacy == "inan") then
-- [[genius]], [[basileus]], [[rádius]]; not [[nukleus]], [[choreus]] (inanimate); not
-- [[skarabeus]] (animate), which should specify 'hard'
base.decl = "semisoft-m"
else
base.decl = "hard-m"
end
-- set the lemma here as if decllemma: were given
base.lemma = stem
elseif base.hard then
base.decl = "hard-m"
elseif base.soft then
base.decl = "soft-m"
elseif base.mixed then
base.decl = "mixed-m"
elseif rfind(base.lemma, com.inherently_soft_c .. "$") or rfind(base.lemma, "tel$") then
base.decl = "soft-m"
else
base.decl = "hard-m"
end
elseif base.gender == "f" then
if base.mixedistem then
base.decl = "mixed-istem-f"
elseif base.istem then
base.decl = "istem-f"
elseif base["-istem"] then
base.decl = "cons-f"
elseif rfind(base.lemma, "st$") then
-- Numerous abstracts in -ost; also [[kost]], [[část]], [[srst]], [[bolest]]
base.decl = "istem-f"
else
base.decl = "cons-f"
end
elseif base.gender == "n" then
if base.foreign then
stem = rmatch(base.lemma, "^(.*)um$") or rmatch(base.lemma, "^(.*)on$")
if not stem then
error("Unrecognized neuter foreign ending, should be -um or -on")
end
if base.hard then
base.decl = "hard-n"
elseif rfind(stem, "[eiuy]$") then
base.decl = "semisoft-n"
else
base.decl = "hard-n"
end
-- set the lemma here as if decllemma: were given
base.lemma = stem .. "o"
base.vowel_stem = stem
return
else
error("Neuter nouns ending in a consonant should use '.foreign' or '.decllemma:...'")
end
end
base.nonvowel_stem = stem
return
end
error("Unrecognized ending for lemma: '" .. base.lemma .. "'")
end
-- Determine the default value for the 'reducible' flag.
local function determine_default_reducible(base)
-- Nouns in vowels other than -a/o as well as masculine nouns ending in all vowels don't have null endings so not
-- reducible. Note, we are never called on adjectival nouns.
if rfind(base.lemma, "[iyuíeě]$") or base.gender == "m" and rfind(base.lemma, "[ao]$") or base.tstem then
base.default_reducible = false
return
end
local stem
stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$")
if stem then
-- When analyzing existing manual declensions in -ec and -ek, 290 were reducible vs. 23 non-reducible. Of these
-- 23, 15 were monosyllabic (and none of the 290 reducible nouns were monosyllabic) -- and two of these were
-- actually reducible but irregularly: [[švec]] "shoemaker" (gen sg 'ševce') and [[žnec]] "reaper (person)"
-- (gen sg. 'žence'). Of the remaining 8 multisyllabic non-reducible words, two were actually reducible but
-- irregularly: [[stařec]] "old man" (gen sg 'starce') and [[tkadlec]] "weaver" (gen sg 'tkalce'). The remaining
-- six consisted of 5 compounds of monosyllabic words: [[dotek]], [[oblek]], [[kramflek]], [[pucflek]],
-- [[pokec]], plus [[česnek]], which should be reducible but would lead to an impossible consonant cluster.
if base.gender == "m" and rfind(stem, "[eě][ck]$") and not com.is_monosyllabic(stem) then
base.default_reducible = true
elseif base.gender == "f" and rfind(stem, "[eě]ň$") then
-- [[pochodeň]] "torch", [[píseň]] "leather", [[žeň]] "harvest"; not [[reveň]] "rhubarb" or [[dřeň]] "pulp",
-- which need an override.
base.default_reducible = true
else
base.default_reducible = false
end
return
end
if base.number == "sg" then
base.default_reducible = false
return
end
if rfind(base.lemma, "isko$") then
-- e.g. [[středisko]]
base.default_reducible = "mixed"
return
end
stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$")
if not stem then
error(("Internal error: Something wrong, lemma '%s' doesn't end in consonant or vowel"):format(base.lemma))
end
-- Substitute 'ch' with a single character to make the following code simpler.
stem = stem:gsub("ch", com.TEMP_CH)
if rfind(stem, com.cons_c .. "[lr]" .. com.cons_c .. "$") then
-- [[vrba]], [[vlha]]; not reducible. (But note [[jablko]], reducible; needs override.)
base.default_reducible = false
elseif not base.foreign and (rfind(stem, com.cons_c .. "[bkhlrmnv]$") or base.c_as_k and rfind(stem, com.cons_c .. "c$")) then
-- [[ayahuasca]] has gen pl 'ayahuasek'
base.default_reducible = true
elseif base.foreign and rfind(stem, com.cons_c .. "r$") then
-- Foreign nouns in -CCum seem generally non-reducible in the gen pl except for those in -Crum like [[centrum]],
-- Examples: [[album]], [[verbum]], [[signum]], [[interregnum]], [[sternum]]. [[infernum]] has gen pl 'infern/inferen'.
base.default_reducible = true
else
base.default_reducible = false
end
end
-- Determine the stems to use for each stem set: vowel and nonvowel stems, for singular
-- and plural. We assume that one of base.vowel_stem or base.nonvowel_stem has been
-- set in determine_declension(), depending on whether the lemma ends in
-- a vowel. We construct all the rest given the reducibility, vowel alternation spec and
-- any explicit stems given. We store the determined stems inside of the stem-set objects
-- in `base.stem_sets`, meaning that if the user gave multiple reducible or vowel-alternation
-- patterns, we will compute multiple sets of stems. The reason is that the stems may vary
-- depending on the reducibility and vowel alternation.
local function determine_stems(base)
if not base.stem_sets then
base.stem_sets = {{}}
end
-- Set default reducible and check for default mixed reducible, which needs to be expanded into two entries.
local default_mixed_reducible = false
for _, stems in ipairs(base.stem_sets) do
if stems.reducible == nil then
stems.reducible = base.default_reducible
end
if stems.reducible == "mixed" then
default_mixed_reducible = true
end
end
if default_mixed_reducible then
local new_stem_sets = {}
for _, stems in ipairs(base.stem_sets) do
if stems.reducible == "mixed" then
local non_reducible_copy = m_table.shallowCopy(stems)
non_reducible_copy.reducible = false
stems.reducible = true
table.insert(new_stem_sets, stems)
table.insert(new_stem_sets, non_reducible_copy)
else
table.insert(new_stem_sets, stems)
end
end
base.stem_sets = new_stem_sets
end
-- Now determine all the stems for each stem set.
for _, stems in ipairs(base.stem_sets) do
local lemma_is_vowel_stem = not not base.vowel_stem
if base.vowel_stem then
stems.vowel_stem = base.vowel_stem
stems.nonvowel_stem = stems.vowel_stem
-- Apply vowel alternation first in cases like jádro -> jader; apply_vowel_alternation() will throw an error
-- if the vowel being modified isn't the last vowel in the stem.
stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem)
if stems.reducible then
stems.nonvowel_stem = dereduce(base, stems.nonvowel_stem)
stems.oblique_nonvowel_stem = dereduce(base, stems.oblique_nonvowel_stem)
end
else
stems.nonvowel_stem = base.nonvowel_stem
-- The user specified #, #ě, ## or ##ě and we're dealing with a term like masculine [[bůh]] or feminine
-- [[sůl]] that ends in a consonant. In this case, all slots except the nom_s and maybe acc_s have vowel
-- alternation.
if stems.oblique_slots then
stems.oblique_slots = "all"
end
stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem)
if stems.reducible then
stems.vowel_stem = com.reduce(base.nonvowel_stem)
if not stems.vowel_stem then
error("Unable to reduce stem '" .. base.nonvowel_stem .. "'")
end
else
stems.vowel_stem = base.nonvowel_stem
end
end
stems.oblique_vowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.vowel_stem)
end
end
local function detect_indicator_spec(base)
if base.pron then
determine_pronoun_stems(base)
elseif base.det then
determine_determiner_stems(base)
elseif base.num then
determine_numeral_stems(base)
elseif base.adj then
process_declnumber(base)
synthesize_adj_lemma(base)
elseif base.manual then
if base.stem_sets then
-- FIXME, maybe this should be allowed?
error("Reducible and vowel alternation specs cannot be given with manual declensions")
end
base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}}
base.decl = "manual"
else
if base.number == "pl" then
synthesize_singular_lemma(base)
end
determine_declension(base)
determine_default_reducible(base)
determine_stems(base)
end
end
local function detect_all_indicator_specs(alternant_multiword_spec)
-- Keep track of all genders seen in the singular and plural so we can determine whether to add the term to
-- [[:Category:Czech nouns that change gender in the plural]].
alternant_multiword_spec.sg_genders = {}
alternant_multiword_spec.pl_genders = {}
iut.map_word_specs(alternant_multiword_spec, function(base)
detect_indicator_spec(base)
if base.number ~= "pl" then
alternant_multiword_spec.sg_genders[base.actual_gender] = true
end
if base.number ~= "sg" then
-- All t-stem masculines are neuter in the plural.
local plgender
if base.decl == "tstem-m" then
plgender = "n"
else
plgender = base.actual_gender
end
alternant_multiword_spec.pl_genders[plgender] = true
end
end)
if (alternant_multiword_spec.saw_pron and 1 or 0) + (alternant_multiword_spec.saw_det and 1 or 0) + (alternant_multiword_spec.saw_num and 1 or 0) > 1 then
error("Can't combine pronouns, determiners and/or numerals")
end
end
local propagate_multiword_properties
local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only)
local seen_property
for _, multiword_spec in ipairs(alternant_spec.alternants) do
propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only)
if seen_property == nil then
seen_property = multiword_spec[property]
elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then
seen_property = mixed_value
end
end
alternant_spec[property] = seen_property
end
propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only)
local seen_property = nil
local last_seen_nounal_pos = 0
local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs
for i = 1, #word_specs do
local is_nounal
if word_specs[i].alternants then
propagate_alternant_properties(word_specs[i], property, mixed_value)
is_nounal = not not word_specs[i][property]
elseif nouns_only then
is_nounal = is_regular_noun(word_specs[i])
else
is_nounal = not not word_specs[i][property]
end
if is_nounal then
if not word_specs[i][property] then
error("Internal error: noun-type word spec without " .. property .. " set")
end
for j = last_seen_nounal_pos + 1, i - 1 do
word_specs[j][property] = word_specs[j][property] or word_specs[i][property]
end
last_seen_nounal_pos = i
if seen_property == nil then
seen_property = word_specs[i][property]
elseif seen_property ~= word_specs[i][property] then
seen_property = mixed_value
end
end
end
if last_seen_nounal_pos > 0 then
for i = last_seen_nounal_pos + 1, #word_specs do
word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property]
end
end
multiword_spec[property] = seen_property
end
local function propagate_properties_downward(alternant_multiword_spec, property, default_propval)
local function set_and_fetch(obj, default)
local retval
if obj[property] then
retval = obj[property]
else
obj[property] = default
retval = default
end
if not obj["actual_" .. property] then
obj["actual_" .. property] = retval
end
return retval
end
local propval1 = set_and_fetch(alternant_multiword_spec, default_propval)
for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
local propval2 = set_and_fetch(alternant_or_word_spec, propval1)
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
local propval3 = set_and_fetch(multiword_spec, propval2)
for _, word_spec in ipairs(multiword_spec.word_specs) do
local propval4 = set_and_fetch(word_spec, propval3)
if propval4 == "mixed" then
-- FIXME, use clearer error message.
error("Attempt to assign mixed " .. property .. " to word")
end
set_and_fetch(word_spec, propval4)
end
end
else
if propval2 == "mixed" then
-- FIXME, use clearer error message.
error("Attempt to assign mixed " .. property .. " to word")
end
set_and_fetch(alternant_or_word_spec, propval2)
end
end
end
--[=[
Propagate `property` (one of "animacy", "gender" or "number") from nouns to adjacent
adjectives. We proceed as follows:
1. We assume the properties in question are already set on all nouns. This should happen in
set_defaults_and_check_bad_indicators().
2. We first propagate properties upwards and sideways. We recurse downwards from the top. When we encounter a multiword
spec, we proceed left to right looking for a noun. When we find a noun, we fetch its property (recursing if the noun
is an alternant), and propagate it to any adjectives to its left, up to the next noun to the left. When we have
processed the last noun, we also propagate its property value to any adjectives to the right (to handle e.g.
[[anděl strážný]] "guardian angel", where the adjective [[strážný]] should inherit the 'masculine' and 'animate'
properties of [[anděl]]). Finally, we set the property value for the multiword spec itself by combining all the
non-nil properties of the individual elements. If all non-nil properties have the same value, the result is that
value, otherwise it is `mixed_value` (which is "mixed" for animacy and gender, but "both" for number).
3. When we encounter an alternant spec in this process, we recursively process each alternant (which is a multiword
spec) using the previous step, and combine any non-nil properties we encounter the same way as for multiword specs.
4. The effect of steps 2 and 3 is to set the property of each alternant and multiword spec based on its children or its
neighbors.
]=]
local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value)
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only")
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false)
propagate_properties_downward(alternant_multiword_spec, property, default_propval)
end
local function determine_noun_status(alternant_multiword_spec)
for i, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
if alternant_or_word_spec.alternants then
local is_noun = false
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
for j, word_spec in ipairs(multiword_spec.word_specs) do
if is_regular_noun(word_spec) then
multiword_spec.first_noun = j
is_noun = true
break
end
end
end
if is_noun then
alternant_multiword_spec.first_noun = i
end
elseif is_regular_noun(alternant_or_word_spec) then
alternant_multiword_spec.first_noun = i
return
end
end
end
-- Set the part of speech based on properties of the individual words.
local function set_pos(alternant_multiword_spec)
if alternant_multiword_spec.args.pos then
alternant_multiword_spec.pos = alternant_multiword_spec.args.pos
elseif alternant_multiword_spec.saw_pron and not alternant_multiword_spec.saw_non_pron then
alternant_multiword_spec.pos = "သဗ္ဗနာမ်"
elseif alternant_multiword_spec.saw_det and not alternant_multiword_spec.saw_non_det then
alternant_multiword_spec.pos = "ဖျေံလဝ်သန္နိဋ္ဌာန်"
elseif alternant_multiword_spec.saw_num and not alternant_multiword_spec.saw_non_num then
alternant_multiword_spec.pos = "ဂၞန်သၚ်္ချာ"
else
alternant_multiword_spec.pos = "နာမ်"
end
alternant_multiword_spec.plpos = require(en_utilities_module).pluralize(alternant_multiword_spec.pos)
end
local function normalize_all_lemmas(alternant_multiword_spec, pagename)
iut.map_word_specs(alternant_multiword_spec, function(base)
if base.lemma == "" then
base.lemma = pagename
end
base.orig_lemma = base.lemma
base.orig_lemma_no_links = m_links.remove_links(base.lemma)
local lemma = base.orig_lemma_no_links
-- If the lemma is all-uppercase, lowercase it but note this, so that later in combine_stem_ending() we convert it
-- back to uppercase. This allows us to handle all-uppercase acronyms without a lot of extra complexity.
-- FIXME: This may not make sense at all.
if uupper(lemma) == lemma then
base.all_uppercase = true
lemma = ulower(lemma)
end
base.actual_lemma = lemma
base.lemma = base.decllemma or lemma
end)
end
local function decline_noun(base)
for _, stems in ipairs(base.stem_sets) do
if not decls[base.decl] then
error("Internal error: Unrecognized declension type '" .. base.decl .. "'")
end
decls[base.decl](base, stems)
end
handle_derived_slots_and_overrides(base)
local function copy(from_slot, to_slot)
base.forms[to_slot] = base.forms[from_slot]
end
if base.actual_number ~= base.number then
local source_num = base.number == "sg" and "_s" or "_p"
local dest_num = base.number == "sg" and "_p" or "_s"
for case, _ in pairs(cases) do
copy(case .. source_num, case .. dest_num)
copy("nom" .. source_num .. "_linked", "nom" .. dest_num .. "_linked")
end
if base.actual_number ~= "both" then
local erase_num = base.actual_number == "sg" and "_p" or "_s"
for case, _ in pairs(cases) do
base.forms[case .. erase_num] = nil
end
base.forms["nom" .. erase_num .. "_linked"] = nil
end
end
end
local function get_variants(form)
return nil
--[=[
FIXME
return
form:find(com.VAR1) and "var1" or
form:find(com.VAR2) and "var2" or
form:find(com.VAR3) and "var3" or
nil
]=]
end
-- Compute the categories to add the noun to, as well as the annotation to display in the
-- declension title bar. We combine the code to do these functions as both categories and
-- title bar contain similar information.
local function compute_categories_and_annotation(alternant_multiword_spec)
local all_cats = {}
local function insert(cattype)
m_table.insertIfNot(all_cats, "Czech " .. cattype)
end
if alternant_multiword_spec.pos == "နာမ်" then
if alternant_multiword_spec.actual_number == "sg" then
-- insert("uncountable nouns")
elseif alternant_multiword_spec.actual_number == "pl" then
-- insert("pluralia tantum")
end
end
local annotation
local annparts = {}
local decldescs = {}
local vowelalts = {}
local foreign = {}
local irregs = {}
local stemspecs = {}
local reducible = nil
local function get_genanim(gender, animacy)
local gender_code_to_desc = {
m = "masculine",
f = "feminine",
n = "neuter",
none = nil,
}
local animacy_code_to_desc = {
an = "animate",
inan = "inanimate",
none = nil,
}
local descs = {}
table.insert(descs, gender_code_to_desc[gender])
if gender ~= "f" and gender ~= "n" then
-- masculine or "none" (e.g. certain pronouns and numerals)
table.insert(descs, animacy_code_to_desc[animacy])
end
return table.concat(descs, " ")
end
local function trim(text)
text = text:gsub(" +", " ")
return mw.text.trim(text)
end
local function do_word_spec(base)
local actual_genanim = get_genanim(base.actual_gender, base.actual_animacy)
local declined_genanim = get_genanim(base.gender, base.animacy)
local genanim
if actual_genanim ~= declined_genanim then
genanim = ("%s (declined as %s)"):format(actual_genanim, declined_genanim)
-- insert("nouns with actual gender different from declined gender")
else
genanim = actual_genanim
end
if base.actual_gender == "m" then
-- Insert a category for 'Czech masculine animate nouns' or 'Czech masculine inanimate nouns'; the base categories
-- [[:Category:Czech masculine nouns]], [[:Czech animate nouns]] are auto-inserted.
insert(actual_genanim .. " " .. alternant_multiword_spec.plpos)
end
for _, stems in ipairs(base.stem_sets) do
local props = declprops[base.decl]
local cats = props.cat
if type(cats) == "function" then
cats = cats(base, stems)
end
if type(cats) == "string" then
cats = {cats}
end
local default_desc
for i, cat in ipairs(cats) do
if not cat:find("GENDER") and not cat:find("GENPOS") and not cat:find("POS") then
cat = cat .. " GENPOS"
end
cat = cat:gsub("GENPOS", "GENDER POS")
if not cat:find("POS") then
cat = cat .. " POS"
end
if i == #cats then
default_desc = cat:gsub(" POS", "")
end
cat = cat:gsub("GENDER", actual_genanim)
cat = cat:gsub("POS", alternant_multiword_spec.plpos)
-- Need to trim `cat` because actual_genanim may be an empty string.
insert(trim(cat))
end
local desc = props.desc
if type(desc) == "function" then
desc = desc(base, stems)
end
desc = desc or default_desc
desc = desc:gsub("GENDER", genanim)
-- Need to trim `desc` because genanim may be an empty string.
m_table.insertIfNot(decldescs, trim(desc))
local vowelalt
if stems.vowelalt == "quant" then
vowelalt = "quant-alt"
-- insert("nouns with quantitative vowel alternation")
elseif stems.vowelalt == "quant-ě" then
vowelalt = "í-ě-alt"
-- insert("nouns with í-ě alternation")
end
if vowelalt then
m_table.insertIfNot(vowelalts, vowelalt)
end
if reducible == nil then
reducible = stems.reducible
elseif reducible ~= stems.reducible then
reducible = "mixed"
end
if stems.reducible then
-- insert("nouns with reducible stem")
end
if base.foreign then
m_table.insertIfNot(foreign, "foreign")
if not base.decllemma then
-- NOTE: there are nouns that use both 'foreign' and 'decllemma', e.g. [[Zeus]].
-- insert("nouns with regular foreign declension")
end
end
-- User-specified 'decllemma:' indicates irregular stem. Don't consider foreign nouns in -us/-os/-es, -um/-on or
-- silent -e (e.g. [[software]]) where this ending is simply dropped in oblique and plural forms as irregular;
-- there are too many of these and they are already categorized above as 'nouns with regular foreign declension'.
if base.decllemma then
m_table.insertIfNot(irregs, "irreg-stem")
-- insert("nouns with irregular stem")
end
m_table.insertIfNot(stemspecs, stems.vowel_stem)
end
end
local key_entry = alternant_multiword_spec.first_noun or 1
if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then
local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry]
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
key_entry = multiword_spec.first_noun or 1
if #multiword_spec.word_specs >= key_entry then
do_word_spec(multiword_spec.word_specs[key_entry])
end
end
else
do_word_spec(alternant_or_word_spec)
end
end
if alternant_multiword_spec.actual_number == "sg" or alternant_multiword_spec.actual_number == "pl" then
-- not "both" or "none" (for [[sebe]])
table.insert(annparts, alternant_multiword_spec.actual_number == "sg" and "sg-only" or "pl-only")
end
if #decldescs == 0 then
table.insert(annparts, "indecl")
else
table.insert(annparts, table.concat(decldescs, " // "))
end
if #vowelalts > 0 then
table.insert(annparts, table.concat(vowelalts, "/"))
end
if reducible == "mixed" then
table.insert(annparts, "mixed-reducible")
elseif reducible then
table.insert(annparts, "reducible")
end
if #foreign > 0 then
table.insert(annparts, table.concat(foreign, " // "))
end
if #irregs > 0 then
table.insert(annparts, table.concat(irregs, " // "))
end
alternant_multiword_spec.annotation = table.concat(annparts, " ")
if #stemspecs > 1 then
-- insert("nouns with multiple stems")
end
if alternant_multiword_spec.actual_number == "both" and not m_table.deepEquals(alternant_multiword_spec.sg_genders, alternant_multiword_spec.pl_genders) then
-- insert("nouns that change gender in the plural")
end
alternant_multiword_spec.categories = all_cats
end
local function show_forms(alternant_multiword_spec)
local lemmas = {}
for _, slot in ipairs(potential_lemma_slots) do
if alternant_multiword_spec.forms[slot] then
for _, formobj in ipairs(alternant_multiword_spec.forms[slot]) do
-- FIXME, now can support footnotes as qualifiers in headwords?
table.insert(lemmas, formobj.form)
end
break
end
end
local props = {
lemmas = lemmas,
slot_table = alternant_multiword_spec.output_noun_slots,
lang = lang,
canonicalize = function(form)
-- return com.remove_variant_codes(form)
return form
end,
}
iut.show_forms(alternant_multiword_spec.forms, props)
end
local function make_table(alternant_multiword_spec)
local forms = alternant_multiword_spec.forms
local function template_prelude(min_width)
return rsub([=[
<div>
<div class="NavFrame" style="max-width:MINWIDTHem">
<div class="NavHead" style="background:var(--wikt-palette-lighterblue, #ebf4ff);">{title}{annotation}</div>
<div class="NavContent" style="overflow:auto">
{\op}| style="min-width:MINWIDTHem" class="inflection-table inflection"
|- class="rowgroup"
]=], "MINWIDTH", min_width)
end
local function template_postlude()
return [=[
|{\cl}{notes_clause}</div></div></div>]=]
end
local table_spec_both = template_prelude("45") .. [=[
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" |
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | ကိုန်ဨကဝုစ်
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | ကိုန်ဗဟုဝစ်
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| {nom_s}
| {nom_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_s}
| {gen_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_s}
| {dat_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_s}
| {acc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| {voc_s}
| {voc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| {loc_s}
| {loc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| {ins_s}
| {ins_p}
]=] .. template_postlude()
local function get_table_spec_one_number(number, numcode)
local table_spec_one_number = [=[
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" |
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | NUMBER
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| {nom_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| {voc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| {loc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| {ins_CODE}
]=]
return template_prelude("30") .. table_spec_one_number:gsub("NUMBER", number):gsub("CODE", numcode) ..
template_postlude()
end
local function get_table_spec_one_number_clitic(number, numcode)
local table_spec_one_number_clitic = [=[
! rowspan=2 style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);"|
! colspan=2 style="background:var(--wikt-palette-lightblue, #d9ebff);" | NUMBER
|-
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" | stressed
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | clitic
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| colspan=2 | {nom_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_CODE}
| {clitic_gen_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_CODE}
| {clitic_dat_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_CODE}
| {clitic_acc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| colspan=2 | {voc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| colspan=2 | {loc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| colspan=2 | {ins_CODE}
]=]
return template_prelude("40") .. table_spec_one_number_clitic:gsub("NUMBER", number):gsub("CODE", numcode) ..
template_postlude()
end
local notes_template = [=[
<div style="width:100%;text-align:left;background:var(--wikt-palette-lightblue, #d9ebff);">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]=]
if alternant_multiword_spec.title then
forms.title = alternant_multiword_spec.title
else
forms.title = 'မလဟုတ်စှ်ေဆေၚ်စပ်ကဵု <i lang="cs">' .. forms.lemma .. '</i>'
end
local annotation = alternant_multiword_spec.annotation
if annotation == "" then
forms.annotation = ""
else
forms.annotation = " (<span style=\"font-size: smaller;\">" .. annotation .. "</span>)"
end
local number, numcode
if alternant_multiword_spec.actual_number == "sg" then
number, numcode = "singular", "s"
elseif alternant_multiword_spec.actual_number == "pl" then
number, numcode = "plural", "p"
elseif alternant_multiword_spec.actual_number == "none" then -- used for [[sebe]]
number, numcode = "", "s"
end
local table_spec =
alternant_multiword_spec.actual_number == "both" and table_spec_both or
alternant_multiword_spec.has_clitic and get_table_spec_one_number_clitic(number, numcode) or
get_table_spec_one_number(number, numcode)
forms.notes_clause = forms.footnote ~= "" and
m_string_utilities.format(notes_template, forms) or ""
return m_string_utilities.format(table_spec, forms)
end
local function compute_headword_genders(alternant_multiword_spec)
local genders = {}
local number
if alternant_multiword_spec.actual_number == "pl" then
number = "-p"
else
number = ""
end
iut.map_word_specs(alternant_multiword_spec, function(base)
local animacy = base.animacy
if animacy == "inan" then
animacy = "in"
end
m_table.insertIfNot(genders, base.gender .. "-" .. animacy .. number)
end)
return genders
end
-- Externally callable function to parse and decline a noun given user-specified arguments.
-- Return value is ALTERNANT_MULTIWORD_SPEC, an object where the declined forms are in
-- `ALTERNANT_MULTIWORD_SPEC.forms` for each slot. If there are no values for a slot, the
-- slot key will be missing. The value for a given slot is a list of objects
-- {form=FORM, footnotes=FOOTNOTES}.
function export.do_generate_forms(parent_args, from_headword)
local params = {
[1] = {required = true, template_default = "bůh<m.an.#.voce>"},
title = true,
pagename = true,
json = {type = "boolean"},
pos = true,
}
if from_headword then
params["head"] = {list = true}
params["lemma"] = {list = true}
params["g"] = {list = true}
params["f"] = {list = true}
params["m"] = {list = true}
params["adj"] = {list = true}
params["dim"] = {list = true}
params["id"] = {}
end
local args = m_para.process(parent_args, params)
local parse_props = {
parse_indicator_spec = parse_indicator_spec,
angle_brackets_omittable = true,
allow_blank_lemma = true,
}
local alternant_multiword_spec = iut.parse_inflected_text(args[1], parse_props)
alternant_multiword_spec.title = args.title
alternant_multiword_spec.args = args
local pagename = args.pagename or from_headword and args.head[1] or mw.loadData("Module:headword/data").pagename
normalize_all_lemmas(alternant_multiword_spec, pagename)
set_all_defaults_and_check_bad_indicators(alternant_multiword_spec)
-- These need to happen before detect_all_indicator_specs() so that adjectives get their genders and numbers set
-- appropriately, which are needed to correctly synthesize the adjective lemma.
propagate_properties(alternant_multiword_spec, "animacy", "inan", "mixed")
propagate_properties(alternant_multiword_spec, "number", "both", "both")
-- FIXME, the default value (third param) used to be 'm' with a comment indicating that this applied only to
-- plural adjectives, where it didn't matter; but in Czech, plural adjectives are distinguished for gender and
-- animacy. Make sure 'mixed' works.
propagate_properties(alternant_multiword_spec, "gender", "mixed", "mixed")
detect_all_indicator_specs(alternant_multiword_spec)
-- Propagate 'actual_number' after calling detect_all_indicator_specs(), which sets 'actual_number' for adjectives.
propagate_properties(alternant_multiword_spec, "actual_number", "both", "both")
determine_noun_status(alternant_multiword_spec)
set_pos(alternant_multiword_spec)
alternant_multiword_spec.output_noun_slots = get_output_noun_slots(alternant_multiword_spec)
local inflect_props = {
skip_slot = function(slot)
return skip_slot(alternant_multiword_spec.actual_number, slot)
end,
slot_table = alternant_multiword_spec.output_noun_slots,
get_variants = get_variants,
inflect_word_spec = decline_noun,
}
iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
compute_categories_and_annotation(alternant_multiword_spec)
alternant_multiword_spec.genders = compute_headword_genders(alternant_multiword_spec)
if args.json then
alternant_multiword_spec.args = nil
return require("Module:JSON").toJSON(alternant_multiword_spec)
end
return alternant_multiword_spec
end
-- Entry point for {{cs-ndecl}}. Template-callable function to parse and decline a noun given
-- user-specified arguments and generate a displayable table of the declined forms.
function export.show(frame)
local parent_args = frame:getParent().args
local alternant_multiword_spec = export.do_generate_forms(parent_args)
if type(alternant_multiword_spec) == "string" then
-- JSON return value
return alternant_multiword_spec
end
show_forms(alternant_multiword_spec)
return make_table(alternant_multiword_spec) ..
require("Module:utilities").format_categories(alternant_multiword_spec.categories, lang, nil, nil, force_cat)
end
return export
glgzx1kvlpm46kr2h45q4zd3ysg5gc3
395161
395158
2026-05-19T18:25:18Z
咽頭べさ
33
395161
Scribunto
text/plain
local export = {}
--[=[
Authorship: Ben Wing <benwing2>
]=]
--[=[
TERMINOLOGY:
-- "slot" = A particular combination of case/number.
Example slot names for nouns are "gen_s" (genitive singular) and
"voc_p" (vocative plural). Each slot is filled with zero or more forms.
-- "form" = The declined Czech form representing the value of a given slot.
-- "lemma" = The dictionary form of a given Czech term. Generally the nominative
masculine singular, but may occasionally be another form if the nominative
masculine singular is missing.
]=]
--[=[
FIXME:
1. Finish synthesize_singular_lemma(). [DONE]
2. Implement feminines in -ea, -oa/-ua, -ia, -oe. [DONE]
3. Implement "mixed" masculine nouns in -l, -n, -t (each different, also inanimate vs. animate). [DONE]
4. Allow 'stem:' override after vowel-final words like [[centurio]]. [DONE using decllemma:]
5. Support masculine foreign nouns in -us/-os/-es. [DONE]
6. Support masculine foreign nouns in -ius/-etc. [DONE]
7. Support masculine foreign nouns in unpronounced final -e (e.g. [[software]]). [DONE]
8. Support neuter foreign nouns in -um/-on. [DONE]
9. Support neuter foreign nouns in -ium/-ion. [DONE]
10. Support paired body parts, e.g. [[ruka]], [[noha]], [[oko]], [[ucho]], [[koleno]], [[rameno]]. [WON'T DO;
JUST SEPARATE THE MEANINGS AND GIVE THEM DIFFERENT DECLENSIONS]
11. Support masculine nouns in -e/ě that are neuter in the plural. [DONE]
12. Correctly handle -e vs. -ě, e.g. soft neuters have both [[kutě]] and [[poledne]]. [DONE]
13. Always use specified lemma in nom_pl and maybe acc_pl when plurale tantum. [DONE]
14. Support feminine nouns in -ca/-ča/-ša/-ža. [DONE]
15. Support feminine nouns in -ja/-ňa. [DONE]
16. Support mixed i-stem feminine nouns. [DONE]
17. Support "c as k" feminine nouns like [[ayahuasca]].
18. Support 'declgender'. [DONE]
19. Support pronouns with clitics. [DONE]
20. Singular-only and plural-only terms should not have number in accelerator form. [DONE]
21. Support [[úterý]] (like neuters in -í). [DONE]
22. Support feminines in -i ([[máti]], [[pramáti]]). [DONE]
23. Support foreign nouns in -ie ([[zombie]], [[hippie]], [[yuppie]]). [DONE]
24. Support foreign nouns in -í ([[muftí]], [[qádí]]). [DONE]
25. Support manual declensions. [DONE]
26. Support numerals. [DONE]
27. Allow for reducible spec in pluralia tantum and dereduce accordingly; also automatically assign reducibility
if singular stem ends in -Ck or -Cc. [DONE]
28. Use `pos` value in all categories.
29. Support determiners [[kolik]], [[tolik]], [[několik]], [[mnoho]]. [DONE]
30. Support a '.velar' indicator for foreign names whose pronunciation but not spelling ends in a velar: [[Remarque]],
[[Braque]], [[Mike]], [[Drake]], [[Jake]] with vocative 'Remarquu', 'Braquu', 'Mikeu', 'Drakeu', 'Jakeu'. In
general we need more thought around such foreign names; essentially, for names in a silent e, sometimes the -e
is dropped in all oblique forms (e.g. [[Shakespeare]], [[Pierre]], [[Barrande]], [[La Fontaine]], [[Braque]],
[[Remarque]] with gen sg 'Shakespeara', 'Pierra', Barranda', 'La Fontaina', 'Braqua', 'Remarqua') and sometimes
it's kept in all oblique forms except those ending in an -e, where -ee is avoided (e.g. [[Pete]], [[Gable]],
[[Jake]], [[White]], [[Byrne]], [[Mike]], [[Drake]] with gen sg 'Petea', 'Gablea' etc. and voc sg 'Pete', 'Gable'
but 'Jakeu', 'Mikeu'). Sometimes there are doublets, e.g. [[Hubble]] and [[Hume]] have gen sg 'Hubbla/Hubblea'
(where the second form is used among astronomers in a technical sense and the first form may be more popular)
and 'Huma/Humea'. We already have a '.foreign' indicator that when applied to a noun ending in -e drops the -e
in oblique forms e.g. for [[software]]. We may need to combine this with an explicit indicator of hard, soft or
velar as there will be names with silent -e and preceding soft consonant e.g. [[Bruce]], [[Coleridge]]. Note
that when the -e is kept it is still dropped before front vowels, hence dat sg 'Bruci'/Bruceovi'. Need some
investigation in IJP and cswikt. [.velar DONE]
31. Support 'declnumber'. [DONE]
32. Support foreign nouns in -ee ([[Yankee]]). [DONE]
]=]
local lang = require("Module:languages").getByCode("cs")
local m_table = require("Module:table")
local m_links = require("Module:links")
local m_string_utilities = require("Module:string utilities")
local iut = require("Module:inflection utilities")
local put = require("Module:parse utilities")
local m_para = require("Module:parameters")
local com = require("Module:cs-common")
local en_utilities_module = "Module:en-utilities"
local u = mw.ustring.char
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rgmatch = mw.ustring.gmatch
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local usub = mw.ustring.sub
local uupper = mw.ustring.upper
local ulower = mw.ustring.lower
local force_cat = false -- set to true to make categories appear in non-mainspace pages, for testing
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
local function track(track_id)
require("Module:debug/track")("cs-noun/" .. track_id)
return true
end
local output_noun_slots = {
nom_s = "nom|s",
nom_s_linked = "nom|s",
gen_s = "gen|s",
gen_s_linked = "gen|s",
clitic_gen_s = "clitic|gen|s",
dat_s = "dat|s",
clitic_dat_s = "clitic|dat|s",
acc_s = "acc|s",
clitic_acc_s = "clitic|acc|s",
voc_s = "voc|s",
loc_s = "loc|s",
ins_s = "ins|s",
nom_p = "nom|p",
nom_p_linked = "nom|p",
gen_p = "gen|p",
dat_p = "dat|p",
acc_p = "acc|p",
voc_p = "voc|p",
loc_p = "loc|p",
ins_p = "ins|p",
}
local function get_output_noun_slots(alternant_multiword_spec)
-- FIXME: To save memory we modify the table in-place. This won't work if we ever end up with multiple calls to
-- this module in the same Lua invocation, and we would need to clone the table.
if alternant_multiword_spec.actual_number ~= "both" then
for slot, accel_form in pairs(output_noun_slots) do
output_noun_slots[slot] = accel_form:gsub("|[sp]$", "")
end
end
return output_noun_slots
end
local potential_lemma_slots = {"nom_s", "nom_p", "gen_s"}
local cases = {
nom = true,
gen = true,
dat = true,
acc = true,
voc = true,
loc = true,
ins = true,
}
local clitic_cases = {
gen = true,
dat = true,
acc = true,
}
local function dereduce(base, stem)
local dereduced_stem = com.dereduce(base, stem)
if not dereduced_stem then
error("Unable to dereduce stem '" .. stem .. "'")
end
return dereduced_stem
end
--[=[
Maybe modify the stem and/or ending in certain special cases:
1. Final -e in vocative singular triggers first palatalization of the stem in some cases (e.g. hard masc).
2. Endings beginning with ě, i, í trigger second palatalization, as does -e in the loc_s.
NOTE: Correctly handling -e vs. -ě and -tdn/-ťďň alternations is tricky. We have to deal with the following:
1. Soft-stem and t-stem neuters can have either -e or -ě. With coronals we have both [[poledne]] "noon" with /n/ and
[[kutě]] "bed" with /ť/. We also have soft-stem neuter [[Labe]] with /b/ vs. t-stem neuter [[hříbě]] with /bj/.
2. Underlying palatal coronals maintain their nature before back vowels and when not followed by a vowel, e.g. [[štěně]]
"puppy" becomes 'štěňata' in the nom/acc/voc plural and [[přítelkyně]] "girlfriend" becomes 'přítelkyň' in the gen
plural, but underlying palatal labials become non-palatal, e.g. [[hříbě]] "foal" becomes 'hříbata' in the nom/acc/voc
plural.
3. There are at least four types of endings beginning with '-e':
a. "maintaining" endings, e.g. instrumental singular '-em', which do not change the nature of the consonant, e.g.
[[zákon]] "law" becomes 'zákonem' while [[vězeň]] "prisoner" becomes 'vězeněm';
b. "palatalizing" endings, e.g. locative singular '-e', which palatalizes t/d/n (and more generally applies the
Slavic second palatalization, e.g. k -> c, r -> ř), e.g. [[žena]] "woman" becomes 'ženě';
c. "depalatalizing" endings, e.g. feminine i-stem dative plural '-em', which actively depalatalize ť/ď/ň, e.g.
[[oběť]] "sacrifice, victim" becomes 'obětem';
d. vocative singular '-e' of hard-stem masculines, which applies the Slavic first palatalization in some
circumstances (e.g. k -> č, Cr -> Cř, sometimes c -> č).
The way we handle this as follows:
1. We maintain the underlying stems always in their "pronounced" form, i.e. if the last consonant is pronounced ť/ď/ň
we maintain the stem in that form, but if pronounced t/d/n, we use those consonants. Hence neuter [[poledne]] "noon"
has stem 'poledn-' but neuter [[štěně]] "puppy" has stem 'štěň'. If the stem ends in labial + /j/, we use a special
TEMP_SOFT_LABIAL character after the labial (rather than 'j', in case of stems that actually have a written 'j' in
them such as [[banjo]]).
2. We signal types (a), (b) and (c) above using respectively 'e', 'ě' and 'E'. Type (d) uses 'e' and sets
`base.palatalize_voc`.
3. In combine_stem_ending(), we convert the stem back to the written form before adding the ending. If the ending begins
with -e, this may entail converting -e to -ě, and in all cases -E is converted to -e. "Converting to the written
form" converts ť/ď/ň to plain equivalents and deletes TEMP_SOFT_LABIAL before -e, converting -e to -ě with such
consonants. The same conversions happen before other front consonants -ě/-é/-i/-í, which don't allow ť/ď/ň to
precede, and in all cases with TEMP_SOFT_LABIAL, which is not an actual consonant.
4. If the ending is specified using -ě, this is maintained after plain coronals and labials in combine_stem_ending(),
and converted to -e in other cases.
5. Applying the first and second palatalization happens below in apply_special_cases().
]=]
local function apply_special_cases(base, slot, stem, ending)
local palatalize_voc
if base.c_as_k and rfind(ending, "^[aouyáóúůý]") then
local k_stem = rsub(stem, "c$", "k")
stem = {stem, k_stem}
elseif slot == "voc_s" and ending == "e" and base.palatalize_voc and not base["-velar"] then
-- Don't palatalize words like [[hadíth]] with silent -h.
local palstem = com.apply_first_palatalization(stem)
-- According to IJP, nouns ending in -Cr palatalize in the vocative, but those in -Vr don't. In reality,
-- though, it's more complex. It appears that animate nouns in -Cr tend to palatalize but inanimate nouns
-- do it optionally. Specifics:
-- -- Inanimate nouns with optional palatalization (ř listed second): [[alabastr]], [[amfiteátr]], [[barometr]],
-- [[centilitr]], [[centrimetr]], [[decilitr]], [[decimetr]], [[Dněstr]], [[filtr]], [[galvanometr]],
-- [[hektolitr]], [[kalorimetr]], [[litr]], [[lustr]], [[manometr]], [[manšestr]], [[metr]] (NOTE: is both
-- animate and inanimate), [[mikrometr]], [[miliampérmetr]], [[mililitr]], [[nanometr]], [[orchestr]],
-- [[parametr]], [[piastr]], [[půllitr]], [[radiometr]], [[registr]], [[rotmistr]], [[semestr]], [[skútr]],
-- [[spirometr]], [[svetr]], [[šutr]], [[tachometr]], [[titr]], [[vítr]] (NOTE: has í-ě alternation),
-- [[voltmetr]]; [[bagr]], [[bunkr]], [[cedr]], [[Dněpr]], [[fofr]], [[habr]] (NOTE: ř listed first), [[hadr]]
-- (NOTE: ř listed first), [[hamr]], [[kafr]], [[kepr]], [[kopr]], [[koriandr]], [[krekr]], [[kufr]],
-- [[Kypr]], [[lágr]], [[lógr]], [[manévr]], [[masakr]], [[okr]], [[oleandr]], [[pulovr]], [[šlágr]],
-- [[vichr]] (NOTE: ř listed first), [[žánr]]
--
-- -- Inanimate nouns that don't palatalize: [[ampérmetr]], [[anemometr]], [[sfygmomanometr]], [[sfygmometr]];
-- [[dodekaedr]], [[Hamr]], [[ikozaedr]], [[kvádr]], [[sandr]], [[torr]]
--
-- -- Animate nouns that palatalize: [[arbitr]], [[bratr]], [[ekonometr]], [[foniatr]], [[fotr]], [[geometr]],
-- [[kmotr]], [[lotr]], [[magistr]], [[metr]] (NOTE: is both animate and inanimate), [[ministr]], [[mistr]],
-- [[pediatr]], [[Petr]], [[psychiatr]], [[purkmistr]], [[setr]], [[šamstr]]; [[bobr]], [[fajnšmekr]],
-- [[humr]], [[hypochondr]], [[kapr]], [[lídr]], [[negr]], [[obr]], [[salamandr]], [[sólokapr]], [[švagr]],
-- [[tygr]], [[zlobr]], [[zubr]]
--
-- -- Animate nouns with optional palatalization (ř listed first): [[Silvestr]]; [[Alexandr]], [[snajpr]]
--
-- Note the inconsistencies, e.g. [[sfygmomanometr]] and [[ampérmetr]] don't palatalize but [[manometr]] and
-- [[miliampérmetr]] do it optionally. In reality, inanimate vocatives are extremely rare so this may not be the
-- final word.
if base.animacy == "inan" and rfind(stem, com.cons_c .. "r$") and not rfind(stem, "rr$") then
-- optional r -> ř
stem = {stem, palstem}
else
stem = palstem
end
elseif rfind(ending, "^[ěií]") or slot == "loc_s" and ending == "e" then
if rfind(stem, "ck$") and rfind(base.lemma, "ck$") then
-- IJP says nouns in -ck (back, comeback, crack, deadlock, hatchback, hattrick, joystick, paperback, quarterback,
-- rock, soundtrack, track, truck) simplify the resulting -cc ending in the loc_p to -c. Similarly [[quarterback]]
-- has nom_pl 'quarterbaci, quarterbackove'. We need to check the lemma as well because nouns in -cek don't do this.
stem = rsub(stem, "ck$", "k")
end
if base.velar then
-- [[petanque]] /petank/ -> loc pl 'petancích'.
stem = rsub(stem, "gu$", "g")
stem = rsub(stem, "qu$", "k")
end
-- loc_s of hard masculines is sometimes -e/ě; the user might indicate this as -e, which we should handle
-- correctly
stem = com.apply_second_palatalization(stem)
end
return stem, ending
end
local function skip_slot(number, slot)
return number == "sg" and rfind(slot, "_p$") or
number == "pl" and rfind(slot, "_s$")
end
-- Basic function to combine stem(s) and ending(s) and insert the result into the appropriate slot. `stems` is either
-- the `stems` object passed into the declension functions (containing the various stems; see below) or a string to
-- override the stem. (NOTE: If you pass a string in as `stems`, you should pass the value of `stems.footnotes` as the
-- value of `footnotes` as it will be lost otherwise. If you need to supply your own footnote in addition, use
-- iut.combine_footnotes() to combine any user-specified footnote(s) with your footnote(s).) `endings` is either a
-- string specifying a single ending or a list of endings. If `endings` is nil, no forms are inserted. If an ending is
-- "-", the value of `stems` is ignored and the lemma is used instead as the stem; this is important in case the user
-- used `decllemma:` to specify a declension lemma different from the actual lemma, or specified '.foreign' (which has
-- a similar effect).
local function add(base, slot, stems, endings, footnotes)
if not endings then
return
end
-- Call skip_slot() based on the declined number; if the actual number is different, we correct this in
-- decline_noun() at the end.
if skip_slot(base.number, slot) then
return
end
local stems_footnotes = type(stems) == "table" and stems.footnotes or nil
footnotes = iut.combine_footnotes(iut.combine_footnotes(base.footnotes, stems_footnotes), footnotes)
if type(endings) == "string" then
endings = {endings}
end
for _, ending in ipairs(endings) do
-- Compute the stem. If ending is "-", use the lemma regardless. Otherwise if `stems` is a string, use it.
-- Otherwise `stems` is an object containing four stems (vowel-vs-non-vowel cross regular-vs-oblique);
-- compute the appropriate stem based on the slot and whether the ending begins with a vowel.
local stem
if ending == "-" then
stem = base.actual_lemma
ending = ""
elseif type(stems) == "string" then
stem = stems
else
local is_vowel_ending = rfind(ending, "^" .. com.vowel_c)
if stems.oblique_slots == "all" or
(stems.oblique_slots == "gen_p" or stems.oblique_slots == "all-oblique") and slot == "gen_p" or
stems.oblique_slots == "all-oblique" and (slot == "ins_s" or slot == "dat_p" or slot == "loc_p" or slot == "ins_p") then
if is_vowel_ending then
stem = stems.oblique_vowel_stem
else
stem = stems.oblique_nonvowel_stem
end
elseif is_vowel_ending then
stem = stems.vowel_stem
else
stem = stems.nonvowel_stem
end
end
-- Maybe apply the first or second Slavic palatalization.
stem, ending = apply_special_cases(base, slot, stem, ending)
ending = iut.combine_form_and_footnotes(ending, footnotes)
local function combine_stem_ending(stem, ending)
return com.combine_stem_ending(base, slot, stem, ending)
end
iut.add_forms(base.forms, slot, stem, ending, combine_stem_ending)
end
end
local function process_slot_overrides(base, do_slot)
for slot, overrides in pairs(base.overrides) do
-- Call skip_slot() based on the declined number; if the actual number is different, we correct this in
-- decline_noun() at the end.
if skip_slot(base.number, slot) then
error("Override specified for invalid slot '" .. slot .. "' due to '" .. base.number .. "' number restriction")
end
if do_slot(slot) then
base.slot_overridden[slot] = true
base.forms[slot] = nil
for _, override in ipairs(overrides) do
for _, value in ipairs(override.values) do
local form = value.form
local combined_notes = iut.combine_footnotes(base.footnotes, value.footnotes)
if override.full then
if form ~= "" then
iut.insert_form(base.forms, slot, {form = form, footnotes = combined_notes})
end
else
-- Convert a null ending to "-" in the acc/voc sg slots so that e.g. [[Kerberos]] declared as
-- <m.sg.foreign.gena:u.acc-:a> works correctly and generates accusative 'Kerberos/Kerbera' not
-- #'Kerber/Kerbera'.
if (slot == "acc_s" or slot == "voc_s") and form == "" then
form = "-"
end
for _, stems in ipairs(base.stem_sets) do
add(base, slot, stems, form, combined_notes)
end
end
end
end
end
end
end
local function add_decl(base, stems,
gen_s, dat_s, acc_s, voc_s, loc_s, ins_s,
nom_p, gen_p, dat_p, acc_p, loc_p, ins_p, footnotes
)
add(base, "nom_s", stems, "-", footnotes)
add(base, "gen_s", stems, gen_s, footnotes)
add(base, "dat_s", stems, dat_s, footnotes)
add(base, "acc_s", stems, acc_s, footnotes)
add(base, "voc_s", stems, voc_s, footnotes)
add(base, "loc_s", stems, loc_s, footnotes)
add(base, "ins_s", stems, ins_s, footnotes)
if base.number == "pl" then
-- If this is a plurale tantum noun and we're processing the nominative plural, use the user-specified lemma
-- rather than generating the plural from the synthesized singular, which may not match the specified lemma
-- (e.g. [[tvargle]] "Olomouc cheese" using <m.pl.mixed> would try to generate 'tvargle/tvargly', and [[peníze]]
-- "money" using <m.pl.#ě.genpl-> would try to generate 'peněze').
local acc_p_like_nom = m_table.deepEquals(nom_p, acc_p)
nom_p = "-"
if acc_p_like_nom then
acc_p = "-"
end
end
add(base, "nom_p", stems, nom_p, footnotes)
add(base, "gen_p", stems, gen_p, footnotes)
add(base, "dat_p", stems, dat_p, footnotes)
add(base, "acc_p", stems, acc_p, footnotes)
add(base, "loc_p", stems, loc_p, footnotes)
add(base, "ins_p", stems, ins_p, footnotes)
end
local function add_sg_decl(base, stems,
gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, footnotes
)
add_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s,
nil, nil, nil, nil, nil, nil, footnotes)
end
local function add_pl_only_decl(base, stems,
gen_p, dat_p, acc_p, loc_p, ins_p, footnotes
)
add_decl(base, stems, nil, nil, nil, nil, nil, nil,
"-", gen_p, dat_p, acc_p, loc_p, ins_p, footnotes)
end
local function add_sg_decl_with_clitic(base, stems,
gen_s, clitic_gen_s, dat_s, clitic_dat_s, acc_s, clitic_acc_s, voc_s, loc_s, ins_s, footnotes, no_nom_s
)
if not no_nom_s then
add(base, "nom_s", stems, "-", footnotes)
end
add(base, "gen_s", stems, gen_s, footnotes)
add(base, "clitic_gen_s", stems, clitic_gen_s, footnotes)
add(base, "dat_s", stems, dat_s, footnotes)
add(base, "clitic_dat_s", stems, clitic_dat_s, footnotes)
add(base, "acc_s", stems, acc_s, footnotes)
add(base, "clitic_acc_s", stems, clitic_acc_s, footnotes)
add(base, "voc_s", stems, voc_s, footnotes)
add(base, "loc_s", stems, loc_s, footnotes)
add(base, "ins_s", stems, ins_s, footnotes)
end
local function handle_derived_slots_and_overrides(base)
local function is_non_derived_slot(slot)
return slot ~= "voc_p" and slot ~= "acc_s" and slot ~= "clitic_acc_s"
end
local function is_derived_slot(slot)
return not is_non_derived_slot(slot)
end
base.slot_overridden = {}
-- Handle overrides for the non-derived slots. Do this before generating the derived
-- slots so overrides of the source slots (e.g. nom_p) propagate to the derived slots.
process_slot_overrides(base, is_non_derived_slot)
-- Generate the remaining slots that are derived from other slots.
if not base.pron and not base.det then
-- Pronouns don't have a vocative (singular or plural).
iut.insert_forms(base.forms, "voc_p", base.forms.nom_p)
end
if not base.forms.acc_s and not base.slot_overridden.acc_s then
iut.insert_forms(base.forms, "acc_s", base.forms[base.animacy == "inan" and "nom_s" or "gen_s"])
end
if not base.forms.clitic_acc_s and not base.slot_overridden.clitic_acc_s then
iut.insert_forms(base.forms, "clitic_acc_s", base.forms[base.animacy == "inan" and "nom_s" or "clitic_gen_s"])
end
-- Handle overrides for derived slots, to allow them to be overridden.
process_slot_overrides(base, is_derived_slot)
-- Compute linked versions of potential lemma slots, for use in {{cs-noun}}.
-- We substitute the original lemma (before removing links) for forms that
-- are the same as the lemma, if the original lemma has links.
for _, slot in ipairs(potential_lemma_slots) do
iut.insert_forms(base.forms, slot .. "_linked", iut.map_forms(base.forms[slot], function(form)
if form == base.orig_lemma_no_links and rfind(base.orig_lemma, "%[%[") then
return base.orig_lemma
else
return form
end
end))
end
end
-- Table mapping declension types to functions to decline the noun. The function takes two arguments, `base` and
-- `stems`; the latter specifies the computed stems (vowel vs. non-vowel, singular vs. plural) and whether the noun
-- is reducible and/or has vowel alternations in the stem. Most of the specifics of determining which stem to use
-- and how to modify it for the given ending are handled in add_decl(); the declension functions just need to generate
-- the appropriate endings.
local decls = {}
-- Table specifying additional properties for declension types. Every declension type must have such a table, which
-- specifies which category or categories to add and what annotation to show in the title bar of the declension table.
--
-- * Only the `cat` property of this table is mandatory; there is also a `desc` property to specify the annotation, but
-- this can be omitted and the annotation will then be computed from the `cat` property. The `cat` property is either
-- a string, a list of strings or a function (of two arguments, `base` and `stems` as above) returning a string or
-- list of strings. The string can contain the keywords GENDER to substitute the gender (and animacy for masculine
-- nouns) and POS (to substitute the pluralized part of speech). The keyword GENPOS is equivalent to 'GENDER POS'. If
-- no keyword is present, ' GENPOS' is added onto the end. If only GENDER is present, ' POS' is added onto the end.
-- In all cases, the language name is added onto the beginning to form the full category name.
-- * The `desc` property is of the same form as the `cat` property and specifies the annotation to display in the title
-- bar (which may have the same format as the category minus the part of speech, or may be abbreviated). The value
-- may not be a list of strings, as only one annotation is displayed. If omitted, it is derived from the category
-- spec(s) by taking the last category (if more than one is given) and removing ' POS' before keyword substitution.
local declprops = {}
-- Return the default masculine animate nominative plural ending(s) given `base` and `stems`. This is called for hard
-- and soft masculines ending in a consonant, but not for nouns ending in a vowel, which have their own defaults
-- (particularly nouns in -a, where -ista/-ita/-asta behave differently from other nouns in -a).
local function default_masc_animate_nom_pl(base, stems)
return
-- [monosyllabic words: Dánové, Irové, králové, mágové, Rusové, sokové, synové, špehové, zběhové, zeťové, manové, danové
-- (but Žid → Židé, Čech → Češi).] -- There are too many exceptions to this to make a special rule. It is better to use
-- the overall default of -i and require that cases with -ove, -ove/-i, -i/-ove, etc. use overrides.
-- com.is_monosyllabic(base.lemma) and "ové" or
-- reducible terms in -Cek; order of -ové vs. -i sometimes varies:
-- [[fracek]] (ové/i), [[klacek]] (i/ové), [[macek]] (ové/i), [[nácek]] (i/ové), [[prcek]] (ové/i), [[racek]] (ové/i);
-- [[bazilišek]] (i/ové), [[černoušek]] (i/ové), [[drahoušek]] (ové/i), [[fanoušek]] (i/ové), [[františek]] (an/inan,
-- ends in -i/-y but not -ové), [[koloušek]] (-i only), [[kulíšek]] (i/ové), [[oříšek]] (i/ové), [[papoušek]] (-i only),
-- [[prášek]] (i/ové), [[šašek]] (i/ové).
-- make sure to check `stems` as we don't want to include non-reducible words in -Cek (but do want to include
-- [[quarterback]], with -i/-ové)
rfind(stems.vowel_stem, "^" .. com.lowercase_c .. ".*" .. com.cons_c .. "k$") and {"i", "ové"} or
-- [[stoik]], [[neurotik]], [[logik]], [[fyzik]], etc.
rfind(base.lemma, "^" .. com.lowercase_c .. ".ik$") and {"i", "ové"} or
-- barmani, gentlemani, jazzmani, kameramani, narkomani, ombudsmani, pivotmani, rekordmani, showmani, supermani, toxikomani
rfind(base.lemma, "^" .. com.lowercase_c .. ".*man$") and "i" or
-- terms ending in -an after a palatal or a consonant that doesn't change when palatalized, i.e. labial or l (but -man
-- forms -mani unless in a proper noun): Brňan → Brňané, křesťan → křesťané, měšťan → měšťané, Moravan → Moravané,
-- občan → občané, ostrovan → ostrované, Pražan → Pražané, Slovan → Slované, svatebčan → svatebčané, venkovan → venkované,
-- Australan → Australané; also s, because there are many demonyms in -san e.g. [[Andalusan]], [[Barbadosan]], [[Oděsan]],
-- and few proper nouns in -san; similarly z because of [[Belizan]], [[Gazan]], [[Kavkazan]], etc.; also w, which isn't a
-- normal consonant in Czech but occurs in [[Glasgowan]] and [[Zimbabwan]]; NOTE: a few misc words like [[pohan]] also
-- work this way but need manual overrides
rfind(base.lemma, "[" .. com.inherently_soft .. com.labial .. "wlsz]an$") and {"é", "i"} or -- most now can also take -i
-- proper names: Baťové, Novákové, Petrové, Tomášové, Vláďové; exclude demonyms (but include surnames)
rfind(base.lemma, "^" .. com.uppercase_c) and (base.surname or not rfind(base.lemma, "[eě]c$")) and "ové" or
-- demonyms: [[Albánec]], [[Gruzínec]], [[Izraelec]], [[Korejec]], [[Libyjec]], [[Litevec]], [[Němec]], [[Portugalec]]
rfind(base.lemma, "^" .. com.uppercase_c .. ".*[eě]c$") and "i" or
-- From here on down, we're dealing only with lowercase terms.
-- buditelé, budovatelé, čekatelé, činitelé, hostitelé, jmenovatelé, pisatelé, ručitelé, velitelé, živitelé
rfind(base.lemma, ".*tel$") and "é" or
-- nouns in -j: čaroděj → čarodějové, lokaj → lokajové, patricij → patricijové, plebej → plebejové, šohaj → šohajové, žokej → žokejové
-- nouns in -l: apoštol → apoštolové, břídil → břídilové, fňukal → fňukalové, hýřil → hýřilové, kutil → kutilové,
-- loudal → loudalové, mazal → mazalové, škrabal → škrabalové, škudlil → škudlilové, vyvrhel → vyvrhelové, žvanil → žvanilové
-- (we excluded those in -tel above)
rfind(base.lemma, ".*[jl]$") and "ové" or
-- archeolog → archeologové, biolog → biologové, geolog → geologové, meteorolog → meteorologové
rfind(base.lemma, ".*log$") and "ové" or
-- dramaturg → dramaturgové, chirurg → chirurgové
rfind(base.lemma, ".*urg$") and "ové" or
-- fotograf → fotografové, geograf → geografové, lexikograf → lexikografové
rfind(base.lemma, ".*graf$") and "ové" or
-- bibliofil → bibliofilové, germanofil → germanofilové
rfind(base.lemma, ".*fil$") and "ové" or
-- rusofob → rusofobové
rfind(base.lemma, ".*fob$") and "ové" or
-- agronom → agronomové, ekonom → ekonomové
rfind(base.lemma, ".*nom$") and "ové" or
"i"
end
decls["hard-m"] = function(base, stems)
-- Nouns ending in hard -c, e.g. [[hec]] "joke", [[kibuc]] "kibbutz", don't palatalize.
base.palatalize_voc = not rfind(stems.vowel_stem, "c$")
base.hard_c = true
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- See [https://prirucka.ujc.cas.cz/en/?id=360] on declension of toponyms.
local toponym = base.animacy == "inan" and rfind(base.lemma, "^" .. com.uppercase_c)
-- Some toponyms take -a in the genitive singular, e.g. toponyms in -ín ([[Zlín]], [[Jičín]], [[Berlín]]);
-- -ýn ([[Hostýn]], [[Londýn]]); -ov ([[Havířov]]); and -ev ([[Bezdrev]]), as do some others, e.g. domestic
-- [[Beroun]], [[Brandýs]], [[Náchod]], [[Tábor]] and foreign [[Betlém]] "Bethlehem", [[Egypt]],
-- [[Jeruzalém]] "Jerusalem", [[Milán]] "Milan", [[Řím]] "Rome", [[Rýn]] "Rhine". Also some transferred from
-- common nouns e.g. ([[Nový]]) [[Kostel]], ([[Starý]]) [[Rybník]].
local toponym_gen_a = toponym and (rfind(base.lemma, "[íý]n$") or rfind(base.lemma, "[oe]v$"))
-- Toponyms in -ík (Mělník, Braník, Rakovník, Lipník) seem to fluctuate between gen -a and -u. Also some in
-- ‑štejn, ‑berg, ‑perk, ‑burk, ‑purk (Rabštejn, Heidelberg, Kašperk, Hamburk, Prešpurk) and some others:
-- Zbiroh, Kamýk, Příbor, Zábřeh, Žebrák, Praděd.
local toponym_gen_a_u = toponym and rfind(base.lemma, "ík$")
-- Toponyms that take -a in the genitive singular tend to take -ě in the locative singular; so do those in
-- -štejn (Rabštejn), -hrad (Petrohrad), -grad (Volgograd).
local toponym_loc_e = toponym and (toponym_gen_a or rfind(base.lemma, "štejn$") or rfind(base.lemma, "[gh]rad$"))
-- Toponyms in -ík seem to fluctuate between loc -ě and -u.
local toponym_loc_e_u = toponym_gen_a_u
-- Inanimate gen_s in -a other than toponyms in -ín/-ýn/-ev/-ov (e.g. [[zákon]] "law", [[oběd]] "lunch", [[kostel]] "church",
-- [[dnešek]] "today", [[leden]] "January", [[trujúhelník]] "triangle") needs to be given manually, using '<gena>'.
local gen_s = toponym_gen_a and "a" or toponym_gen_a_u and {"a", "u"} or base.animacy == "inan" and "u" or "a"
-- Animates with dat_s only in -u (e.g. [[člověk]] "person", [[Bůh]] "God") need to give this manually,
-- using '<datu>'.
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi" or {"ovi", "u"}
-- Inanimates with loc_s in -e/ě other than certain toponyms (see above) need to give this manually, using <locě>, but
-- it will trigger the second palatalization automatically.
local loc_s = toponym_loc_e and "ě" or toponym_loc_e_u and {"ě", "u"} or dat_s
-- Velar-stem animates with voc_s in -e (e.g. [[Bůh]] "God", voc_s 'Bože'; [[člověk]] "person", voc_s 'člověče')
-- need to give this manually using <voce>; it will trigger the first palatalization automatically.
local voc_s = velar and "u" or "e" -- 'e' will trigger first palatalization in apply_special_cases()
-- Nom_p in -i will trigger second palatalization in apply_special_cases().
local nom_p = base.animacy == "inan" and "y" or default_masc_animate_nom_pl(base, stems)
-- Per IJP and Janda and Townsend:
-- * loc_p in -ích is currently the default for velars but not otherwise; it will automatically trigger the second
-- palatalization (e.g. [[práh]] "threshold", loc_p 'prazích'). Otherwise, -ích needs to be given manually using
-- <locplích>, e.g. [[les]] "forest"; [[hotel]] "hotel"; likewise for loc_p in -ách (e.g. [[plech]]
-- "metal plate"), using <locplách>.
-- * Inanimate hard nouns in -c normally have -ech: [[hec]] "joke", [[tác]] "tray", [[truc]], [[kec]], [[frc]],
-- [[flanc]], [[kibuc]] "kibbutz", [[pokec]] "chat".
-- In the IJP tables, inanimate reducible nouns in -ček (and most in -cek, although there are many fewer; also some
-- in -žek, but in this case it's too inconsistent to make the default) regularly have both -ích and -ách in the
-- locative plural, while similar animate nouns only have -ích. This applies even to nouns like [[háček]] and
-- [[koníček]] that can be either animate or inanimate. Make sure to exclude nouns in -ck such as [[comeback]] and
-- [[joystick]], which have only -ích.
local loc_p =
base.animacy == "inan" and rfind(base.lemma, "[cč]ek$") and rfind(stems.vowel_stem, "[cč]k$") and {"ích", "ách"} or
velar and "ích" or "ech"
add_decl(base, stems, gen_s, dat_s, nil, voc_s, loc_s, "em",
-- loc_p in -ích not after velar stems (e.g. [[les]] "forest"; [[hotel]] "hotel") needs to be given manually
-- using <locplích>; it will automatically trigger the second palatalization; loc_p in -ách (e.g. [[plech]]
-- "metal plate") also needs to be given manually using <locplách>
nom_p, "ů", "ům", "y", loc_p, "y")
end
declprops["hard-m"] = {
desc = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar GENDER"
else
return "hard GENDER"
end
end,
cat = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar-stem"
else
return "hard"
end
end
}
decls["semisoft-m"] = function(base, stems)
-- Examples:
-- * Animate in -ius: génius, nuncius, nonius (breed of horse), notárius, ordinárius, patricius, primárius,
-- pronuncius, various names
-- * Animate in -eus: farizeus, basileus, pygmeus ([[skarabeus]] inflects hard in the plural), various names
-- * Inanimate in -ius: nonius (measuring device), rádius, sestercius
-- NOTE: Inanimate nouns in -eus (nukleus, choreus) inflect hard in the plural
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi" or {"ovi", "u"}
local loc_s = dat_s
local nom_p = base.animacy == "inan" and "e" or "ové"
add_decl(base, stems, "a", dat_s, nil, "e", loc_s, "em",
nom_p, "ů", "ům", "e", "ích", "i")
end
declprops["semisoft-m"] = {
cat = "semisoft"
}
decls["soft-m"] = function(base, stems)
base.palatalize_voc = true
-- animates with dat_s only in -i need to give this manually, using '<dati>'
local dat_s = base.animacy == "inan" and "i" or base.surname and "ovi" or {"ovi", "i"}
local loc_s = dat_s
-- Per IJP, the vast majority of soft masculine animates take -i in the voc_s, but those in -ec/-ěc take -e with first
-- palatalization to -če, e.g. [[otec]] "father", [[lovec]] "hunter", [[blbec]] "fool, idiot", [[horolezec]]
-- "mountaineer", [[znalec]] "expert", [[chlapec]] "boy", [[nadšenec]] "enthusiast", [[luněc]] (type of bird).
-- Demonyms but not surnames ending in -ec but beginning with a capital letter take either -e or -i (only the former
-- triggers the first palatalization). Examples: [[Portugalec]], [[Slovinec]] "Slovenian", [[Japonec]], [[Vietnamec]].
-- Not [[Kadlec]] (surname).
local voc_s = base.animacy == "an" and rfind(base.lemma, "[eě]c$") and stems.reducible and
(not base.surname and rfind(base.lemma, "^" .. com.uppercase_c) and {"e", "i"} or "e") or "i"
local nom_p = base.animacy == "inan" and "e" or default_masc_animate_nom_pl(base, stems)
-- nouns with loc_p in -ech (e.g. [[cíl]] "goal") need to give this manually, using <locplech>
add_decl(base, stems, "e", dat_s, nil, voc_s, loc_s, "em",
nom_p, "ů", "ům", "e", "ích", "i")
end
declprops["soft-m"] = {
cat = "soft"
}
decls["mixed-m"] = function(base, stems)
-- NOTE: IJP tends to list the soft endings first, but per their section on this
-- (https://prirucka.ujc.cas.cz/en/?id=220), the hard endings tend to predominate in modern use, so we list them
-- first.
if base.animacy == "an" then
if rfind(base.lemma, "l$") then
-- [[anděl]] "angel", [[manžel]] "husband", [[strašpytel]] "coward"; 'strašpytel' has a different declension
-- from the other two, with more soft forms. [[manžel]] has plural in -é or -ové and needs an override.
local dat_s = base.surname and "ovi" or {"ovi", "u"}
local loc_s = dat_s
add_decl(base, stems, "a", dat_s, nil, "i", loc_s, "em",
"é", "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
else
-- -s/-z: rorýs, platýs, pilous, markýz, všekaz, stávkokaz, penězokaz, listokaz, dřevokaz, zrnokaz, boss.
-- Others recently moving towards this declension: primas, karas, kalous, konipas, ibis, chabrus, chuďas,
-- kakabus, kliďas, kandrdas, morous, vágus.
-- Some names: Alois, Mánes.
-- Both hard and soft endings throughout. Most have -i and -ové in the nominative plural.
local dat_s = base.surname and "ovi" or {"u", "i", "ovi"}
local loc_s = dat_s
add_decl(base, stems, {"a", "e"}, dat_s, nil, {"e", "i"}, loc_s, "em",
{"i", "ové"}, "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
end
else
-- Given in IJP: burel, hnědel, chmel, krevel, kužel, námel, plevel, tmel, zádrhel, apríl, artikul, koukol, rubl,
-- úběl, plus reducible nouns cumel, chrchel, [[kotel]] "cauldron", sopel, uhel. Also [[městys]]. Many of them are listed in the
-- IJP tables with only hard or with fewer soft forms, so need to be investigated individually.
if rfind(base.lemma, "[ls]$") then
add_decl(base, stems, {"u", "e"}, {"u", "i"}, nil, {"e", "i"}, {"u", "e", "i"}, "em",
{"y", "e"}, "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
else
-- -n/-t; hard in the plural: hřeben, ječmen, [[kámen]] "stone", kmen, kořen, křemen, plamen,
-- [[pramen]] "source", [[řemen]] "strap", den, týden, [[loket]] "elbow".
-- There may be deviations (e.g. soft plural forms for [[den]]), so need to be investigated individually.
add_decl(base, stems, {"u", "e"}, {"u", "i"}, nil, "i", {"u", "i"}, "em",
"y", "ů", "ům", "y", "ech", "y")
end
end
end
declprops["mixed-m"] = {
cat = "mixed"
}
decls["a-m"] = function(base, stems)
-- husita → husité, izraelita → izraelité, jezuita → jezuité, kosmopolita → kosmopolité, táborita → táborité
-- fašista → fašisté, filatelista → filatelisté, fotbalista → fotbalisté, kapitalista → kapitalisté,
-- marxista → marxisté, šachista → šachisté, terorista → teroristé. NOTE: most these words actually appear in
-- the IJP tables with -é/-i, so we go accordingly.
--
-- gymnasta → gymnasté, fantasta → fantasté; also chiliasta, orgiasta, scholiasta, entuziasta, dynasta, ochlasta,
-- sarkasta, vymasta; NOTE: Only 'gymnasta' actually given with just -é; 'fantasta' with -ové/-é, 'dynasta' and
-- 'ochlasta' with just -ové, vymasta not in IJP (no plural given in SSJC), and the rest with -é/-i. So we go
-- accordingly.
local it_ist = rfind(stems.vowel_stem, "is?t$") or rfind(stems.vowel_stem, "ast$")
-- Velar nouns (e.g. [[sluha]] "servant") have -ích in the loc_p (which triggers the second palatalization)
-- instead of -ech. Nouns whose stem ends in a soft consonant ([[rikša]], [[paša]], [[bača]], [[mahárádža]],
-- [[paňáca]], etc.) behave likewise.
-- FIXME: [[pária]] "pariah", [[Maria]] etc.
local loc_p =
(base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") or rfind(stems.vowel_stem, com.inherently_soft_c .. "$")) and
"ích" or "ech"
add_decl(base, stems, "y", "ovi", "u", "o", "ovi", "ou",
it_ist and {"é", "i"} or "ové", "ů", "ům", "y", loc_p, "y")
end
declprops["a-m"] = {
cat = "GENPOS in -a"
}
decls["e-m"] = function(base, stems)
-- [[zachránce]] "savior"; [[soudce]] "judge"; etc.
-- At least two inanimates: [[průvodce]] "guide, guidebook; computing wizard"; [[správce]] "manager (software program), configuration program"
local dat_s = base.animacy == "inan" and "i" or base.surname and "ovi" or {"ovi", "i"}
local loc_s = dat_s
add_decl(base, stems, "e", dat_s, nil, "-", loc_s, "em",
-- nouns with -ové as well (e.g. [[soudce]] "judge") will need to specify that manually, e.g. <nompli:ové>
base.animacy == "inan" and "e" or "i", "ů", "ům", "e", "ích", "i")
end
declprops["e-m"] = {
cat = "GENPOS in -e"
}
decls["i-m"] = function(base, stems)
-- [[kivi]] "kiwi (bird)"; [[kuli]] "coolie"; [[lori]] "lory, lorikeet (bird)" (loc_pl 'loriech/loriích/lorich');
-- [[vini]] "parrot of the genus Vini"; [[yetti]]/[[yeti]] "yeti". other examples: [[aguti]], [[efendi]], [[hadži]],
-- [[pekari]], [[regenschori]], [[yetti]]/[[yeti]].
--
-- [[grizzly]]/[[grizly]] "grizzly bear"; [[pony]] "pony"; [[husky]] "husky"; [[dandy]] "dandy"; [[Billy]] "billy".
--
-- NOTE: Some nouns in -y are regular soft stems, e.g. [[gay]] "gay person"; [[gray]] "gray (unit of absorbed
-- radiation)"; [[Nagy]] (surname).
--
-- NOTE: The stem ends in -i/-y.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
-- ins_pl 'kivii/kivimi'
{"ové", ""}, {"ů", "ch"}, {"ům", "m"}, {"e", ""}, {"ích", "ch"}, {"i", "mi"})
end
declprops["i-m"] = {
cat = "GENPOS in -i/-y"
}
decls["í-m"] = function(base, stems)
-- [[kádí]] "qadi (Islamic judge)", [[mahdí]] "Mahdi (Islamic prophet)", [[muftí]] "mufti (Islamic scholar)",
-- [[sipáhí]] "sipahi (Algerian cavalryman in the French army)"
--
-- No obvious examples in -ý, but the support is there.
--
-- NOTE: The stem ends in -í/-ý.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
{"ové", ""}, {"ů", "ch"}, {"ům", "m"}, {"e", ""}, "ích", "mi")
end
declprops["í-m"] = {
cat = "GENPOS in -í/-ý"
}
decls["ie-m"] = function(base, stems)
-- [[zombie]] "zombie" (also fem/neut), [[hippie]] "hippie", [[yuppie]] "yuppie", [[rowdie]] "rowdy/hooligan"
--
-- NOTE: The stem ends in -i (not -ie, because of the plural).
add_decl(base, stems, "eho", "emu", nil, "-", "em", "em",
{"ové", "es"}, {"ů", "es"}, {"ům", "es"}, {"e", "es"}, {"ích", "es"}, {"i", "es"})
end
declprops["ie-m"] = {
cat = "GENPOS in -ie"
}
decls["ee-m"] = function(base, stems)
-- [[Yankee]] "Yankee"
--
-- NOTE: The stem ends in -ee.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
"ové", "ů", "ům", "e", "ích", "i")
end
declprops["ee-m"] = {
cat = "GENPOS in -ee"
}
decls["o-m"] = function(base, stems)
-- [[kápo]] "head, leader"; [[lamželezo]] "strongman"; [[torero]] "bullfighter"; [[žako]] "African gray parrot";
-- [[dingo]] "dingo"; [[kakapo]] "kakapo" (given in Wiktionary with dat_s/loc_s in -ovi only not -ovi/-u; probably
-- wrong but not in IJP); [[maestro]] "maestro"; [[Bruno]] "Bruno", [[Hugo]] "Hugo"; [[Ivo]] "Yves" (these names
-- are singular-only per IJP); [[Kvido]] "Guido, Guy" (per IJP has accusative in -a or -ona); [[Oto]] "Otto" (per
-- IJP also declinable like virile -a masculines; singular-only); [[Kuřátko]] (a surname; how declined?);
-- [[Picasso]] (surname; how declined?); [[Pluto]] "Pluto (God)", also "Pluto (planet)", which is inanimate;
-- [[Samo]]/[[Sámo]] "Samo (7th century Slavic ruler)" (dat_s/loc_s only in -ovi, needs override); [[Tomio]]
-- "Tomio (Japanese male given name)" (how declined?); [[nemakačenko]] "idler, loafer" (given in Wiktionary with
-- dat_s/loc_s in -ovi only, as for [[kakapo]]); [[nefachčenko]] "idler, loafer"; note also [[gadžo]] "gadjo",
-- which has a unique declension.
--
-- Velar nouns ([[žako]], [[dingo]], etc.) have -ích in the loc_p (which triggers the second palatalization)
-- instead of -ech.
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- inanimates e.g. [[Pluto]] (planet) have -u only, like for normal hard masculines.
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi"or {"ovi", "u"}
local loc_s = dat_s
local loc_p = velar and "ích" or "ech"
add_decl(base, stems, "a", dat_s, nil, "-", loc_s, "em",
"ové", "ů", "ům", "y", loc_p, "y")
end
declprops["o-m"] = {
cat = "GENPOS in -o"
}
decls["u-m"] = function(base, stems)
-- [[emu]] "emu", [[guru]] "guru", [[kakadu]] "cockatoo", [[marabu]] "marabou" (declined the same way)
-- [[Osamu]] "Osamu (Japanese male given name)" [how declined?]
-- [[Višnu]] "Vishnu" (declined like [[guru]] but singular-only)
-- [[budižkničemu]] "good-for-nothing, ne'er-do-well" (indeclinable in the singular, declinable as masculine hard stem
-- budižkničemové etc. in the plural, declinable as feminine hard stem budižkničemy etc. in the plural when feminine).
--
-- NOTE: The stem ends in -u.
add_decl(base, stems, "a", "ovi", nil, "-", "ovi", "em",
"ové", "ů", "ům", "y", "ech", "y")
end
declprops["u-m"] = {
cat = "GENPOS in -u"
}
decls["tstem-m"] = function(base, stems)
-- E.g. [[kníže]] "prince", [[hrabě]] "earl", [[markrabě]] "margrave".
add_decl(base, stems, "ete", "eti", "ete", "-", "eti", "etem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["tstem-m"] = {
cat = "t-stem"
}
decls["hard-f"] = function(base, stems)
base.no_palatalize_c = true
if base.c_as_k then
-- forms like 'ayahuascy' are allowed.
base.hard_c = true
end
-- [[skica]] "sketch", [[gejša]] "geisha", [[rikša]] "rickshaw (vehicle)"; [[arakača]], [[čača]], [[čiča]] (drink),
-- [[dača]] "dacha", [[gutaperča]] "guttapercha", [[viskača]]; [[babča]], [[číča]], [[káča]], [[mamča]], [[úča]].
-- Also appears to apply to ď (e.g. [[Naďa]]) and ť, as well as certain words with stems in -ň and -j (e.g. [[doňa]],
-- and personal names such as [[Táňa]] and [[Darja]]), which normally have a mixed declension.
local soft_cons = rfind(base.vowel_stem, "[cčšžďťjň]$") and not base.c_as_k
local dat_s = soft_cons and {"ě", "i"} or "ě"
local loc_s = dat_s
add_decl(base, stems, "y", dat_s, "u", "o", loc_s, "ou",
"y", "", "ám", "y", "ách", "ami")
end
declprops["hard-f"] = {
cat = "hard"
}
decls["soft-f"] = function(base, stems)
-- This also includes feminines in -ie, e.g. [[belarie]], [[signorie]], [[uncie]], and feminines in -oe, e.g.
-- [[kánoe]], [[aloe]] and medical terms like [[dyspnoe]], [[apnoe]], [[hemoptoe]], [[kalanchoe]].
-- Nouns in -ice like [[ulice]] "street" have null genitive plural e.g. 'ulic'; nouns in -yně e.g. [[přítelkyně]]
-- "girlfriend" have gen pl 'přítelkyň' or 'přítelkyní' with two possible endings; otherwise -í. (Alternation between
-- -ň and -n and between -e and -ě handled automatically by combine_stem_ending().)
local gen_p = rfind(base.lemma, "ice$") and "" or rfind(base.lemma, "yně$") and {"", "í"} or "í"
-- Vocative really ends in -e, not just a copy of the nominative; cf. [[sinfonia]], which is soft-f except for
-- the nominative and has -e in the vocative singular.
add_decl(base, stems, "e", "i", "i", "e", "i", "í",
"e", gen_p, "ím", "e", "ích", "emi")
end
declprops["soft-f"] = {
cat = "soft"
}
decls["mixed-f"] = function(base, stems)
-- Lowercase nouns in -ňa (e.g. bárišňa/báryšňa, doňa, dueňa, piraňa, vikuňa) and -ja (e.g. maracuja, papája, sója).
-- Also non-personal proper nouns in -ňa (e.g. [[Keňa]] "Kenya") and -ja (e.g. [[Troja]]/[[Trója]] "Troy",
-- [[Amudarja]] "Amu Darya"). Does not appear to apply to personal proper nouns (e.g. [[Táňa]] "Tanya", [[Darja]] "Daria"),
-- which usually decline like [[gejša]], [[dača]], [[skica]]).
add_decl(base, stems, {"i", "e"}, {"e", "i"}, "u", "o", {"e", "i"}, "ou",
{"i", "e"}, {"", "í"}, {"ám", "ím"}, {"i", "e"}, {"ách", "ích"}, {"ami", "emi"})
end
declprops["mixed-f"] = {
cat = "mixed"
}
decls["cons-f"] = function(base, stems)
-- e.g. [[dlaň]] "palm (of the hand)"
add_decl(base, stems, "e", "i", "-", "i", "i", "í",
"e", "í", "ím", "e", "ích", "emi")
end
declprops["cons-f"] = {
cat = "soft zero-ending"
}
decls["istem-f"] = function(base, stems)
add_decl(base, stems, "i", "i", "-", "i", "i", "í",
-- See above under apply_special_cases(); -E causes depalatalization of ť/ď/ň.
"i", "í", "Em", "i", "Ech", "mi")
end
declprops["istem-f"] = {
cat = "i-stem"
}
decls["mixed-istem-f"] = function(base, stems)
local gen_s, nom_p, dat_p, loc_p, ins_p
-- Use of ě vs E below is intentional. Contrast [[oběť]] dat pl 'obětem' (depalatalizing) with [[nit]] ins pl
-- 'nitěmi' (palatalizing). See comment above under apply_special_cases().
if base.mixedistem == "pěst" then
-- pěst, past, mast, lest [reducible; ins pl 'lstmi'], pelest, propust, plst, oběť, zeď [reducible; ins pl
-- 'zdmi'], paměť [ins pl 'pamětmi/paměťmi]
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", {"ím", "Em"}, {"ích", "Ech"}, "mi"
elseif base.mixedistem == "moc" then
-- moc, nemoc, pomoc, velmoc; NOTE: pravomoc has -i/-e alternation in gen_s, nom_p
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", {"Em", "ím"}, {"Ech", "ích"}, "ěmi"
elseif base.mixedistem == "myš" then
-- myš, veš [reducible, ins pl vešmi], hruď, měď, pleť, spleť, směs, smrt, step, odpověď [ins pl 'odpověď'mi/odpovědmi'], šeď,
-- závěť [ins pl 'závěťmi/závětmi'], plsť [ins pl 'plstmi']
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", "ím", "ích", "mi"
elseif base.mixedistem == "noc" then
-- lež [reducible], noc, mosaz, rez [reducible], ves [reducible], mysl, sůl, běl, žluť
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", "ím", "ích", "ěmi"
elseif base.mixedistem == "žluč" then
-- žluč, moč, modř, čeleď, kapraď, záď, žerď, čtvrť/čtvrt, drť, huť, chuť, nit, pečeť, závrať, pouť, stať, ocel
gen_s, nom_p, dat_p, loc_p, ins_p = {"i", "ě"}, {"i", "ě"}, "ím", "ích", "ěmi"
elseif base.mixedistem == "loď" then
-- loď, suť
gen_s, nom_p, dat_p, loc_p, ins_p = {"i", "ě"}, {"i", "ě"}, "ím", "ích", {"ěmi", "mi"}
else
error(("Unrecognized value '%s' for 'mixedistem', should be one of 'pěst', 'moc', 'myš', 'noc', 'žluč' or 'loď'"):
format(base.mixedistem))
end
add_decl(base, stems, gen_s, "i", "-", "i", "i", "í",
nom_p, "í", dat_p, nom_p, loc_p, ins_p)
end
declprops["mixed-istem-f"] = {
-- Include subtype in the table description but not in the category to avoid too many categories.
desc = function(base, stems)
return ("mixed i-stem [type '%s'] GENDER"):format(base.mixedistem)
end,
cat = function(base, stems)
return {"mixed i-stem", ("mixed i-stem GENPOS (type '%s')"):format(base.mixedistem)}
end,
}
decls["i-f"] = function(base, stems)
-- [[máti]] "mother" (singular-only), [[pramáti]] "foremother"; very similar to the 'noc' mixed i-stem type
add_decl(base, stems, "i", "i", "-", "i", "i", "í",
"i", "í", "ím", "i", "ích", "ěmi")
end
declprops["i-f"] = {
cat = "GENPOS in -i"
}
decls["ea-f"] = function(base, stems)
-- Stem ends in -e.
if base.tech then
-- diarea, gonorea, chorea, nauzea, paleogea, seborea, trachea
add_decl(base, stems, "y", "i", "u", "o", "i", "ou",
"y", "í", {"ám", "ím"}, "y", {"ách", "ích"}, "ami")
elseif base.persname then
-- Medea, Andrea, etc.
add_decl(base, stems, {"y", "je", "ji"}, {"e", "je", "ji"}, "u", "o", {"e", "je", "ji"}, "ou",
-- this is a guess, based on the same as below; plural of personal names not attested in IJP
{"y", "je"}, "jí", {"ám", "jím"}, {"y", "je"}, {"ách", "jích"}, {"ami", "jemi"})
else
-- idea, odysea ("wandering pilgrimage"), orchidea, palea, spirea
-- proper names Galilea, Judea, Caesarea, Korea, Odyssea ("epic poem")
add_decl(base, stems, {"y", "je"}, "ji", "u", "o", "ji", {"ou", "jí"},
{"y", "je"}, "jí", {"ám", "jím"}, {"y", "je"}, {"ách", "jích"}, {"ami", "jemi"})
end
end
declprops["ea-f"] = {
cat = function(base, stems)
if base.tech then
return {"GENPOS in -ea", "technical GENPOS in -ea"}
else
return "GENPOS in -ea"
end
end
}
decls["oa-f"] = function(base, stems)
-- Stem ends in -o/-u.
-- stoa, kongrua; proper names Samoa, Managua, Nikaragua, Capua
add_decl(base, stems, "y", "i", "u", "o", "i", "ou",
"y", "í", "ám", "y", "ách", "ami")
end
declprops["oa-f"] = {
cat = "GENPOS in -oa/-ua"
}
decls["ia-f"] = function(base, stems)
-- Stem ends in -i.
-- belaria, signoria, uncia; paranoia, sinfonia;
-- proper names Alexandria, Alexia, Livia, Monrovia, Olympia, Sofia
-- Identical to soft declension except for nom sg.
decls["soft-f"](base, stems)
end
declprops["ia-f"] = {
cat = "GENPOS in -ia"
}
decls["hard-n"] = function(base, stems)
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- NOTE: Per IJP it appears the meaning of the preceding preposition makes a difference: 'o' = "about" takes
-- '-u' or '-ě', while 'na/v' = "in, on" normally takes '-ě'.
local loc_s =
-- Exceptions: [[mléko]] "milk" ('mléku' or 'mléce'), [[břicho]] "belly" ('břiše' or (less often) 'břichu'),
-- [[roucho]] ('na rouchu' or 'v rouše'; why the difference in preposition?).
velar and "u" or
-- IJP says nouns in -dlo take only -e but the declension tables show otherwise. It appears -u is possible
-- but significantly less common. Other nouns in -lo usually take just -e ([[čelo]] "forehead",
-- [[kolo]] "wheel", [[křeslo]] "armchair", [[máslo]] "butter", [[peklo]] "hell", [[sklo]] "glass",
-- [[světlo]] "light", [[tělo]] "body"; but [[číslo]] "number' with -e/-u; [[zlo]] "evil" and [[kouzlo]] "spell"
-- with -u/-e).
rfind(base.lemma, "dlo$") and {"ě", "u"} or
rfind(base.lemma, "lo$") and "ě" or
(rfind(base.lemma, "[sc]tvo$") or rfind(base.lemma, "ivo$")) and "u" or
-- Per IJP: Borrowed words and abstracts take -u (e.g. [[banjo]]/[[bendžo]]/[[benžo]] "banjo", [[depo]] "depot",
-- [[chladno]] "cold", [[mokro]] "damp, dampness", [[právo]] "law, right", [[šeru]] "twilight?",
-- [[temno]] "dark, darkness", [[tempo]] "rate, tempo", [[ticho]] "quiet, silence", [[vedro]] "heat") and others
-- often take -ě/-u. Formerly we defaulted to -ě/-u but it seems better to default to just -u, similarly to hard
-- masculines.
-- {"ě", "u"}
"u"
local loc_p =
-- Note, lemmas in -isko also have mixed-reducible as default, handled in determine_default_reducible().
-- Note also, ending -ích triggers the second palatalization.
rfind(base.lemma, "isko$") and {"ích", "ách"} or
-- Diminutives in -ko, -čko, -tko; also [[lýtko]], [[děcko]], [[vrátka]], [[dvířka]], [[jho]], [[roucho]],
-- [[tango]], [[mango]], [[sucho]], [[blaho]], [[víko]], [[echo]], [[embargo]], [[largo]], [[jericho]] (from
-- IJP). Also foreign nouns in -kum: [[antibiotikum]], [[narkotikum]], [[afrodiziakum]], [[analgetikum]], etc.
-- [[jablko]] "apple" has '-ách' or '-ích' and needs an override; likewise for [[vojsko]] "troop"; [[riziko]]
-- "risk" normally has '-ích' and needs and override.
velar and "ách" or
"ech"
add_decl(base, stems, "a", "u", "-", "-", loc_s, "em",
"a", "", "ům", "a", loc_p, "y")
-- FIXME: paired body parts e.g. [[rameno]] "shoulder" (gen_p/loc_p 'ramenou/ramen'), [[koleno]] "knee"
-- (gen_p/loc_p 'kolenou/kolen'), [[prsa]] "chest, breasts" (plurale tantum; gen_p/loc_p 'prsou').
-- FIXME: Nouns with both neuter and feminine forms in the plural, e.g. [[lýtko]] "calf (of the leg)",
-- [[bedro]] "hip", [[vrátka]] "gate".
end
declprops["hard-n"] = {
desc = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar GENDER"
else
return "hard GENDER"
end
end,
cat = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar-stem"
else
return "hard"
end
end
}
decls["semisoft-n"] = function(base, stems)
-- Examples:
-- * In -ao: [[kakao]] "cacao", [[makao]] "Macao (gambling card game, see Wikipedia)", [[curaçao]] "curaçao (liqueur)"
-- (IJP gives gen pl 'curaç' but ASSC [https://slovnikcestiny.cz/heslo/cura%C3%A7ao/0/9967] says 'curaçí' as expected),
-- [[farao]] "faro (card game)"; also [[Makao]], [[Pathet Lao]], but these are sg-only
-- * In -eo: [[stereo]], [[rodeo]], [[video]], [[solideo]]; also [[Borneo]], [[Montevideo]], but these are sg-only
-- * In -io: [[rádio]] "radio", [[gramorádio]], [[studio]], [[scenário]], [[trio]], [[ážio]] (also spelled [[agio]]),
-- [[disážio]], [[folio]], [[vibrio]]; also [[arpeggio]], [[adagio]], [[capriccio]], [[solfeggio]] although
-- pronounced the Italian way without /i/; also [[Ohio]], [[Ontario]], [[Tokio]], but these are sg-only
-- * In -uo: only [[duo]]
-- * In -yo: only [[embryo]]
-- * In -eum: [[muzeum]], [[lyceum]], [[linoleum]], [[ileum]], etc.
-- * In -ium: [[atrium]] "atrium", most chemical elements, etc.
-- * In -uum: [[individuum]], [[kontinuum]], [[premenstruum]], [[residuum]], [[vakuum]]/[[vacuum]]
-- * In -yum: only [[baryum]] "barium" (none others in SSJC)
-- * In -ion: [[enkómion]] "encomium", [[eufonion]] (variant of [[eufonium]]), [[amnion]], [[ganglion]], [[gymnasion]],
-- [[scholion]], [[kritérion]] (rare for [[kritérium]]), [[onomatopoion]] (variant of [[onomatopoie]]),
-- [[symposion]], [[synedrion]]; also [[Byzantion]], but this is sg-only; most words in -ion are masculine
-- Hard in the singular, mostly soft in the plural. Those in -eo and -uo have alternative hard endings in the
-- dat/loc/ins pl, but not those in -eum or -uum. Those in -ao have only hard endings except in the gen pl. (There are
-- apparently no neuters in -eon; those in -eon or -yon e.g. [[akordeon]], [[neon]], [[nukleon]], [[karyon]], [[Lyon]]
-- are masculine.)
local dat_p, loc_p, ins_p
if rfind(base.actual_lemma, "ao$") then
dat_p, loc_p, ins_p = "ům", "ech", "y"
elseif rfind(base.actual_lemma, "[eu]o$") then
dat_p, loc_p, ins_p = {"ím", "ům"}, {"ích", "ech"}, {"i", "y"}
else
dat_p, loc_p, ins_p = "ím", "ích", "i"
end
add_decl(base, stems, "a", "u", "-", "-", "u", "em",
"a", "í", dat_p, "a", loc_p, ins_p)
end
declprops["semisoft-n"] = {
cat = "semisoft"
}
decls["soft-n"] = function(base, stems)
-- Examples: [[moře]] "sea", [[slunce]] "sun", [[srdce]] "heart", [[citoslovce]] "interjection",
-- [[dopoledne]] "late morning", [[odpoledne]] "afternoon", [[hoře]] "sorrow, grief" (archaic or literary),
-- [[inhalace]] "inhalation", [[kafe]] "coffee", [[kanape]] "sofa", [[kutě]] "bed", [[Labe]] "Elbe (singular only)",
-- [[líce]] "cheek", [[lože]] "bed", [[nebe]] "sky; heaven", [[ovoce]] "fruit", [[pole]] "field", [[poledne]]
-- "noon", [[příslovce]] "adverb", [[pukrle]] "curtsey" (also t-n), [[vejce]] "egg" (NOTE: gen pl 'vajec').
--
-- Many nouns in -iště, with null genitive plural.
local gen_p = rfind(base.vowel_stem, "išť$") and "" or "í"
add_decl(base, stems, "e", "i", "-", "-", "i", "em",
"e", gen_p, "ím", "e", "ích", "i")
-- NOTE: Some neuter words in -e indeclinable, e.g. [[Belize]], [[Chile]], [[garde]] "chaperone", [[karaoke]],
-- [[karate]], [[re]] "double raise (card games)", [[ukulele]], [[Zimbabwe]], [[zombie]] (pl. 'zombie' or
-- 'zombies')
-- some nearly indeclinable, e.g. [[finále]], [[chucpe]]; see mostly-indecl below
end
declprops["soft-n"] = {
cat = "soft"
}
decls["í-n"] = function(base, stems)
-- [[nábřeží]] "waterfront" and a zillion others; also [[úterý]] "Tuesday".
-- NOTE: The stem ends in -í/-ý.
add_decl(base, stems, "", "", "-", "-", "", "m",
"", "", "m", "", "ch", "mi")
end
declprops["í-n"] = {
cat = "GENPOS in -í/-ý"
}
decls["n-n"] = function(base, stems)
-- E.g. [[břemeno]] "burden" (also [[břímě]], use 'decllemma:'); [[písmeno]] "letter"; [[plemeno]] "breed";
-- [[rameno]] "shoulder" (also [[rámě]], use 'decllemma:'); [[semeno]] "seed" (also [[sémě]], [[símě]], use
-- 'decllemma:'); [[temeno]] "crown (of the head)"; [[vemeno]] "udder"
add_decl(base, stems, {"a", "e"}, {"i", "u"}, "-", "-", {"ě", "i", "u"}, "em",
"a", "", "ům", "a", "ech", "y")
end
declprops["n-n"] = {
cat = "n-stem"
}
decls["tstem-n"] = function(base, stems)
-- E.g. [[batole]] "toddler", [[čuně]] "pig", [[daňče]] "fallow deer fawn", [[děvče]] "girl", [[ďouče]] "girl"
-- (dialectal), [[dítě]] "child" (NOTE: feminine in the plural [[děti]], declined as a feminine i-stem), [[dvojče]]
-- "twin", [[hádě]] "young snake", [[house]] "gosling", [[hříbě]] "foal" (pl. hříbata), [[jehně]] "lamb", [[kavče]]
-- "young jackdaw; chough", [[káče]] "duckling", [[káně]] "buzzard chick" (NOTE: also feminine meaning "buzzard"),
-- [[klíště]] "tick", [[kose]] "blackbird chick" (rare), [[kuře]] "chick (young chicken)", [[kůzle]]
-- "kid (young goat)", [[lišče]] "fox cub", [[lvíče]] "lion cub", [[medvídě]] "bear cub", [[mládě]] "baby animal",
-- [[morče]] "guinea pig", [[mrně]] "toddler", [[nemluvně]] "infant", [[novorozeně]] "newborn", [[orle]] "eaglet",
-- [[osle]] "donkey foal", [[pachole]] "boy (obsolete); page, squire", [[páže]] "page, squire", [[podsvinče]]
-- "suckling pig", [[prase]] "pig", [[prtě]] "toddler", [[ptáče]] "chick (young bird)",
-- [[robě]] "baby, small child", [[saranče]] "locust" (NOTE: also feminine), [[sele]] "piglet",
-- [[slůně]] "baby elephant", [[škvrně]] "toddler", [[štěně]] "puppy", [[tele]] "calf", [[velbloudě]] "camel colt",
-- [[vlče]] "wolf cub", [[vnouče]] "grandchild", [[vyžle]] "small hunting dog; slender person",
-- [[zvíře]] "animal, beast".
--
-- Some referring to inanimates, e.g. [[doupě]] "lair" (pl. doupata), [[koště]]/[[chvoště]] "broom", [[paraple]]
-- "umbrella", [[poupě]] "bud", [[pukrle]] "curtsey" (also soft-n), [[rajče]] "tomato", [[šuple]] "drawer",
-- [[varle]] "testicle", [[vole]] "craw (of a bird); goiter".
add_decl(base, stems, "ete", "eti", "-", "-", "eti", "etem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["tstem-n"] = {
cat = "t-stem"
}
decls["ma-n"] = function(base, stems)
-- E.g. [[drama]] "drama", [[dogma]] "dogma", [[aneurysma]]/[[aneuryzma]] "aneurysm", [[dilema]] "dilemma",
-- [[gumma]] "gumma" (non-cancerous syphilitic growth), [[klima]] "climate", [[kóma]] "coma", [[lemma]] "lemma",
-- [[melisma]] "melisma", [[paradigma]] "paradigm", [[plasma]]/[[plazma]] "plasma [partly ionized gas]"
-- (note [[plasma]]/[[plazma]] "blood plasma" is feminine), [[revma]] "rheumatism", [[schéma]] "schema, diagram",
-- [[schisma]]/[[schizma]] "schism", [[smegma]] "smegma", [[sofisma]]/[[sofizma]] "sophism", [[sperma]] "sperm",
-- [[stigma]] "stigma", [[téma]] "theme", [[trauma]] "trauma", [[trilema]] "trilemma", [[zeugma]] "zeugma".
add_decl(base, stems, "atu", "atu", "-", "-", "atu", "atem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["ma-n"] = {
cat = "ma-stem"
}
decls["adj"] = function(base, stems)
local props = {}
local propspec = table.concat(props, ".")
if propspec ~= "" then
propspec = "<" .. propspec .. ">"
end
local adj_alternant_multiword_spec = require("Module:cs-adjective").do_generate_forms({base.lemma .. propspec})
local function copy(from_slot, to_slot)
base.forms[to_slot] = adj_alternant_multiword_spec.forms[from_slot]
end
if base.number ~= "pl" then
if base.gender == "m" then
copy("nom_m", "nom_s")
copy("gen_mn", "gen_s")
copy("dat_mn", "dat_s")
copy("loc_mn", "loc_s")
copy("ins_mn", "ins_s")
elseif base.gender == "f" then
copy("nom_f", "nom_s")
copy("gen_f", "gen_s")
copy("dat_f", "dat_s")
copy("acc_f", "acc_s")
copy("loc_f", "loc_s")
copy("ins_f", "ins_s")
else
copy("nom_n", "nom_s")
copy("gen_mn", "gen_s")
copy("dat_mn", "dat_s")
copy("acc_n", "acc_s")
copy("loc_mn", "loc_s")
copy("ins_mn", "ins_s")
end
if not base.forms.voc_s then
iut.insert_forms(base.forms, "voc_s", base.forms.nom_s)
end
end
if base.number ~= "sg" then
if base.gender == "m" then
if base.animacy == "an" then
copy("nom_mp_an", "nom_p")
else
copy("nom_fp", "nom_p")
end
copy("acc_mfp", "acc_p")
elseif base.gender == "f" then
copy("nom_fp", "nom_p")
copy("acc_mfp", "acc_p")
else
copy("nom_np", "nom_p")
copy("acc_np", "acc_p")
end
copy("gen_p", "gen_p")
copy("dat_p", "dat_p")
copy("ins_p", "ins_p")
copy("loc_p", "loc_p")
end
end
local function get_stemtype(base)
if rfind(base.lemma, "ý$") then
return "hard"
elseif rfind(base.lemma, "í$") then
return "soft"
else
return "possessive"
end
end
declprops["adj"] = {
cat = function(base, stems)
return {"adjectival POS", get_stemtype(base) .. " GENDER adjectival POS"}
end,
}
decls["mostly-indecl"] = function(base, stems)
-- Several neuters: E.g. [[finále]] "final (sports)", [[čtvrtfinále]] "quarterfinal", [[chucpe]] "chutzpah",
-- [[penále]] "fine, penalty", [[promile]] "" (NOTE: loc pl also promilech), [[rande]] "rendezvous", [[semifinále]]
-- "semifinal", [[skóre]] "score".
-- At least one masculine animate: [[kamikaze]]/[[kamikadze]], where IJP says only -m in the ins sg.
local ins_s = base.gender == "m" and "m" or {"-", "m"}
add_decl(base, stems, "-", "-", "-", "-", "-", ins_s,
"-", "-", "-", "-", "-", "-")
end
declprops["mostly-indecl"] = {
cat = "mostly indeclinable"
}
decls["indecl"] = function(base, stems)
-- Indeclinable. Note that fully indeclinable nouns should not have a table at all rather than one all of whose forms
-- are the same; but having an indeclinable declension is useful for nouns that may or may not be indeclinable, e.g.
-- [[desatero]] "group of ten" or the plural of [[peso]], which may be indeclinable 'pesos'.
add_decl(base, stems, "-", "-", "-", "-", "-", "-",
"-", "-", "-", "-", "-", "-")
end
declprops["indecl"] = {
cat = function(base, stems)
if base.adj then
return {"adjectival POS", "indeclinable adjectival POS", "indeclinable GENDER adjectival POS"}
else
return {"indeclinable POS", "indeclinable GENPOS"}
end
end
}
decls["manual"] = function(base, stems)
-- Anything declined manually using overrides. We don't set any declensions except the nom_s (or nom_p if plurale
-- tantum).
add(base, base.number == "pl" and "nom_p" or "nom_s", stems, "-")
end
declprops["manual"] = {
desc = "GENDER",
cat = {},
}
local function set_pron_defaults(base)
if base.gender or base.lemma ~= "ona" and base.number or base.animacy then
error("Can't specify gender, number or animacy for pronouns")
end
local function pron_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
if base.lemma == "kdo" then
return "none", "sg", "an", false
elseif base.lemma == "co" then
return "none", "sg", "inan", false
elseif base.lemma == "já" or base.lemma == "ty" then
return "none", "sg", "an", true
elseif base.lemma == "my" or base.lemma == "vy" then
return "none", "pl", "an", false
elseif base.lemma == "on" then
return "m", "sg", "none", true
elseif base.lemma == "ono" then
return "n", "sg", "inan", true
elseif base.lemma == "oni" then
return "m", "pl", "an", false
elseif base.lemma == "ony" then
return "none", "pl", "none", false
elseif base.lemma == "ona" then
if base.number ~= "sg" and base.number ~= "pl" then
error("Must specify '.sg' or '.pl' with lemma 'ona'")
end
if base.number == "sg" then
return "f", "sg", "none", false
else
return "n", "pl", "inan", false
end
elseif base.lemma == "sebe" then
return "none", "none", "none", true
else
error(("Unrecognized pronoun '%s'"):format(base.lemma))
end
end
local gender, number, animacy, has_clitic = pron_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_pronoun_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with pronouns")
end
base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}}
base.decl = "pron"
end
decls["pron"] = function(base, stems)
local after_prep_footnote = "[after a preposition]"
local dual_footnote = "[when referring to dual nouns, e.g. [[oči]], [[ruce]]]"
local animate_footnote = "[animate]"
if base.lemma == "kdo" then
add_decl(base, stems, "koho", "komu", nil, nil, "kom", "kým")
elseif base.lemma == "co" then
add_decl(base, stems, "čeho", "čemu", nil, nil, "čem", "čím")
elseif base.lemma == "já" then
add_sg_decl_with_clitic(base, stems, "mne", "mě", "mně", "mi", nil, nil, nil, "mně", "mnou")
elseif base.lemma == "ty" then
add_sg_decl_with_clitic(base, stems, "tebe", "tě", "tobě", "ti", nil, nil, nil, "tobě", "tebou")
elseif base.lemma == "my" then
add_pl_only_decl(base, stems, "nás", "nám", "nás", "nás", "námi")
add_pl_only_decl(base, stems, nil, nil, nil, nil, "náma", dual_footnote)
elseif base.lemma == "vy" then
add_pl_only_decl(base, stems, "vás", "vám", "vás", "vás", "vámi")
add_pl_only_decl(base, stems, nil, nil, nil, nil, "váma", dual_footnote)
elseif base.lemma == "on" or base.lemma == "ono" then
local acc_s = base.lemma == "on" and "jej" or {"jej", "je"}
local clitic_acc_s = base.lemma == "on" and {"jej", "ho"} or {"jej", "ho", "je"}
local prep_acc_s = base.lemma == "on" and "něj" or {"něj", "ně"}
local prep_clitic_acc_s = base.lemma == "on" and "-ň" or nil
add_sg_decl_with_clitic(base, stems, {"jeho", "jej"}, {"ho", "jej"}, "jemu", "mu", acc_s, clitic_acc_s, nil, nil, "jím")
add_sg_decl_with_clitic(base, stems, {"něho", "něj"}, nil, "němu", nil, prep_acc_s, prep_clitic_acc_s, nil, "něm", "ním",
after_prep_footnote)
if base.lemma == "on" then
add_sg_decl_with_clitic(base, stems, nil, nil, nil, nil, "jeho", nil, nil, nil, nil,
animate_footnote)
add_sg_decl_with_clitic(base, stems, nil, nil, nil, nil, "něho", nil, nil, nil, nil,
after_prep_footnote and animate_footnote)
end
elseif base.lemma == "ona" and base.number == "sg" then
add_sg_decl(base, stems, "jí", "jí", "ji", nil, nil, "jí")
add_sg_decl(base, stems, "ní", "ní", "ni", nil, "ní", "ní", after_prep_footnote)
elseif base.lemma == "oni" or base.lemma == "ony" or base.lemma == "ona" then
add_pl_only_decl(base, stems, "jich", "jim", "je", nil, "jimi")
add_pl_only_decl(base, stems, "nich", "nim", "ně", "nich", "nimi", after_prep_footnote)
add_pl_only_decl(base, stems, nil, nil, nil, nil, "jima", dual_footnote)
add_pl_only_decl(base, stems, nil, nil, nil, nil, "nima", dual_footnote)
elseif base.lemma == "sebe" then
-- Underlyingly we handle [[sebe]]'s slots as singular.
add_sg_decl_with_clitic(base, stems, "sebe", "sebe", "sobě", "si", "sebe", "se", nil, "sobě", "sebou",
nil, "no nom_s")
else
error(("Internal error: Unrecognized pronoun lemma '%s'"):format(base.lemma))
end
end
declprops["pron"] = {
desc = "GENDER pronoun",
cat = {},
}
local function set_num_defaults(base)
if base.gender or base.number or base.animacy then
error("Can't specify gender, number or animacy for numeral")
end
local function num_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
return "none", "pl", "none", false
end
local gender, number, animacy, has_clitic = num_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_numeral_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with numerals")
end
local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma
base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}}
base.decl = "num"
end
decls["num"] = function(base, stems)
local after_prep_footnote = "[after a preposition]"
if base.lemma == "devět" then
add_pl_only_decl(base, "", "devíti", "devíti", "-", "devíti", "devíti", stems.footnotes)
elseif base.lemma == "sta" or base.lemma == "stě" or base.lemma == "set" then
add_pl_only_decl(base, "", "set", "stům", "-", "stech", "sty", stems.footnotes)
elseif rfind(base.lemma, "[cs]et$") then
-- [[deset]] and all numbers ending in -cet ([[dvacet]], [[třicet]], [[čtyřicet]] and inverted compound
-- numerals such as [[pětadvacet]] "25" and [[dvaatřicet]] "32")
local begin = rmatch(base.lemma, "^(.*)et$")
add_pl_only_decl(base, stems, "i", "i", "-", "i", "i")
add_pl_only_decl(base, begin, "íti", "íti", "-", "íti", "íti", stems.footnotes)
else
add_pl_only_decl(base, stems, "i", "i", "-", "i", "i")
end
end
declprops["num"] = {
desc = "GENDER numeral",
cat = {},
}
local function set_det_defaults(base)
if base.gender or base.number or base.animacy then
error("Can't specify gender, number or animacy for determiner")
end
local function det_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
return "none", "none", "none", false
end
local gender, number, animacy, has_clitic = det_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_determiner_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with determiners")
end
local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma
base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}}
base.decl = "det"
end
decls["det"] = function(base, stems)
add_sg_decl(base, stems, "a", "a", "-", nil, "a", "a")
end
declprops["det"] = {
desc = "GENDER determiner",
cat = {},
}
local function fetch_footnotes(separated_group)
local footnotes
for j = 2, #separated_group - 1, 2 do
if separated_group[j + 1] ~= "" then
error("Extraneous text after bracketed footnotes: '" .. table.concat(separated_group) .. "'")
end
if not footnotes then
footnotes = {}
end
table.insert(footnotes, separated_group[j])
end
return footnotes
end
--[=[
Parse a single override spec (e.g. 'nomplé:ové' or 'ins:autodráhou:autodrahou[rare]') and return
two values: the slot(s) the override applies to, and an object describing the override spec.
The input is actually a list where the footnotes have been separated out; for example,
given the spec 'inspl:čobotami:čobotámi[rare]:čobitmi[archaic]', the input will be a list
{"inspl:čobotami:čobotámi", "[rare]", ":čobitmi", "[archaic]", ""}. The object returned
for 'ins:autodráhou:autodrahou[rare]' looks like this:
{
full = true,
values = {
{
form = "autodráhou"
},
{
form = "autodrahou",
footnotes = {"[rare]"}
}
}
}
The object returned for 'nomplé:ové' looks like this:
{
values = {
{
form = "é",
},
{
form = "ové",
}
}
}
]=]
local function parse_override(segments)
local retval = {values = {}}
local part = segments[1]
local slots = {}
while true do
local case = usub(part, 1, 3)
if cases[case] then
-- ok
else
error(("Unrecognized case '%s' in override: '%s'"):format(case, table.concat(segments)))
end
part = usub(part, 4)
local slot
if rfind(part, "^pl") then
part = usub(part, 3)
slot = case .. "_p"
elseif rfind(part, "^cl") then
-- No plural clitic cases at this point.
part = usub(part, 3)
if clitic_cases[case] then
slot = "clitic_" .. case .. "_s"
else
error(("Unrecognized clitic case '%s' in override: '%s'"):format(case, table.concat(segments)))
end
else
slot = case .. "_s"
end
table.insert(slots, slot)
if rfind(part, "^%+") then
part = usub(part, 2)
else
break
end
end
if rfind(part, "^:") then
retval.full = true
part = usub(part, 2)
end
segments[1] = part
local colon_separated_groups = put.split_alternating_runs_and_strip_spaces(segments, ":")
for i, colon_separated_group in ipairs(colon_separated_groups) do
local value = {}
local form = colon_separated_group[1]
if form == "" then
error(("Use - to indicate an empty ending for slot%s '%s': '%s'"):format(#slots > 1 and "s" or "", table.concat(slots), table.concat(segments)))
elseif form == "-" then
value.form = ""
else
value.form = form
end
value.footnotes = fetch_footnotes(colon_separated_group)
table.insert(retval.values, value)
end
return slots, retval
end
--[=[
Parse an indicator spec (text consisting of angle brackets and zero or more
dot-separated indicators within them). Return value is an object of the form
{
overrides = {
SLOT = {OVERRIDE, OVERRIDE, ...}, -- as returned by parse_override()
...
},
forms = {}, -- forms for a single spec alternant; see `forms` below
footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing
stems = { -- may be missing
{
reducible = TRUE_OR_FALSE,
footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing
-- The following fields are filled in by determine_stems()
vowel_stem = "STEM",
nonvowel_stem = "STEM",
oblique_slots = one of {nil, "gen_p", "all", "all-oblique"},
oblique_vowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil),
oblique_nonvowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil),
},
...
},
gender = "GENDER", -- "m", "f", "n"
number = "NUMBER", -- "sg", "pl"; may be missing
animacy = "ANIMACY", -- "inan", "an"; may be missing
hard = true, -- may be missing
soft = true, -- may be missing
mixed = true, -- may be missing
surname = true, -- may be missing
istem = true, -- may be missing
["-istem"] = true, -- may be missing
tstem = true, -- may be missing
nstem = true, -- may be missing
tech = true, -- may be missing
foreign = true, -- may be missing
mostlyindecl = true, -- may be missing
indecl = true, -- may be missing
manual = true, -- may be missing
adj = true, -- may be missing
decllemma = "DECLENSION-LEMMA", -- may be missing
declgender = "DECLENSION-GENDER", -- may be missing
declnumber = "DECLENSION-NUMBER", -- may be missing
-- The following additional fields are added by other functions:
orig_lemma = "ORIGINAL-LEMMA", -- as given by the user
orig_lemma_no_links = "ORIGINAL-LEMMA-NO-LINKS", -- links removed
lemma = "LEMMA", -- `orig_lemma_no_links`, converted to singular form if plural and lowercase if all-uppercase
forms = {
SLOT = {
{
form = "FORM",
footnotes = {"FOOTNOTE", "FOOTNOTE", ...} -- may be missing
},
...
},
...
},
decl = "DECL", -- declension, e.g. "hard-m"
vowel_stem = "VOWEL-STEM", -- derived from vowel-ending lemmas
nonvowel_stem = "NONVOWEL-STEM", -- derived from non-vowel-ending lemmas
}
]=]
local function parse_indicator_spec(angle_bracket_spec)
local inside = rmatch(angle_bracket_spec, "^<(.*)>$")
assert(inside)
local base = {overrides = {}, forms = {}}
if inside ~= "" then
local segments = put.parse_balanced_segment_run(inside, "[", "]")
local dot_separated_groups = put.split_alternating_runs_and_strip_spaces(segments, "%.")
for i, dot_separated_group in ipairs(dot_separated_groups) do
local part = dot_separated_group[1]
local case_prefix = usub(part, 1, 3)
if cases[case_prefix] then
local slots, override = parse_override(dot_separated_group)
for _, slot in ipairs(slots) do
if base.overrides[slot] then
error(("Two overrides specified for slot '%s'"):format(slot))
else
base.overrides[slot] = {override}
end
end
elseif part == "" then
if #dot_separated_group == 1 then
error("Blank indicator: '" .. inside .. "'")
end
base.footnotes = fetch_footnotes(dot_separated_group)
elseif rfind(part, "^[-*#ě]*$") or rfind(part, "^[-*#ě]*,") then
if base.stem_sets then
error("Can't specify reducible/vowel-alternant indicator twice: '" .. inside .. "'")
end
local comma_separated_groups = put.split_alternating_runs_and_strip_spaces(dot_separated_group, ",")
local stem_sets = {}
for i, comma_separated_group in ipairs(comma_separated_groups) do
local pattern = comma_separated_group[1]
local orig_pattern = pattern
local reducible, vowelalt, oblique_slots
if pattern == "-" then
-- default reducible, no vowel alt
else
local before, after
before, reducible, after = rmatch(pattern, "^(.-)(%-?%*)(.-)$")
if before then
pattern = before .. after
reducible = reducible == "*"
end
if pattern ~= "" then
if not rfind(pattern, "^##?ě?$") then
error("Unrecognized vowel-alternation pattern '" .. pattern .. "', should be one of #, ##, #ě or ##ě: '" .. inside .. "'")
end
if pattern == "#ě" or pattern == "##ě" then
vowelalt = "quant-ě"
else
vowelalt = "quant"
end
-- `oblique_slots` will be later changed to "all" if the lemma ends in a consonant.
if pattern == "##" or pattern == "##ě" then
oblique_slots = "all-oblique"
else
oblique_slots = "gen_p"
end
end
end
table.insert(stem_sets, {
reducible = reducible,
vowelalt = vowelalt,
oblique_slots = oblique_slots,
footnotes = fetch_footnotes(comma_separated_group)
})
end
base.stem_sets = stem_sets
elseif #dot_separated_group > 1 then
error("Footnotes only allowed with slot overrides, reducible or vowel alternation specs or by themselves: '" .. table.concat(dot_separated_group) .. "'")
elseif part == "m" or part == "f" or part == "n" then
if base.gender then
error("Can't specify gender twice: '" .. inside .. "'")
end
base.gender = part
elseif part == "sg" or part == "pl" then
if base.number then
error("Can't specify number twice: '" .. inside .. "'")
end
base.number = part
elseif part == "an" or part == "inan" then
if base.animacy then
error("Can't specify animacy twice: '" .. inside .. "'")
end
base.animacy = part
elseif part == "hard" or part == "soft" or part == "mixed" or part == "surname" or part == "istem" or
part == "-istem" or part == "tstem" or part == "nstem" or part == "tech" or part == "foreign" or
part == "mostlyindecl" or part == "indecl" or part == "pron" or part == "det" or part == "num" or
-- Use 'velar' with words like [[petanque]] and [[Braque]] that end with a pronounced velar (and hence are declined
-- like velars) but not with a spelled velar; use '-velar' with words like [[hadíth]] that end with a spelled but
-- silent velar.
part == "collapse_ee" or part == "persname" or part == "c_as_k" or part == "velar" or part == "-velar" then
if base[part] then
error("Can't specify '" .. part .. "' twice: '" .. inside .. "'")
end
base[part] = true
-- Allow 'hard' to signal that -y is allowed after -c, as in hard masculine nouns such as [[hec]]
-- "joke", and also feminines in -ca where the c is pronounced as /k/, e.g. [[ayahuasca]], [[pororoca]],
-- [[Petrarca]], [[Mallorca]], [[Casablanca]]. (Contrast [[mangalica]], [[Kusturica]], [[Bjelica]],
-- where the c is pronounced as /ts/ and -y is disallowed.)
if part == "hard" then
base.hard_c = true
end
elseif part == "+" then
if base.adj then
error("Can't specify '+' twice: '" .. inside .. "'")
end
base.adj = true
elseif part == "!" then
if base.manual then
error("Can't specify '!' twice: '" .. inside .. "'")
end
base.manual = true
elseif rfind(part, "^mixedistem:") then
if base.mixedistem then
error("Can't specify 'mixedistem:' twice: '" .. inside .. "'")
end
base.mixedistem = rsub(part, "^mixedistem:", "")
elseif rfind(part, "^decllemma:") then
if base.decllemma then
error("Can't specify 'decllemma:' twice: '" .. inside .. "'")
end
base.decllemma = rsub(part, "^decllemma:", "")
elseif rfind(part, "^declgender:") then
if base.declgender then
error("Can't specify 'declgender:' twice: '" .. inside .. "'")
end
base.declgender = rsub(part, "^declgender:", "")
elseif rfind(part, "^declnumber:") then
if base.declnumber then
error("Can't specify 'declnumber:' twice: '" .. inside .. "'")
end
base.declnumber = rsub(part, "^declnumber:", "")
else
error("Unrecognized indicator '" .. part .. "': '" .. inside .. "'")
end
end
end
return base
end
local function is_regular_noun(base)
return not base.adj and not base.pron and not base.det and not base.num
end
local function process_declnumber(base)
base.actual_number = base.number
if base.declnumber then
if base.declnumber == "sg" or base.declnumber == "pl" then
base.number = base.declnumber
else
error(("Unrecognized value '%s' for 'declnumber', should be 'sg' or 'pl'"):format(base.declnumber))
end
end
end
local function set_defaults_and_check_bad_indicators(base)
-- Set default values.
local regular_noun = is_regular_noun(base)
if base.pron then
set_pron_defaults(base)
elseif base.det then
set_det_defaults(base)
elseif base.num then
set_num_defaults(base)
elseif not base.adj then
if not base.gender then
if base.manual then
base.gender = "none"
else
error("For nouns, gender must be specified")
end
end
base.number = base.number or "both"
process_declnumber(base)
base.animacy = base.animacy or "inan"
base.actual_gender = base.gender
base.actual_animacy = base.animacy
if base.declgender then
if base.declgender == "m-an" then
base.gender = "m"
base.animacy = "an"
elseif base.declgender == "m-in" then
base.gender = "m"
base.animacy = "inan"
elseif base.declgender == "f" or base.declgender == "n" then
base.gender = base.declgender
else
error(("Unrecognized value '%s' for 'declgender', should be 'm-an', 'm-in', 'f' or 'n'"):format(base.declgender))
end
end
end
-- Check for bad indicator combinations.
if (base.hard and 1 or 0) + (base.soft and 1 or 0) + (base.mixed and 1 or 0) > 1 then
error("At most one of 'hard', 'soft' and 'mixed' can be specified")
end
if base.istem and base["-istem"] then
error("'istem' and '-istem' cannot be specified together")
end
if (base.istem or base["-istem"]) then
if base.gender ~= "f" then
error("'istem' and '-istem' can only be specified with the feminine gender")
end
if not regular_noun then
error("'istem' and '-istem' can only be specified with regular nouns")
end
end
if base.declgender and not regular_noun then
error("'declgender' can only be specified with regular nouns")
end
end
local function set_all_defaults_and_check_bad_indicators(alternant_multiword_spec)
local is_multiword = #alternant_multiword_spec.alternant_or_word_specs > 1
iut.map_word_specs(alternant_multiword_spec, function(base)
set_defaults_and_check_bad_indicators(base)
base.multiword = is_multiword -- FIXME: not currently used; consider deleting
alternant_multiword_spec.has_clitic = alternant_multiword_spec.has_clitic or base.has_clitic
if base.pron then
alternant_multiword_spec.saw_pron = true
else
alternant_multiword_spec.saw_non_pron = true
end
if base.det then
alternant_multiword_spec.saw_det = true
else
alternant_multiword_spec.saw_non_det = true
end
if base.num then
alternant_multiword_spec.saw_num = true
else
alternant_multiword_spec.saw_non_num = true
end
end)
end
local function undo_second_palatalization(base, word, is_adjective)
local function try(from, to)
local stem = rmatch(word, "^(.*)" .. from .. "$")
if stem then
return stem .. to
end
return nil
end
return is_adjective and try("št", "sk") or
is_adjective and try("čt", "ck") or
try("c", "k") or -- FIXME, this could be wrong and c correct
try("ř", "r") or
try("z", "h") or -- FIXME, this could be wrong and z or g correct
try("š", "ch") or
word
end
-- For a plural-only lemma, synthesize a likely singular lemma. It doesn't have to be
-- theoretically correct as long as it generates all the correct plural forms.
local function synthesize_singular_lemma(base)
if not base.stem_sets then
base.stem_sets = {{}}
end
local lemma_determined
-- Loop over all stem sets in case the user specified multiple ones (e.g. '*,-*'). If we try to reconstruct
-- different lemmas for different stem sets, we'll throw an error below.
for _, stems in ipairs(base.stem_sets) do
local stem, lemma
while true do
if base.indecl then
-- If specified as indeclinable, leave it alone; e.g. 'pesos' indeclinable plural of [[peso]].
lemma = base.lemma
break
elseif base.gender == "m" then
if base.animacy == "an" then
stem = rmatch(base.lemma, "^(.*)i$")
if stem then
if base.soft then
-- [[Blíženci]] "Gemini"
-- Since the nominative singular has no ending.
lemma = com.convert_paired_plain_to_palatal(stem, ending)
else
lemma = undo_second_palatalization(base, stem)
end
else
stem = rmatch(base.lemma, "^(.*)ové$") or rmatch(base.lemma, "^(.*)é$")
if stem then
-- [[manželé]] "married couple", [[Velšané]] "Welsh people"
lemma = stem
else
error(("Animate masculine plural-only lemma '%s' should end in -i, -ové or -é"):format(base.lemma))
end
end
else
stem = rmatch(base.lemma, "^(.*)y$")
if stem then
-- [[droby]] "giblets"; [[tvarůžky]] "Olomouc cheese"; [[alimenty]] "alimony"; etc.
lemma = stem
else
local ending
stem, ending = rmatch(base.lemma, "^(.*)([eě])$")
if stem then
-- [[peníze]] "money", [[tvargle]] "Olomouc cheese" (mixed declension), [[údaje]] "data",
-- [[Lazce]] (a village), [[lováče]] "money", [[Krkonoše]] "Giant Mountains", [[kříže]] "clubs"
lemma = com.convert_paired_plain_to_palatal(stem, ending)
if not base.mixed then
base.soft = true
end
else
error(("Inanimate masculine plural-only lemma '%s' should end in -y, -e or -ě"):format(base.lemma))
end
end
end
if stems.reducible == nil then
if rfind(lemma, com.cons_c .. "[ck]$") and not com.is_monosyllabic(base.lemma) then
stems.reducible = true
end
if stems.reducible then
lemma = dereduce(base, lemma)
end
end
break
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)y$")
if stem then
lemma = stem .. "a"
break
end
stem = rmatch(base.lemma, "^(.*)[eě]$")
if stem then
-- Singular like the plural. Cons-stem feminines like [[dlaň]] "palm (of the hand)" have identical
-- plurals to soft-stem feminines like [[růže]] (modulo e/ě differences), so we don't need to
-- reconstruct the former type.
lemma = base.lemma
break
end
stem = rmatch(base.lemma, "^(.*)i$")
if stem then
-- i-stems.
lemma = stem
base.istem = true
break
end
error(("Feminine plural-only lemma '%s' should end in -y, -ě, -e or -i"):format(base.lemma))
elseif base.gender == "n" then
-- -ata nouns like [[slůně]] "baby elephant" nom pl 'slůňata' are declined in the plural same as if
-- the singular were 'slůňato' so we don't have to worry about them.
stem = rmatch(base.lemma, "^(.*)a$")
if stem then
lemma = stem .. "o"
break
end
stem = rmatch(base.lemma, "^(.*)[eěí]$")
if stem then
-- singular lemma also in -e, -ě or -í; e.g. [[věčná loviště]] "[[happy hunting ground]]"
lemma = base.lemma
break
end
error(("Neuter plural-only lemma '%s' should end in -a, -í, -ě or -e"):format(base.lemma))
else
error(("Internal error: Unrecognized gender '%s'"):format(base.gender))
end
end
if lemma_determined and lemma_determined ~= lemma then
error(("Attempt to set two different singular lemmas '%s' and '%s'"):format(lemma_determined, lemma))
end
lemma_determined = lemma
end
base.lemma = lemma_determined
end
-- For an adjectival lemma, synthesize the masc singular form.
local function synthesize_adj_lemma(base)
local stem
if base.indecl then
base.decl = "indecl"
stem = base.lemma
else
local gender, number
local function sub_ov(stem)
stem = stem:gsub("ov$", "ův")
return stem
end
while true do
if base.number == "pl" then
if base.gender == "m" then
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
if base.soft then
-- nothing to do
else
if base.animacy ~= "an" then
error(("Masculine plural-only adjectival lemma '%s' ending in -í can only be animate unless '.soft' is specified"):
format(base.lemma))
end
base.lemma = undo_second_palatalization(base, stem, "is adjective") .. "ý"
end
break
end
stem = rmatch(base.lemma, "^(.*)é$")
if stem then
if base.animacy == "an" then
error(("Masculine plural-only adjectival lemma '%s' ending in -é must be inanimate"):
format(base.lemma))
end
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*ov)i$") or rmatch(base.lemma, "^(.*in)i$")
if stem then
if base.animacy ~= "an" then
error(("Masculine plural-only possessive adjectival lemma '%s' ending in -i must be animate"):
format(base.lemma))
end
base.lemma = sub_ov(stem)
break
end
stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$")
if stem then
if base.animacy == "an" then
error(("Masculine plural-only possessive adjectival lemma '%s' ending in -y must be inanimate"):
format(base.lemma))
end
base.lemma = sub_ov(stem)
break
end
if base.animacy == "an" then
error(("Animate masculine plural-only adjectival lemma '%s' should end in -í, -ovi or -ini"):
format(base.lemma))
elseif base.soft then
error(("Soft masculine plural-only adjectival lemma '%s' should end in -í"):format(base.lemma))
else
error(("Inanimate masculine plural-only adjectival lemma '%s' should end in -é, -ovy or -iny"):
format(base.lemma))
end
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)é$") -- hard adjective
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$") -- possessive adjective
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Feminine plural-only adjectival lemma '%s' should end in -é, -í, -ovy or -iny"):format(base.lemma))
else
stem = rmatch(base.lemma, "^(.*)á$") -- hard adjective
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$") -- possessive adjective
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Neuter plural-only adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma))
end
else
if base.gender == "m" then
stem = rmatch(base.lemma, "^(.*)[ýí]$") or rmatch(base.lemma, "^(.*)ův$") or rmatch(base.lemma, "^(.*)in$")
if stem then
break
end
error(("Masculine adjectival lemma '%s' should end in -ý, -í, -ův or -in"):format(base.lemma))
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)á$")
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$")
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Feminine adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma))
else
stem = rmatch(base.lemma, "^(.*)é$")
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)o$") or rmatch(base.lemma, "^(.*in)o$")
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Neuter adjectival lemma '%s' should end in -é, -í, -ovo or -ino"):format(base.lemma))
end
end
end
base.decl = "adj"
end
-- Now set the stem sets if not given.
-- Now set the stem sets if not given.
if not base.stem_sets then
base.stem_sets = {{reducible = false}}
end
for _, stems in ipairs(base.stem_sets) do
-- Set the stems.
stems.vowel_stem = stem
stems.nonvowel_stem = stem
end
end
-- Determine the declension based on the lemma, gender and number. The declension is set in base.decl. In the process,
-- we set either base.vowel_stem (if the lemma ends in a vowel) or base.nonvowel_stem (if the lemma does not end in a
-- vowel), which is used by determine_stems(). In some cases (specifically with certain foreign nouns), we set
-- base.lemma to a new value; this is as if the user specified 'decllemma:'.
local function determine_declension(base)
if base.mostlyindecl then
base.decl = "mostly-indecl"
base.nonvowel_stem = base.lemma
return
end
if base.indecl then
base.decl = "indecl"
base.nonvowel_stem = base.lemma
return
end
-- Determine declension
stem = rmatch(base.lemma, "^(.*)a$")
if stem then
if base.gender == "m" then
if base.animacy ~= "an" then
error("Masculine lemma in -a must be animate")
end
base.decl = "a-m"
elseif base.gender == "f" then
if base.hard then
-- e.g. [[doňa]], which seems not to have soft alternates as [[piraňa]] does (despite IJP; but see the note at the
-- bottom)
base.decl = "hard-f"
elseif rfind(stem, "e$") then
-- [[idea]], [[diarea]] (subtype '.tech'), [[Korea]], etc.
base.decl = "ea-f"
elseif rfind(stem, "i$") then
-- [[signoria]], [[sinfonia]], [[paranoia]], etc.
base.decl = "ia-f"
elseif rfind(stem, "[ou]$") then
-- [[stoa]], [[kongrua]], [[Samoa]], [[Nikaragua]], etc.
base.decl = "oa-f"
elseif not base.persname and rfind(stem, "^.*[ňj]$") or base.mixed then
-- [[maracuja]], [[papája]], [[sója]]; [[piraňa]] etc. Also [[Keňa]], [[Troja]]/[[Trója]], [[Amudarja]].
-- Not [[Táňa]], [[Darja]], which decline like [[gejša]], [[skica]], etc. (subtype of hard feminines).
base.decl = "mixed-f"
else
base.decl = "hard-f"
end
elseif base.gender == "n" then
if rfind(stem, "m$") then
base.decl = "ma-n"
else
error("Lemma ending in -a and neuter must end in -ma")
end
end
base.vowel_stem = stem
return
end
local ending
stem, ending = rmatch(base.lemma, "^(.*)([eě])$")
if stem then
if ending == "ě" then
stem = com.convert_paired_plain_to_palatal(stem)
end
if base.gender == "m" then
if base.foreign then
-- [[software]] and similar English-derived nouns with silent -e; set the lemma here as if decllemma: were given
base.lemma = stem
base.nonvowel_stem = stem
base.decl = "hard-m"
return
end
if base.hard then
-- -e be damned; e.g. [[Sofokles]] with hard stem 'Sofokle-' (genitive 'Sofoklea', dative 'Sofokleovi', etc.)
base.nonvowel_stem = base.lemma
base.decl = "hard-m"
return
end
if base.tstem then
if base.animacy ~= "an" then
error("T-stem masculine lemma in -e must be animate")
end
base.decl = "tstem-m"
elseif rfind(stem, "i$") then
-- [[zombie]], [[hippie]], [[yuppie]], [[rowdie]]
base.decl = "ie-m"
elseif rfind(stem, "e$") then
-- [[Yankee]]
base.nonvowel_stem = base.lemma
base.decl = "ee-m"
return
else
base.decl = "e-m"
end
elseif base.gender == "f" then
base.decl = "soft-f"
else
if base.tstem then
base.decl = "tstem-n"
else
base.decl = "soft-n"
end
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*)o$")
if stem then
if base.gender == "m" then
-- Cf. [[maestro]] m.
base.decl = "o-m"
elseif base.gender == "f" then
-- [[zoo]]; [[Žemaitsko]]?
error("Feminine nouns in -o are indeclinable; use '.indecl' if needed")
elseif base.nstem then
base.decl = "n-n"
elseif base.hard then
base.decl = "hard-n"
elseif rfind(stem, "[aeiuy]$") then
-- These have gen pl in -í and often other soft plural endings.
base.decl = "semisoft-n"
else
base.decl = "hard-n"
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*[iy])$")
if stem then
if base.gender == "m" then
if base.soft then
-- [[gay]] "gay man", [[gray]] "gray (scientific unit)", [[Nagy]] (surname)
base.decl = "soft-m"
else
-- Cf. [[kivi]] "kiwi (bird)", [[husky]] "kusky", etc.
base.decl = "i-m"
end
elseif base.gender == "f" then
if base.soft then
-- [[Uruguay]], [[Paraguay]]
base.decl = "soft-f"
else
-- [[máti]], [[pramáti]]; note also indeclinable [[tsunami]]/[[cunami]], [[okapi]]
base.decl = "i-f"
if stem:find("i$") then
stem = stem:gsub("i$", "")
else
error("Feminine nouns in -y are either soft or indeclinable; use '.soft' or '.indecl' as needed")
end
end
else
error("Neuter nouns in -i are indeclinable; use '.indecl' if needed")
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*u)$")
if stem then
if base.gender == "m" then
-- Cf. [[emu]], [[guru]], etc.
base.decl = "u-m"
elseif base.gender == "f" then
-- Only one I know is [[budižkničemu]], which is indeclinable in the singular and declines in the plural as
-- if written 'budižkničema'.
error("Feminine nouns in -u are indeclinable; use '.indecl' if needed")
else
error("Neuter nouns in -u are indeclinable; use '.indecl' if needed")
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*[íý])$")
if stem then
if base.gender == "m" then
base.decl = "í-m"
elseif base.gender == "f" then
-- FIXME: Do any exist? If not, update this message.
error("Support for non-adjectival non-indeclinable feminine nouns in -í/-ý not yet implemented")
else
base.decl = "í-n"
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$")
if stem then
if base.gender == "m" then
if base.foreign then
-- [[komunismus]] "communism", [[kosmos]] "cosmos", [[hádes]] "Hades"
stem = rmatch(base.lemma, "^(.*)[ueoaéá]s$")
if not stem then
error("Unrecognized masculine foreign ending, should be -us, -es, -os, -as, -és or -ás")
end
if not base.hard and (rfind(stem, "[ei]$") and base.animacy == "an" or
rfind(stem, "i$") and base.animacy == "inan") then
-- [[genius]], [[basileus]], [[rádius]]; not [[nukleus]], [[choreus]] (inanimate); not
-- [[skarabeus]] (animate), which should specify 'hard'
base.decl = "semisoft-m"
else
base.decl = "hard-m"
end
-- set the lemma here as if decllemma: were given
base.lemma = stem
elseif base.hard then
base.decl = "hard-m"
elseif base.soft then
base.decl = "soft-m"
elseif base.mixed then
base.decl = "mixed-m"
elseif rfind(base.lemma, com.inherently_soft_c .. "$") or rfind(base.lemma, "tel$") then
base.decl = "soft-m"
else
base.decl = "hard-m"
end
elseif base.gender == "f" then
if base.mixedistem then
base.decl = "mixed-istem-f"
elseif base.istem then
base.decl = "istem-f"
elseif base["-istem"] then
base.decl = "cons-f"
elseif rfind(base.lemma, "st$") then
-- Numerous abstracts in -ost; also [[kost]], [[část]], [[srst]], [[bolest]]
base.decl = "istem-f"
else
base.decl = "cons-f"
end
elseif base.gender == "n" then
if base.foreign then
stem = rmatch(base.lemma, "^(.*)um$") or rmatch(base.lemma, "^(.*)on$")
if not stem then
error("Unrecognized neuter foreign ending, should be -um or -on")
end
if base.hard then
base.decl = "hard-n"
elseif rfind(stem, "[eiuy]$") then
base.decl = "semisoft-n"
else
base.decl = "hard-n"
end
-- set the lemma here as if decllemma: were given
base.lemma = stem .. "o"
base.vowel_stem = stem
return
else
error("Neuter nouns ending in a consonant should use '.foreign' or '.decllemma:...'")
end
end
base.nonvowel_stem = stem
return
end
error("Unrecognized ending for lemma: '" .. base.lemma .. "'")
end
-- Determine the default value for the 'reducible' flag.
local function determine_default_reducible(base)
-- Nouns in vowels other than -a/o as well as masculine nouns ending in all vowels don't have null endings so not
-- reducible. Note, we are never called on adjectival nouns.
if rfind(base.lemma, "[iyuíeě]$") or base.gender == "m" and rfind(base.lemma, "[ao]$") or base.tstem then
base.default_reducible = false
return
end
local stem
stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$")
if stem then
-- When analyzing existing manual declensions in -ec and -ek, 290 were reducible vs. 23 non-reducible. Of these
-- 23, 15 were monosyllabic (and none of the 290 reducible nouns were monosyllabic) -- and two of these were
-- actually reducible but irregularly: [[švec]] "shoemaker" (gen sg 'ševce') and [[žnec]] "reaper (person)"
-- (gen sg. 'žence'). Of the remaining 8 multisyllabic non-reducible words, two were actually reducible but
-- irregularly: [[stařec]] "old man" (gen sg 'starce') and [[tkadlec]] "weaver" (gen sg 'tkalce'). The remaining
-- six consisted of 5 compounds of monosyllabic words: [[dotek]], [[oblek]], [[kramflek]], [[pucflek]],
-- [[pokec]], plus [[česnek]], which should be reducible but would lead to an impossible consonant cluster.
if base.gender == "m" and rfind(stem, "[eě][ck]$") and not com.is_monosyllabic(stem) then
base.default_reducible = true
elseif base.gender == "f" and rfind(stem, "[eě]ň$") then
-- [[pochodeň]] "torch", [[píseň]] "leather", [[žeň]] "harvest"; not [[reveň]] "rhubarb" or [[dřeň]] "pulp",
-- which need an override.
base.default_reducible = true
else
base.default_reducible = false
end
return
end
if base.number == "sg" then
base.default_reducible = false
return
end
if rfind(base.lemma, "isko$") then
-- e.g. [[středisko]]
base.default_reducible = "mixed"
return
end
stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$")
if not stem then
error(("Internal error: Something wrong, lemma '%s' doesn't end in consonant or vowel"):format(base.lemma))
end
-- Substitute 'ch' with a single character to make the following code simpler.
stem = stem:gsub("ch", com.TEMP_CH)
if rfind(stem, com.cons_c .. "[lr]" .. com.cons_c .. "$") then
-- [[vrba]], [[vlha]]; not reducible. (But note [[jablko]], reducible; needs override.)
base.default_reducible = false
elseif not base.foreign and (rfind(stem, com.cons_c .. "[bkhlrmnv]$") or base.c_as_k and rfind(stem, com.cons_c .. "c$")) then
-- [[ayahuasca]] has gen pl 'ayahuasek'
base.default_reducible = true
elseif base.foreign and rfind(stem, com.cons_c .. "r$") then
-- Foreign nouns in -CCum seem generally non-reducible in the gen pl except for those in -Crum like [[centrum]],
-- Examples: [[album]], [[verbum]], [[signum]], [[interregnum]], [[sternum]]. [[infernum]] has gen pl 'infern/inferen'.
base.default_reducible = true
else
base.default_reducible = false
end
end
-- Determine the stems to use for each stem set: vowel and nonvowel stems, for singular
-- and plural. We assume that one of base.vowel_stem or base.nonvowel_stem has been
-- set in determine_declension(), depending on whether the lemma ends in
-- a vowel. We construct all the rest given the reducibility, vowel alternation spec and
-- any explicit stems given. We store the determined stems inside of the stem-set objects
-- in `base.stem_sets`, meaning that if the user gave multiple reducible or vowel-alternation
-- patterns, we will compute multiple sets of stems. The reason is that the stems may vary
-- depending on the reducibility and vowel alternation.
local function determine_stems(base)
if not base.stem_sets then
base.stem_sets = {{}}
end
-- Set default reducible and check for default mixed reducible, which needs to be expanded into two entries.
local default_mixed_reducible = false
for _, stems in ipairs(base.stem_sets) do
if stems.reducible == nil then
stems.reducible = base.default_reducible
end
if stems.reducible == "mixed" then
default_mixed_reducible = true
end
end
if default_mixed_reducible then
local new_stem_sets = {}
for _, stems in ipairs(base.stem_sets) do
if stems.reducible == "mixed" then
local non_reducible_copy = m_table.shallowCopy(stems)
non_reducible_copy.reducible = false
stems.reducible = true
table.insert(new_stem_sets, stems)
table.insert(new_stem_sets, non_reducible_copy)
else
table.insert(new_stem_sets, stems)
end
end
base.stem_sets = new_stem_sets
end
-- Now determine all the stems for each stem set.
for _, stems in ipairs(base.stem_sets) do
local lemma_is_vowel_stem = not not base.vowel_stem
if base.vowel_stem then
stems.vowel_stem = base.vowel_stem
stems.nonvowel_stem = stems.vowel_stem
-- Apply vowel alternation first in cases like jádro -> jader; apply_vowel_alternation() will throw an error
-- if the vowel being modified isn't the last vowel in the stem.
stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem)
if stems.reducible then
stems.nonvowel_stem = dereduce(base, stems.nonvowel_stem)
stems.oblique_nonvowel_stem = dereduce(base, stems.oblique_nonvowel_stem)
end
else
stems.nonvowel_stem = base.nonvowel_stem
-- The user specified #, #ě, ## or ##ě and we're dealing with a term like masculine [[bůh]] or feminine
-- [[sůl]] that ends in a consonant. In this case, all slots except the nom_s and maybe acc_s have vowel
-- alternation.
if stems.oblique_slots then
stems.oblique_slots = "all"
end
stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem)
if stems.reducible then
stems.vowel_stem = com.reduce(base.nonvowel_stem)
if not stems.vowel_stem then
error("Unable to reduce stem '" .. base.nonvowel_stem .. "'")
end
else
stems.vowel_stem = base.nonvowel_stem
end
end
stems.oblique_vowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.vowel_stem)
end
end
local function detect_indicator_spec(base)
if base.pron then
determine_pronoun_stems(base)
elseif base.det then
determine_determiner_stems(base)
elseif base.num then
determine_numeral_stems(base)
elseif base.adj then
process_declnumber(base)
synthesize_adj_lemma(base)
elseif base.manual then
if base.stem_sets then
-- FIXME, maybe this should be allowed?
error("Reducible and vowel alternation specs cannot be given with manual declensions")
end
base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}}
base.decl = "manual"
else
if base.number == "pl" then
synthesize_singular_lemma(base)
end
determine_declension(base)
determine_default_reducible(base)
determine_stems(base)
end
end
local function detect_all_indicator_specs(alternant_multiword_spec)
-- Keep track of all genders seen in the singular and plural so we can determine whether to add the term to
-- [[:Category:Czech nouns that change gender in the plural]].
alternant_multiword_spec.sg_genders = {}
alternant_multiword_spec.pl_genders = {}
iut.map_word_specs(alternant_multiword_spec, function(base)
detect_indicator_spec(base)
if base.number ~= "pl" then
alternant_multiword_spec.sg_genders[base.actual_gender] = true
end
if base.number ~= "sg" then
-- All t-stem masculines are neuter in the plural.
local plgender
if base.decl == "tstem-m" then
plgender = "n"
else
plgender = base.actual_gender
end
alternant_multiword_spec.pl_genders[plgender] = true
end
end)
if (alternant_multiword_spec.saw_pron and 1 or 0) + (alternant_multiword_spec.saw_det and 1 or 0) + (alternant_multiword_spec.saw_num and 1 or 0) > 1 then
error("Can't combine pronouns, determiners and/or numerals")
end
end
local propagate_multiword_properties
local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only)
local seen_property
for _, multiword_spec in ipairs(alternant_spec.alternants) do
propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only)
if seen_property == nil then
seen_property = multiword_spec[property]
elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then
seen_property = mixed_value
end
end
alternant_spec[property] = seen_property
end
propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only)
local seen_property = nil
local last_seen_nounal_pos = 0
local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs
for i = 1, #word_specs do
local is_nounal
if word_specs[i].alternants then
propagate_alternant_properties(word_specs[i], property, mixed_value)
is_nounal = not not word_specs[i][property]
elseif nouns_only then
is_nounal = is_regular_noun(word_specs[i])
else
is_nounal = not not word_specs[i][property]
end
if is_nounal then
if not word_specs[i][property] then
error("Internal error: noun-type word spec without " .. property .. " set")
end
for j = last_seen_nounal_pos + 1, i - 1 do
word_specs[j][property] = word_specs[j][property] or word_specs[i][property]
end
last_seen_nounal_pos = i
if seen_property == nil then
seen_property = word_specs[i][property]
elseif seen_property ~= word_specs[i][property] then
seen_property = mixed_value
end
end
end
if last_seen_nounal_pos > 0 then
for i = last_seen_nounal_pos + 1, #word_specs do
word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property]
end
end
multiword_spec[property] = seen_property
end
local function propagate_properties_downward(alternant_multiword_spec, property, default_propval)
local function set_and_fetch(obj, default)
local retval
if obj[property] then
retval = obj[property]
else
obj[property] = default
retval = default
end
if not obj["actual_" .. property] then
obj["actual_" .. property] = retval
end
return retval
end
local propval1 = set_and_fetch(alternant_multiword_spec, default_propval)
for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
local propval2 = set_and_fetch(alternant_or_word_spec, propval1)
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
local propval3 = set_and_fetch(multiword_spec, propval2)
for _, word_spec in ipairs(multiword_spec.word_specs) do
local propval4 = set_and_fetch(word_spec, propval3)
if propval4 == "mixed" then
-- FIXME, use clearer error message.
error("Attempt to assign mixed " .. property .. " to word")
end
set_and_fetch(word_spec, propval4)
end
end
else
if propval2 == "mixed" then
-- FIXME, use clearer error message.
error("Attempt to assign mixed " .. property .. " to word")
end
set_and_fetch(alternant_or_word_spec, propval2)
end
end
end
--[=[
Propagate `property` (one of "animacy", "gender" or "number") from nouns to adjacent
adjectives. We proceed as follows:
1. We assume the properties in question are already set on all nouns. This should happen in
set_defaults_and_check_bad_indicators().
2. We first propagate properties upwards and sideways. We recurse downwards from the top. When we encounter a multiword
spec, we proceed left to right looking for a noun. When we find a noun, we fetch its property (recursing if the noun
is an alternant), and propagate it to any adjectives to its left, up to the next noun to the left. When we have
processed the last noun, we also propagate its property value to any adjectives to the right (to handle e.g.
[[anděl strážný]] "guardian angel", where the adjective [[strážný]] should inherit the 'masculine' and 'animate'
properties of [[anděl]]). Finally, we set the property value for the multiword spec itself by combining all the
non-nil properties of the individual elements. If all non-nil properties have the same value, the result is that
value, otherwise it is `mixed_value` (which is "mixed" for animacy and gender, but "both" for number).
3. When we encounter an alternant spec in this process, we recursively process each alternant (which is a multiword
spec) using the previous step, and combine any non-nil properties we encounter the same way as for multiword specs.
4. The effect of steps 2 and 3 is to set the property of each alternant and multiword spec based on its children or its
neighbors.
]=]
local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value)
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only")
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false)
propagate_properties_downward(alternant_multiword_spec, property, default_propval)
end
local function determine_noun_status(alternant_multiword_spec)
for i, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
if alternant_or_word_spec.alternants then
local is_noun = false
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
for j, word_spec in ipairs(multiword_spec.word_specs) do
if is_regular_noun(word_spec) then
multiword_spec.first_noun = j
is_noun = true
break
end
end
end
if is_noun then
alternant_multiword_spec.first_noun = i
end
elseif is_regular_noun(alternant_or_word_spec) then
alternant_multiword_spec.first_noun = i
return
end
end
end
-- Set the part of speech based on properties of the individual words.
local function set_pos(alternant_multiword_spec)
if alternant_multiword_spec.args.pos then
alternant_multiword_spec.pos = alternant_multiword_spec.args.pos
elseif alternant_multiword_spec.saw_pron and not alternant_multiword_spec.saw_non_pron then
alternant_multiword_spec.pos = "သဗ္ဗနာမ်"
elseif alternant_multiword_spec.saw_det and not alternant_multiword_spec.saw_non_det then
alternant_multiword_spec.pos = "ဖျေံလဝ်သန္နိဋ္ဌာန်"
elseif alternant_multiword_spec.saw_num and not alternant_multiword_spec.saw_non_num then
alternant_multiword_spec.pos = "ဂၞန်သၚ်္ချာ"
else
alternant_multiword_spec.pos = "နာမ်"
end
alternant_multiword_spec.plpos = require(en_utilities_module).pluralize(alternant_multiword_spec.pos)
end
local function normalize_all_lemmas(alternant_multiword_spec, pagename)
iut.map_word_specs(alternant_multiword_spec, function(base)
if base.lemma == "" then
base.lemma = pagename
end
base.orig_lemma = base.lemma
base.orig_lemma_no_links = m_links.remove_links(base.lemma)
local lemma = base.orig_lemma_no_links
-- If the lemma is all-uppercase, lowercase it but note this, so that later in combine_stem_ending() we convert it
-- back to uppercase. This allows us to handle all-uppercase acronyms without a lot of extra complexity.
-- FIXME: This may not make sense at all.
if uupper(lemma) == lemma then
base.all_uppercase = true
lemma = ulower(lemma)
end
base.actual_lemma = lemma
base.lemma = base.decllemma or lemma
end)
end
local function decline_noun(base)
for _, stems in ipairs(base.stem_sets) do
if not decls[base.decl] then
error("Internal error: Unrecognized declension type '" .. base.decl .. "'")
end
decls[base.decl](base, stems)
end
handle_derived_slots_and_overrides(base)
local function copy(from_slot, to_slot)
base.forms[to_slot] = base.forms[from_slot]
end
if base.actual_number ~= base.number then
local source_num = base.number == "sg" and "_s" or "_p"
local dest_num = base.number == "sg" and "_p" or "_s"
for case, _ in pairs(cases) do
copy(case .. source_num, case .. dest_num)
copy("nom" .. source_num .. "_linked", "nom" .. dest_num .. "_linked")
end
if base.actual_number ~= "both" then
local erase_num = base.actual_number == "sg" and "_p" or "_s"
for case, _ in pairs(cases) do
base.forms[case .. erase_num] = nil
end
base.forms["nom" .. erase_num .. "_linked"] = nil
end
end
end
local function get_variants(form)
return nil
--[=[
FIXME
return
form:find(com.VAR1) and "var1" or
form:find(com.VAR2) and "var2" or
form:find(com.VAR3) and "var3" or
nil
]=]
end
-- Compute the categories to add the noun to, as well as the annotation to display in the
-- declension title bar. We combine the code to do these functions as both categories and
-- title bar contain similar information.
local function compute_categories_and_annotation(alternant_multiword_spec)
local all_cats = {}
local function insert(cattype)
m_table.insertIfNot(all_cats, "Czech " .. cattype)
end
if alternant_multiword_spec.pos == "နာမ်" then
if alternant_multiword_spec.actual_number == "sg" then
-- insert("uncountable nouns")
elseif alternant_multiword_spec.actual_number == "pl" then
-- insert("pluralia tantum")
end
end
local annotation
local annparts = {}
local decldescs = {}
local vowelalts = {}
local foreign = {}
local irregs = {}
local stemspecs = {}
local reducible = nil
local function get_genanim(gender, animacy)
local gender_code_to_desc = {
m = "masculine",
f = "feminine",
n = "neuter",
none = nil,
}
local animacy_code_to_desc = {
an = "animate",
inan = "inanimate",
none = nil,
}
local descs = {}
table.insert(descs, gender_code_to_desc[gender])
if gender ~= "f" and gender ~= "n" then
-- masculine or "none" (e.g. certain pronouns and numerals)
table.insert(descs, animacy_code_to_desc[animacy])
end
return table.concat(descs, " ")
end
local function trim(text)
text = text:gsub(" +", " ")
return mw.text.trim(text)
end
local function do_word_spec(base)
local actual_genanim = get_genanim(base.actual_gender, base.actual_animacy)
local declined_genanim = get_genanim(base.gender, base.animacy)
local genanim
if actual_genanim ~= declined_genanim then
genanim = ("%s (declined as %s)"):format(actual_genanim, declined_genanim)
-- insert("nouns with actual gender different from declined gender")
else
genanim = actual_genanim
end
if base.actual_gender == "m" then
-- Insert a category for 'Czech masculine animate nouns' or 'Czech masculine inanimate nouns'; the base categories
-- [[:Category:Czech masculine nouns]], [[:Czech animate nouns]] are auto-inserted.
-- insert(actual_genanim .. " " .. alternant_multiword_spec.plpos)
end
for _, stems in ipairs(base.stem_sets) do
local props = declprops[base.decl]
local cats = props.cat
if type(cats) == "function" then
cats = cats(base, stems)
end
if type(cats) == "string" then
cats = {cats}
end
local default_desc
for i, cat in ipairs(cats) do
if not cat:find("GENDER") and not cat:find("GENPOS") and not cat:find("POS") then
cat = cat .. " GENPOS"
end
cat = cat:gsub("GENPOS", "GENDER POS")
if not cat:find("POS") then
cat = cat .. " POS"
end
if i == #cats then
default_desc = cat:gsub(" POS", "")
end
cat = cat:gsub("GENDER", actual_genanim)
cat = cat:gsub("POS", alternant_multiword_spec.plpos)
-- Need to trim `cat` because actual_genanim may be an empty string.
insert(trim(cat))
end
local desc = props.desc
if type(desc) == "function" then
desc = desc(base, stems)
end
desc = desc or default_desc
desc = desc:gsub("GENDER", genanim)
-- Need to trim `desc` because genanim may be an empty string.
m_table.insertIfNot(decldescs, trim(desc))
local vowelalt
if stems.vowelalt == "quant" then
vowelalt = "quant-alt"
-- insert("nouns with quantitative vowel alternation")
elseif stems.vowelalt == "quant-ě" then
vowelalt = "í-ě-alt"
-- insert("nouns with í-ě alternation")
end
if vowelalt then
m_table.insertIfNot(vowelalts, vowelalt)
end
if reducible == nil then
reducible = stems.reducible
elseif reducible ~= stems.reducible then
reducible = "mixed"
end
if stems.reducible then
-- insert("nouns with reducible stem")
end
if base.foreign then
m_table.insertIfNot(foreign, "foreign")
if not base.decllemma then
-- NOTE: there are nouns that use both 'foreign' and 'decllemma', e.g. [[Zeus]].
-- insert("nouns with regular foreign declension")
end
end
-- User-specified 'decllemma:' indicates irregular stem. Don't consider foreign nouns in -us/-os/-es, -um/-on or
-- silent -e (e.g. [[software]]) where this ending is simply dropped in oblique and plural forms as irregular;
-- there are too many of these and they are already categorized above as 'nouns with regular foreign declension'.
if base.decllemma then
m_table.insertIfNot(irregs, "irreg-stem")
-- insert("nouns with irregular stem")
end
m_table.insertIfNot(stemspecs, stems.vowel_stem)
end
end
local key_entry = alternant_multiword_spec.first_noun or 1
if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then
local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry]
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
key_entry = multiword_spec.first_noun or 1
if #multiword_spec.word_specs >= key_entry then
do_word_spec(multiword_spec.word_specs[key_entry])
end
end
else
do_word_spec(alternant_or_word_spec)
end
end
if alternant_multiword_spec.actual_number == "sg" or alternant_multiword_spec.actual_number == "pl" then
-- not "both" or "none" (for [[sebe]])
table.insert(annparts, alternant_multiword_spec.actual_number == "sg" and "sg-only" or "pl-only")
end
if #decldescs == 0 then
table.insert(annparts, "indecl")
else
table.insert(annparts, table.concat(decldescs, " // "))
end
if #vowelalts > 0 then
table.insert(annparts, table.concat(vowelalts, "/"))
end
if reducible == "mixed" then
table.insert(annparts, "mixed-reducible")
elseif reducible then
table.insert(annparts, "reducible")
end
if #foreign > 0 then
table.insert(annparts, table.concat(foreign, " // "))
end
if #irregs > 0 then
table.insert(annparts, table.concat(irregs, " // "))
end
alternant_multiword_spec.annotation = table.concat(annparts, " ")
if #stemspecs > 1 then
-- insert("nouns with multiple stems")
end
if alternant_multiword_spec.actual_number == "both" and not m_table.deepEquals(alternant_multiword_spec.sg_genders, alternant_multiword_spec.pl_genders) then
-- insert("nouns that change gender in the plural")
end
alternant_multiword_spec.categories = all_cats
end
local function show_forms(alternant_multiword_spec)
local lemmas = {}
for _, slot in ipairs(potential_lemma_slots) do
if alternant_multiword_spec.forms[slot] then
for _, formobj in ipairs(alternant_multiword_spec.forms[slot]) do
-- FIXME, now can support footnotes as qualifiers in headwords?
table.insert(lemmas, formobj.form)
end
break
end
end
local props = {
lemmas = lemmas,
slot_table = alternant_multiword_spec.output_noun_slots,
lang = lang,
canonicalize = function(form)
-- return com.remove_variant_codes(form)
return form
end,
}
iut.show_forms(alternant_multiword_spec.forms, props)
end
local function make_table(alternant_multiword_spec)
local forms = alternant_multiword_spec.forms
local function template_prelude(min_width)
return rsub([=[
<div>
<div class="NavFrame" style="max-width:MINWIDTHem">
<div class="NavHead" style="background:var(--wikt-palette-lighterblue, #ebf4ff);">{title}{annotation}</div>
<div class="NavContent" style="overflow:auto">
{\op}| style="min-width:MINWIDTHem" class="inflection-table inflection"
|- class="rowgroup"
]=], "MINWIDTH", min_width)
end
local function template_postlude()
return [=[
|{\cl}{notes_clause}</div></div></div>]=]
end
local table_spec_both = template_prelude("45") .. [=[
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" |
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | ကိုန်ဨကဝုစ်
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | ကိုန်ဗဟုဝစ်
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| {nom_s}
| {nom_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_s}
| {gen_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_s}
| {dat_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_s}
| {acc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| {voc_s}
| {voc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| {loc_s}
| {loc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| {ins_s}
| {ins_p}
]=] .. template_postlude()
local function get_table_spec_one_number(number, numcode)
local table_spec_one_number = [=[
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" |
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | NUMBER
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| {nom_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| {voc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| {loc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| {ins_CODE}
]=]
return template_prelude("30") .. table_spec_one_number:gsub("NUMBER", number):gsub("CODE", numcode) ..
template_postlude()
end
local function get_table_spec_one_number_clitic(number, numcode)
local table_spec_one_number_clitic = [=[
! rowspan=2 style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);"|
! colspan=2 style="background:var(--wikt-palette-lightblue, #d9ebff);" | NUMBER
|-
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" | stressed
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | clitic
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| colspan=2 | {nom_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_CODE}
| {clitic_gen_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_CODE}
| {clitic_dat_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_CODE}
| {clitic_acc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| colspan=2 | {voc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| colspan=2 | {loc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| colspan=2 | {ins_CODE}
]=]
return template_prelude("40") .. table_spec_one_number_clitic:gsub("NUMBER", number):gsub("CODE", numcode) ..
template_postlude()
end
local notes_template = [=[
<div style="width:100%;text-align:left;background:var(--wikt-palette-lightblue, #d9ebff);">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]=]
if alternant_multiword_spec.title then
forms.title = alternant_multiword_spec.title
else
forms.title = 'မလဟုတ်စှ်ေဆေၚ်စပ်ကဵု <i lang="cs">' .. forms.lemma .. '</i>'
end
local annotation = alternant_multiword_spec.annotation
if annotation == "" then
forms.annotation = ""
else
forms.annotation = " (<span style=\"font-size: smaller;\">" .. annotation .. "</span>)"
end
local number, numcode
if alternant_multiword_spec.actual_number == "sg" then
number, numcode = "singular", "s"
elseif alternant_multiword_spec.actual_number == "pl" then
number, numcode = "plural", "p"
elseif alternant_multiword_spec.actual_number == "none" then -- used for [[sebe]]
number, numcode = "", "s"
end
local table_spec =
alternant_multiword_spec.actual_number == "both" and table_spec_both or
alternant_multiword_spec.has_clitic and get_table_spec_one_number_clitic(number, numcode) or
get_table_spec_one_number(number, numcode)
forms.notes_clause = forms.footnote ~= "" and
m_string_utilities.format(notes_template, forms) or ""
return m_string_utilities.format(table_spec, forms)
end
local function compute_headword_genders(alternant_multiword_spec)
local genders = {}
local number
if alternant_multiword_spec.actual_number == "pl" then
number = "-p"
else
number = ""
end
iut.map_word_specs(alternant_multiword_spec, function(base)
local animacy = base.animacy
if animacy == "inan" then
animacy = "in"
end
m_table.insertIfNot(genders, base.gender .. "-" .. animacy .. number)
end)
return genders
end
-- Externally callable function to parse and decline a noun given user-specified arguments.
-- Return value is ALTERNANT_MULTIWORD_SPEC, an object where the declined forms are in
-- `ALTERNANT_MULTIWORD_SPEC.forms` for each slot. If there are no values for a slot, the
-- slot key will be missing. The value for a given slot is a list of objects
-- {form=FORM, footnotes=FOOTNOTES}.
function export.do_generate_forms(parent_args, from_headword)
local params = {
[1] = {required = true, template_default = "bůh<m.an.#.voce>"},
title = true,
pagename = true,
json = {type = "boolean"},
pos = true,
}
if from_headword then
params["head"] = {list = true}
params["lemma"] = {list = true}
params["g"] = {list = true}
params["f"] = {list = true}
params["m"] = {list = true}
params["adj"] = {list = true}
params["dim"] = {list = true}
params["id"] = {}
end
local args = m_para.process(parent_args, params)
local parse_props = {
parse_indicator_spec = parse_indicator_spec,
angle_brackets_omittable = true,
allow_blank_lemma = true,
}
local alternant_multiword_spec = iut.parse_inflected_text(args[1], parse_props)
alternant_multiword_spec.title = args.title
alternant_multiword_spec.args = args
local pagename = args.pagename or from_headword and args.head[1] or mw.loadData("Module:headword/data").pagename
normalize_all_lemmas(alternant_multiword_spec, pagename)
set_all_defaults_and_check_bad_indicators(alternant_multiword_spec)
-- These need to happen before detect_all_indicator_specs() so that adjectives get their genders and numbers set
-- appropriately, which are needed to correctly synthesize the adjective lemma.
propagate_properties(alternant_multiword_spec, "animacy", "inan", "mixed")
propagate_properties(alternant_multiword_spec, "number", "both", "both")
-- FIXME, the default value (third param) used to be 'm' with a comment indicating that this applied only to
-- plural adjectives, where it didn't matter; but in Czech, plural adjectives are distinguished for gender and
-- animacy. Make sure 'mixed' works.
propagate_properties(alternant_multiword_spec, "gender", "mixed", "mixed")
detect_all_indicator_specs(alternant_multiword_spec)
-- Propagate 'actual_number' after calling detect_all_indicator_specs(), which sets 'actual_number' for adjectives.
propagate_properties(alternant_multiword_spec, "actual_number", "both", "both")
determine_noun_status(alternant_multiword_spec)
set_pos(alternant_multiword_spec)
alternant_multiword_spec.output_noun_slots = get_output_noun_slots(alternant_multiword_spec)
local inflect_props = {
skip_slot = function(slot)
return skip_slot(alternant_multiword_spec.actual_number, slot)
end,
slot_table = alternant_multiword_spec.output_noun_slots,
get_variants = get_variants,
inflect_word_spec = decline_noun,
}
iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
compute_categories_and_annotation(alternant_multiword_spec)
alternant_multiword_spec.genders = compute_headword_genders(alternant_multiword_spec)
if args.json then
alternant_multiword_spec.args = nil
return require("Module:JSON").toJSON(alternant_multiword_spec)
end
return alternant_multiword_spec
end
-- Entry point for {{cs-ndecl}}. Template-callable function to parse and decline a noun given
-- user-specified arguments and generate a displayable table of the declined forms.
function export.show(frame)
local parent_args = frame:getParent().args
local alternant_multiword_spec = export.do_generate_forms(parent_args)
if type(alternant_multiword_spec) == "string" then
-- JSON return value
return alternant_multiword_spec
end
show_forms(alternant_multiword_spec)
return make_table(alternant_multiword_spec) ..
require("Module:utilities").format_categories(alternant_multiword_spec.categories, lang, nil, nil, force_cat)
end
return export
4g5qws8eucokyfi4obp9yr8zg4s7c84
395163
395161
2026-05-19T18:29:29Z
咽頭べさ
33
395163
Scribunto
text/plain
local export = {}
--[=[
Authorship: Ben Wing <benwing2>
]=]
--[=[
TERMINOLOGY:
-- "slot" = A particular combination of case/number.
Example slot names for nouns are "gen_s" (genitive singular) and
"voc_p" (vocative plural). Each slot is filled with zero or more forms.
-- "form" = The declined Czech form representing the value of a given slot.
-- "lemma" = The dictionary form of a given Czech term. Generally the nominative
masculine singular, but may occasionally be another form if the nominative
masculine singular is missing.
]=]
--[=[
FIXME:
1. Finish synthesize_singular_lemma(). [DONE]
2. Implement feminines in -ea, -oa/-ua, -ia, -oe. [DONE]
3. Implement "mixed" masculine nouns in -l, -n, -t (each different, also inanimate vs. animate). [DONE]
4. Allow 'stem:' override after vowel-final words like [[centurio]]. [DONE using decllemma:]
5. Support masculine foreign nouns in -us/-os/-es. [DONE]
6. Support masculine foreign nouns in -ius/-etc. [DONE]
7. Support masculine foreign nouns in unpronounced final -e (e.g. [[software]]). [DONE]
8. Support neuter foreign nouns in -um/-on. [DONE]
9. Support neuter foreign nouns in -ium/-ion. [DONE]
10. Support paired body parts, e.g. [[ruka]], [[noha]], [[oko]], [[ucho]], [[koleno]], [[rameno]]. [WON'T DO;
JUST SEPARATE THE MEANINGS AND GIVE THEM DIFFERENT DECLENSIONS]
11. Support masculine nouns in -e/ě that are neuter in the plural. [DONE]
12. Correctly handle -e vs. -ě, e.g. soft neuters have both [[kutě]] and [[poledne]]. [DONE]
13. Always use specified lemma in nom_pl and maybe acc_pl when plurale tantum. [DONE]
14. Support feminine nouns in -ca/-ča/-ša/-ža. [DONE]
15. Support feminine nouns in -ja/-ňa. [DONE]
16. Support mixed i-stem feminine nouns. [DONE]
17. Support "c as k" feminine nouns like [[ayahuasca]].
18. Support 'declgender'. [DONE]
19. Support pronouns with clitics. [DONE]
20. Singular-only and plural-only terms should not have number in accelerator form. [DONE]
21. Support [[úterý]] (like neuters in -í). [DONE]
22. Support feminines in -i ([[máti]], [[pramáti]]). [DONE]
23. Support foreign nouns in -ie ([[zombie]], [[hippie]], [[yuppie]]). [DONE]
24. Support foreign nouns in -í ([[muftí]], [[qádí]]). [DONE]
25. Support manual declensions. [DONE]
26. Support numerals. [DONE]
27. Allow for reducible spec in pluralia tantum and dereduce accordingly; also automatically assign reducibility
if singular stem ends in -Ck or -Cc. [DONE]
28. Use `pos` value in all categories.
29. Support determiners [[kolik]], [[tolik]], [[několik]], [[mnoho]]. [DONE]
30. Support a '.velar' indicator for foreign names whose pronunciation but not spelling ends in a velar: [[Remarque]],
[[Braque]], [[Mike]], [[Drake]], [[Jake]] with vocative 'Remarquu', 'Braquu', 'Mikeu', 'Drakeu', 'Jakeu'. In
general we need more thought around such foreign names; essentially, for names in a silent e, sometimes the -e
is dropped in all oblique forms (e.g. [[Shakespeare]], [[Pierre]], [[Barrande]], [[La Fontaine]], [[Braque]],
[[Remarque]] with gen sg 'Shakespeara', 'Pierra', Barranda', 'La Fontaina', 'Braqua', 'Remarqua') and sometimes
it's kept in all oblique forms except those ending in an -e, where -ee is avoided (e.g. [[Pete]], [[Gable]],
[[Jake]], [[White]], [[Byrne]], [[Mike]], [[Drake]] with gen sg 'Petea', 'Gablea' etc. and voc sg 'Pete', 'Gable'
but 'Jakeu', 'Mikeu'). Sometimes there are doublets, e.g. [[Hubble]] and [[Hume]] have gen sg 'Hubbla/Hubblea'
(where the second form is used among astronomers in a technical sense and the first form may be more popular)
and 'Huma/Humea'. We already have a '.foreign' indicator that when applied to a noun ending in -e drops the -e
in oblique forms e.g. for [[software]]. We may need to combine this with an explicit indicator of hard, soft or
velar as there will be names with silent -e and preceding soft consonant e.g. [[Bruce]], [[Coleridge]]. Note
that when the -e is kept it is still dropped before front vowels, hence dat sg 'Bruci'/Bruceovi'. Need some
investigation in IJP and cswikt. [.velar DONE]
31. Support 'declnumber'. [DONE]
32. Support foreign nouns in -ee ([[Yankee]]). [DONE]
]=]
local lang = require("Module:languages").getByCode("cs")
local m_table = require("Module:table")
local m_links = require("Module:links")
local m_string_utilities = require("Module:string utilities")
local iut = require("Module:inflection utilities")
local put = require("Module:parse utilities")
local m_para = require("Module:parameters")
local com = require("Module:cs-common")
local en_utilities_module = "Module:en-utilities"
local u = mw.ustring.char
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rgmatch = mw.ustring.gmatch
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local usub = mw.ustring.sub
local uupper = mw.ustring.upper
local ulower = mw.ustring.lower
local force_cat = false -- set to true to make categories appear in non-mainspace pages, for testing
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
local function track(track_id)
require("Module:debug/track")("cs-noun/" .. track_id)
return true
end
local output_noun_slots = {
nom_s = "nom|s",
nom_s_linked = "nom|s",
gen_s = "gen|s",
gen_s_linked = "gen|s",
clitic_gen_s = "clitic|gen|s",
dat_s = "dat|s",
clitic_dat_s = "clitic|dat|s",
acc_s = "acc|s",
clitic_acc_s = "clitic|acc|s",
voc_s = "voc|s",
loc_s = "loc|s",
ins_s = "ins|s",
nom_p = "nom|p",
nom_p_linked = "nom|p",
gen_p = "gen|p",
dat_p = "dat|p",
acc_p = "acc|p",
voc_p = "voc|p",
loc_p = "loc|p",
ins_p = "ins|p",
}
local function get_output_noun_slots(alternant_multiword_spec)
-- FIXME: To save memory we modify the table in-place. This won't work if we ever end up with multiple calls to
-- this module in the same Lua invocation, and we would need to clone the table.
if alternant_multiword_spec.actual_number ~= "both" then
for slot, accel_form in pairs(output_noun_slots) do
output_noun_slots[slot] = accel_form:gsub("|[sp]$", "")
end
end
return output_noun_slots
end
local potential_lemma_slots = {"nom_s", "nom_p", "gen_s"}
local cases = {
nom = true,
gen = true,
dat = true,
acc = true,
voc = true,
loc = true,
ins = true,
}
local clitic_cases = {
gen = true,
dat = true,
acc = true,
}
local function dereduce(base, stem)
local dereduced_stem = com.dereduce(base, stem)
if not dereduced_stem then
error("Unable to dereduce stem '" .. stem .. "'")
end
return dereduced_stem
end
--[=[
Maybe modify the stem and/or ending in certain special cases:
1. Final -e in vocative singular triggers first palatalization of the stem in some cases (e.g. hard masc).
2. Endings beginning with ě, i, í trigger second palatalization, as does -e in the loc_s.
NOTE: Correctly handling -e vs. -ě and -tdn/-ťďň alternations is tricky. We have to deal with the following:
1. Soft-stem and t-stem neuters can have either -e or -ě. With coronals we have both [[poledne]] "noon" with /n/ and
[[kutě]] "bed" with /ť/. We also have soft-stem neuter [[Labe]] with /b/ vs. t-stem neuter [[hříbě]] with /bj/.
2. Underlying palatal coronals maintain their nature before back vowels and when not followed by a vowel, e.g. [[štěně]]
"puppy" becomes 'štěňata' in the nom/acc/voc plural and [[přítelkyně]] "girlfriend" becomes 'přítelkyň' in the gen
plural, but underlying palatal labials become non-palatal, e.g. [[hříbě]] "foal" becomes 'hříbata' in the nom/acc/voc
plural.
3. There are at least four types of endings beginning with '-e':
a. "maintaining" endings, e.g. instrumental singular '-em', which do not change the nature of the consonant, e.g.
[[zákon]] "law" becomes 'zákonem' while [[vězeň]] "prisoner" becomes 'vězeněm';
b. "palatalizing" endings, e.g. locative singular '-e', which palatalizes t/d/n (and more generally applies the
Slavic second palatalization, e.g. k -> c, r -> ř), e.g. [[žena]] "woman" becomes 'ženě';
c. "depalatalizing" endings, e.g. feminine i-stem dative plural '-em', which actively depalatalize ť/ď/ň, e.g.
[[oběť]] "sacrifice, victim" becomes 'obětem';
d. vocative singular '-e' of hard-stem masculines, which applies the Slavic first palatalization in some
circumstances (e.g. k -> č, Cr -> Cř, sometimes c -> č).
The way we handle this as follows:
1. We maintain the underlying stems always in their "pronounced" form, i.e. if the last consonant is pronounced ť/ď/ň
we maintain the stem in that form, but if pronounced t/d/n, we use those consonants. Hence neuter [[poledne]] "noon"
has stem 'poledn-' but neuter [[štěně]] "puppy" has stem 'štěň'. If the stem ends in labial + /j/, we use a special
TEMP_SOFT_LABIAL character after the labial (rather than 'j', in case of stems that actually have a written 'j' in
them such as [[banjo]]).
2. We signal types (a), (b) and (c) above using respectively 'e', 'ě' and 'E'. Type (d) uses 'e' and sets
`base.palatalize_voc`.
3. In combine_stem_ending(), we convert the stem back to the written form before adding the ending. If the ending begins
with -e, this may entail converting -e to -ě, and in all cases -E is converted to -e. "Converting to the written
form" converts ť/ď/ň to plain equivalents and deletes TEMP_SOFT_LABIAL before -e, converting -e to -ě with such
consonants. The same conversions happen before other front consonants -ě/-é/-i/-í, which don't allow ť/ď/ň to
precede, and in all cases with TEMP_SOFT_LABIAL, which is not an actual consonant.
4. If the ending is specified using -ě, this is maintained after plain coronals and labials in combine_stem_ending(),
and converted to -e in other cases.
5. Applying the first and second palatalization happens below in apply_special_cases().
]=]
local function apply_special_cases(base, slot, stem, ending)
local palatalize_voc
if base.c_as_k and rfind(ending, "^[aouyáóúůý]") then
local k_stem = rsub(stem, "c$", "k")
stem = {stem, k_stem}
elseif slot == "voc_s" and ending == "e" and base.palatalize_voc and not base["-velar"] then
-- Don't palatalize words like [[hadíth]] with silent -h.
local palstem = com.apply_first_palatalization(stem)
-- According to IJP, nouns ending in -Cr palatalize in the vocative, but those in -Vr don't. In reality,
-- though, it's more complex. It appears that animate nouns in -Cr tend to palatalize but inanimate nouns
-- do it optionally. Specifics:
-- -- Inanimate nouns with optional palatalization (ř listed second): [[alabastr]], [[amfiteátr]], [[barometr]],
-- [[centilitr]], [[centrimetr]], [[decilitr]], [[decimetr]], [[Dněstr]], [[filtr]], [[galvanometr]],
-- [[hektolitr]], [[kalorimetr]], [[litr]], [[lustr]], [[manometr]], [[manšestr]], [[metr]] (NOTE: is both
-- animate and inanimate), [[mikrometr]], [[miliampérmetr]], [[mililitr]], [[nanometr]], [[orchestr]],
-- [[parametr]], [[piastr]], [[půllitr]], [[radiometr]], [[registr]], [[rotmistr]], [[semestr]], [[skútr]],
-- [[spirometr]], [[svetr]], [[šutr]], [[tachometr]], [[titr]], [[vítr]] (NOTE: has í-ě alternation),
-- [[voltmetr]]; [[bagr]], [[bunkr]], [[cedr]], [[Dněpr]], [[fofr]], [[habr]] (NOTE: ř listed first), [[hadr]]
-- (NOTE: ř listed first), [[hamr]], [[kafr]], [[kepr]], [[kopr]], [[koriandr]], [[krekr]], [[kufr]],
-- [[Kypr]], [[lágr]], [[lógr]], [[manévr]], [[masakr]], [[okr]], [[oleandr]], [[pulovr]], [[šlágr]],
-- [[vichr]] (NOTE: ř listed first), [[žánr]]
--
-- -- Inanimate nouns that don't palatalize: [[ampérmetr]], [[anemometr]], [[sfygmomanometr]], [[sfygmometr]];
-- [[dodekaedr]], [[Hamr]], [[ikozaedr]], [[kvádr]], [[sandr]], [[torr]]
--
-- -- Animate nouns that palatalize: [[arbitr]], [[bratr]], [[ekonometr]], [[foniatr]], [[fotr]], [[geometr]],
-- [[kmotr]], [[lotr]], [[magistr]], [[metr]] (NOTE: is both animate and inanimate), [[ministr]], [[mistr]],
-- [[pediatr]], [[Petr]], [[psychiatr]], [[purkmistr]], [[setr]], [[šamstr]]; [[bobr]], [[fajnšmekr]],
-- [[humr]], [[hypochondr]], [[kapr]], [[lídr]], [[negr]], [[obr]], [[salamandr]], [[sólokapr]], [[švagr]],
-- [[tygr]], [[zlobr]], [[zubr]]
--
-- -- Animate nouns with optional palatalization (ř listed first): [[Silvestr]]; [[Alexandr]], [[snajpr]]
--
-- Note the inconsistencies, e.g. [[sfygmomanometr]] and [[ampérmetr]] don't palatalize but [[manometr]] and
-- [[miliampérmetr]] do it optionally. In reality, inanimate vocatives are extremely rare so this may not be the
-- final word.
if base.animacy == "inan" and rfind(stem, com.cons_c .. "r$") and not rfind(stem, "rr$") then
-- optional r -> ř
stem = {stem, palstem}
else
stem = palstem
end
elseif rfind(ending, "^[ěií]") or slot == "loc_s" and ending == "e" then
if rfind(stem, "ck$") and rfind(base.lemma, "ck$") then
-- IJP says nouns in -ck (back, comeback, crack, deadlock, hatchback, hattrick, joystick, paperback, quarterback,
-- rock, soundtrack, track, truck) simplify the resulting -cc ending in the loc_p to -c. Similarly [[quarterback]]
-- has nom_pl 'quarterbaci, quarterbackove'. We need to check the lemma as well because nouns in -cek don't do this.
stem = rsub(stem, "ck$", "k")
end
if base.velar then
-- [[petanque]] /petank/ -> loc pl 'petancích'.
stem = rsub(stem, "gu$", "g")
stem = rsub(stem, "qu$", "k")
end
-- loc_s of hard masculines is sometimes -e/ě; the user might indicate this as -e, which we should handle
-- correctly
stem = com.apply_second_palatalization(stem)
end
return stem, ending
end
local function skip_slot(number, slot)
return number == "sg" and rfind(slot, "_p$") or
number == "pl" and rfind(slot, "_s$")
end
-- Basic function to combine stem(s) and ending(s) and insert the result into the appropriate slot. `stems` is either
-- the `stems` object passed into the declension functions (containing the various stems; see below) or a string to
-- override the stem. (NOTE: If you pass a string in as `stems`, you should pass the value of `stems.footnotes` as the
-- value of `footnotes` as it will be lost otherwise. If you need to supply your own footnote in addition, use
-- iut.combine_footnotes() to combine any user-specified footnote(s) with your footnote(s).) `endings` is either a
-- string specifying a single ending or a list of endings. If `endings` is nil, no forms are inserted. If an ending is
-- "-", the value of `stems` is ignored and the lemma is used instead as the stem; this is important in case the user
-- used `decllemma:` to specify a declension lemma different from the actual lemma, or specified '.foreign' (which has
-- a similar effect).
local function add(base, slot, stems, endings, footnotes)
if not endings then
return
end
-- Call skip_slot() based on the declined number; if the actual number is different, we correct this in
-- decline_noun() at the end.
if skip_slot(base.number, slot) then
return
end
local stems_footnotes = type(stems) == "table" and stems.footnotes or nil
footnotes = iut.combine_footnotes(iut.combine_footnotes(base.footnotes, stems_footnotes), footnotes)
if type(endings) == "string" then
endings = {endings}
end
for _, ending in ipairs(endings) do
-- Compute the stem. If ending is "-", use the lemma regardless. Otherwise if `stems` is a string, use it.
-- Otherwise `stems` is an object containing four stems (vowel-vs-non-vowel cross regular-vs-oblique);
-- compute the appropriate stem based on the slot and whether the ending begins with a vowel.
local stem
if ending == "-" then
stem = base.actual_lemma
ending = ""
elseif type(stems) == "string" then
stem = stems
else
local is_vowel_ending = rfind(ending, "^" .. com.vowel_c)
if stems.oblique_slots == "all" or
(stems.oblique_slots == "gen_p" or stems.oblique_slots == "all-oblique") and slot == "gen_p" or
stems.oblique_slots == "all-oblique" and (slot == "ins_s" or slot == "dat_p" or slot == "loc_p" or slot == "ins_p") then
if is_vowel_ending then
stem = stems.oblique_vowel_stem
else
stem = stems.oblique_nonvowel_stem
end
elseif is_vowel_ending then
stem = stems.vowel_stem
else
stem = stems.nonvowel_stem
end
end
-- Maybe apply the first or second Slavic palatalization.
stem, ending = apply_special_cases(base, slot, stem, ending)
ending = iut.combine_form_and_footnotes(ending, footnotes)
local function combine_stem_ending(stem, ending)
return com.combine_stem_ending(base, slot, stem, ending)
end
iut.add_forms(base.forms, slot, stem, ending, combine_stem_ending)
end
end
local function process_slot_overrides(base, do_slot)
for slot, overrides in pairs(base.overrides) do
-- Call skip_slot() based on the declined number; if the actual number is different, we correct this in
-- decline_noun() at the end.
if skip_slot(base.number, slot) then
error("Override specified for invalid slot '" .. slot .. "' due to '" .. base.number .. "' number restriction")
end
if do_slot(slot) then
base.slot_overridden[slot] = true
base.forms[slot] = nil
for _, override in ipairs(overrides) do
for _, value in ipairs(override.values) do
local form = value.form
local combined_notes = iut.combine_footnotes(base.footnotes, value.footnotes)
if override.full then
if form ~= "" then
iut.insert_form(base.forms, slot, {form = form, footnotes = combined_notes})
end
else
-- Convert a null ending to "-" in the acc/voc sg slots so that e.g. [[Kerberos]] declared as
-- <m.sg.foreign.gena:u.acc-:a> works correctly and generates accusative 'Kerberos/Kerbera' not
-- #'Kerber/Kerbera'.
if (slot == "acc_s" or slot == "voc_s") and form == "" then
form = "-"
end
for _, stems in ipairs(base.stem_sets) do
add(base, slot, stems, form, combined_notes)
end
end
end
end
end
end
end
local function add_decl(base, stems,
gen_s, dat_s, acc_s, voc_s, loc_s, ins_s,
nom_p, gen_p, dat_p, acc_p, loc_p, ins_p, footnotes
)
add(base, "nom_s", stems, "-", footnotes)
add(base, "gen_s", stems, gen_s, footnotes)
add(base, "dat_s", stems, dat_s, footnotes)
add(base, "acc_s", stems, acc_s, footnotes)
add(base, "voc_s", stems, voc_s, footnotes)
add(base, "loc_s", stems, loc_s, footnotes)
add(base, "ins_s", stems, ins_s, footnotes)
if base.number == "pl" then
-- If this is a plurale tantum noun and we're processing the nominative plural, use the user-specified lemma
-- rather than generating the plural from the synthesized singular, which may not match the specified lemma
-- (e.g. [[tvargle]] "Olomouc cheese" using <m.pl.mixed> would try to generate 'tvargle/tvargly', and [[peníze]]
-- "money" using <m.pl.#ě.genpl-> would try to generate 'peněze').
local acc_p_like_nom = m_table.deepEquals(nom_p, acc_p)
nom_p = "-"
if acc_p_like_nom then
acc_p = "-"
end
end
add(base, "nom_p", stems, nom_p, footnotes)
add(base, "gen_p", stems, gen_p, footnotes)
add(base, "dat_p", stems, dat_p, footnotes)
add(base, "acc_p", stems, acc_p, footnotes)
add(base, "loc_p", stems, loc_p, footnotes)
add(base, "ins_p", stems, ins_p, footnotes)
end
local function add_sg_decl(base, stems,
gen_s, dat_s, acc_s, voc_s, loc_s, ins_s, footnotes
)
add_decl(base, stems, gen_s, dat_s, acc_s, voc_s, loc_s, ins_s,
nil, nil, nil, nil, nil, nil, footnotes)
end
local function add_pl_only_decl(base, stems,
gen_p, dat_p, acc_p, loc_p, ins_p, footnotes
)
add_decl(base, stems, nil, nil, nil, nil, nil, nil,
"-", gen_p, dat_p, acc_p, loc_p, ins_p, footnotes)
end
local function add_sg_decl_with_clitic(base, stems,
gen_s, clitic_gen_s, dat_s, clitic_dat_s, acc_s, clitic_acc_s, voc_s, loc_s, ins_s, footnotes, no_nom_s
)
if not no_nom_s then
add(base, "nom_s", stems, "-", footnotes)
end
add(base, "gen_s", stems, gen_s, footnotes)
add(base, "clitic_gen_s", stems, clitic_gen_s, footnotes)
add(base, "dat_s", stems, dat_s, footnotes)
add(base, "clitic_dat_s", stems, clitic_dat_s, footnotes)
add(base, "acc_s", stems, acc_s, footnotes)
add(base, "clitic_acc_s", stems, clitic_acc_s, footnotes)
add(base, "voc_s", stems, voc_s, footnotes)
add(base, "loc_s", stems, loc_s, footnotes)
add(base, "ins_s", stems, ins_s, footnotes)
end
local function handle_derived_slots_and_overrides(base)
local function is_non_derived_slot(slot)
return slot ~= "voc_p" and slot ~= "acc_s" and slot ~= "clitic_acc_s"
end
local function is_derived_slot(slot)
return not is_non_derived_slot(slot)
end
base.slot_overridden = {}
-- Handle overrides for the non-derived slots. Do this before generating the derived
-- slots so overrides of the source slots (e.g. nom_p) propagate to the derived slots.
process_slot_overrides(base, is_non_derived_slot)
-- Generate the remaining slots that are derived from other slots.
if not base.pron and not base.det then
-- Pronouns don't have a vocative (singular or plural).
iut.insert_forms(base.forms, "voc_p", base.forms.nom_p)
end
if not base.forms.acc_s and not base.slot_overridden.acc_s then
iut.insert_forms(base.forms, "acc_s", base.forms[base.animacy == "inan" and "nom_s" or "gen_s"])
end
if not base.forms.clitic_acc_s and not base.slot_overridden.clitic_acc_s then
iut.insert_forms(base.forms, "clitic_acc_s", base.forms[base.animacy == "inan" and "nom_s" or "clitic_gen_s"])
end
-- Handle overrides for derived slots, to allow them to be overridden.
process_slot_overrides(base, is_derived_slot)
-- Compute linked versions of potential lemma slots, for use in {{cs-noun}}.
-- We substitute the original lemma (before removing links) for forms that
-- are the same as the lemma, if the original lemma has links.
for _, slot in ipairs(potential_lemma_slots) do
iut.insert_forms(base.forms, slot .. "_linked", iut.map_forms(base.forms[slot], function(form)
if form == base.orig_lemma_no_links and rfind(base.orig_lemma, "%[%[") then
return base.orig_lemma
else
return form
end
end))
end
end
-- Table mapping declension types to functions to decline the noun. The function takes two arguments, `base` and
-- `stems`; the latter specifies the computed stems (vowel vs. non-vowel, singular vs. plural) and whether the noun
-- is reducible and/or has vowel alternations in the stem. Most of the specifics of determining which stem to use
-- and how to modify it for the given ending are handled in add_decl(); the declension functions just need to generate
-- the appropriate endings.
local decls = {}
-- Table specifying additional properties for declension types. Every declension type must have such a table, which
-- specifies which category or categories to add and what annotation to show in the title bar of the declension table.
--
-- * Only the `cat` property of this table is mandatory; there is also a `desc` property to specify the annotation, but
-- this can be omitted and the annotation will then be computed from the `cat` property. The `cat` property is either
-- a string, a list of strings or a function (of two arguments, `base` and `stems` as above) returning a string or
-- list of strings. The string can contain the keywords GENDER to substitute the gender (and animacy for masculine
-- nouns) and POS (to substitute the pluralized part of speech). The keyword GENPOS is equivalent to 'GENDER POS'. If
-- no keyword is present, ' GENPOS' is added onto the end. If only GENDER is present, ' POS' is added onto the end.
-- In all cases, the language name is added onto the beginning to form the full category name.
-- * The `desc` property is of the same form as the `cat` property and specifies the annotation to display in the title
-- bar (which may have the same format as the category minus the part of speech, or may be abbreviated). The value
-- may not be a list of strings, as only one annotation is displayed. If omitted, it is derived from the category
-- spec(s) by taking the last category (if more than one is given) and removing ' POS' before keyword substitution.
local declprops = {}
-- Return the default masculine animate nominative plural ending(s) given `base` and `stems`. This is called for hard
-- and soft masculines ending in a consonant, but not for nouns ending in a vowel, which have their own defaults
-- (particularly nouns in -a, where -ista/-ita/-asta behave differently from other nouns in -a).
local function default_masc_animate_nom_pl(base, stems)
return
-- [monosyllabic words: Dánové, Irové, králové, mágové, Rusové, sokové, synové, špehové, zběhové, zeťové, manové, danové
-- (but Žid → Židé, Čech → Češi).] -- There are too many exceptions to this to make a special rule. It is better to use
-- the overall default of -i and require that cases with -ove, -ove/-i, -i/-ove, etc. use overrides.
-- com.is_monosyllabic(base.lemma) and "ové" or
-- reducible terms in -Cek; order of -ové vs. -i sometimes varies:
-- [[fracek]] (ové/i), [[klacek]] (i/ové), [[macek]] (ové/i), [[nácek]] (i/ové), [[prcek]] (ové/i), [[racek]] (ové/i);
-- [[bazilišek]] (i/ové), [[černoušek]] (i/ové), [[drahoušek]] (ové/i), [[fanoušek]] (i/ové), [[františek]] (an/inan,
-- ends in -i/-y but not -ové), [[koloušek]] (-i only), [[kulíšek]] (i/ové), [[oříšek]] (i/ové), [[papoušek]] (-i only),
-- [[prášek]] (i/ové), [[šašek]] (i/ové).
-- make sure to check `stems` as we don't want to include non-reducible words in -Cek (but do want to include
-- [[quarterback]], with -i/-ové)
rfind(stems.vowel_stem, "^" .. com.lowercase_c .. ".*" .. com.cons_c .. "k$") and {"i", "ové"} or
-- [[stoik]], [[neurotik]], [[logik]], [[fyzik]], etc.
rfind(base.lemma, "^" .. com.lowercase_c .. ".ik$") and {"i", "ové"} or
-- barmani, gentlemani, jazzmani, kameramani, narkomani, ombudsmani, pivotmani, rekordmani, showmani, supermani, toxikomani
rfind(base.lemma, "^" .. com.lowercase_c .. ".*man$") and "i" or
-- terms ending in -an after a palatal or a consonant that doesn't change when palatalized, i.e. labial or l (but -man
-- forms -mani unless in a proper noun): Brňan → Brňané, křesťan → křesťané, měšťan → měšťané, Moravan → Moravané,
-- občan → občané, ostrovan → ostrované, Pražan → Pražané, Slovan → Slované, svatebčan → svatebčané, venkovan → venkované,
-- Australan → Australané; also s, because there are many demonyms in -san e.g. [[Andalusan]], [[Barbadosan]], [[Oděsan]],
-- and few proper nouns in -san; similarly z because of [[Belizan]], [[Gazan]], [[Kavkazan]], etc.; also w, which isn't a
-- normal consonant in Czech but occurs in [[Glasgowan]] and [[Zimbabwan]]; NOTE: a few misc words like [[pohan]] also
-- work this way but need manual overrides
rfind(base.lemma, "[" .. com.inherently_soft .. com.labial .. "wlsz]an$") and {"é", "i"} or -- most now can also take -i
-- proper names: Baťové, Novákové, Petrové, Tomášové, Vláďové; exclude demonyms (but include surnames)
rfind(base.lemma, "^" .. com.uppercase_c) and (base.surname or not rfind(base.lemma, "[eě]c$")) and "ové" or
-- demonyms: [[Albánec]], [[Gruzínec]], [[Izraelec]], [[Korejec]], [[Libyjec]], [[Litevec]], [[Němec]], [[Portugalec]]
rfind(base.lemma, "^" .. com.uppercase_c .. ".*[eě]c$") and "i" or
-- From here on down, we're dealing only with lowercase terms.
-- buditelé, budovatelé, čekatelé, činitelé, hostitelé, jmenovatelé, pisatelé, ručitelé, velitelé, živitelé
rfind(base.lemma, ".*tel$") and "é" or
-- nouns in -j: čaroděj → čarodějové, lokaj → lokajové, patricij → patricijové, plebej → plebejové, šohaj → šohajové, žokej → žokejové
-- nouns in -l: apoštol → apoštolové, břídil → břídilové, fňukal → fňukalové, hýřil → hýřilové, kutil → kutilové,
-- loudal → loudalové, mazal → mazalové, škrabal → škrabalové, škudlil → škudlilové, vyvrhel → vyvrhelové, žvanil → žvanilové
-- (we excluded those in -tel above)
rfind(base.lemma, ".*[jl]$") and "ové" or
-- archeolog → archeologové, biolog → biologové, geolog → geologové, meteorolog → meteorologové
rfind(base.lemma, ".*log$") and "ové" or
-- dramaturg → dramaturgové, chirurg → chirurgové
rfind(base.lemma, ".*urg$") and "ové" or
-- fotograf → fotografové, geograf → geografové, lexikograf → lexikografové
rfind(base.lemma, ".*graf$") and "ové" or
-- bibliofil → bibliofilové, germanofil → germanofilové
rfind(base.lemma, ".*fil$") and "ové" or
-- rusofob → rusofobové
rfind(base.lemma, ".*fob$") and "ové" or
-- agronom → agronomové, ekonom → ekonomové
rfind(base.lemma, ".*nom$") and "ové" or
"i"
end
decls["hard-m"] = function(base, stems)
-- Nouns ending in hard -c, e.g. [[hec]] "joke", [[kibuc]] "kibbutz", don't palatalize.
base.palatalize_voc = not rfind(stems.vowel_stem, "c$")
base.hard_c = true
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- See [https://prirucka.ujc.cas.cz/en/?id=360] on declension of toponyms.
local toponym = base.animacy == "inan" and rfind(base.lemma, "^" .. com.uppercase_c)
-- Some toponyms take -a in the genitive singular, e.g. toponyms in -ín ([[Zlín]], [[Jičín]], [[Berlín]]);
-- -ýn ([[Hostýn]], [[Londýn]]); -ov ([[Havířov]]); and -ev ([[Bezdrev]]), as do some others, e.g. domestic
-- [[Beroun]], [[Brandýs]], [[Náchod]], [[Tábor]] and foreign [[Betlém]] "Bethlehem", [[Egypt]],
-- [[Jeruzalém]] "Jerusalem", [[Milán]] "Milan", [[Řím]] "Rome", [[Rýn]] "Rhine". Also some transferred from
-- common nouns e.g. ([[Nový]]) [[Kostel]], ([[Starý]]) [[Rybník]].
local toponym_gen_a = toponym and (rfind(base.lemma, "[íý]n$") or rfind(base.lemma, "[oe]v$"))
-- Toponyms in -ík (Mělník, Braník, Rakovník, Lipník) seem to fluctuate between gen -a and -u. Also some in
-- ‑štejn, ‑berg, ‑perk, ‑burk, ‑purk (Rabštejn, Heidelberg, Kašperk, Hamburk, Prešpurk) and some others:
-- Zbiroh, Kamýk, Příbor, Zábřeh, Žebrák, Praděd.
local toponym_gen_a_u = toponym and rfind(base.lemma, "ík$")
-- Toponyms that take -a in the genitive singular tend to take -ě in the locative singular; so do those in
-- -štejn (Rabštejn), -hrad (Petrohrad), -grad (Volgograd).
local toponym_loc_e = toponym and (toponym_gen_a or rfind(base.lemma, "štejn$") or rfind(base.lemma, "[gh]rad$"))
-- Toponyms in -ík seem to fluctuate between loc -ě and -u.
local toponym_loc_e_u = toponym_gen_a_u
-- Inanimate gen_s in -a other than toponyms in -ín/-ýn/-ev/-ov (e.g. [[zákon]] "law", [[oběd]] "lunch", [[kostel]] "church",
-- [[dnešek]] "today", [[leden]] "January", [[trujúhelník]] "triangle") needs to be given manually, using '<gena>'.
local gen_s = toponym_gen_a and "a" or toponym_gen_a_u and {"a", "u"} or base.animacy == "inan" and "u" or "a"
-- Animates with dat_s only in -u (e.g. [[člověk]] "person", [[Bůh]] "God") need to give this manually,
-- using '<datu>'.
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi" or {"ovi", "u"}
-- Inanimates with loc_s in -e/ě other than certain toponyms (see above) need to give this manually, using <locě>, but
-- it will trigger the second palatalization automatically.
local loc_s = toponym_loc_e and "ě" or toponym_loc_e_u and {"ě", "u"} or dat_s
-- Velar-stem animates with voc_s in -e (e.g. [[Bůh]] "God", voc_s 'Bože'; [[člověk]] "person", voc_s 'člověče')
-- need to give this manually using <voce>; it will trigger the first palatalization automatically.
local voc_s = velar and "u" or "e" -- 'e' will trigger first palatalization in apply_special_cases()
-- Nom_p in -i will trigger second palatalization in apply_special_cases().
local nom_p = base.animacy == "inan" and "y" or default_masc_animate_nom_pl(base, stems)
-- Per IJP and Janda and Townsend:
-- * loc_p in -ích is currently the default for velars but not otherwise; it will automatically trigger the second
-- palatalization (e.g. [[práh]] "threshold", loc_p 'prazích'). Otherwise, -ích needs to be given manually using
-- <locplích>, e.g. [[les]] "forest"; [[hotel]] "hotel"; likewise for loc_p in -ách (e.g. [[plech]]
-- "metal plate"), using <locplách>.
-- * Inanimate hard nouns in -c normally have -ech: [[hec]] "joke", [[tác]] "tray", [[truc]], [[kec]], [[frc]],
-- [[flanc]], [[kibuc]] "kibbutz", [[pokec]] "chat".
-- In the IJP tables, inanimate reducible nouns in -ček (and most in -cek, although there are many fewer; also some
-- in -žek, but in this case it's too inconsistent to make the default) regularly have both -ích and -ách in the
-- locative plural, while similar animate nouns only have -ích. This applies even to nouns like [[háček]] and
-- [[koníček]] that can be either animate or inanimate. Make sure to exclude nouns in -ck such as [[comeback]] and
-- [[joystick]], which have only -ích.
local loc_p =
base.animacy == "inan" and rfind(base.lemma, "[cč]ek$") and rfind(stems.vowel_stem, "[cč]k$") and {"ích", "ách"} or
velar and "ích" or "ech"
add_decl(base, stems, gen_s, dat_s, nil, voc_s, loc_s, "em",
-- loc_p in -ích not after velar stems (e.g. [[les]] "forest"; [[hotel]] "hotel") needs to be given manually
-- using <locplích>; it will automatically trigger the second palatalization; loc_p in -ách (e.g. [[plech]]
-- "metal plate") also needs to be given manually using <locplách>
nom_p, "ů", "ům", "y", loc_p, "y")
end
declprops["hard-m"] = {
desc = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar GENDER"
else
return "hard GENDER"
end
end,
cat = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar-stem"
else
return "hard"
end
end
}
decls["semisoft-m"] = function(base, stems)
-- Examples:
-- * Animate in -ius: génius, nuncius, nonius (breed of horse), notárius, ordinárius, patricius, primárius,
-- pronuncius, various names
-- * Animate in -eus: farizeus, basileus, pygmeus ([[skarabeus]] inflects hard in the plural), various names
-- * Inanimate in -ius: nonius (measuring device), rádius, sestercius
-- NOTE: Inanimate nouns in -eus (nukleus, choreus) inflect hard in the plural
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi" or {"ovi", "u"}
local loc_s = dat_s
local nom_p = base.animacy == "inan" and "e" or "ové"
add_decl(base, stems, "a", dat_s, nil, "e", loc_s, "em",
nom_p, "ů", "ům", "e", "ích", "i")
end
declprops["semisoft-m"] = {
cat = "semisoft"
}
decls["soft-m"] = function(base, stems)
base.palatalize_voc = true
-- animates with dat_s only in -i need to give this manually, using '<dati>'
local dat_s = base.animacy == "inan" and "i" or base.surname and "ovi" or {"ovi", "i"}
local loc_s = dat_s
-- Per IJP, the vast majority of soft masculine animates take -i in the voc_s, but those in -ec/-ěc take -e with first
-- palatalization to -če, e.g. [[otec]] "father", [[lovec]] "hunter", [[blbec]] "fool, idiot", [[horolezec]]
-- "mountaineer", [[znalec]] "expert", [[chlapec]] "boy", [[nadšenec]] "enthusiast", [[luněc]] (type of bird).
-- Demonyms but not surnames ending in -ec but beginning with a capital letter take either -e or -i (only the former
-- triggers the first palatalization). Examples: [[Portugalec]], [[Slovinec]] "Slovenian", [[Japonec]], [[Vietnamec]].
-- Not [[Kadlec]] (surname).
local voc_s = base.animacy == "an" and rfind(base.lemma, "[eě]c$") and stems.reducible and
(not base.surname and rfind(base.lemma, "^" .. com.uppercase_c) and {"e", "i"} or "e") or "i"
local nom_p = base.animacy == "inan" and "e" or default_masc_animate_nom_pl(base, stems)
-- nouns with loc_p in -ech (e.g. [[cíl]] "goal") need to give this manually, using <locplech>
add_decl(base, stems, "e", dat_s, nil, voc_s, loc_s, "em",
nom_p, "ů", "ům", "e", "ích", "i")
end
declprops["soft-m"] = {
cat = "soft"
}
decls["mixed-m"] = function(base, stems)
-- NOTE: IJP tends to list the soft endings first, but per their section on this
-- (https://prirucka.ujc.cas.cz/en/?id=220), the hard endings tend to predominate in modern use, so we list them
-- first.
if base.animacy == "an" then
if rfind(base.lemma, "l$") then
-- [[anděl]] "angel", [[manžel]] "husband", [[strašpytel]] "coward"; 'strašpytel' has a different declension
-- from the other two, with more soft forms. [[manžel]] has plural in -é or -ové and needs an override.
local dat_s = base.surname and "ovi" or {"ovi", "u"}
local loc_s = dat_s
add_decl(base, stems, "a", dat_s, nil, "i", loc_s, "em",
"é", "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
else
-- -s/-z: rorýs, platýs, pilous, markýz, všekaz, stávkokaz, penězokaz, listokaz, dřevokaz, zrnokaz, boss.
-- Others recently moving towards this declension: primas, karas, kalous, konipas, ibis, chabrus, chuďas,
-- kakabus, kliďas, kandrdas, morous, vágus.
-- Some names: Alois, Mánes.
-- Both hard and soft endings throughout. Most have -i and -ové in the nominative plural.
local dat_s = base.surname and "ovi" or {"u", "i", "ovi"}
local loc_s = dat_s
add_decl(base, stems, {"a", "e"}, dat_s, nil, {"e", "i"}, loc_s, "em",
{"i", "ové"}, "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
end
else
-- Given in IJP: burel, hnědel, chmel, krevel, kužel, námel, plevel, tmel, zádrhel, apríl, artikul, koukol, rubl,
-- úběl, plus reducible nouns cumel, chrchel, [[kotel]] "cauldron", sopel, uhel. Also [[městys]]. Many of them are listed in the
-- IJP tables with only hard or with fewer soft forms, so need to be investigated individually.
if rfind(base.lemma, "[ls]$") then
add_decl(base, stems, {"u", "e"}, {"u", "i"}, nil, {"e", "i"}, {"u", "e", "i"}, "em",
{"y", "e"}, "ů", "ům", {"y", "e"}, {"ech", "ích"}, {"y", "i"})
else
-- -n/-t; hard in the plural: hřeben, ječmen, [[kámen]] "stone", kmen, kořen, křemen, plamen,
-- [[pramen]] "source", [[řemen]] "strap", den, týden, [[loket]] "elbow".
-- There may be deviations (e.g. soft plural forms for [[den]]), so need to be investigated individually.
add_decl(base, stems, {"u", "e"}, {"u", "i"}, nil, "i", {"u", "i"}, "em",
"y", "ů", "ům", "y", "ech", "y")
end
end
end
declprops["mixed-m"] = {
cat = "mixed"
}
decls["a-m"] = function(base, stems)
-- husita → husité, izraelita → izraelité, jezuita → jezuité, kosmopolita → kosmopolité, táborita → táborité
-- fašista → fašisté, filatelista → filatelisté, fotbalista → fotbalisté, kapitalista → kapitalisté,
-- marxista → marxisté, šachista → šachisté, terorista → teroristé. NOTE: most these words actually appear in
-- the IJP tables with -é/-i, so we go accordingly.
--
-- gymnasta → gymnasté, fantasta → fantasté; also chiliasta, orgiasta, scholiasta, entuziasta, dynasta, ochlasta,
-- sarkasta, vymasta; NOTE: Only 'gymnasta' actually given with just -é; 'fantasta' with -ové/-é, 'dynasta' and
-- 'ochlasta' with just -ové, vymasta not in IJP (no plural given in SSJC), and the rest with -é/-i. So we go
-- accordingly.
local it_ist = rfind(stems.vowel_stem, "is?t$") or rfind(stems.vowel_stem, "ast$")
-- Velar nouns (e.g. [[sluha]] "servant") have -ích in the loc_p (which triggers the second palatalization)
-- instead of -ech. Nouns whose stem ends in a soft consonant ([[rikša]], [[paša]], [[bača]], [[mahárádža]],
-- [[paňáca]], etc.) behave likewise.
-- FIXME: [[pária]] "pariah", [[Maria]] etc.
local loc_p =
(base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") or rfind(stems.vowel_stem, com.inherently_soft_c .. "$")) and
"ích" or "ech"
add_decl(base, stems, "y", "ovi", "u", "o", "ovi", "ou",
it_ist and {"é", "i"} or "ové", "ů", "ům", "y", loc_p, "y")
end
declprops["a-m"] = {
cat = "GENPOS in -a"
}
decls["e-m"] = function(base, stems)
-- [[zachránce]] "savior"; [[soudce]] "judge"; etc.
-- At least two inanimates: [[průvodce]] "guide, guidebook; computing wizard"; [[správce]] "manager (software program), configuration program"
local dat_s = base.animacy == "inan" and "i" or base.surname and "ovi" or {"ovi", "i"}
local loc_s = dat_s
add_decl(base, stems, "e", dat_s, nil, "-", loc_s, "em",
-- nouns with -ové as well (e.g. [[soudce]] "judge") will need to specify that manually, e.g. <nompli:ové>
base.animacy == "inan" and "e" or "i", "ů", "ům", "e", "ích", "i")
end
declprops["e-m"] = {
cat = "GENPOS in -e"
}
decls["i-m"] = function(base, stems)
-- [[kivi]] "kiwi (bird)"; [[kuli]] "coolie"; [[lori]] "lory, lorikeet (bird)" (loc_pl 'loriech/loriích/lorich');
-- [[vini]] "parrot of the genus Vini"; [[yetti]]/[[yeti]] "yeti". other examples: [[aguti]], [[efendi]], [[hadži]],
-- [[pekari]], [[regenschori]], [[yetti]]/[[yeti]].
--
-- [[grizzly]]/[[grizly]] "grizzly bear"; [[pony]] "pony"; [[husky]] "husky"; [[dandy]] "dandy"; [[Billy]] "billy".
--
-- NOTE: Some nouns in -y are regular soft stems, e.g. [[gay]] "gay person"; [[gray]] "gray (unit of absorbed
-- radiation)"; [[Nagy]] (surname).
--
-- NOTE: The stem ends in -i/-y.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
-- ins_pl 'kivii/kivimi'
{"ové", ""}, {"ů", "ch"}, {"ům", "m"}, {"e", ""}, {"ích", "ch"}, {"i", "mi"})
end
declprops["i-m"] = {
cat = "GENPOS in -i/-y"
}
decls["í-m"] = function(base, stems)
-- [[kádí]] "qadi (Islamic judge)", [[mahdí]] "Mahdi (Islamic prophet)", [[muftí]] "mufti (Islamic scholar)",
-- [[sipáhí]] "sipahi (Algerian cavalryman in the French army)"
--
-- No obvious examples in -ý, but the support is there.
--
-- NOTE: The stem ends in -í/-ý.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
{"ové", ""}, {"ů", "ch"}, {"ům", "m"}, {"e", ""}, "ích", "mi")
end
declprops["í-m"] = {
cat = "GENPOS in -í/-ý"
}
decls["ie-m"] = function(base, stems)
-- [[zombie]] "zombie" (also fem/neut), [[hippie]] "hippie", [[yuppie]] "yuppie", [[rowdie]] "rowdy/hooligan"
--
-- NOTE: The stem ends in -i (not -ie, because of the plural).
add_decl(base, stems, "eho", "emu", nil, "-", "em", "em",
{"ové", "es"}, {"ů", "es"}, {"ům", "es"}, {"e", "es"}, {"ích", "es"}, {"i", "es"})
end
declprops["ie-m"] = {
cat = "GENPOS in -ie"
}
decls["ee-m"] = function(base, stems)
-- [[Yankee]] "Yankee"
--
-- NOTE: The stem ends in -ee.
add_decl(base, stems, "ho", "mu", nil, "-", "m", "m",
"ové", "ů", "ům", "e", "ích", "i")
end
declprops["ee-m"] = {
cat = "GENPOS in -ee"
}
decls["o-m"] = function(base, stems)
-- [[kápo]] "head, leader"; [[lamželezo]] "strongman"; [[torero]] "bullfighter"; [[žako]] "African gray parrot";
-- [[dingo]] "dingo"; [[kakapo]] "kakapo" (given in Wiktionary with dat_s/loc_s in -ovi only not -ovi/-u; probably
-- wrong but not in IJP); [[maestro]] "maestro"; [[Bruno]] "Bruno", [[Hugo]] "Hugo"; [[Ivo]] "Yves" (these names
-- are singular-only per IJP); [[Kvido]] "Guido, Guy" (per IJP has accusative in -a or -ona); [[Oto]] "Otto" (per
-- IJP also declinable like virile -a masculines; singular-only); [[Kuřátko]] (a surname; how declined?);
-- [[Picasso]] (surname; how declined?); [[Pluto]] "Pluto (God)", also "Pluto (planet)", which is inanimate;
-- [[Samo]]/[[Sámo]] "Samo (7th century Slavic ruler)" (dat_s/loc_s only in -ovi, needs override); [[Tomio]]
-- "Tomio (Japanese male given name)" (how declined?); [[nemakačenko]] "idler, loafer" (given in Wiktionary with
-- dat_s/loc_s in -ovi only, as for [[kakapo]]); [[nefachčenko]] "idler, loafer"; note also [[gadžo]] "gadjo",
-- which has a unique declension.
--
-- Velar nouns ([[žako]], [[dingo]], etc.) have -ích in the loc_p (which triggers the second palatalization)
-- instead of -ech.
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- inanimates e.g. [[Pluto]] (planet) have -u only, like for normal hard masculines.
local dat_s = base.animacy == "inan" and "u" or base.surname and "ovi"or {"ovi", "u"}
local loc_s = dat_s
local loc_p = velar and "ích" or "ech"
add_decl(base, stems, "a", dat_s, nil, "-", loc_s, "em",
"ové", "ů", "ům", "y", loc_p, "y")
end
declprops["o-m"] = {
cat = "GENPOS in -o"
}
decls["u-m"] = function(base, stems)
-- [[emu]] "emu", [[guru]] "guru", [[kakadu]] "cockatoo", [[marabu]] "marabou" (declined the same way)
-- [[Osamu]] "Osamu (Japanese male given name)" [how declined?]
-- [[Višnu]] "Vishnu" (declined like [[guru]] but singular-only)
-- [[budižkničemu]] "good-for-nothing, ne'er-do-well" (indeclinable in the singular, declinable as masculine hard stem
-- budižkničemové etc. in the plural, declinable as feminine hard stem budižkničemy etc. in the plural when feminine).
--
-- NOTE: The stem ends in -u.
add_decl(base, stems, "a", "ovi", nil, "-", "ovi", "em",
"ové", "ů", "ům", "y", "ech", "y")
end
declprops["u-m"] = {
cat = "GENPOS in -u"
}
decls["tstem-m"] = function(base, stems)
-- E.g. [[kníže]] "prince", [[hrabě]] "earl", [[markrabě]] "margrave".
add_decl(base, stems, "ete", "eti", "ete", "-", "eti", "etem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["tstem-m"] = {
cat = "t-stem"
}
decls["hard-f"] = function(base, stems)
base.no_palatalize_c = true
if base.c_as_k then
-- forms like 'ayahuascy' are allowed.
base.hard_c = true
end
-- [[skica]] "sketch", [[gejša]] "geisha", [[rikša]] "rickshaw (vehicle)"; [[arakača]], [[čača]], [[čiča]] (drink),
-- [[dača]] "dacha", [[gutaperča]] "guttapercha", [[viskača]]; [[babča]], [[číča]], [[káča]], [[mamča]], [[úča]].
-- Also appears to apply to ď (e.g. [[Naďa]]) and ť, as well as certain words with stems in -ň and -j (e.g. [[doňa]],
-- and personal names such as [[Táňa]] and [[Darja]]), which normally have a mixed declension.
local soft_cons = rfind(base.vowel_stem, "[cčšžďťjň]$") and not base.c_as_k
local dat_s = soft_cons and {"ě", "i"} or "ě"
local loc_s = dat_s
add_decl(base, stems, "y", dat_s, "u", "o", loc_s, "ou",
"y", "", "ám", "y", "ách", "ami")
end
declprops["hard-f"] = {
cat = "hard"
}
decls["soft-f"] = function(base, stems)
-- This also includes feminines in -ie, e.g. [[belarie]], [[signorie]], [[uncie]], and feminines in -oe, e.g.
-- [[kánoe]], [[aloe]] and medical terms like [[dyspnoe]], [[apnoe]], [[hemoptoe]], [[kalanchoe]].
-- Nouns in -ice like [[ulice]] "street" have null genitive plural e.g. 'ulic'; nouns in -yně e.g. [[přítelkyně]]
-- "girlfriend" have gen pl 'přítelkyň' or 'přítelkyní' with two possible endings; otherwise -í. (Alternation between
-- -ň and -n and between -e and -ě handled automatically by combine_stem_ending().)
local gen_p = rfind(base.lemma, "ice$") and "" or rfind(base.lemma, "yně$") and {"", "í"} or "í"
-- Vocative really ends in -e, not just a copy of the nominative; cf. [[sinfonia]], which is soft-f except for
-- the nominative and has -e in the vocative singular.
add_decl(base, stems, "e", "i", "i", "e", "i", "í",
"e", gen_p, "ím", "e", "ích", "emi")
end
declprops["soft-f"] = {
cat = "soft"
}
decls["mixed-f"] = function(base, stems)
-- Lowercase nouns in -ňa (e.g. bárišňa/báryšňa, doňa, dueňa, piraňa, vikuňa) and -ja (e.g. maracuja, papája, sója).
-- Also non-personal proper nouns in -ňa (e.g. [[Keňa]] "Kenya") and -ja (e.g. [[Troja]]/[[Trója]] "Troy",
-- [[Amudarja]] "Amu Darya"). Does not appear to apply to personal proper nouns (e.g. [[Táňa]] "Tanya", [[Darja]] "Daria"),
-- which usually decline like [[gejša]], [[dača]], [[skica]]).
add_decl(base, stems, {"i", "e"}, {"e", "i"}, "u", "o", {"e", "i"}, "ou",
{"i", "e"}, {"", "í"}, {"ám", "ím"}, {"i", "e"}, {"ách", "ích"}, {"ami", "emi"})
end
declprops["mixed-f"] = {
cat = "mixed"
}
decls["cons-f"] = function(base, stems)
-- e.g. [[dlaň]] "palm (of the hand)"
add_decl(base, stems, "e", "i", "-", "i", "i", "í",
"e", "í", "ím", "e", "ích", "emi")
end
declprops["cons-f"] = {
cat = "soft zero-ending"
}
decls["istem-f"] = function(base, stems)
add_decl(base, stems, "i", "i", "-", "i", "i", "í",
-- See above under apply_special_cases(); -E causes depalatalization of ť/ď/ň.
"i", "í", "Em", "i", "Ech", "mi")
end
declprops["istem-f"] = {
cat = "i-stem"
}
decls["mixed-istem-f"] = function(base, stems)
local gen_s, nom_p, dat_p, loc_p, ins_p
-- Use of ě vs E below is intentional. Contrast [[oběť]] dat pl 'obětem' (depalatalizing) with [[nit]] ins pl
-- 'nitěmi' (palatalizing). See comment above under apply_special_cases().
if base.mixedistem == "pěst" then
-- pěst, past, mast, lest [reducible; ins pl 'lstmi'], pelest, propust, plst, oběť, zeď [reducible; ins pl
-- 'zdmi'], paměť [ins pl 'pamětmi/paměťmi]
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", {"ím", "Em"}, {"ích", "Ech"}, "mi"
elseif base.mixedistem == "moc" then
-- moc, nemoc, pomoc, velmoc; NOTE: pravomoc has -i/-e alternation in gen_s, nom_p
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", {"Em", "ím"}, {"Ech", "ích"}, "ěmi"
elseif base.mixedistem == "myš" then
-- myš, veš [reducible, ins pl vešmi], hruď, měď, pleť, spleť, směs, smrt, step, odpověď [ins pl 'odpověď'mi/odpovědmi'], šeď,
-- závěť [ins pl 'závěťmi/závětmi'], plsť [ins pl 'plstmi']
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", "ím", "ích", "mi"
elseif base.mixedistem == "noc" then
-- lež [reducible], noc, mosaz, rez [reducible], ves [reducible], mysl, sůl, běl, žluť
gen_s, nom_p, dat_p, loc_p, ins_p = "i", "i", "ím", "ích", "ěmi"
elseif base.mixedistem == "žluč" then
-- žluč, moč, modř, čeleď, kapraď, záď, žerď, čtvrť/čtvrt, drť, huť, chuť, nit, pečeť, závrať, pouť, stať, ocel
gen_s, nom_p, dat_p, loc_p, ins_p = {"i", "ě"}, {"i", "ě"}, "ím", "ích", "ěmi"
elseif base.mixedistem == "loď" then
-- loď, suť
gen_s, nom_p, dat_p, loc_p, ins_p = {"i", "ě"}, {"i", "ě"}, "ím", "ích", {"ěmi", "mi"}
else
error(("Unrecognized value '%s' for 'mixedistem', should be one of 'pěst', 'moc', 'myš', 'noc', 'žluč' or 'loď'"):
format(base.mixedistem))
end
add_decl(base, stems, gen_s, "i", "-", "i", "i", "í",
nom_p, "í", dat_p, nom_p, loc_p, ins_p)
end
declprops["mixed-istem-f"] = {
-- Include subtype in the table description but not in the category to avoid too many categories.
desc = function(base, stems)
return ("mixed i-stem [type '%s'] GENDER"):format(base.mixedistem)
end,
cat = function(base, stems)
return {"mixed i-stem", ("mixed i-stem GENPOS (type '%s')"):format(base.mixedistem)}
end,
}
decls["i-f"] = function(base, stems)
-- [[máti]] "mother" (singular-only), [[pramáti]] "foremother"; very similar to the 'noc' mixed i-stem type
add_decl(base, stems, "i", "i", "-", "i", "i", "í",
"i", "í", "ím", "i", "ích", "ěmi")
end
declprops["i-f"] = {
cat = "GENPOS in -i"
}
decls["ea-f"] = function(base, stems)
-- Stem ends in -e.
if base.tech then
-- diarea, gonorea, chorea, nauzea, paleogea, seborea, trachea
add_decl(base, stems, "y", "i", "u", "o", "i", "ou",
"y", "í", {"ám", "ím"}, "y", {"ách", "ích"}, "ami")
elseif base.persname then
-- Medea, Andrea, etc.
add_decl(base, stems, {"y", "je", "ji"}, {"e", "je", "ji"}, "u", "o", {"e", "je", "ji"}, "ou",
-- this is a guess, based on the same as below; plural of personal names not attested in IJP
{"y", "je"}, "jí", {"ám", "jím"}, {"y", "je"}, {"ách", "jích"}, {"ami", "jemi"})
else
-- idea, odysea ("wandering pilgrimage"), orchidea, palea, spirea
-- proper names Galilea, Judea, Caesarea, Korea, Odyssea ("epic poem")
add_decl(base, stems, {"y", "je"}, "ji", "u", "o", "ji", {"ou", "jí"},
{"y", "je"}, "jí", {"ám", "jím"}, {"y", "je"}, {"ách", "jích"}, {"ami", "jemi"})
end
end
declprops["ea-f"] = {
cat = function(base, stems)
if base.tech then
return {"GENPOS in -ea", "technical GENPOS in -ea"}
else
return "GENPOS in -ea"
end
end
}
decls["oa-f"] = function(base, stems)
-- Stem ends in -o/-u.
-- stoa, kongrua; proper names Samoa, Managua, Nikaragua, Capua
add_decl(base, stems, "y", "i", "u", "o", "i", "ou",
"y", "í", "ám", "y", "ách", "ami")
end
declprops["oa-f"] = {
cat = "GENPOS in -oa/-ua"
}
decls["ia-f"] = function(base, stems)
-- Stem ends in -i.
-- belaria, signoria, uncia; paranoia, sinfonia;
-- proper names Alexandria, Alexia, Livia, Monrovia, Olympia, Sofia
-- Identical to soft declension except for nom sg.
decls["soft-f"](base, stems)
end
declprops["ia-f"] = {
cat = "GENPOS in -ia"
}
decls["hard-n"] = function(base, stems)
local velar = base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$")
-- NOTE: Per IJP it appears the meaning of the preceding preposition makes a difference: 'o' = "about" takes
-- '-u' or '-ě', while 'na/v' = "in, on" normally takes '-ě'.
local loc_s =
-- Exceptions: [[mléko]] "milk" ('mléku' or 'mléce'), [[břicho]] "belly" ('břiše' or (less often) 'břichu'),
-- [[roucho]] ('na rouchu' or 'v rouše'; why the difference in preposition?).
velar and "u" or
-- IJP says nouns in -dlo take only -e but the declension tables show otherwise. It appears -u is possible
-- but significantly less common. Other nouns in -lo usually take just -e ([[čelo]] "forehead",
-- [[kolo]] "wheel", [[křeslo]] "armchair", [[máslo]] "butter", [[peklo]] "hell", [[sklo]] "glass",
-- [[světlo]] "light", [[tělo]] "body"; but [[číslo]] "number' with -e/-u; [[zlo]] "evil" and [[kouzlo]] "spell"
-- with -u/-e).
rfind(base.lemma, "dlo$") and {"ě", "u"} or
rfind(base.lemma, "lo$") and "ě" or
(rfind(base.lemma, "[sc]tvo$") or rfind(base.lemma, "ivo$")) and "u" or
-- Per IJP: Borrowed words and abstracts take -u (e.g. [[banjo]]/[[bendžo]]/[[benžo]] "banjo", [[depo]] "depot",
-- [[chladno]] "cold", [[mokro]] "damp, dampness", [[právo]] "law, right", [[šeru]] "twilight?",
-- [[temno]] "dark, darkness", [[tempo]] "rate, tempo", [[ticho]] "quiet, silence", [[vedro]] "heat") and others
-- often take -ě/-u. Formerly we defaulted to -ě/-u but it seems better to default to just -u, similarly to hard
-- masculines.
-- {"ě", "u"}
"u"
local loc_p =
-- Note, lemmas in -isko also have mixed-reducible as default, handled in determine_default_reducible().
-- Note also, ending -ích triggers the second palatalization.
rfind(base.lemma, "isko$") and {"ích", "ách"} or
-- Diminutives in -ko, -čko, -tko; also [[lýtko]], [[děcko]], [[vrátka]], [[dvířka]], [[jho]], [[roucho]],
-- [[tango]], [[mango]], [[sucho]], [[blaho]], [[víko]], [[echo]], [[embargo]], [[largo]], [[jericho]] (from
-- IJP). Also foreign nouns in -kum: [[antibiotikum]], [[narkotikum]], [[afrodiziakum]], [[analgetikum]], etc.
-- [[jablko]] "apple" has '-ách' or '-ích' and needs an override; likewise for [[vojsko]] "troop"; [[riziko]]
-- "risk" normally has '-ích' and needs and override.
velar and "ách" or
"ech"
add_decl(base, stems, "a", "u", "-", "-", loc_s, "em",
"a", "", "ům", "a", loc_p, "y")
-- FIXME: paired body parts e.g. [[rameno]] "shoulder" (gen_p/loc_p 'ramenou/ramen'), [[koleno]] "knee"
-- (gen_p/loc_p 'kolenou/kolen'), [[prsa]] "chest, breasts" (plurale tantum; gen_p/loc_p 'prsou').
-- FIXME: Nouns with both neuter and feminine forms in the plural, e.g. [[lýtko]] "calf (of the leg)",
-- [[bedro]] "hip", [[vrátka]] "gate".
end
declprops["hard-n"] = {
desc = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar GENDER"
else
return "hard GENDER"
end
end,
cat = function(base, stems)
if base.velar or not base["-velar"] and rfind(stems.vowel_stem, com.velar_c .. "$") then
return "velar-stem"
else
return "hard"
end
end
}
decls["semisoft-n"] = function(base, stems)
-- Examples:
-- * In -ao: [[kakao]] "cacao", [[makao]] "Macao (gambling card game, see Wikipedia)", [[curaçao]] "curaçao (liqueur)"
-- (IJP gives gen pl 'curaç' but ASSC [https://slovnikcestiny.cz/heslo/cura%C3%A7ao/0/9967] says 'curaçí' as expected),
-- [[farao]] "faro (card game)"; also [[Makao]], [[Pathet Lao]], but these are sg-only
-- * In -eo: [[stereo]], [[rodeo]], [[video]], [[solideo]]; also [[Borneo]], [[Montevideo]], but these are sg-only
-- * In -io: [[rádio]] "radio", [[gramorádio]], [[studio]], [[scenário]], [[trio]], [[ážio]] (also spelled [[agio]]),
-- [[disážio]], [[folio]], [[vibrio]]; also [[arpeggio]], [[adagio]], [[capriccio]], [[solfeggio]] although
-- pronounced the Italian way without /i/; also [[Ohio]], [[Ontario]], [[Tokio]], but these are sg-only
-- * In -uo: only [[duo]]
-- * In -yo: only [[embryo]]
-- * In -eum: [[muzeum]], [[lyceum]], [[linoleum]], [[ileum]], etc.
-- * In -ium: [[atrium]] "atrium", most chemical elements, etc.
-- * In -uum: [[individuum]], [[kontinuum]], [[premenstruum]], [[residuum]], [[vakuum]]/[[vacuum]]
-- * In -yum: only [[baryum]] "barium" (none others in SSJC)
-- * In -ion: [[enkómion]] "encomium", [[eufonion]] (variant of [[eufonium]]), [[amnion]], [[ganglion]], [[gymnasion]],
-- [[scholion]], [[kritérion]] (rare for [[kritérium]]), [[onomatopoion]] (variant of [[onomatopoie]]),
-- [[symposion]], [[synedrion]]; also [[Byzantion]], but this is sg-only; most words in -ion are masculine
-- Hard in the singular, mostly soft in the plural. Those in -eo and -uo have alternative hard endings in the
-- dat/loc/ins pl, but not those in -eum or -uum. Those in -ao have only hard endings except in the gen pl. (There are
-- apparently no neuters in -eon; those in -eon or -yon e.g. [[akordeon]], [[neon]], [[nukleon]], [[karyon]], [[Lyon]]
-- are masculine.)
local dat_p, loc_p, ins_p
if rfind(base.actual_lemma, "ao$") then
dat_p, loc_p, ins_p = "ům", "ech", "y"
elseif rfind(base.actual_lemma, "[eu]o$") then
dat_p, loc_p, ins_p = {"ím", "ům"}, {"ích", "ech"}, {"i", "y"}
else
dat_p, loc_p, ins_p = "ím", "ích", "i"
end
add_decl(base, stems, "a", "u", "-", "-", "u", "em",
"a", "í", dat_p, "a", loc_p, ins_p)
end
declprops["semisoft-n"] = {
cat = "semisoft"
}
decls["soft-n"] = function(base, stems)
-- Examples: [[moře]] "sea", [[slunce]] "sun", [[srdce]] "heart", [[citoslovce]] "interjection",
-- [[dopoledne]] "late morning", [[odpoledne]] "afternoon", [[hoře]] "sorrow, grief" (archaic or literary),
-- [[inhalace]] "inhalation", [[kafe]] "coffee", [[kanape]] "sofa", [[kutě]] "bed", [[Labe]] "Elbe (singular only)",
-- [[líce]] "cheek", [[lože]] "bed", [[nebe]] "sky; heaven", [[ovoce]] "fruit", [[pole]] "field", [[poledne]]
-- "noon", [[příslovce]] "adverb", [[pukrle]] "curtsey" (also t-n), [[vejce]] "egg" (NOTE: gen pl 'vajec').
--
-- Many nouns in -iště, with null genitive plural.
local gen_p = rfind(base.vowel_stem, "išť$") and "" or "í"
add_decl(base, stems, "e", "i", "-", "-", "i", "em",
"e", gen_p, "ím", "e", "ích", "i")
-- NOTE: Some neuter words in -e indeclinable, e.g. [[Belize]], [[Chile]], [[garde]] "chaperone", [[karaoke]],
-- [[karate]], [[re]] "double raise (card games)", [[ukulele]], [[Zimbabwe]], [[zombie]] (pl. 'zombie' or
-- 'zombies')
-- some nearly indeclinable, e.g. [[finále]], [[chucpe]]; see mostly-indecl below
end
declprops["soft-n"] = {
cat = "soft"
}
decls["í-n"] = function(base, stems)
-- [[nábřeží]] "waterfront" and a zillion others; also [[úterý]] "Tuesday".
-- NOTE: The stem ends in -í/-ý.
add_decl(base, stems, "", "", "-", "-", "", "m",
"", "", "m", "", "ch", "mi")
end
declprops["í-n"] = {
cat = "GENPOS in -í/-ý"
}
decls["n-n"] = function(base, stems)
-- E.g. [[břemeno]] "burden" (also [[břímě]], use 'decllemma:'); [[písmeno]] "letter"; [[plemeno]] "breed";
-- [[rameno]] "shoulder" (also [[rámě]], use 'decllemma:'); [[semeno]] "seed" (also [[sémě]], [[símě]], use
-- 'decllemma:'); [[temeno]] "crown (of the head)"; [[vemeno]] "udder"
add_decl(base, stems, {"a", "e"}, {"i", "u"}, "-", "-", {"ě", "i", "u"}, "em",
"a", "", "ům", "a", "ech", "y")
end
declprops["n-n"] = {
cat = "n-stem"
}
decls["tstem-n"] = function(base, stems)
-- E.g. [[batole]] "toddler", [[čuně]] "pig", [[daňče]] "fallow deer fawn", [[děvče]] "girl", [[ďouče]] "girl"
-- (dialectal), [[dítě]] "child" (NOTE: feminine in the plural [[děti]], declined as a feminine i-stem), [[dvojče]]
-- "twin", [[hádě]] "young snake", [[house]] "gosling", [[hříbě]] "foal" (pl. hříbata), [[jehně]] "lamb", [[kavče]]
-- "young jackdaw; chough", [[káče]] "duckling", [[káně]] "buzzard chick" (NOTE: also feminine meaning "buzzard"),
-- [[klíště]] "tick", [[kose]] "blackbird chick" (rare), [[kuře]] "chick (young chicken)", [[kůzle]]
-- "kid (young goat)", [[lišče]] "fox cub", [[lvíče]] "lion cub", [[medvídě]] "bear cub", [[mládě]] "baby animal",
-- [[morče]] "guinea pig", [[mrně]] "toddler", [[nemluvně]] "infant", [[novorozeně]] "newborn", [[orle]] "eaglet",
-- [[osle]] "donkey foal", [[pachole]] "boy (obsolete); page, squire", [[páže]] "page, squire", [[podsvinče]]
-- "suckling pig", [[prase]] "pig", [[prtě]] "toddler", [[ptáče]] "chick (young bird)",
-- [[robě]] "baby, small child", [[saranče]] "locust" (NOTE: also feminine), [[sele]] "piglet",
-- [[slůně]] "baby elephant", [[škvrně]] "toddler", [[štěně]] "puppy", [[tele]] "calf", [[velbloudě]] "camel colt",
-- [[vlče]] "wolf cub", [[vnouče]] "grandchild", [[vyžle]] "small hunting dog; slender person",
-- [[zvíře]] "animal, beast".
--
-- Some referring to inanimates, e.g. [[doupě]] "lair" (pl. doupata), [[koště]]/[[chvoště]] "broom", [[paraple]]
-- "umbrella", [[poupě]] "bud", [[pukrle]] "curtsey" (also soft-n), [[rajče]] "tomato", [[šuple]] "drawer",
-- [[varle]] "testicle", [[vole]] "craw (of a bird); goiter".
add_decl(base, stems, "ete", "eti", "-", "-", "eti", "etem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["tstem-n"] = {
cat = "t-stem"
}
decls["ma-n"] = function(base, stems)
-- E.g. [[drama]] "drama", [[dogma]] "dogma", [[aneurysma]]/[[aneuryzma]] "aneurysm", [[dilema]] "dilemma",
-- [[gumma]] "gumma" (non-cancerous syphilitic growth), [[klima]] "climate", [[kóma]] "coma", [[lemma]] "lemma",
-- [[melisma]] "melisma", [[paradigma]] "paradigm", [[plasma]]/[[plazma]] "plasma [partly ionized gas]"
-- (note [[plasma]]/[[plazma]] "blood plasma" is feminine), [[revma]] "rheumatism", [[schéma]] "schema, diagram",
-- [[schisma]]/[[schizma]] "schism", [[smegma]] "smegma", [[sofisma]]/[[sofizma]] "sophism", [[sperma]] "sperm",
-- [[stigma]] "stigma", [[téma]] "theme", [[trauma]] "trauma", [[trilema]] "trilemma", [[zeugma]] "zeugma".
add_decl(base, stems, "atu", "atu", "-", "-", "atu", "atem",
"ata", "at", "atům", "ata", "atech", "aty")
end
declprops["ma-n"] = {
cat = "ma-stem"
}
decls["adj"] = function(base, stems)
local props = {}
local propspec = table.concat(props, ".")
if propspec ~= "" then
propspec = "<" .. propspec .. ">"
end
local adj_alternant_multiword_spec = require("Module:cs-adjective").do_generate_forms({base.lemma .. propspec})
local function copy(from_slot, to_slot)
base.forms[to_slot] = adj_alternant_multiword_spec.forms[from_slot]
end
if base.number ~= "pl" then
if base.gender == "m" then
copy("nom_m", "nom_s")
copy("gen_mn", "gen_s")
copy("dat_mn", "dat_s")
copy("loc_mn", "loc_s")
copy("ins_mn", "ins_s")
elseif base.gender == "f" then
copy("nom_f", "nom_s")
copy("gen_f", "gen_s")
copy("dat_f", "dat_s")
copy("acc_f", "acc_s")
copy("loc_f", "loc_s")
copy("ins_f", "ins_s")
else
copy("nom_n", "nom_s")
copy("gen_mn", "gen_s")
copy("dat_mn", "dat_s")
copy("acc_n", "acc_s")
copy("loc_mn", "loc_s")
copy("ins_mn", "ins_s")
end
if not base.forms.voc_s then
iut.insert_forms(base.forms, "voc_s", base.forms.nom_s)
end
end
if base.number ~= "sg" then
if base.gender == "m" then
if base.animacy == "an" then
copy("nom_mp_an", "nom_p")
else
copy("nom_fp", "nom_p")
end
copy("acc_mfp", "acc_p")
elseif base.gender == "f" then
copy("nom_fp", "nom_p")
copy("acc_mfp", "acc_p")
else
copy("nom_np", "nom_p")
copy("acc_np", "acc_p")
end
copy("gen_p", "gen_p")
copy("dat_p", "dat_p")
copy("ins_p", "ins_p")
copy("loc_p", "loc_p")
end
end
local function get_stemtype(base)
if rfind(base.lemma, "ý$") then
return "hard"
elseif rfind(base.lemma, "í$") then
return "soft"
else
return "possessive"
end
end
declprops["adj"] = {
cat = function(base, stems)
return {"adjectival POS", get_stemtype(base) .. " GENDER adjectival POS"}
end,
}
decls["mostly-indecl"] = function(base, stems)
-- Several neuters: E.g. [[finále]] "final (sports)", [[čtvrtfinále]] "quarterfinal", [[chucpe]] "chutzpah",
-- [[penále]] "fine, penalty", [[promile]] "" (NOTE: loc pl also promilech), [[rande]] "rendezvous", [[semifinále]]
-- "semifinal", [[skóre]] "score".
-- At least one masculine animate: [[kamikaze]]/[[kamikadze]], where IJP says only -m in the ins sg.
local ins_s = base.gender == "m" and "m" or {"-", "m"}
add_decl(base, stems, "-", "-", "-", "-", "-", ins_s,
"-", "-", "-", "-", "-", "-")
end
declprops["mostly-indecl"] = {
cat = "mostly indeclinable"
}
decls["indecl"] = function(base, stems)
-- Indeclinable. Note that fully indeclinable nouns should not have a table at all rather than one all of whose forms
-- are the same; but having an indeclinable declension is useful for nouns that may or may not be indeclinable, e.g.
-- [[desatero]] "group of ten" or the plural of [[peso]], which may be indeclinable 'pesos'.
add_decl(base, stems, "-", "-", "-", "-", "-", "-",
"-", "-", "-", "-", "-", "-")
end
declprops["indecl"] = {
cat = function(base, stems)
if base.adj then
return {"adjectival POS", "indeclinable adjectival POS", "indeclinable GENDER adjectival POS"}
else
return {"indeclinable POS", "indeclinable GENPOS"}
end
end
}
decls["manual"] = function(base, stems)
-- Anything declined manually using overrides. We don't set any declensions except the nom_s (or nom_p if plurale
-- tantum).
add(base, base.number == "pl" and "nom_p" or "nom_s", stems, "-")
end
declprops["manual"] = {
desc = "GENDER",
cat = {},
}
local function set_pron_defaults(base)
if base.gender or base.lemma ~= "ona" and base.number or base.animacy then
error("Can't specify gender, number or animacy for pronouns")
end
local function pron_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
if base.lemma == "kdo" then
return "none", "sg", "an", false
elseif base.lemma == "co" then
return "none", "sg", "inan", false
elseif base.lemma == "já" or base.lemma == "ty" then
return "none", "sg", "an", true
elseif base.lemma == "my" or base.lemma == "vy" then
return "none", "pl", "an", false
elseif base.lemma == "on" then
return "m", "sg", "none", true
elseif base.lemma == "ono" then
return "n", "sg", "inan", true
elseif base.lemma == "oni" then
return "m", "pl", "an", false
elseif base.lemma == "ony" then
return "none", "pl", "none", false
elseif base.lemma == "ona" then
if base.number ~= "sg" and base.number ~= "pl" then
error("Must specify '.sg' or '.pl' with lemma 'ona'")
end
if base.number == "sg" then
return "f", "sg", "none", false
else
return "n", "pl", "inan", false
end
elseif base.lemma == "sebe" then
return "none", "none", "none", true
else
error(("Unrecognized pronoun '%s'"):format(base.lemma))
end
end
local gender, number, animacy, has_clitic = pron_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_pronoun_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with pronouns")
end
base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}}
base.decl = "pron"
end
decls["pron"] = function(base, stems)
local after_prep_footnote = "[after a preposition]"
local dual_footnote = "[when referring to dual nouns, e.g. [[oči]], [[ruce]]]"
local animate_footnote = "[animate]"
if base.lemma == "kdo" then
add_decl(base, stems, "koho", "komu", nil, nil, "kom", "kým")
elseif base.lemma == "co" then
add_decl(base, stems, "čeho", "čemu", nil, nil, "čem", "čím")
elseif base.lemma == "já" then
add_sg_decl_with_clitic(base, stems, "mne", "mě", "mně", "mi", nil, nil, nil, "mně", "mnou")
elseif base.lemma == "ty" then
add_sg_decl_with_clitic(base, stems, "tebe", "tě", "tobě", "ti", nil, nil, nil, "tobě", "tebou")
elseif base.lemma == "my" then
add_pl_only_decl(base, stems, "nás", "nám", "nás", "nás", "námi")
add_pl_only_decl(base, stems, nil, nil, nil, nil, "náma", dual_footnote)
elseif base.lemma == "vy" then
add_pl_only_decl(base, stems, "vás", "vám", "vás", "vás", "vámi")
add_pl_only_decl(base, stems, nil, nil, nil, nil, "váma", dual_footnote)
elseif base.lemma == "on" or base.lemma == "ono" then
local acc_s = base.lemma == "on" and "jej" or {"jej", "je"}
local clitic_acc_s = base.lemma == "on" and {"jej", "ho"} or {"jej", "ho", "je"}
local prep_acc_s = base.lemma == "on" and "něj" or {"něj", "ně"}
local prep_clitic_acc_s = base.lemma == "on" and "-ň" or nil
add_sg_decl_with_clitic(base, stems, {"jeho", "jej"}, {"ho", "jej"}, "jemu", "mu", acc_s, clitic_acc_s, nil, nil, "jím")
add_sg_decl_with_clitic(base, stems, {"něho", "něj"}, nil, "němu", nil, prep_acc_s, prep_clitic_acc_s, nil, "něm", "ním",
after_prep_footnote)
if base.lemma == "on" then
add_sg_decl_with_clitic(base, stems, nil, nil, nil, nil, "jeho", nil, nil, nil, nil,
animate_footnote)
add_sg_decl_with_clitic(base, stems, nil, nil, nil, nil, "něho", nil, nil, nil, nil,
after_prep_footnote and animate_footnote)
end
elseif base.lemma == "ona" and base.number == "sg" then
add_sg_decl(base, stems, "jí", "jí", "ji", nil, nil, "jí")
add_sg_decl(base, stems, "ní", "ní", "ni", nil, "ní", "ní", after_prep_footnote)
elseif base.lemma == "oni" or base.lemma == "ony" or base.lemma == "ona" then
add_pl_only_decl(base, stems, "jich", "jim", "je", nil, "jimi")
add_pl_only_decl(base, stems, "nich", "nim", "ně", "nich", "nimi", after_prep_footnote)
add_pl_only_decl(base, stems, nil, nil, nil, nil, "jima", dual_footnote)
add_pl_only_decl(base, stems, nil, nil, nil, nil, "nima", dual_footnote)
elseif base.lemma == "sebe" then
-- Underlyingly we handle [[sebe]]'s slots as singular.
add_sg_decl_with_clitic(base, stems, "sebe", "sebe", "sobě", "si", "sebe", "se", nil, "sobě", "sebou",
nil, "no nom_s")
else
error(("Internal error: Unrecognized pronoun lemma '%s'"):format(base.lemma))
end
end
declprops["pron"] = {
desc = "GENDER pronoun",
cat = {},
}
local function set_num_defaults(base)
if base.gender or base.number or base.animacy then
error("Can't specify gender, number or animacy for numeral")
end
local function num_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
return "none", "pl", "none", false
end
local gender, number, animacy, has_clitic = num_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_numeral_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with numerals")
end
local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma
base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}}
base.decl = "num"
end
decls["num"] = function(base, stems)
local after_prep_footnote = "[after a preposition]"
if base.lemma == "devět" then
add_pl_only_decl(base, "", "devíti", "devíti", "-", "devíti", "devíti", stems.footnotes)
elseif base.lemma == "sta" or base.lemma == "stě" or base.lemma == "set" then
add_pl_only_decl(base, "", "set", "stům", "-", "stech", "sty", stems.footnotes)
elseif rfind(base.lemma, "[cs]et$") then
-- [[deset]] and all numbers ending in -cet ([[dvacet]], [[třicet]], [[čtyřicet]] and inverted compound
-- numerals such as [[pětadvacet]] "25" and [[dvaatřicet]] "32")
local begin = rmatch(base.lemma, "^(.*)et$")
add_pl_only_decl(base, stems, "i", "i", "-", "i", "i")
add_pl_only_decl(base, begin, "íti", "íti", "-", "íti", "íti", stems.footnotes)
else
add_pl_only_decl(base, stems, "i", "i", "-", "i", "i")
end
end
declprops["num"] = {
desc = "GENDER numeral",
cat = {},
}
local function set_det_defaults(base)
if base.gender or base.number or base.animacy then
error("Can't specify gender, number or animacy for determiner")
end
local function det_props()
-- Return values are GENDER, NUMBER, ANIMACY, HAS_CLITIC.
return "none", "none", "none", false
end
local gender, number, animacy, has_clitic = det_props()
base.gender = gender
base.actual_gender = gender
base.number = number
base.actual_number = number
base.animacy = animacy
base.actual_animacy = animacy
base.has_clitic = has_clitic
end
local function determine_determiner_stems(base)
if base.stem_sets then
error("Reducible and vowel alternation specs cannot be given with determiners")
end
local stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$") or base.lemma
base.stem_sets = {{reducible = false, vowel_stem = stem, nonvowel_stem = stem}}
base.decl = "det"
end
decls["det"] = function(base, stems)
add_sg_decl(base, stems, "a", "a", "-", nil, "a", "a")
end
declprops["det"] = {
desc = "GENDER determiner",
cat = {},
}
local function fetch_footnotes(separated_group)
local footnotes
for j = 2, #separated_group - 1, 2 do
if separated_group[j + 1] ~= "" then
error("Extraneous text after bracketed footnotes: '" .. table.concat(separated_group) .. "'")
end
if not footnotes then
footnotes = {}
end
table.insert(footnotes, separated_group[j])
end
return footnotes
end
--[=[
Parse a single override spec (e.g. 'nomplé:ové' or 'ins:autodráhou:autodrahou[rare]') and return
two values: the slot(s) the override applies to, and an object describing the override spec.
The input is actually a list where the footnotes have been separated out; for example,
given the spec 'inspl:čobotami:čobotámi[rare]:čobitmi[archaic]', the input will be a list
{"inspl:čobotami:čobotámi", "[rare]", ":čobitmi", "[archaic]", ""}. The object returned
for 'ins:autodráhou:autodrahou[rare]' looks like this:
{
full = true,
values = {
{
form = "autodráhou"
},
{
form = "autodrahou",
footnotes = {"[rare]"}
}
}
}
The object returned for 'nomplé:ové' looks like this:
{
values = {
{
form = "é",
},
{
form = "ové",
}
}
}
]=]
local function parse_override(segments)
local retval = {values = {}}
local part = segments[1]
local slots = {}
while true do
local case = usub(part, 1, 3)
if cases[case] then
-- ok
else
error(("Unrecognized case '%s' in override: '%s'"):format(case, table.concat(segments)))
end
part = usub(part, 4)
local slot
if rfind(part, "^pl") then
part = usub(part, 3)
slot = case .. "_p"
elseif rfind(part, "^cl") then
-- No plural clitic cases at this point.
part = usub(part, 3)
if clitic_cases[case] then
slot = "clitic_" .. case .. "_s"
else
error(("Unrecognized clitic case '%s' in override: '%s'"):format(case, table.concat(segments)))
end
else
slot = case .. "_s"
end
table.insert(slots, slot)
if rfind(part, "^%+") then
part = usub(part, 2)
else
break
end
end
if rfind(part, "^:") then
retval.full = true
part = usub(part, 2)
end
segments[1] = part
local colon_separated_groups = put.split_alternating_runs_and_strip_spaces(segments, ":")
for i, colon_separated_group in ipairs(colon_separated_groups) do
local value = {}
local form = colon_separated_group[1]
if form == "" then
error(("Use - to indicate an empty ending for slot%s '%s': '%s'"):format(#slots > 1 and "s" or "", table.concat(slots), table.concat(segments)))
elseif form == "-" then
value.form = ""
else
value.form = form
end
value.footnotes = fetch_footnotes(colon_separated_group)
table.insert(retval.values, value)
end
return slots, retval
end
--[=[
Parse an indicator spec (text consisting of angle brackets and zero or more
dot-separated indicators within them). Return value is an object of the form
{
overrides = {
SLOT = {OVERRIDE, OVERRIDE, ...}, -- as returned by parse_override()
...
},
forms = {}, -- forms for a single spec alternant; see `forms` below
footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing
stems = { -- may be missing
{
reducible = TRUE_OR_FALSE,
footnotes = {"FOOTNOTE", "FOOTNOTE", ...}, -- may be missing
-- The following fields are filled in by determine_stems()
vowel_stem = "STEM",
nonvowel_stem = "STEM",
oblique_slots = one of {nil, "gen_p", "all", "all-oblique"},
oblique_vowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil),
oblique_nonvowel_stem = "STEM" or nil (only needs to be set if oblique_slots is non-nil),
},
...
},
gender = "GENDER", -- "m", "f", "n"
number = "NUMBER", -- "sg", "pl"; may be missing
animacy = "ANIMACY", -- "inan", "an"; may be missing
hard = true, -- may be missing
soft = true, -- may be missing
mixed = true, -- may be missing
surname = true, -- may be missing
istem = true, -- may be missing
["-istem"] = true, -- may be missing
tstem = true, -- may be missing
nstem = true, -- may be missing
tech = true, -- may be missing
foreign = true, -- may be missing
mostlyindecl = true, -- may be missing
indecl = true, -- may be missing
manual = true, -- may be missing
adj = true, -- may be missing
decllemma = "DECLENSION-LEMMA", -- may be missing
declgender = "DECLENSION-GENDER", -- may be missing
declnumber = "DECLENSION-NUMBER", -- may be missing
-- The following additional fields are added by other functions:
orig_lemma = "ORIGINAL-LEMMA", -- as given by the user
orig_lemma_no_links = "ORIGINAL-LEMMA-NO-LINKS", -- links removed
lemma = "LEMMA", -- `orig_lemma_no_links`, converted to singular form if plural and lowercase if all-uppercase
forms = {
SLOT = {
{
form = "FORM",
footnotes = {"FOOTNOTE", "FOOTNOTE", ...} -- may be missing
},
...
},
...
},
decl = "DECL", -- declension, e.g. "hard-m"
vowel_stem = "VOWEL-STEM", -- derived from vowel-ending lemmas
nonvowel_stem = "NONVOWEL-STEM", -- derived from non-vowel-ending lemmas
}
]=]
local function parse_indicator_spec(angle_bracket_spec)
local inside = rmatch(angle_bracket_spec, "^<(.*)>$")
assert(inside)
local base = {overrides = {}, forms = {}}
if inside ~= "" then
local segments = put.parse_balanced_segment_run(inside, "[", "]")
local dot_separated_groups = put.split_alternating_runs_and_strip_spaces(segments, "%.")
for i, dot_separated_group in ipairs(dot_separated_groups) do
local part = dot_separated_group[1]
local case_prefix = usub(part, 1, 3)
if cases[case_prefix] then
local slots, override = parse_override(dot_separated_group)
for _, slot in ipairs(slots) do
if base.overrides[slot] then
error(("Two overrides specified for slot '%s'"):format(slot))
else
base.overrides[slot] = {override}
end
end
elseif part == "" then
if #dot_separated_group == 1 then
error("Blank indicator: '" .. inside .. "'")
end
base.footnotes = fetch_footnotes(dot_separated_group)
elseif rfind(part, "^[-*#ě]*$") or rfind(part, "^[-*#ě]*,") then
if base.stem_sets then
error("Can't specify reducible/vowel-alternant indicator twice: '" .. inside .. "'")
end
local comma_separated_groups = put.split_alternating_runs_and_strip_spaces(dot_separated_group, ",")
local stem_sets = {}
for i, comma_separated_group in ipairs(comma_separated_groups) do
local pattern = comma_separated_group[1]
local orig_pattern = pattern
local reducible, vowelalt, oblique_slots
if pattern == "-" then
-- default reducible, no vowel alt
else
local before, after
before, reducible, after = rmatch(pattern, "^(.-)(%-?%*)(.-)$")
if before then
pattern = before .. after
reducible = reducible == "*"
end
if pattern ~= "" then
if not rfind(pattern, "^##?ě?$") then
error("Unrecognized vowel-alternation pattern '" .. pattern .. "', should be one of #, ##, #ě or ##ě: '" .. inside .. "'")
end
if pattern == "#ě" or pattern == "##ě" then
vowelalt = "quant-ě"
else
vowelalt = "quant"
end
-- `oblique_slots` will be later changed to "all" if the lemma ends in a consonant.
if pattern == "##" or pattern == "##ě" then
oblique_slots = "all-oblique"
else
oblique_slots = "gen_p"
end
end
end
table.insert(stem_sets, {
reducible = reducible,
vowelalt = vowelalt,
oblique_slots = oblique_slots,
footnotes = fetch_footnotes(comma_separated_group)
})
end
base.stem_sets = stem_sets
elseif #dot_separated_group > 1 then
error("Footnotes only allowed with slot overrides, reducible or vowel alternation specs or by themselves: '" .. table.concat(dot_separated_group) .. "'")
elseif part == "m" or part == "f" or part == "n" then
if base.gender then
error("Can't specify gender twice: '" .. inside .. "'")
end
base.gender = part
elseif part == "sg" or part == "pl" then
if base.number then
error("Can't specify number twice: '" .. inside .. "'")
end
base.number = part
elseif part == "an" or part == "inan" then
if base.animacy then
error("Can't specify animacy twice: '" .. inside .. "'")
end
base.animacy = part
elseif part == "hard" or part == "soft" or part == "mixed" or part == "surname" or part == "istem" or
part == "-istem" or part == "tstem" or part == "nstem" or part == "tech" or part == "foreign" or
part == "mostlyindecl" or part == "indecl" or part == "pron" or part == "det" or part == "num" or
-- Use 'velar' with words like [[petanque]] and [[Braque]] that end with a pronounced velar (and hence are declined
-- like velars) but not with a spelled velar; use '-velar' with words like [[hadíth]] that end with a spelled but
-- silent velar.
part == "collapse_ee" or part == "persname" or part == "c_as_k" or part == "velar" or part == "-velar" then
if base[part] then
error("Can't specify '" .. part .. "' twice: '" .. inside .. "'")
end
base[part] = true
-- Allow 'hard' to signal that -y is allowed after -c, as in hard masculine nouns such as [[hec]]
-- "joke", and also feminines in -ca where the c is pronounced as /k/, e.g. [[ayahuasca]], [[pororoca]],
-- [[Petrarca]], [[Mallorca]], [[Casablanca]]. (Contrast [[mangalica]], [[Kusturica]], [[Bjelica]],
-- where the c is pronounced as /ts/ and -y is disallowed.)
if part == "hard" then
base.hard_c = true
end
elseif part == "+" then
if base.adj then
error("Can't specify '+' twice: '" .. inside .. "'")
end
base.adj = true
elseif part == "!" then
if base.manual then
error("Can't specify '!' twice: '" .. inside .. "'")
end
base.manual = true
elseif rfind(part, "^mixedistem:") then
if base.mixedistem then
error("Can't specify 'mixedistem:' twice: '" .. inside .. "'")
end
base.mixedistem = rsub(part, "^mixedistem:", "")
elseif rfind(part, "^decllemma:") then
if base.decllemma then
error("Can't specify 'decllemma:' twice: '" .. inside .. "'")
end
base.decllemma = rsub(part, "^decllemma:", "")
elseif rfind(part, "^declgender:") then
if base.declgender then
error("Can't specify 'declgender:' twice: '" .. inside .. "'")
end
base.declgender = rsub(part, "^declgender:", "")
elseif rfind(part, "^declnumber:") then
if base.declnumber then
error("Can't specify 'declnumber:' twice: '" .. inside .. "'")
end
base.declnumber = rsub(part, "^declnumber:", "")
else
error("Unrecognized indicator '" .. part .. "': '" .. inside .. "'")
end
end
end
return base
end
local function is_regular_noun(base)
return not base.adj and not base.pron and not base.det and not base.num
end
local function process_declnumber(base)
base.actual_number = base.number
if base.declnumber then
if base.declnumber == "sg" or base.declnumber == "pl" then
base.number = base.declnumber
else
error(("Unrecognized value '%s' for 'declnumber', should be 'sg' or 'pl'"):format(base.declnumber))
end
end
end
local function set_defaults_and_check_bad_indicators(base)
-- Set default values.
local regular_noun = is_regular_noun(base)
if base.pron then
set_pron_defaults(base)
elseif base.det then
set_det_defaults(base)
elseif base.num then
set_num_defaults(base)
elseif not base.adj then
if not base.gender then
if base.manual then
base.gender = "none"
else
error("For nouns, gender must be specified")
end
end
base.number = base.number or "both"
process_declnumber(base)
base.animacy = base.animacy or "inan"
base.actual_gender = base.gender
base.actual_animacy = base.animacy
if base.declgender then
if base.declgender == "m-an" then
base.gender = "m"
base.animacy = "an"
elseif base.declgender == "m-in" then
base.gender = "m"
base.animacy = "inan"
elseif base.declgender == "f" or base.declgender == "n" then
base.gender = base.declgender
else
error(("Unrecognized value '%s' for 'declgender', should be 'm-an', 'm-in', 'f' or 'n'"):format(base.declgender))
end
end
end
-- Check for bad indicator combinations.
if (base.hard and 1 or 0) + (base.soft and 1 or 0) + (base.mixed and 1 or 0) > 1 then
error("At most one of 'hard', 'soft' and 'mixed' can be specified")
end
if base.istem and base["-istem"] then
error("'istem' and '-istem' cannot be specified together")
end
if (base.istem or base["-istem"]) then
if base.gender ~= "f" then
error("'istem' and '-istem' can only be specified with the feminine gender")
end
if not regular_noun then
error("'istem' and '-istem' can only be specified with regular nouns")
end
end
if base.declgender and not regular_noun then
error("'declgender' can only be specified with regular nouns")
end
end
local function set_all_defaults_and_check_bad_indicators(alternant_multiword_spec)
local is_multiword = #alternant_multiword_spec.alternant_or_word_specs > 1
iut.map_word_specs(alternant_multiword_spec, function(base)
set_defaults_and_check_bad_indicators(base)
base.multiword = is_multiword -- FIXME: not currently used; consider deleting
alternant_multiword_spec.has_clitic = alternant_multiword_spec.has_clitic or base.has_clitic
if base.pron then
alternant_multiword_spec.saw_pron = true
else
alternant_multiword_spec.saw_non_pron = true
end
if base.det then
alternant_multiword_spec.saw_det = true
else
alternant_multiword_spec.saw_non_det = true
end
if base.num then
alternant_multiword_spec.saw_num = true
else
alternant_multiword_spec.saw_non_num = true
end
end)
end
local function undo_second_palatalization(base, word, is_adjective)
local function try(from, to)
local stem = rmatch(word, "^(.*)" .. from .. "$")
if stem then
return stem .. to
end
return nil
end
return is_adjective and try("št", "sk") or
is_adjective and try("čt", "ck") or
try("c", "k") or -- FIXME, this could be wrong and c correct
try("ř", "r") or
try("z", "h") or -- FIXME, this could be wrong and z or g correct
try("š", "ch") or
word
end
-- For a plural-only lemma, synthesize a likely singular lemma. It doesn't have to be
-- theoretically correct as long as it generates all the correct plural forms.
local function synthesize_singular_lemma(base)
if not base.stem_sets then
base.stem_sets = {{}}
end
local lemma_determined
-- Loop over all stem sets in case the user specified multiple ones (e.g. '*,-*'). If we try to reconstruct
-- different lemmas for different stem sets, we'll throw an error below.
for _, stems in ipairs(base.stem_sets) do
local stem, lemma
while true do
if base.indecl then
-- If specified as indeclinable, leave it alone; e.g. 'pesos' indeclinable plural of [[peso]].
lemma = base.lemma
break
elseif base.gender == "m" then
if base.animacy == "an" then
stem = rmatch(base.lemma, "^(.*)i$")
if stem then
if base.soft then
-- [[Blíženci]] "Gemini"
-- Since the nominative singular has no ending.
lemma = com.convert_paired_plain_to_palatal(stem, ending)
else
lemma = undo_second_palatalization(base, stem)
end
else
stem = rmatch(base.lemma, "^(.*)ové$") or rmatch(base.lemma, "^(.*)é$")
if stem then
-- [[manželé]] "married couple", [[Velšané]] "Welsh people"
lemma = stem
else
error(("Animate masculine plural-only lemma '%s' should end in -i, -ové or -é"):format(base.lemma))
end
end
else
stem = rmatch(base.lemma, "^(.*)y$")
if stem then
-- [[droby]] "giblets"; [[tvarůžky]] "Olomouc cheese"; [[alimenty]] "alimony"; etc.
lemma = stem
else
local ending
stem, ending = rmatch(base.lemma, "^(.*)([eě])$")
if stem then
-- [[peníze]] "money", [[tvargle]] "Olomouc cheese" (mixed declension), [[údaje]] "data",
-- [[Lazce]] (a village), [[lováče]] "money", [[Krkonoše]] "Giant Mountains", [[kříže]] "clubs"
lemma = com.convert_paired_plain_to_palatal(stem, ending)
if not base.mixed then
base.soft = true
end
else
error(("Inanimate masculine plural-only lemma '%s' should end in -y, -e or -ě"):format(base.lemma))
end
end
end
if stems.reducible == nil then
if rfind(lemma, com.cons_c .. "[ck]$") and not com.is_monosyllabic(base.lemma) then
stems.reducible = true
end
if stems.reducible then
lemma = dereduce(base, lemma)
end
end
break
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)y$")
if stem then
lemma = stem .. "a"
break
end
stem = rmatch(base.lemma, "^(.*)[eě]$")
if stem then
-- Singular like the plural. Cons-stem feminines like [[dlaň]] "palm (of the hand)" have identical
-- plurals to soft-stem feminines like [[růže]] (modulo e/ě differences), so we don't need to
-- reconstruct the former type.
lemma = base.lemma
break
end
stem = rmatch(base.lemma, "^(.*)i$")
if stem then
-- i-stems.
lemma = stem
base.istem = true
break
end
error(("Feminine plural-only lemma '%s' should end in -y, -ě, -e or -i"):format(base.lemma))
elseif base.gender == "n" then
-- -ata nouns like [[slůně]] "baby elephant" nom pl 'slůňata' are declined in the plural same as if
-- the singular were 'slůňato' so we don't have to worry about them.
stem = rmatch(base.lemma, "^(.*)a$")
if stem then
lemma = stem .. "o"
break
end
stem = rmatch(base.lemma, "^(.*)[eěí]$")
if stem then
-- singular lemma also in -e, -ě or -í; e.g. [[věčná loviště]] "[[happy hunting ground]]"
lemma = base.lemma
break
end
error(("Neuter plural-only lemma '%s' should end in -a, -í, -ě or -e"):format(base.lemma))
else
error(("Internal error: Unrecognized gender '%s'"):format(base.gender))
end
end
if lemma_determined and lemma_determined ~= lemma then
error(("Attempt to set two different singular lemmas '%s' and '%s'"):format(lemma_determined, lemma))
end
lemma_determined = lemma
end
base.lemma = lemma_determined
end
-- For an adjectival lemma, synthesize the masc singular form.
local function synthesize_adj_lemma(base)
local stem
if base.indecl then
base.decl = "indecl"
stem = base.lemma
else
local gender, number
local function sub_ov(stem)
stem = stem:gsub("ov$", "ův")
return stem
end
while true do
if base.number == "pl" then
if base.gender == "m" then
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
if base.soft then
-- nothing to do
else
if base.animacy ~= "an" then
error(("Masculine plural-only adjectival lemma '%s' ending in -í can only be animate unless '.soft' is specified"):
format(base.lemma))
end
base.lemma = undo_second_palatalization(base, stem, "is adjective") .. "ý"
end
break
end
stem = rmatch(base.lemma, "^(.*)é$")
if stem then
if base.animacy == "an" then
error(("Masculine plural-only adjectival lemma '%s' ending in -é must be inanimate"):
format(base.lemma))
end
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*ov)i$") or rmatch(base.lemma, "^(.*in)i$")
if stem then
if base.animacy ~= "an" then
error(("Masculine plural-only possessive adjectival lemma '%s' ending in -i must be animate"):
format(base.lemma))
end
base.lemma = sub_ov(stem)
break
end
stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$")
if stem then
if base.animacy == "an" then
error(("Masculine plural-only possessive adjectival lemma '%s' ending in -y must be inanimate"):
format(base.lemma))
end
base.lemma = sub_ov(stem)
break
end
if base.animacy == "an" then
error(("Animate masculine plural-only adjectival lemma '%s' should end in -í, -ovi or -ini"):
format(base.lemma))
elseif base.soft then
error(("Soft masculine plural-only adjectival lemma '%s' should end in -í"):format(base.lemma))
else
error(("Inanimate masculine plural-only adjectival lemma '%s' should end in -é, -ovy or -iny"):
format(base.lemma))
end
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)é$") -- hard adjective
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)y$") or rmatch(base.lemma, "^(.*in)y$") -- possessive adjective
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Feminine plural-only adjectival lemma '%s' should end in -é, -í, -ovy or -iny"):format(base.lemma))
else
stem = rmatch(base.lemma, "^(.*)á$") -- hard adjective
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$") -- soft adjective
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$") -- possessive adjective
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Neuter plural-only adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma))
end
else
if base.gender == "m" then
stem = rmatch(base.lemma, "^(.*)[ýí]$") or rmatch(base.lemma, "^(.*)ův$") or rmatch(base.lemma, "^(.*)in$")
if stem then
break
end
error(("Masculine adjectival lemma '%s' should end in -ý, -í, -ův or -in"):format(base.lemma))
elseif base.gender == "f" then
stem = rmatch(base.lemma, "^(.*)á$")
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)a$") or rmatch(base.lemma, "^(.*in)a$")
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Feminine adjectival lemma '%s' should end in -á, -í, -ova or -ina"):format(base.lemma))
else
stem = rmatch(base.lemma, "^(.*)é$")
if stem then
base.lemma = stem .. "ý"
break
end
stem = rmatch(base.lemma, "^(.*)í$")
if stem then
break
end
stem = rmatch(base.lemma, "^(.*ov)o$") or rmatch(base.lemma, "^(.*in)o$")
if stem then
base.lemma = sub_ov(stem)
break
end
error(("Neuter adjectival lemma '%s' should end in -é, -í, -ovo or -ino"):format(base.lemma))
end
end
end
base.decl = "adj"
end
-- Now set the stem sets if not given.
-- Now set the stem sets if not given.
if not base.stem_sets then
base.stem_sets = {{reducible = false}}
end
for _, stems in ipairs(base.stem_sets) do
-- Set the stems.
stems.vowel_stem = stem
stems.nonvowel_stem = stem
end
end
-- Determine the declension based on the lemma, gender and number. The declension is set in base.decl. In the process,
-- we set either base.vowel_stem (if the lemma ends in a vowel) or base.nonvowel_stem (if the lemma does not end in a
-- vowel), which is used by determine_stems(). In some cases (specifically with certain foreign nouns), we set
-- base.lemma to a new value; this is as if the user specified 'decllemma:'.
local function determine_declension(base)
if base.mostlyindecl then
base.decl = "mostly-indecl"
base.nonvowel_stem = base.lemma
return
end
if base.indecl then
base.decl = "indecl"
base.nonvowel_stem = base.lemma
return
end
-- Determine declension
stem = rmatch(base.lemma, "^(.*)a$")
if stem then
if base.gender == "m" then
if base.animacy ~= "an" then
error("Masculine lemma in -a must be animate")
end
base.decl = "a-m"
elseif base.gender == "f" then
if base.hard then
-- e.g. [[doňa]], which seems not to have soft alternates as [[piraňa]] does (despite IJP; but see the note at the
-- bottom)
base.decl = "hard-f"
elseif rfind(stem, "e$") then
-- [[idea]], [[diarea]] (subtype '.tech'), [[Korea]], etc.
base.decl = "ea-f"
elseif rfind(stem, "i$") then
-- [[signoria]], [[sinfonia]], [[paranoia]], etc.
base.decl = "ia-f"
elseif rfind(stem, "[ou]$") then
-- [[stoa]], [[kongrua]], [[Samoa]], [[Nikaragua]], etc.
base.decl = "oa-f"
elseif not base.persname and rfind(stem, "^.*[ňj]$") or base.mixed then
-- [[maracuja]], [[papája]], [[sója]]; [[piraňa]] etc. Also [[Keňa]], [[Troja]]/[[Trója]], [[Amudarja]].
-- Not [[Táňa]], [[Darja]], which decline like [[gejša]], [[skica]], etc. (subtype of hard feminines).
base.decl = "mixed-f"
else
base.decl = "hard-f"
end
elseif base.gender == "n" then
if rfind(stem, "m$") then
base.decl = "ma-n"
else
error("Lemma ending in -a and neuter must end in -ma")
end
end
base.vowel_stem = stem
return
end
local ending
stem, ending = rmatch(base.lemma, "^(.*)([eě])$")
if stem then
if ending == "ě" then
stem = com.convert_paired_plain_to_palatal(stem)
end
if base.gender == "m" then
if base.foreign then
-- [[software]] and similar English-derived nouns with silent -e; set the lemma here as if decllemma: were given
base.lemma = stem
base.nonvowel_stem = stem
base.decl = "hard-m"
return
end
if base.hard then
-- -e be damned; e.g. [[Sofokles]] with hard stem 'Sofokle-' (genitive 'Sofoklea', dative 'Sofokleovi', etc.)
base.nonvowel_stem = base.lemma
base.decl = "hard-m"
return
end
if base.tstem then
if base.animacy ~= "an" then
error("T-stem masculine lemma in -e must be animate")
end
base.decl = "tstem-m"
elseif rfind(stem, "i$") then
-- [[zombie]], [[hippie]], [[yuppie]], [[rowdie]]
base.decl = "ie-m"
elseif rfind(stem, "e$") then
-- [[Yankee]]
base.nonvowel_stem = base.lemma
base.decl = "ee-m"
return
else
base.decl = "e-m"
end
elseif base.gender == "f" then
base.decl = "soft-f"
else
if base.tstem then
base.decl = "tstem-n"
else
base.decl = "soft-n"
end
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*)o$")
if stem then
if base.gender == "m" then
-- Cf. [[maestro]] m.
base.decl = "o-m"
elseif base.gender == "f" then
-- [[zoo]]; [[Žemaitsko]]?
error("Feminine nouns in -o are indeclinable; use '.indecl' if needed")
elseif base.nstem then
base.decl = "n-n"
elseif base.hard then
base.decl = "hard-n"
elseif rfind(stem, "[aeiuy]$") then
-- These have gen pl in -í and often other soft plural endings.
base.decl = "semisoft-n"
else
base.decl = "hard-n"
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*[iy])$")
if stem then
if base.gender == "m" then
if base.soft then
-- [[gay]] "gay man", [[gray]] "gray (scientific unit)", [[Nagy]] (surname)
base.decl = "soft-m"
else
-- Cf. [[kivi]] "kiwi (bird)", [[husky]] "kusky", etc.
base.decl = "i-m"
end
elseif base.gender == "f" then
if base.soft then
-- [[Uruguay]], [[Paraguay]]
base.decl = "soft-f"
else
-- [[máti]], [[pramáti]]; note also indeclinable [[tsunami]]/[[cunami]], [[okapi]]
base.decl = "i-f"
if stem:find("i$") then
stem = stem:gsub("i$", "")
else
error("Feminine nouns in -y are either soft or indeclinable; use '.soft' or '.indecl' as needed")
end
end
else
error("Neuter nouns in -i are indeclinable; use '.indecl' if needed")
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*u)$")
if stem then
if base.gender == "m" then
-- Cf. [[emu]], [[guru]], etc.
base.decl = "u-m"
elseif base.gender == "f" then
-- Only one I know is [[budižkničemu]], which is indeclinable in the singular and declines in the plural as
-- if written 'budižkničema'.
error("Feminine nouns in -u are indeclinable; use '.indecl' if needed")
else
error("Neuter nouns in -u are indeclinable; use '.indecl' if needed")
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*[íý])$")
if stem then
if base.gender == "m" then
base.decl = "í-m"
elseif base.gender == "f" then
-- FIXME: Do any exist? If not, update this message.
error("Support for non-adjectival non-indeclinable feminine nouns in -í/-ý not yet implemented")
else
base.decl = "í-n"
end
base.vowel_stem = stem
return
end
stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$")
if stem then
if base.gender == "m" then
if base.foreign then
-- [[komunismus]] "communism", [[kosmos]] "cosmos", [[hádes]] "Hades"
stem = rmatch(base.lemma, "^(.*)[ueoaéá]s$")
if not stem then
error("Unrecognized masculine foreign ending, should be -us, -es, -os, -as, -és or -ás")
end
if not base.hard and (rfind(stem, "[ei]$") and base.animacy == "an" or
rfind(stem, "i$") and base.animacy == "inan") then
-- [[genius]], [[basileus]], [[rádius]]; not [[nukleus]], [[choreus]] (inanimate); not
-- [[skarabeus]] (animate), which should specify 'hard'
base.decl = "semisoft-m"
else
base.decl = "hard-m"
end
-- set the lemma here as if decllemma: were given
base.lemma = stem
elseif base.hard then
base.decl = "hard-m"
elseif base.soft then
base.decl = "soft-m"
elseif base.mixed then
base.decl = "mixed-m"
elseif rfind(base.lemma, com.inherently_soft_c .. "$") or rfind(base.lemma, "tel$") then
base.decl = "soft-m"
else
base.decl = "hard-m"
end
elseif base.gender == "f" then
if base.mixedistem then
base.decl = "mixed-istem-f"
elseif base.istem then
base.decl = "istem-f"
elseif base["-istem"] then
base.decl = "cons-f"
elseif rfind(base.lemma, "st$") then
-- Numerous abstracts in -ost; also [[kost]], [[část]], [[srst]], [[bolest]]
base.decl = "istem-f"
else
base.decl = "cons-f"
end
elseif base.gender == "n" then
if base.foreign then
stem = rmatch(base.lemma, "^(.*)um$") or rmatch(base.lemma, "^(.*)on$")
if not stem then
error("Unrecognized neuter foreign ending, should be -um or -on")
end
if base.hard then
base.decl = "hard-n"
elseif rfind(stem, "[eiuy]$") then
base.decl = "semisoft-n"
else
base.decl = "hard-n"
end
-- set the lemma here as if decllemma: were given
base.lemma = stem .. "o"
base.vowel_stem = stem
return
else
error("Neuter nouns ending in a consonant should use '.foreign' or '.decllemma:...'")
end
end
base.nonvowel_stem = stem
return
end
error("Unrecognized ending for lemma: '" .. base.lemma .. "'")
end
-- Determine the default value for the 'reducible' flag.
local function determine_default_reducible(base)
-- Nouns in vowels other than -a/o as well as masculine nouns ending in all vowels don't have null endings so not
-- reducible. Note, we are never called on adjectival nouns.
if rfind(base.lemma, "[iyuíeě]$") or base.gender == "m" and rfind(base.lemma, "[ao]$") or base.tstem then
base.default_reducible = false
return
end
local stem
stem = rmatch(base.lemma, "^(.*" .. com.cons_c .. ")$")
if stem then
-- When analyzing existing manual declensions in -ec and -ek, 290 were reducible vs. 23 non-reducible. Of these
-- 23, 15 were monosyllabic (and none of the 290 reducible nouns were monosyllabic) -- and two of these were
-- actually reducible but irregularly: [[švec]] "shoemaker" (gen sg 'ševce') and [[žnec]] "reaper (person)"
-- (gen sg. 'žence'). Of the remaining 8 multisyllabic non-reducible words, two were actually reducible but
-- irregularly: [[stařec]] "old man" (gen sg 'starce') and [[tkadlec]] "weaver" (gen sg 'tkalce'). The remaining
-- six consisted of 5 compounds of monosyllabic words: [[dotek]], [[oblek]], [[kramflek]], [[pucflek]],
-- [[pokec]], plus [[česnek]], which should be reducible but would lead to an impossible consonant cluster.
if base.gender == "m" and rfind(stem, "[eě][ck]$") and not com.is_monosyllabic(stem) then
base.default_reducible = true
elseif base.gender == "f" and rfind(stem, "[eě]ň$") then
-- [[pochodeň]] "torch", [[píseň]] "leather", [[žeň]] "harvest"; not [[reveň]] "rhubarb" or [[dřeň]] "pulp",
-- which need an override.
base.default_reducible = true
else
base.default_reducible = false
end
return
end
if base.number == "sg" then
base.default_reducible = false
return
end
if rfind(base.lemma, "isko$") then
-- e.g. [[středisko]]
base.default_reducible = "mixed"
return
end
stem = rmatch(base.lemma, "^(.*)" .. com.vowel_c .. "$")
if not stem then
error(("Internal error: Something wrong, lemma '%s' doesn't end in consonant or vowel"):format(base.lemma))
end
-- Substitute 'ch' with a single character to make the following code simpler.
stem = stem:gsub("ch", com.TEMP_CH)
if rfind(stem, com.cons_c .. "[lr]" .. com.cons_c .. "$") then
-- [[vrba]], [[vlha]]; not reducible. (But note [[jablko]], reducible; needs override.)
base.default_reducible = false
elseif not base.foreign and (rfind(stem, com.cons_c .. "[bkhlrmnv]$") or base.c_as_k and rfind(stem, com.cons_c .. "c$")) then
-- [[ayahuasca]] has gen pl 'ayahuasek'
base.default_reducible = true
elseif base.foreign and rfind(stem, com.cons_c .. "r$") then
-- Foreign nouns in -CCum seem generally non-reducible in the gen pl except for those in -Crum like [[centrum]],
-- Examples: [[album]], [[verbum]], [[signum]], [[interregnum]], [[sternum]]. [[infernum]] has gen pl 'infern/inferen'.
base.default_reducible = true
else
base.default_reducible = false
end
end
-- Determine the stems to use for each stem set: vowel and nonvowel stems, for singular
-- and plural. We assume that one of base.vowel_stem or base.nonvowel_stem has been
-- set in determine_declension(), depending on whether the lemma ends in
-- a vowel. We construct all the rest given the reducibility, vowel alternation spec and
-- any explicit stems given. We store the determined stems inside of the stem-set objects
-- in `base.stem_sets`, meaning that if the user gave multiple reducible or vowel-alternation
-- patterns, we will compute multiple sets of stems. The reason is that the stems may vary
-- depending on the reducibility and vowel alternation.
local function determine_stems(base)
if not base.stem_sets then
base.stem_sets = {{}}
end
-- Set default reducible and check for default mixed reducible, which needs to be expanded into two entries.
local default_mixed_reducible = false
for _, stems in ipairs(base.stem_sets) do
if stems.reducible == nil then
stems.reducible = base.default_reducible
end
if stems.reducible == "mixed" then
default_mixed_reducible = true
end
end
if default_mixed_reducible then
local new_stem_sets = {}
for _, stems in ipairs(base.stem_sets) do
if stems.reducible == "mixed" then
local non_reducible_copy = m_table.shallowCopy(stems)
non_reducible_copy.reducible = false
stems.reducible = true
table.insert(new_stem_sets, stems)
table.insert(new_stem_sets, non_reducible_copy)
else
table.insert(new_stem_sets, stems)
end
end
base.stem_sets = new_stem_sets
end
-- Now determine all the stems for each stem set.
for _, stems in ipairs(base.stem_sets) do
local lemma_is_vowel_stem = not not base.vowel_stem
if base.vowel_stem then
stems.vowel_stem = base.vowel_stem
stems.nonvowel_stem = stems.vowel_stem
-- Apply vowel alternation first in cases like jádro -> jader; apply_vowel_alternation() will throw an error
-- if the vowel being modified isn't the last vowel in the stem.
stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem)
if stems.reducible then
stems.nonvowel_stem = dereduce(base, stems.nonvowel_stem)
stems.oblique_nonvowel_stem = dereduce(base, stems.oblique_nonvowel_stem)
end
else
stems.nonvowel_stem = base.nonvowel_stem
-- The user specified #, #ě, ## or ##ě and we're dealing with a term like masculine [[bůh]] or feminine
-- [[sůl]] that ends in a consonant. In this case, all slots except the nom_s and maybe acc_s have vowel
-- alternation.
if stems.oblique_slots then
stems.oblique_slots = "all"
end
stems.oblique_nonvowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.nonvowel_stem)
if stems.reducible then
stems.vowel_stem = com.reduce(base.nonvowel_stem)
if not stems.vowel_stem then
error("Unable to reduce stem '" .. base.nonvowel_stem .. "'")
end
else
stems.vowel_stem = base.nonvowel_stem
end
end
stems.oblique_vowel_stem = com.apply_vowel_alternation(stems.vowelalt, stems.vowel_stem)
end
end
local function detect_indicator_spec(base)
if base.pron then
determine_pronoun_stems(base)
elseif base.det then
determine_determiner_stems(base)
elseif base.num then
determine_numeral_stems(base)
elseif base.adj then
process_declnumber(base)
synthesize_adj_lemma(base)
elseif base.manual then
if base.stem_sets then
-- FIXME, maybe this should be allowed?
error("Reducible and vowel alternation specs cannot be given with manual declensions")
end
base.stem_sets = {{reducible = false, vowel_stem = "", nonvowel_stem = ""}}
base.decl = "manual"
else
if base.number == "pl" then
synthesize_singular_lemma(base)
end
determine_declension(base)
determine_default_reducible(base)
determine_stems(base)
end
end
local function detect_all_indicator_specs(alternant_multiword_spec)
-- Keep track of all genders seen in the singular and plural so we can determine whether to add the term to
-- [[:Category:Czech nouns that change gender in the plural]].
alternant_multiword_spec.sg_genders = {}
alternant_multiword_spec.pl_genders = {}
iut.map_word_specs(alternant_multiword_spec, function(base)
detect_indicator_spec(base)
if base.number ~= "pl" then
alternant_multiword_spec.sg_genders[base.actual_gender] = true
end
if base.number ~= "sg" then
-- All t-stem masculines are neuter in the plural.
local plgender
if base.decl == "tstem-m" then
plgender = "n"
else
plgender = base.actual_gender
end
alternant_multiword_spec.pl_genders[plgender] = true
end
end)
if (alternant_multiword_spec.saw_pron and 1 or 0) + (alternant_multiword_spec.saw_det and 1 or 0) + (alternant_multiword_spec.saw_num and 1 or 0) > 1 then
error("Can't combine pronouns, determiners and/or numerals")
end
end
local propagate_multiword_properties
local function propagate_alternant_properties(alternant_spec, property, mixed_value, nouns_only)
local seen_property
for _, multiword_spec in ipairs(alternant_spec.alternants) do
propagate_multiword_properties(multiword_spec, property, mixed_value, nouns_only)
if seen_property == nil then
seen_property = multiword_spec[property]
elseif multiword_spec[property] and seen_property ~= multiword_spec[property] then
seen_property = mixed_value
end
end
alternant_spec[property] = seen_property
end
propagate_multiword_properties = function(multiword_spec, property, mixed_value, nouns_only)
local seen_property = nil
local last_seen_nounal_pos = 0
local word_specs = multiword_spec.alternant_or_word_specs or multiword_spec.word_specs
for i = 1, #word_specs do
local is_nounal
if word_specs[i].alternants then
propagate_alternant_properties(word_specs[i], property, mixed_value)
is_nounal = not not word_specs[i][property]
elseif nouns_only then
is_nounal = is_regular_noun(word_specs[i])
else
is_nounal = not not word_specs[i][property]
end
if is_nounal then
if not word_specs[i][property] then
error("Internal error: noun-type word spec without " .. property .. " set")
end
for j = last_seen_nounal_pos + 1, i - 1 do
word_specs[j][property] = word_specs[j][property] or word_specs[i][property]
end
last_seen_nounal_pos = i
if seen_property == nil then
seen_property = word_specs[i][property]
elseif seen_property ~= word_specs[i][property] then
seen_property = mixed_value
end
end
end
if last_seen_nounal_pos > 0 then
for i = last_seen_nounal_pos + 1, #word_specs do
word_specs[i][property] = word_specs[i][property] or word_specs[last_seen_nounal_pos][property]
end
end
multiword_spec[property] = seen_property
end
local function propagate_properties_downward(alternant_multiword_spec, property, default_propval)
local function set_and_fetch(obj, default)
local retval
if obj[property] then
retval = obj[property]
else
obj[property] = default
retval = default
end
if not obj["actual_" .. property] then
obj["actual_" .. property] = retval
end
return retval
end
local propval1 = set_and_fetch(alternant_multiword_spec, default_propval)
for _, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
local propval2 = set_and_fetch(alternant_or_word_spec, propval1)
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
local propval3 = set_and_fetch(multiword_spec, propval2)
for _, word_spec in ipairs(multiword_spec.word_specs) do
local propval4 = set_and_fetch(word_spec, propval3)
if propval4 == "mixed" then
-- FIXME, use clearer error message.
error("Attempt to assign mixed " .. property .. " to word")
end
set_and_fetch(word_spec, propval4)
end
end
else
if propval2 == "mixed" then
-- FIXME, use clearer error message.
error("Attempt to assign mixed " .. property .. " to word")
end
set_and_fetch(alternant_or_word_spec, propval2)
end
end
end
--[=[
Propagate `property` (one of "animacy", "gender" or "number") from nouns to adjacent
adjectives. We proceed as follows:
1. We assume the properties in question are already set on all nouns. This should happen in
set_defaults_and_check_bad_indicators().
2. We first propagate properties upwards and sideways. We recurse downwards from the top. When we encounter a multiword
spec, we proceed left to right looking for a noun. When we find a noun, we fetch its property (recursing if the noun
is an alternant), and propagate it to any adjectives to its left, up to the next noun to the left. When we have
processed the last noun, we also propagate its property value to any adjectives to the right (to handle e.g.
[[anděl strážný]] "guardian angel", where the adjective [[strážný]] should inherit the 'masculine' and 'animate'
properties of [[anděl]]). Finally, we set the property value for the multiword spec itself by combining all the
non-nil properties of the individual elements. If all non-nil properties have the same value, the result is that
value, otherwise it is `mixed_value` (which is "mixed" for animacy and gender, but "both" for number).
3. When we encounter an alternant spec in this process, we recursively process each alternant (which is a multiword
spec) using the previous step, and combine any non-nil properties we encounter the same way as for multiword specs.
4. The effect of steps 2 and 3 is to set the property of each alternant and multiword spec based on its children or its
neighbors.
]=]
local function propagate_properties(alternant_multiword_spec, property, default_propval, mixed_value)
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, "nouns only")
propagate_multiword_properties(alternant_multiword_spec, property, mixed_value, false)
propagate_properties_downward(alternant_multiword_spec, property, default_propval)
end
local function determine_noun_status(alternant_multiword_spec)
for i, alternant_or_word_spec in ipairs(alternant_multiword_spec.alternant_or_word_specs) do
if alternant_or_word_spec.alternants then
local is_noun = false
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
for j, word_spec in ipairs(multiword_spec.word_specs) do
if is_regular_noun(word_spec) then
multiword_spec.first_noun = j
is_noun = true
break
end
end
end
if is_noun then
alternant_multiword_spec.first_noun = i
end
elseif is_regular_noun(alternant_or_word_spec) then
alternant_multiword_spec.first_noun = i
return
end
end
end
-- Set the part of speech based on properties of the individual words.
local function set_pos(alternant_multiword_spec)
if alternant_multiword_spec.args.pos then
alternant_multiword_spec.pos = alternant_multiword_spec.args.pos
elseif alternant_multiword_spec.saw_pron and not alternant_multiword_spec.saw_non_pron then
alternant_multiword_spec.pos = "သဗ္ဗနာမ်"
elseif alternant_multiword_spec.saw_det and not alternant_multiword_spec.saw_non_det then
alternant_multiword_spec.pos = "ဖျေံလဝ်သန္နိဋ္ဌာန်"
elseif alternant_multiword_spec.saw_num and not alternant_multiword_spec.saw_non_num then
alternant_multiword_spec.pos = "ဂၞန်သၚ်္ချာ"
else
alternant_multiword_spec.pos = "နာမ်"
end
alternant_multiword_spec.plpos = require(en_utilities_module).pluralize(alternant_multiword_spec.pos)
end
local function normalize_all_lemmas(alternant_multiword_spec, pagename)
iut.map_word_specs(alternant_multiword_spec, function(base)
if base.lemma == "" then
base.lemma = pagename
end
base.orig_lemma = base.lemma
base.orig_lemma_no_links = m_links.remove_links(base.lemma)
local lemma = base.orig_lemma_no_links
-- If the lemma is all-uppercase, lowercase it but note this, so that later in combine_stem_ending() we convert it
-- back to uppercase. This allows us to handle all-uppercase acronyms without a lot of extra complexity.
-- FIXME: This may not make sense at all.
if uupper(lemma) == lemma then
base.all_uppercase = true
lemma = ulower(lemma)
end
base.actual_lemma = lemma
base.lemma = base.decllemma or lemma
end)
end
local function decline_noun(base)
for _, stems in ipairs(base.stem_sets) do
if not decls[base.decl] then
error("Internal error: Unrecognized declension type '" .. base.decl .. "'")
end
decls[base.decl](base, stems)
end
handle_derived_slots_and_overrides(base)
local function copy(from_slot, to_slot)
base.forms[to_slot] = base.forms[from_slot]
end
if base.actual_number ~= base.number then
local source_num = base.number == "sg" and "_s" or "_p"
local dest_num = base.number == "sg" and "_p" or "_s"
for case, _ in pairs(cases) do
copy(case .. source_num, case .. dest_num)
copy("nom" .. source_num .. "_linked", "nom" .. dest_num .. "_linked")
end
if base.actual_number ~= "both" then
local erase_num = base.actual_number == "sg" and "_p" or "_s"
for case, _ in pairs(cases) do
base.forms[case .. erase_num] = nil
end
base.forms["nom" .. erase_num .. "_linked"] = nil
end
end
end
local function get_variants(form)
return nil
--[=[
FIXME
return
form:find(com.VAR1) and "var1" or
form:find(com.VAR2) and "var2" or
form:find(com.VAR3) and "var3" or
nil
]=]
end
-- Compute the categories to add the noun to, as well as the annotation to display in the
-- declension title bar. We combine the code to do these functions as both categories and
-- title bar contain similar information.
local function compute_categories_and_annotation(alternant_multiword_spec)
local all_cats = {}
local function insert(cattype)
-- m_table.insertIfNot(all_cats, "Czech " .. cattype)
end
if alternant_multiword_spec.pos == "နာမ်" then
if alternant_multiword_spec.actual_number == "sg" then
-- insert("uncountable nouns")
elseif alternant_multiword_spec.actual_number == "pl" then
-- insert("pluralia tantum")
end
end
local annotation
local annparts = {}
local decldescs = {}
local vowelalts = {}
local foreign = {}
local irregs = {}
local stemspecs = {}
local reducible = nil
local function get_genanim(gender, animacy)
local gender_code_to_desc = {
m = "masculine",
f = "feminine",
n = "neuter",
none = nil,
}
local animacy_code_to_desc = {
an = "animate",
inan = "inanimate",
none = nil,
}
local descs = {}
table.insert(descs, gender_code_to_desc[gender])
if gender ~= "f" and gender ~= "n" then
-- masculine or "none" (e.g. certain pronouns and numerals)
table.insert(descs, animacy_code_to_desc[animacy])
end
return table.concat(descs, " ")
end
local function trim(text)
text = text:gsub(" +", " ")
return mw.text.trim(text)
end
local function do_word_spec(base)
local actual_genanim = get_genanim(base.actual_gender, base.actual_animacy)
local declined_genanim = get_genanim(base.gender, base.animacy)
local genanim
if actual_genanim ~= declined_genanim then
genanim = ("%s (declined as %s)"):format(actual_genanim, declined_genanim)
-- insert("nouns with actual gender different from declined gender")
else
genanim = actual_genanim
end
if base.actual_gender == "m" then
-- Insert a category for 'Czech masculine animate nouns' or 'Czech masculine inanimate nouns'; the base categories
-- [[:Category:Czech masculine nouns]], [[:Czech animate nouns]] are auto-inserted.
-- insert(actual_genanim .. " " .. alternant_multiword_spec.plpos)
end
for _, stems in ipairs(base.stem_sets) do
local props = declprops[base.decl]
local cats = props.cat
if type(cats) == "function" then
cats = cats(base, stems)
end
if type(cats) == "string" then
cats = {cats}
end
local default_desc
for i, cat in ipairs(cats) do
if not cat:find("GENDER") and not cat:find("GENPOS") and not cat:find("POS") then
cat = cat .. " GENPOS"
end
cat = cat:gsub("GENPOS", "GENDER POS")
if not cat:find("POS") then
cat = cat .. " POS"
end
if i == #cats then
default_desc = cat:gsub(" POS", "")
end
cat = cat:gsub("GENDER", actual_genanim)
cat = cat:gsub("POS", alternant_multiword_spec.plpos)
-- Need to trim `cat` because actual_genanim may be an empty string.
insert(trim(cat))
end
local desc = props.desc
if type(desc) == "function" then
desc = desc(base, stems)
end
desc = desc or default_desc
desc = desc:gsub("GENDER", genanim)
-- Need to trim `desc` because genanim may be an empty string.
m_table.insertIfNot(decldescs, trim(desc))
local vowelalt
if stems.vowelalt == "quant" then
vowelalt = "quant-alt"
-- insert("nouns with quantitative vowel alternation")
elseif stems.vowelalt == "quant-ě" then
vowelalt = "í-ě-alt"
-- insert("nouns with í-ě alternation")
end
if vowelalt then
m_table.insertIfNot(vowelalts, vowelalt)
end
if reducible == nil then
reducible = stems.reducible
elseif reducible ~= stems.reducible then
reducible = "mixed"
end
if stems.reducible then
-- insert("nouns with reducible stem")
end
if base.foreign then
m_table.insertIfNot(foreign, "foreign")
if not base.decllemma then
-- NOTE: there are nouns that use both 'foreign' and 'decllemma', e.g. [[Zeus]].
-- insert("nouns with regular foreign declension")
end
end
-- User-specified 'decllemma:' indicates irregular stem. Don't consider foreign nouns in -us/-os/-es, -um/-on or
-- silent -e (e.g. [[software]]) where this ending is simply dropped in oblique and plural forms as irregular;
-- there are too many of these and they are already categorized above as 'nouns with regular foreign declension'.
if base.decllemma then
m_table.insertIfNot(irregs, "irreg-stem")
-- insert("nouns with irregular stem")
end
m_table.insertIfNot(stemspecs, stems.vowel_stem)
end
end
local key_entry = alternant_multiword_spec.first_noun or 1
if #alternant_multiword_spec.alternant_or_word_specs >= key_entry then
local alternant_or_word_spec = alternant_multiword_spec.alternant_or_word_specs[key_entry]
if alternant_or_word_spec.alternants then
for _, multiword_spec in ipairs(alternant_or_word_spec.alternants) do
key_entry = multiword_spec.first_noun or 1
if #multiword_spec.word_specs >= key_entry then
do_word_spec(multiword_spec.word_specs[key_entry])
end
end
else
do_word_spec(alternant_or_word_spec)
end
end
if alternant_multiword_spec.actual_number == "sg" or alternant_multiword_spec.actual_number == "pl" then
-- not "both" or "none" (for [[sebe]])
table.insert(annparts, alternant_multiword_spec.actual_number == "sg" and "sg-only" or "pl-only")
end
if #decldescs == 0 then
table.insert(annparts, "indecl")
else
table.insert(annparts, table.concat(decldescs, " // "))
end
if #vowelalts > 0 then
table.insert(annparts, table.concat(vowelalts, "/"))
end
if reducible == "mixed" then
table.insert(annparts, "mixed-reducible")
elseif reducible then
table.insert(annparts, "reducible")
end
if #foreign > 0 then
table.insert(annparts, table.concat(foreign, " // "))
end
if #irregs > 0 then
table.insert(annparts, table.concat(irregs, " // "))
end
alternant_multiword_spec.annotation = table.concat(annparts, " ")
if #stemspecs > 1 then
-- insert("nouns with multiple stems")
end
if alternant_multiword_spec.actual_number == "both" and not m_table.deepEquals(alternant_multiword_spec.sg_genders, alternant_multiword_spec.pl_genders) then
-- insert("nouns that change gender in the plural")
end
alternant_multiword_spec.categories = all_cats
end
local function show_forms(alternant_multiword_spec)
local lemmas = {}
for _, slot in ipairs(potential_lemma_slots) do
if alternant_multiword_spec.forms[slot] then
for _, formobj in ipairs(alternant_multiword_spec.forms[slot]) do
-- FIXME, now can support footnotes as qualifiers in headwords?
table.insert(lemmas, formobj.form)
end
break
end
end
local props = {
lemmas = lemmas,
slot_table = alternant_multiword_spec.output_noun_slots,
lang = lang,
canonicalize = function(form)
-- return com.remove_variant_codes(form)
return form
end,
}
iut.show_forms(alternant_multiword_spec.forms, props)
end
local function make_table(alternant_multiword_spec)
local forms = alternant_multiword_spec.forms
local function template_prelude(min_width)
return rsub([=[
<div>
<div class="NavFrame" style="max-width:MINWIDTHem">
<div class="NavHead" style="background:var(--wikt-palette-lighterblue, #ebf4ff);">{title}{annotation}</div>
<div class="NavContent" style="overflow:auto">
{\op}| style="min-width:MINWIDTHem" class="inflection-table inflection"
|- class="rowgroup"
]=], "MINWIDTH", min_width)
end
local function template_postlude()
return [=[
|{\cl}{notes_clause}</div></div></div>]=]
end
local table_spec_both = template_prelude("45") .. [=[
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" |
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | ကိုန်ဨကဝုစ်
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | ကိုန်ဗဟုဝစ်
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| {nom_s}
| {nom_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_s}
| {gen_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_s}
| {dat_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_s}
| {acc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| {voc_s}
| {voc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| {loc_s}
| {loc_p}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| {ins_s}
| {ins_p}
]=] .. template_postlude()
local function get_table_spec_one_number(number, numcode)
local table_spec_one_number = [=[
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" |
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | NUMBER
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| {nom_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| {voc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| {loc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| {ins_CODE}
]=]
return template_prelude("30") .. table_spec_one_number:gsub("NUMBER", number):gsub("CODE", numcode) ..
template_postlude()
end
local function get_table_spec_one_number_clitic(number, numcode)
local table_spec_one_number_clitic = [=[
! rowspan=2 style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);"|
! colspan=2 style="background:var(--wikt-palette-lightblue, #d9ebff);" | NUMBER
|-
! style="width:33%;background:var(--wikt-palette-lightblue, #d9ebff);" | stressed
! style="background:var(--wikt-palette-lightblue, #d9ebff);" | clitic
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|မဒုၚ်ယၟု
| colspan=2 | {nom_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ဗဳဇဂကူ
| {gen_CODE}
| {clitic_gen_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပြကမ္မကာရက
| {dat_CODE}
| {clitic_dat_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ကမ္မကာရက
| {acc_CODE}
| {clitic_acc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ပရေၚ်ဂယိုၚ်လမျီု
| colspan=2 | {voc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|ခၞံဗဒှ်ဌာန်မတန်တဴ
| colspan=2 | {loc_CODE}
|-
!style="background:var(--wikt-palette-lighterblue, #ebf4ff);"|တိၚ်တိုက်ကပေါတ်ကွိၚ်ကွိုက်
| colspan=2 | {ins_CODE}
]=]
return template_prelude("40") .. table_spec_one_number_clitic:gsub("NUMBER", number):gsub("CODE", numcode) ..
template_postlude()
end
local notes_template = [=[
<div style="width:100%;text-align:left;background:var(--wikt-palette-lightblue, #d9ebff);">
<div style="display:inline-block;text-align:left;padding-left:1em;padding-right:1em">
{footnote}
</div></div>
]=]
if alternant_multiword_spec.title then
forms.title = alternant_multiword_spec.title
else
forms.title = 'မလဟုတ်စှ်ေဆေၚ်စပ်ကဵု <i lang="cs">' .. forms.lemma .. '</i>'
end
local annotation = alternant_multiword_spec.annotation
if annotation == "" then
forms.annotation = ""
else
forms.annotation = " (<span style=\"font-size: smaller;\">" .. annotation .. "</span>)"
end
local number, numcode
if alternant_multiword_spec.actual_number == "sg" then
number, numcode = "singular", "s"
elseif alternant_multiword_spec.actual_number == "pl" then
number, numcode = "plural", "p"
elseif alternant_multiword_spec.actual_number == "none" then -- used for [[sebe]]
number, numcode = "", "s"
end
local table_spec =
alternant_multiword_spec.actual_number == "both" and table_spec_both or
alternant_multiword_spec.has_clitic and get_table_spec_one_number_clitic(number, numcode) or
get_table_spec_one_number(number, numcode)
forms.notes_clause = forms.footnote ~= "" and
m_string_utilities.format(notes_template, forms) or ""
return m_string_utilities.format(table_spec, forms)
end
local function compute_headword_genders(alternant_multiword_spec)
local genders = {}
local number
if alternant_multiword_spec.actual_number == "pl" then
number = "-p"
else
number = ""
end
iut.map_word_specs(alternant_multiword_spec, function(base)
local animacy = base.animacy
if animacy == "inan" then
animacy = "in"
end
m_table.insertIfNot(genders, base.gender .. "-" .. animacy .. number)
end)
return genders
end
-- Externally callable function to parse and decline a noun given user-specified arguments.
-- Return value is ALTERNANT_MULTIWORD_SPEC, an object where the declined forms are in
-- `ALTERNANT_MULTIWORD_SPEC.forms` for each slot. If there are no values for a slot, the
-- slot key will be missing. The value for a given slot is a list of objects
-- {form=FORM, footnotes=FOOTNOTES}.
function export.do_generate_forms(parent_args, from_headword)
local params = {
[1] = {required = true, template_default = "bůh<m.an.#.voce>"},
title = true,
pagename = true,
json = {type = "boolean"},
pos = true,
}
if from_headword then
params["head"] = {list = true}
params["lemma"] = {list = true}
params["g"] = {list = true}
params["f"] = {list = true}
params["m"] = {list = true}
params["adj"] = {list = true}
params["dim"] = {list = true}
params["id"] = {}
end
local args = m_para.process(parent_args, params)
local parse_props = {
parse_indicator_spec = parse_indicator_spec,
angle_brackets_omittable = true,
allow_blank_lemma = true,
}
local alternant_multiword_spec = iut.parse_inflected_text(args[1], parse_props)
alternant_multiword_spec.title = args.title
alternant_multiword_spec.args = args
local pagename = args.pagename or from_headword and args.head[1] or mw.loadData("Module:headword/data").pagename
normalize_all_lemmas(alternant_multiword_spec, pagename)
set_all_defaults_and_check_bad_indicators(alternant_multiword_spec)
-- These need to happen before detect_all_indicator_specs() so that adjectives get their genders and numbers set
-- appropriately, which are needed to correctly synthesize the adjective lemma.
propagate_properties(alternant_multiword_spec, "animacy", "inan", "mixed")
propagate_properties(alternant_multiword_spec, "number", "both", "both")
-- FIXME, the default value (third param) used to be 'm' with a comment indicating that this applied only to
-- plural adjectives, where it didn't matter; but in Czech, plural adjectives are distinguished for gender and
-- animacy. Make sure 'mixed' works.
propagate_properties(alternant_multiword_spec, "gender", "mixed", "mixed")
detect_all_indicator_specs(alternant_multiword_spec)
-- Propagate 'actual_number' after calling detect_all_indicator_specs(), which sets 'actual_number' for adjectives.
propagate_properties(alternant_multiword_spec, "actual_number", "both", "both")
determine_noun_status(alternant_multiword_spec)
set_pos(alternant_multiword_spec)
alternant_multiword_spec.output_noun_slots = get_output_noun_slots(alternant_multiword_spec)
local inflect_props = {
skip_slot = function(slot)
return skip_slot(alternant_multiword_spec.actual_number, slot)
end,
slot_table = alternant_multiword_spec.output_noun_slots,
get_variants = get_variants,
inflect_word_spec = decline_noun,
}
iut.inflect_multiword_or_alternant_multiword_spec(alternant_multiword_spec, inflect_props)
compute_categories_and_annotation(alternant_multiword_spec)
alternant_multiword_spec.genders = compute_headword_genders(alternant_multiword_spec)
if args.json then
alternant_multiword_spec.args = nil
return require("Module:JSON").toJSON(alternant_multiword_spec)
end
return alternant_multiword_spec
end
-- Entry point for {{cs-ndecl}}. Template-callable function to parse and decline a noun given
-- user-specified arguments and generate a displayable table of the declined forms.
function export.show(frame)
local parent_args = frame:getParent().args
local alternant_multiword_spec = export.do_generate_forms(parent_args)
if type(alternant_multiword_spec) == "string" then
-- JSON return value
return alternant_multiword_spec
end
show_forms(alternant_multiword_spec)
return make_table(alternant_multiword_spec) ..
require("Module:utilities").format_categories(alternant_multiword_spec.categories, lang, nil, nil, force_cat)
end
return export
mtrp29lr8sw4z1l56khbb3kp52tw710
ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ချက်ခ်ဂမၠိုၚ်
14
294765
395155
2026-05-19T18:05:13Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏချက်ခ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ခ]]"
395155
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏချက်ခ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ခ]]
jdik58on0bcwkwn1w8yxrtx43z2vlxz
ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏချက်ခ်ဂမၠိုၚ်
14
294766
395156
2026-05-19T18:06:40Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်ချက်ခ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ခ]]"
395156
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်ချက်ခ်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ခ]]
pvcrm3xfckiy6mwm6u5jsmugwiagmtb
ထာမ်ပလိက်:cs-ndecl/documentation
10
294767
395157
2026-05-19T18:07:41Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} {{uses lua|cs-noun}} {{documentation needed}}<!-- Replace this with a short description of the purpose of the template, and how to use it. --> <includeonly> <!-- add a category of your choice --> {{tcat|ndecl:ndecl}} </includeonly>"
395157
wikitext
text/x-wiki
{{documentation subpage}}
{{uses lua|cs-noun}}
{{documentation needed}}<!-- Replace this with a short description of the purpose of the template, and how to use it. -->
<includeonly>
<!-- add a category of your choice -->
{{tcat|ndecl:ndecl}}
</includeonly>
8am43kkftaocyyeibn72ycnabaorv80
ထာမ်ပလိက်:agent noun of
10
294768
395159
2026-05-19T18:19:41Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{ {{#if:{{{lang|}}}|check deprecated lang param usage|no deprecated lang param usage}}|lang={{{lang|}}}|<!-- -->{{#invoke:form of/templates|form_of_t|သၞးပွမနာမ်နူဝေါဟာ |withencap=1|cat=သၞးပွမနာမ်}}<!-- -->}}<!-- --><noinclude>{{documentation}}</noinclude>"
395159
wikitext
text/x-wiki
{{ {{#if:{{{lang|}}}|check deprecated lang param usage|no deprecated lang param usage}}|lang={{{lang|}}}|<!--
-->{{#invoke:form of/templates|form_of_t|သၞးပွမနာမ်နူဝေါဟာ |withencap=1|cat=သၞးပွမနာမ်}}<!--
-->}}<!--
--><noinclude>{{documentation}}</noinclude>
dl5wtd6m8ghkpx0pyhmumge8ngjy4kz
ထာမ်ပလိက်:agent noun of/documentation
10
294769
395160
2026-05-19T18:21:42Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{form of/infldoc|sgdesc=the [[Appendix:Glossary#agent noun|agent noun]]|withencap=1|cat=agent nouns}} <includeonly> [[ကဏ္ဍ:ထာမ်ပလိက်ဗီုပြၚ်မဆေၚ်စပ်ဂမၠိုၚ်]] [[ကဏ္ဍ:ထာမ်ပလိက်ဗီုပြၚ်မဆေၚ်စပ်ကဵုသဒ္ဒာဂမၠိုၚ်]] </includeonly>"
395160
wikitext
text/x-wiki
{{form of/infldoc|sgdesc=the [[Appendix:Glossary#agent noun|agent noun]]|withencap=1|cat=agent nouns}}
<includeonly>
[[ကဏ္ဍ:ထာမ်ပလိက်ဗီုပြၚ်မဆေၚ်စပ်ဂမၠိုၚ်]]
[[ကဏ္ဍ:ထာမ်ပလိက်ဗီုပြၚ်မဆေၚ်စပ်ကဵုသဒ္ဒာဂမၠိုၚ်]]
</includeonly>
0y6ptjes4i5uyy9s1wcny3v8lwy1ghp
ကဏ္ဍ:သၞးပွမနာမ်ဂျာမာန်ဂမၠိုၚ်
14
294770
395162
2026-05-19T18:26:08Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ဘာသာဂျာမာန်]]"
395162
wikitext
text/x-wiki
[[ကဏ္ဍ:ဘာသာဂျာမာန်]]
i3xb1yaff9mj7cee5l0lfzy48inljn7
Hemmers
0
294771
395164
2026-05-19T18:30:59Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{also|hemmers}} =={{=en=}}== ===နာမ်မကိတ်ညဳ=== {{head|en|proper noun form}} # {{plural of|en|Hemmer}}"
395164
wikitext
text/x-wiki
{{also|hemmers}}
=={{=en=}}==
===နာမ်မကိတ်ညဳ===
{{head|en|proper noun form}}
# {{plural of|en|Hemmer}}
nmb9tbkrqvfvwy2d2j2by6wr6r1vjuv
hemmers
0
294772
395165
2026-05-19T18:31:38Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{also|Hemmers}} =={{=en=}}== ===နာမ်=== {{head|en|noun form}} # {{plural of|en|hemmer}}"
395165
wikitext
text/x-wiki
{{also|Hemmers}}
=={{=en=}}==
===နာမ်===
{{head|en|noun form}}
# {{plural of|en|hemmer}}
evk2vx403y3kfp2jk57se03cgo3o2w7
hemmer
0
294773
395166
2026-05-19T18:34:11Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{also|Hemmer}} =={{=en=}}== ===နာမ်=== {{en-noun}} # ညးမထိၚ်ဒက် ဝါ ညးမအုပ်ဓုပ်။ ==နဝ်ဝေ ဗော်ခ်မဝ်== ===ကြိယာ=== {{head|nb|verbf}} # {{infl of|nb|hemme||pres}}"
395166
wikitext
text/x-wiki
{{also|Hemmer}}
=={{=en=}}==
===နာမ်===
{{en-noun}}
# ညးမထိၚ်ဒက် ဝါ ညးမအုပ်ဓုပ်။
==နဝ်ဝေ ဗော်ခ်မဝ်==
===ကြိယာ===
{{head|nb|verbf}}
# {{infl of|nb|hemme||pres}}
jkxh0dxh3oimk4z4sw3q39pkfxoqn7z
hemme
0
294774
395168
2026-05-20T03:21:02Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{also|Hemme}} ==ဂျာမာန်== ===ဗွဟ်ရမ္သာၚ်=== * {{audio|de|De-hemme.ogg|a=<<Germany>> (<<Berlin>>)}} ===ကြိယာ=== {{head|de|verb form}} # {{verb form of|de|hemmen||1|s|pres|;|s|imp|;|1//3|s|sub|I}} ==အၚ်္ဂလိက် အဒေါဝ်== ===သဗ္ဗနာမ်=== {{head|enm|pronoun|altform=1}} # {{alt form|enm|hem|t=them|id=them}} ===နာမ်=== {..."
395168
wikitext
text/x-wiki
{{also|Hemme}}
==ဂျာမာန်==
===ဗွဟ်ရမ္သာၚ်===
* {{audio|de|De-hemme.ogg|a=<<Germany>> (<<Berlin>>)}}
===ကြိယာ===
{{head|de|verb form}}
# {{verb form of|de|hemmen||1|s|pres|;|s|imp|;|1//3|s|sub|I}}
==အၚ်္ဂလိက် အဒေါဝ်==
===သဗ္ဗနာမ်===
{{head|enm|pronoun|altform=1}}
# {{alt form|enm|hem|t=them|id=them}}
===နာမ်===
{{head|enm|noun|altform=1}}
# {{alt form|enm|hem|t=hem|id=hem}}
==နဝ်ဝေ ဗော်ခ်မဝ်==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{der|nb|nds|hemmen}}
===ကြိယာ===
{{head|nb|verb|မနွံကဵုပၟိက်အကာဲအရာ|hem|ကာလပစ္စုပ္ပန်|hemmer|ဟွံတဝ်စၞေဟ်|hemmes|အတိက်ဓမ္မတာကဵု လုပ်ကၠောန်စွံလဝ်နကဵုအတိက်|hemma|ဝါ|hemmet|လုပ်ကၠောန်စွံလဝ်ပစ္စုပ္ပန်|hemmende}}
# သကဵုစဵုဒၞာ၊ ကြတ်ဒၞာ၊ ဒၟံၚ်တန်တဴ၊ ဒေါံတန်အာ၊ ရပ်ဒက်၊ ထိၚ်ဒဝ်။
==နဝ်ဝေ နဳနိုတ်==
===ပွံၚ်နဲတၞဟ်===
* {{alt|nn|hemma}}
* {{alt|nn|hemje}}
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{der|nn|nds|hemmen}}
===ကြိယာ===
{{nn-verb-irreg|hemmar|hemma|hemma}}
# သကဵုစဵုဒၞာ၊ ကြတ်ဒၞာ၊ ဒၟံၚ်တန်တဴ၊ ဒေါံတန်အာ၊ ရပ်ဒက်၊ ထိၚ်ဒဝ်။
==သလာ==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{der|slr|fa-cls|هَمَه}}
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|slr|[hæmmæ]|a=Jiezi,Gaizi,Qingshui,Xunhua,Qinghai,Ili,Yining,Xinjiang}}
* {{IPA|slr|[hæmæ]|a=Qingshui,Xunhua,Qinghai}}
* {{IPA|slr|[hemme]|a=Baizhuang,Xunhua,Qinghai}}
===ကြိယာဝိသေသန===
{{head|slr|adverb}}
# သီုဖအိုတ်။
==တာခ်မေန်==
{{tk-variant|c=хемме|l=hemme|a=همه}}
===နိရုတ်===
{{bor+|tk|fa-cls|هَمَه}}
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|tk|[ˈhem.me]}}
===ဖျေံလဝ်သန္နိဋ္ဌာန်===
{{head|tk|determiner}}
# သီုဖအိုတ်၊ ဗွဲ။
====မလဟုတ်စှ်ေ====
{{tk-decl-noun-auto|e|v}}
ouhesz2pogab94mmps31h5qnxm40zdj
395175
395168
2026-05-20T03:34:12Z
咽頭べさ
33
395175
wikitext
text/x-wiki
{{also|Hemme}}
==ဂျာမာန်==
===ဗွဟ်ရမ္သာၚ်===
* {{audio|de|De-hemme.ogg|a=<<Germany>> (<<Berlin>>)}}
===ကြိယာ===
{{head|de|verb form}}
# {{verb form of|de|hemmen||1|s|pres|;|s|imp|;|1//3|s|sub|I}}
==အၚ်္ဂလိက် အဒေါဝ်==
===သဗ္ဗနာမ်===
{{head|enm|pronoun|altform=1}}
# {{alt form|enm|hem|id=them}}
===နာမ်===
{{head|enm|noun|altform=1}}
# {{alt form|enm|hem|id=hem}}
==နဝ်ဝေ ဗော်ခ်မဝ်==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{der|nb|nds|hemmen}}
===ကြိယာ===
{{head|nb|verb|မနွံကဵုပၟိက်အကာဲအရာ|hem|ကာလပစ္စုပ္ပန်|hemmer|ဟွံတဝ်စၞေဟ်|hemmes|အတိက်ဓမ္မတာကဵု လုပ်ကၠောန်စွံလဝ်နကဵုအတိက်|hemma|ဝါ|hemmet|လုပ်ကၠောန်စွံလဝ်ပစ္စုပ္ပန်|hemmende}}
# သကဵုစဵုဒၞာ၊ ကြတ်ဒၞာ၊ ဒၟံၚ်တန်တဴ၊ ဒေါံတန်အာ၊ ရပ်ဒက်၊ ထိၚ်ဒဝ်။
==နဝ်ဝေ နဳနိုတ်==
===ပွံၚ်နဲတၞဟ်===
* {{alt|nn|hemma}}
* {{alt|nn|hemje}}
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{der|nn|nds|hemmen}}
===ကြိယာ===
{{nn-verb-irreg|hemmar|hemma|hemma}}
# သကဵုစဵုဒၞာ၊ ကြတ်ဒၞာ၊ ဒၟံၚ်တန်တဴ၊ ဒေါံတန်အာ၊ ရပ်ဒက်၊ ထိၚ်ဒဝ်။
==သလာ==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ {{der|slr|fa-cls|هَمَه}}
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|slr|[hæmmæ]|a=Jiezi,Gaizi,Qingshui,Xunhua,Qinghai,Ili,Yining,Xinjiang}}
* {{IPA|slr|[hæmæ]|a=Qingshui,Xunhua,Qinghai}}
* {{IPA|slr|[hemme]|a=Baizhuang,Xunhua,Qinghai}}
===ကြိယာဝိသေသန===
{{head|slr|adverb}}
# သီုဖအိုတ်။
==တာခ်မေန်==
{{tk-variant|c=хемме|l=hemme|a=همه}}
===နိရုတ်===
{{bor+|tk|fa-cls|هَمَه}}
===ဗွဟ်ရမ္သာၚ်===
* {{IPA|tk|[ˈhem.me]}}
===ဖျေံလဝ်သန္နိဋ္ဌာန်===
{{head|tk|determiner}}
# သီုဖအိုတ်၊ ဗွဲ။
====မလဟုတ်စှ်ေ====
{{tk-decl-noun-auto|e|v}}
5lyf9i3zrd4bq9yi10xl8wl4egy9cs4
ထာမ်ပလိက်:tk-decl-noun-auto
10
294775
395169
2026-05-20T03:22:37Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:checkparams|warn}}<!-- Validate template parameters -->{{#if:{{{noplural|}}}|{{tk-decl-noun-unc |{{{head|{{pagename}}}}} |{{#switch:{{{2}}}|v={{#switch:{{{1}}}|e={{string left|{{{head|{{pagename}}}}}|{{#expr:{{str len|{{{head|{{pagename}}}}}}} - 1}}}}ä|{{{head|{{pagename}}}}}}}|{{{stem|{{{head|{{pagename}}}}}}}}}}{{#switch:{{{2}}}|v=n|c=}}{{tk-v2-B|{{{1}}}}} |{{#switch:{{{2}}}|v={{#switch:{{{1}}}|e={{stri..."
395169
wikitext
text/x-wiki
{{#invoke:checkparams|warn}}<!-- Validate template parameters
-->{{#if:{{{noplural|}}}|{{tk-decl-noun-unc
|{{{head|{{pagename}}}}}
|{{#switch:{{{2}}}|v={{#switch:{{{1}}}|e={{string left|{{{head|{{pagename}}}}}|{{#expr:{{str len|{{{head|{{pagename}}}}}}} - 1}}}}ä|{{{head|{{pagename}}}}}}}|{{{stem|{{{head|{{pagename}}}}}}}}}}{{#switch:{{{2}}}|v=n|c=}}{{tk-v2-B|{{{1}}}}}
|{{#switch:{{{2}}}|v={{#switch:{{{1}}}|e={{string left|{{{head|{{pagename}}}}}|{{#expr:{{str len|{{{head|{{pagename}}}}}}} - 1}}}}ä|{{{head|{{pagename}}}}}}}|{{{stem|{{{head|{{pagename}}}}}}}}}}{{#switch:{{{2}}}|v=n|c=}}{{#if:{{{unr|}}}|{{tk-v2-A|{{{1}}}}}|{{tk-v4|{{{1}}}}}}}ň
|{{#switch:{{{2}}}|v={{string left|{{{head|{{pagename}}}}}|{{#expr:{{str len|{{{head|{{pagename}}}}}}} - 1}}}}{{#switch:{{{1}}}|e=ä|{{chars|{{tk-v2-A|{{{1}}}}}_}}}}|{{{stem|{{{head|{{pagename}}}}}}}}{{tk-v2-A|{{{1}}}}}}}
|{{{head|{{pagename}}}}}d{{tk-v2-A|{{{1}}}}}
|{{{head|{{pagename}}}}}d{{tk-v2-A|{{{1}}}}}n
}}<!--
-->|{{tk-decl-noun-man
|{{{head|{{pagename}}}}}|{{{head|{{pagename}}}}}l{{tk-v2-A|{{{1}}}}}r
|{{#switch:{{{2}}}|v={{#switch:{{{1}}}|e={{string left|{{{head|{{pagename}}}}}|{{#expr:{{str len|{{{head|{{pagename}}}}}}} - 1}}}}ä|{{{head|{{pagename}}}}}}}|{{{stem|{{{head|{{pagename}}}}}}}}}}{{#switch:{{{2}}}|v=n|c=}}{{tk-v2-B|{{{1}}}}}|{{{head|{{pagename}}}}}l{{tk-v2-A|{{{1}}}}}r{{tk-v2-B|{{{1}}}}}
|{{#switch:{{{2}}}|v={{#switch:{{{1}}}|e={{string left|{{{head|{{pagename}}}}}|{{#expr:{{str len|{{{head|{{pagename}}}}}}} - 1}}}}ä|{{{head|{{pagename}}}}}}}|{{{stem|{{{head|{{pagename}}}}}}}}}}{{#switch:{{{2}}}|v=n|c=}}{{tk-v4|{{{1}}}}}ň|{{{head|{{pagename}}}}}l{{tk-v2-A|{{{1}}}}}r{{tk-v2-B|{{{1}}}}}ň
|{{#switch:{{{2}}}|v={{string left|{{{head|{{pagename}}}}}|{{#expr:{{str len|{{{head|{{pagename}}}}}}} - 1}}}}{{#switch:{{{1}}}|i|e=ä|{{chars|{{tk-v2-A|{{{1}}}}}_}}}}|{{{stem|{{{head|{{pagename}}}}}}}}{{tk-v2-A|{{{1}}}}}}}|{{{head|{{pagename}}}}}l{{tk-v2-A|{{{1}}}}}r{{tk-v2-A|{{{1}}}}}
|{{{head|{{pagename}}}}}d{{tk-v2-A|{{{1}}}}}|{{{head|{{pagename}}}}}l{{tk-v2-A|{{{1}}}}}rd{{tk-v2-A|{{{1}}}}}
|{{{head|{{pagename}}}}}d{{tk-v2-A|{{{1}}}}}n|{{{head|{{pagename}}}}}l{{tk-v2-A|{{{1}}}}}rd{{tk-v2-A|{{{1}}}}}n
}}}}<noinclude>{{documentation}}
{{isAccelerated}}</noinclude>
7brl41xwdi7tbubpu38lmrga23w71br
ထာမ်ပလိက်:tk-decl-noun-auto/documentation
10
294776
395170
2026-05-20T03:25:32Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} This template uses [[Template:tk-decl-noun-man]] and [[Template:tk-decl-noun-unc]]. The syntax is based on [[Template:tr-infl-noun-c]] before converted to Lua module and [[Template:az-decl-noun]] for the parameters. Declensions are based on [[Special:WhatLinksHere/Template:tk-decl-noun-man|pages that use Template:tk-decl-noun-man]] which was unconverted to this automatic template. However, i..."
395170
wikitext
text/x-wiki
{{documentation subpage}}
This template uses [[Template:tk-decl-noun-man]] and [[Template:tk-decl-noun-unc]]. The syntax is based on [[Template:tr-infl-noun-c]] before converted to Lua module and [[Template:az-decl-noun]] for the parameters.
Declensions are based on [[Special:WhatLinksHere/Template:tk-decl-noun-man|pages that use Template:tk-decl-noun-man]] which was unconverted to this automatic template. However, irregularly declined pronouns including {{l|tk|men}}, {{l|tk|siz}}, and {{l|tk|ol}} uses [[Template:tk-decl-noun-man]]. This template also not supports old spellings as in {{l|tk|ÿüz}}.
== Usage ==
=== Parameters ===
{|class=wikitable
!Parameter
!Value
!Function
|-
!rowspan=9|<code>1</code>
|<code>a</code>
|rowspan=9|Noun's last vowel, set between a, ä, e, i, o, ö, u, ü, and y
|-
|<code>ä</code>
|-
|<code>e</code>
|-
|<code>i</code>
|-
|<code>o</code>
|-
|<code>ö</code>
|-
|<code>u</code>
|-
|<code>ü</code>
|-
|<code>y</code>
|-
!rowspan=2|<code>2</code>
|<code>c</code>
|For nouns ending in consonants
|-
|<code>v</code>
|For nouns ending in vowels
|-
!<code>head</code>
|String
|Specify a different headword
|-
!<code>stem</code>
|String
|For singular accusative, genitive, and dative stems with voiced consonants, or irregular stems like {{l|tk|ogul}} — {{l|tk|ogly}} (ogl-)
|-
!<code>noplural</code>
|<code>yes</code>
|For nouns which does not have plural forms, including some proper nouns like {{l|tk|Türkiýe}} and {{l|tk|Özbegistan}}.<br>Sets default to none
|}
== Examples ==
*On the page {{m|tk|harp}}:
<nowiki>{{tk-decl-noun-auto|a|c}}</nowiki>
*On the page {{m|tk|köpek}}:
<nowiki>{{tk-decl-noun-auto|e|c|stem=köpeg}}</nowiki>
*On the page {{m|tk|ogul}}:
<nowiki>{{tk-decl-noun-auto|u|c|stem=ogl}}</nowiki>
*On the page {{m|tk|kaka}}:
<nowiki>{{tk-decl-noun-auto|a|v}}</nowiki>
*On the page {{m|tk|suw}}:
<nowiki>{{tk-decl-noun-auto|u|c|unr=1}}</nowiki>
<includeonly>
[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်တာခ်မေန်ဂမၠိုၚ်|auto]]
</includeonly>
n7jbm6yiv2s75e61q6tf65ujgelqp2a
ထာမ်ပလိက်:tk-decl-noun-man
10
294777
395171
2026-05-20T03:28:52Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{#invoke:checkparams|error}}<!-- -->{{inflection-table-top|title=မလဟုတ်စှ်ေဆေၚ်စပ်ကဵု {{m|tk||{{{1}}}|tr=-}}|palette=green|tall=yes}} ! ! ကိုန်ဨကဝုစ် ! ကိုန်ဗဟုဝစ် |- ! မဒုၚ်ယၟု | {{l-self|tk|{{{1}}}}} | {{l-self|tk|{{{2}}}|accel-form=pl}} |- ! ကမ္မကာရက | {{l-self|tk|{{{3}}}|accel-form=acc{{! }}sg}} |..."
395171
wikitext
text/x-wiki
{{#invoke:checkparams|error}}<!--
-->{{inflection-table-top|title=မလဟုတ်စှ်ေဆေၚ်စပ်ကဵု {{m|tk||{{{1}}}|tr=-}}|palette=green|tall=yes}}
!
! ကိုန်ဨကဝုစ်
! ကိုန်ဗဟုဝစ်
|-
! မဒုၚ်ယၟု
| {{l-self|tk|{{{1}}}}}
| {{l-self|tk|{{{2}}}|accel-form=pl}}
|-
! ကမ္မကာရက
| {{l-self|tk|{{{3}}}|accel-form=acc{{! }}sg}}
| {{l-self|tk|{{{4}}}|accel-form=acc{{! }}pl}}
|-
! ဗဳဇဂကူ
| {{l-self|tk|{{{5}}}|accel-form=gen{{! }}sg}}
| {{l-self|tk|{{{6}}}|accel-form=gen{{! }}pl}}
|-
! ပြကမ္မကာရက
| {{l-self|tk|{{{7}}}|accel-form=dat{{! }}sg}}
| {{l-self|tk|{{{8}}}|accel-form=dat{{! }}pl}}
|-
! ခၞံဗဒှ်ဌာန်မတန်တဴ
| {{l-self|tk|{{{9}}}|accel-form=loc{{! }}sg}}
| {{l-self|tk|{{{10}}}|accel-form=loc{{! }}pl}}
|-
! ပရေၚ်မလၚ်
| {{l-self|tk|{{{11}}}|accel-form=abl{{! }}sg}}
| {{l-self|tk|{{{12}}}|accel-form=abl{{! }}pl}}
{{inflection-table-bottom}}<noinclude> {{documentation}}</noinclude>
qjwu19jjy2jb6vex6o3k7yopy6vn1k6
ထာမ်ပလိက်:tk-decl-noun-man/documentation
10
294778
395172
2026-05-20T03:30:01Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} This template is also used for [[Template:tk-decl-noun-auto]]. Use [[Template:tk-decl-noun-auto]] instead except for irregularly declined pronouns or pre-1999 spellings as in {{l|tk|ÿüz}}. <includeonly>[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်တာခ်မေန်ဂမၠိုၚ်|man]]</includeonly>"
395172
wikitext
text/x-wiki
{{documentation subpage}}
This template is also used for [[Template:tk-decl-noun-auto]]. Use [[Template:tk-decl-noun-auto]] instead except for irregularly declined pronouns or pre-1999 spellings as in {{l|tk|ÿüz}}.
<includeonly>[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်တာခ်မေန်ဂမၠိုၚ်|man]]</includeonly>
dx0r626jr6513kferj4tfn6wlnpntxq
ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်တာခ်မေန်ဂမၠိုၚ်
14
294779
395173
2026-05-20T03:31:07Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏတာခ်မေန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|တ]]"
395173
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏတာခ်မေန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|တ]]
iflsfqfjeousmc8w1hs9zgn3akqnnzw
ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏတာခ်မေန်ဂမၠိုၚ်
14
294780
395174
2026-05-20T03:32:45Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်တာခ်မေန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|တ]]"
395174
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်တာခ်မေန်ဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|တ]]
8lxvii3eskwd7keh2cut6k27d1g3x2b
ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ပါဠိဂမၠိုၚ်
14
294781
395177
2026-05-20T09:57:16Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏပါဠိဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ပ]]"
395177
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏပါဠိဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ပ]]
pf294lbu4ppyizfg45zuizk9apr63rc
395178
395177
2026-05-20T10:15:20Z
Intobesa.bot
1035
Bot: ပွမပလီုထောံကဏ္ဍ
395178
wikitext
text/x-wiki
{{delete|1=Bot: ပွမပလီုထောံကဏ္ဍ}}
[[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏပါဠိဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏနာမ်ဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ပ]]
19gjfjzuy8sqcjkl18vk2niubnur4ee
395179
395178
2026-05-20T10:21:05Z
Intobesa.bot
1035
Bot: blanking unused category before deletion
395179
wikitext
text/x-wiki
phoiac9h4m842xq45sp7s6u21eteeq1
395180
395179
2026-05-20T10:21:15Z
Intobesa.bot
1035
Bot: requesting deletion
395180
wikitext
text/x-wiki
{{delete|Unused empty category}}
k6ybiycf3co8z0bu533kahzvesr917u
ထာမ်ပလိက်:pi-decl-noun/documentation
10
294782
395182
2026-05-20T10:34:32Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation subpage}} ==Purpose== This template, which works for all the standard Pali scripts, generates a declension table for a noun. It may also be used for a pronoun or the gender of an adjective. The template itself is a front-end for [[Module:pi-decl/noun]]. The template will also work for 'non-standard' abugidic scripts whose implicit vowel is 'a' if: * [[Module:pi-Latn-translit]] supports transliterati..."
395182
wikitext
text/x-wiki
{{documentation subpage}}
==Purpose==
This template, which works for all the standard Pali scripts, generates a declension table for a noun. It may also be used for a pronoun or the gender of an adjective. The template itself is a front-end for [[Module:pi-decl/noun]].
The template will also work for 'non-standard' abugidic scripts whose implicit vowel is 'a' if:
* [[Module:pi-Latn-translit]] supports transliteration to that script;
* The declension data module Module:pi-decl/noun/<script> exists; it need only return an empty table; and
* There are no idiosyncratic interactions between stem and affix.
In these cases, the parameters {{para|sc}} and {{para|ending}} are required.
==Regular Declensions==
14 'regular' vocalic* patterns of declension and 9 consonantal patterns of declension are recognised:
{|class="wikitable" style=text-align:center
! Exemplar
!colspan=3|Gender
|-
! Ending
! Masculine
! Feminine
! Neuter
|-
! a || {{l|pi|deva}}<br>(devo) || - || {{l|pi|rūpa}}<br>(rūpaṃ)
|-
<!-- ! ā || {{l|pi|candimā}} || {{l|pi|kaññā}} || - -->
! ā || {{l|pi|candimā}} || {{l|pi|gāthā}} || -
|-
! i || {{l|pi|muni}} || {{l|pi|bodhi}} || {{l|pi|aṭṭhi}}
|-
! ī || {{l|pi|antevāsī}} || {{l|pi|nadī}} || -
|-
<!-- ! u || {{l|pi|bandhu}} || {{l|pi|dhenu}} || {{l|pi|cakkhu}} -->
! u || {{l|pi|bandhu}} || {{l|pi|jambu}} || {{l|pi|cakkhu}}
|-
! ū || {{l|pi|viññū}} || {{l|pi|sassū}} ||
|-
! in || {{l|pi|sāmin}} (sāmī) || - || {{l|pi|balin}} (bali)
|-
! ar || {{l|pi|satthar}}<br>(satthā) || - || -
|-
! as || - || - || {{l|pi|sotas}}<br>soto
|-
! an || {{l|pi|attan}}<br>(attā) || - || -
|-
<!-- ! ant || {{l|pi|pacant}}<br>(pacaṃ) || - || {{l|pi|pacant}}<br>(pacaṃ) -->
! ant || {{l|pi|arahant}}<br>(arahaṃ) || - || {{l|pi|arahant}}<br>(arahaṃ)
|-
<!-- ! mant/​vant || {{l|pi|guṇavant}}<br>(guṇavā) || - || {{l|pi|guṇavant}}<br>(guṇavaṃ) -->
! mant/​vant || {{l|pi|himavant}}<br>(himavā) || - || {{l|pi|himavant}}<br>(himavaṃ)
|}
If different from the stem, the commonest nominative singular is given in parentheses after it. The masculine -ī and -in declensions, and as another pair the neuter -i and -in declensions, are indistinguishable. The use of the masculine -ī declension and the neuter -in declension are deprecated; the masculine -in and neuter -i declensions should be used instead.
===Special Handling for Sinhala Script===
Two variants of endings in 'nt' are recognised for the Sinhala script - combinations with touching letters (thematic masculine nominative singular ending {{lang|pi|න්තො}}), the traditional form, and combinations with a bare al-lakuna (thematic nominative singular ending {{lang|pi|න්තො}}) as commonly used in writing the Sinhalese language. The simple codes such as 'ant' are used for bare al-lakuna: special codes with 'T' suffixed, such as 'antT', are used for touching letters.
This feature should be hidden from the user interface - that will be future enhancement. Note that this feature need only be used for present participles whose stem ends in {{lang|pi|මන්ත්}} or {{lang|pi|වන්ත්}}; automatic stem recognition handles the other cases.
==Irregular Declension==
Irregular declension is treated as a deviation from regular declension. Parameters are provided to suppress cases and override or supplement case forms.
==Usage==
The simplest, and commonest, invocation is of the form {{tl|pi-decl-noun|g=m}} for the declension of the head word. To display the declension of the feminine of {{l|pi|pāpa}} within that page, one can simply write {{tl|pi-decl-noun|pāpī|g=f}}. To display the masculine of the present participle of {{l|pi|kilamati}}, one writes {{tl|pi-decl-noun|kilamant|ant|g=m}}:
{{pi-decl-noun|kilamant|ant|g=m}}
Simply writing {{tl|pi-decl-noun|kilamant|g=m}} would generate an incorrect nominative singular masculine:
{{pi-decl-noun|kilamant|g=m}}
===Parameters===
{| class="wikitable"
! Parameter !! Description !! Type !! Default
|-
! stem
| The stem to which the inflectional endings are added. Note that the stem's ending may be replaced as part of the process.
| string
| The name of the page. The first anonymous parameter is used in preference to the named parameter.
|-
! ending
| This is the ending of the stem that, along with the gender, defines the declension. The ending is given in lower case in the Roman script.
The second anonymous parameter is used in preference to the named parameter.
| string
| If the parameter is omitted, the ending is taken from the end of the stem. The parameter is only required in three cases:
# to distinguish a participle coincidentally ending in -mant or -vant from an adjective in -mant or -vant. The former may be indicated by specifying this parameter as 'ant'.
# to distinguish -in from the coincidental ending in -ina when a script is used as both an abugida and as an alphabet.
# For a non-standard script; the script is given by the parameter {{para|sc}}.
|-
! gender
| The gender of the noun; more precisely, the gender according to which it is declined. Four values are allowed: m/f/n/no. The special value 'no' suppresses the display of the gender; the endings are selected as though 'm' had been specified. The third anonymous parameter is used in preference to the named parameter.
| wiki-user-name
| required
|-
! g
|colspan=3| Alias for parameter 'gender'.
|-
! label
| This is the label to use in the table heading instead of the stem.
| string
| If the parameter is omitted, the table heading is based on the stem.
|-
! number
| Which numbers to show the endings for - 's' for singular, 'p' for plural, or 'both' for both. This is intended for use with substantives and numerals that are only used or attested in the singular or in the plural.
| string
| both
|-
! showtr
| How to display transliterations to Roman scripts. There are three options - 'none' for no transliterations, 'plain' for transliterations in plain text and 'link' for transliterations as links. The use of 'link' is not recommended for general use, as the transliteration does not always match the Roman script usage, e.g. Tai Tham {{m|pi|ᨾᩘᩈ|t=flesh}} compared to Roman script equivalent {{m|pi|maṃsa}}.
| string
| plain
|-
! subst
| Substitutions to be applied to handle anomalous or unimplemented transliteration. If specified, should be one or more substitution expressions separated by commas. Each substitution expression is of the form <code>FROM//TO</code> (<code>FROM/TO</code> is also accepted), where <code>FROM</code> is the source text as found in the example, and <code>TO</code> is the corresponding respelling in the word's script (e.g. Burmese or Lao). The substitutions are applied in order and before transliteration. The idea is to respell words with irregular transliteration so that the transliteration comes out correctly; there is no explicit mechanism to transliterate a single item. Note that <code>FROM</code> and <code>TO</code> are actually Lua patterns (see [[WT:LUA]]). This means, for example, that a hyphen in the source text needs to be "escaped" by writing it as <code>%-</code>.
One can address a single spelling by the use of beginning and end of string anchors <code>^</code> and <code>$</code>.
| string
|
|-
! sc
| If the word to be inflected is not in one of the standard scripts, this is the code of the script it is in. In this case, {{para|ending}} is also required.
| string
|
|-
! impl
| In the Thai and Lao scripts, there are two main writing systems for Pali. One is an abugida, i.e. has an implicit vowel, and the other always indicates the vowel ('alphabetic'). This parameter, which defaults to 'yes', commands whether implicit vowels are used. The value 'both' indicates that forms for both writing systems should be shown in the declension table. If the lemma ends in the visual representation of the implicit vowel, the declension will only be shown with alphabetic spellings. The response to requesting the declension in a writing system formally inconsistent with the lemma is undefined.
The use of the value 'both' causes incorrect transliteration for most masculine and neuter nouns, and therefore should not be used for them.
This option is only valid for the Thai and Lao scripts.
| string
| yes
|-
! aa
| In the Burmese and Tai Tham scripts, there are two different vowels corresponding to ā. The choice depends on the preceding consonants, but there are different conventions. This parameter selects which value is used in the inflectional ending if the attested conventions may make different choices. The valid values are 'round', 'tall', 'both' and 'default'.
Note that the stem may limit the number of applicable conventions; it is the responsibility of the editor to avoid impossible combinations.
So far, this has only been implemented for the Tai Tham script.
| string
| The value defaults to 'default'
|-
! liap
| This parameter is only meaningful for the Lao script. Because the Lao script has had poor support for Pali, there are three different was of writing the instrumental and ablative plural ending in -bhi. Not all stem forms are compatible with all three endings, and for a given stem form, which endings are compatible with it may also depend on its etymology. This parameter lists the forms to be used using a rough Romanisation, resulting in six supported combinations: 'b' for -ພິ, 'bh' for -ຠິ, 'b.' for -ພ຺ິ, 'bbh' for -ພິ and -ຠິ, 'bb.' for -ພິ and -ພ຺ິ, 'bhb.' for -ຠິ and -ພ຺ິ, and 'all' or 'bbhb.' for all three. The value 'none' is also permitted as a way to specify that none be used.
| string
| bh
|-
! y
| This parameter is only meaningful for the Lao script. Some Lao script writing systems use yo yung (ຍ) for Pali <y>, while others use yo ya (ຢ). This option selects which value is used in the case endings. The allowed values are: yung/yaa/ຍ/ຢ/both.
| string
| (Accept choice of Lao script declension table or transliteration routine if no table.)
|-
! nonom
| Whether to suppress the nominative case.
| boolean
| false
|-
! noms_mod
| Whether the alternative case forms for the nominative singular are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! noms
| Alternative case form of nominative singular. Additional alternatives may be specified by noms2, noms3,..
| string
|-
! nomp_mod
| Whether the alternative case forms for the nominative plural are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! nomp
| Alternative case form of nominative plural. Additional alternatives may be specified by nomp2, nomp3,.."
| string
|-
! noacc
| Whether to suppress the accusative case.
| boolean
| false
|-
! accs_mod
| Whether the alternative case forms for the accusative singular are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! accs
| Alternative case form of accusative singular. Additional alternatives may be specified by accs2, accs3,.."
| string
|-
! accp_mod
| Whether the alternative case forms for the accusative plural are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace"
| string
| after
|-
! accp
| Alternative case form of accusative plural. Additional alternatives may be specified by accp2, accp3,.."
| string
|-
! noins
| Whether to suppress the instrumental case."
| boolean
| false
|-
! inss_mod
| Whether the alternative case forms for the instrumental singular are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace"
| string
| after
|-
! inss
| Alternative case form of instrumental singular. Additional alternatives may be specified by inss2, inss3,..
| string
|-
! insp_mod
| Whether the alternative case forms for the instrumental plural are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace"
| string
| after
|-
! insp
| Alternative case form of instrumental plural. Additional alternatives may be specified by insp2, insp3,..
| string
|-
! nodat
| Whether to suppress the dative case.
| boolean
| false
|-
! dats_mod
| Whether the alternative case forms for the dative singular are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! dats
| Alternative case form of dative singular. Additional alternatives may be specified by dats2, dats3,..
| string
|-
! datp_mod
| Whether the alternative case forms for the dative plural are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! datp
| Alternative case form of dative plural. Additional alternatives may be specified by datp2, datp3,..
| string
|-
! noabl
| Whether to suppress the ablative case.
| boolean
| false
|-
! abls_mod
| Whether the alternative case forms for the ablative singular are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! abls
| Alternative case form of ablative singular. Additional alternatives may be specified by abls2, abls3,..
| string
|-
! ablp_mod
| Whether the alternative case forms for the ablative plural are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! ablp
| Alternative case form of nominative plural. Additional alternatives may be specified by ablp2, ablp3,..
| string
|-
! nogen
| Whether to suppress the genitive case.
| boolean
| false
|-
! gens_mod
| Whether the alternative case forms for the genitive singular are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! gens
| Alternative case form of genitive singular. Additional alternatives may be specified by gens2, gens3,..
| string
|-
! genp_mod
| Whether the alternative case forms for the genitive plural are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! genp
| Alternative case form of genitive plural. Additional alternatives may be specified by genp2, genp3,..
| string
|-
! noloc
| Whether to suppress the locative case.
| boolean
| false
|-
! locs_mod
| Whether the alternative case forms for the locative singular are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace"
| string
| after
|-
! locs
| Alternative case form of locative singular. Additional alternatives may be specified by locs2, locs3,..
| string
|-
! locp_mod
| Whether the alternative case forms for the locative plural are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! locp
| Alternative case form of locative plural. Additional alternatives may be specified by locp2, locp3,..
| string
|-
! novoc
| Whether to suppress the vocative case.
| boolean
| false
|-
! vocs_mod
| Whether the alternative case forms for the vocative singular are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! vocs
| Alternative case form of vocative singular. Additional alternatives may be specified by vocs2, vocs3,..
| string
|-
! vocp_mod
| Whether the alternative case forms for the nominative plural are listed before, after, or replace, the forms for the regular declension. Three values are allowed: before/after/replace
| string
| after
|-
! vocp
| Alternative case form of vocative plural. Additional alternatives may be specified by vocp2, vocp3,..
| string
| description
|}
d9o0ppmd8bi3w8br4pfzo496utarh07
မဝ်ဂျူ:pi-decl/noun/doc
828
294783
395184
2026-05-20T10:45:01Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "==Purpose== This module provides inflection tables for Pali for nouns, adjectives and pronouns. For pronouns, one currently uses the interface for nouns, while for adjectives one uses separate invocations for each gender. Some functions are exported from this module to service the testing of noun inflection. The module also provides utility functions for the conjugation of verbs. ==Normal Use== The normal way to..."
395184
wikitext
text/x-wiki
==Purpose==
This module provides inflection tables for Pali for nouns, adjectives and pronouns. For pronouns, one currently uses the interface for nouns, while for adjectives one uses separate invocations for each gender.
Some functions are exported from this module to service the testing of noun inflection. The module also provides utility functions for the conjugation of verbs.
==Normal Use==
The normal way to use this module is to invoke the template {{tl|pi-decl-noun}}, which see for the interface. This invokes the exported function {{code|lua|show}}.
==Data tables==
The primary data table for the inflections is the data module [[Module:pi-decl/noun/Latn]], which contains the Latin script tables. These are supplemented by identically structured tables for each of the other supported scripts. If the table for a particular paradigm is missing from one of these, the table will be generated using the transliteration functions in [[Module:pi-Latn-translit]]. The data modules for the other scripts are:
* [[Module:pi-decl/noun/Thai]]
* [[Module:pi-decl/noun/Deva]]
* [[Module:pi-decl/noun/Brah]]
* [[Module:pi-decl/noun/Beng]]
* [[Module:pi-decl/noun/Sinh]]
* [[Module:pi-decl/noun/Mymr]]
* [[Module:pi-decl/noun/Lana]]
* [[Module:pi-decl/noun/Laoo]]
* [[Module:pi-decl/noun/Khmr]]
* [[Module:pi-decl/noun/Latn]]
With the exception of the masculine and neuter thematic nouns, the Thai and Lao tables are not used for declension with explicit vowels.
There is no such redundant table for the Chakma script.
<!-- TODO: Mention consistency check. -->
<!-- TODO: An explanation of these tables' other uses would be good. -->
==Deliberately Exported Functions==
The following Lua functions are exported by this module:
* {{code|lua|orJoin()}}
* {{code|lua|joinSuffix}}
* {{code|lua|arrcat_nodup}}
* {{code|lua|present}}
* {{code|lua|show}}
===Function orJoin===
<!--TODO: Document it-->
===Function joinSuffix===
The original idea was to share this function with the code for verb conjugation. However, the conjugation of verbs in the Thai and Lao scripts is more complicated, and there is therefore a more general function in use for verbs.
<!--TODO: Document it-->
===Function arrcat_nodup===
<!--TODO: Document it-->
===Function present()===
<!--TODO: Document it-->
===Function show()===
<!--TODO: Document it-->
==Other exported functions==
<!-- Check whether these are used elsewhere! -->
* {{code|lua|detectEnding()}}
* {{code|lua|joinSuffixes}}
* {{code|lua|getSuffixes}}
* {{code|lua|modify}}
==Algorithm==
The paradigm to use is determined using the script of the stem, the ending of the stem (for which there are a few conventional values - see {{temp|pi-decl-noun}}) and gender of the stem. The script is always deduced from the script of the stem, while the ending may be supplied explicitly (in Latin script) or deduced from the stem. The gender is always supplied explicitly. The deduction of the ending from the stem is performed by function {{code|lua|detectEnding}}.
The set of suffixes is obtained by function {{code|lua|getSuffixes}}. This first attempts to load the paradigm from the data files. However, if the paradigm is unacceptable or missing, it will generate it itself. Paradigms from data files are only acceptable for some combinations of settings. At present, they are not acceptable for non-Roman scripts when using explicit vowels, except for the conventional ending 'ah', which denotes masculine or neuter nouns with stems in explicit -a. (The convention was chosen because the explicit vowel also represents the Sanskrit ending -aḥ.)
When paradigms are generated internally, they are converted from Latin script to the required script and implicit vowel settings. This is implemented in function {{code|lua|convert_suffixes}}.
The second stage of the generation, applicable to the Lao script only, is to, where needed, convert the ablative and instrumental plural in -bhi to the correct forms. The editor specifies the correct form using the parameter {{para|liap}}.
The third stage of the generation, applicable to Lao script only, is to, where needed, convert the letter corresponding to <y> in the suffixes to the correct letter. This setting is treated as orthogonal to the choice between using or not using implicit vowels.
The endings are then attached to the stem using the function {{code|lua|joinSuffixes}}. This invokes function {{code|lua|joinSuffixes}} to apply the writing system-dependent rules for the attachment of suffixes. There is one user-controlled input to this process, the parameter {{para|aa}}, which is applicable to the Burmes and Tai Tham scripts.
Next, the function {{code|lua|modify}} is applied to add, remove or replace the forms generated so far in accordance a list of modifications included in the invocation of {{temp|pi-decl-noun}}.
Finally, the function {{code|lua|present}} formats the list of forms for each combination of case and number. This formatting includes adding the transliteration, which is done in function {{code|lua|orJoin}}. Function {{code|lua|show}} then returns the inflection table for display on the page.
<includeonly>
{{module cat|pi,ပွမပြံၚ်လှာဲ}}
</includeonly>
rp86lcbkblir7ebf6wko6gmlcpyzl8z
395185
395184
2026-05-20T10:46:12Z
咽頭べさ
33
395185
wikitext
text/x-wiki
==Purpose==
This module provides inflection tables for Pali for nouns, adjectives and pronouns. For pronouns, one currently uses the interface for nouns, while for adjectives one uses separate invocations for each gender.
Some functions are exported from this module to service the testing of noun inflection. The module also provides utility functions for the conjugation of verbs.
==Normal Use==
The normal way to use this module is to invoke the template {{tl|pi-decl-noun}}, which see for the interface. This invokes the exported function {{code|lua|show}}.
==Data tables==
The primary data table for the inflections is the data module [[Module:pi-decl/noun/Latn]], which contains the Latin script tables. These are supplemented by identically structured tables for each of the other supported scripts. If the table for a particular paradigm is missing from one of these, the table will be generated using the transliteration functions in [[Module:pi-Latn-translit]]. The data modules for the other scripts are:
* [[Module:pi-decl/noun/Thai]]
* [[Module:pi-decl/noun/Deva]]
* [[Module:pi-decl/noun/Brah]]
* [[Module:pi-decl/noun/Beng]]
* [[Module:pi-decl/noun/Sinh]]
* [[Module:pi-decl/noun/Mymr]]
* [[Module:pi-decl/noun/Lana]]
* [[Module:pi-decl/noun/Laoo]]
* [[Module:pi-decl/noun/Khmr]]
* [[Module:pi-decl/noun/Latn]]
With the exception of the masculine and neuter thematic nouns, the Thai and Lao tables are not used for declension with explicit vowels.
There is no such redundant table for the Chakma script.
<!-- TODO: Mention consistency check. -->
<!-- TODO: An explanation of these tables' other uses would be good. -->
==Deliberately Exported Functions==
The following Lua functions are exported by this module:
* {{code|lua|orJoin()}}
* {{code|lua|joinSuffix}}
* {{code|lua|arrcat_nodup}}
* {{code|lua|present}}
* {{code|lua|show}}
===Function orJoin===
<!--TODO: Document it-->
===Function joinSuffix===
The original idea was to share this function with the code for verb conjugation. However, the conjugation of verbs in the Thai and Lao scripts is more complicated, and there is therefore a more general function in use for verbs.
<!--TODO: Document it-->
===Function arrcat_nodup===
<!--TODO: Document it-->
===Function present()===
<!--TODO: Document it-->
===Function show()===
<!--TODO: Document it-->
==Other exported functions==
<!-- Check whether these are used elsewhere! -->
* {{code|lua|detectEnding()}}
* {{code|lua|joinSuffixes}}
* {{code|lua|getSuffixes}}
* {{code|lua|modify}}
==Algorithm==
The paradigm to use is determined using the script of the stem, the ending of the stem (for which there are a few conventional values - see {{temp|pi-decl-noun}}) and gender of the stem. The script is always deduced from the script of the stem, while the ending may be supplied explicitly (in Latin script) or deduced from the stem. The gender is always supplied explicitly. The deduction of the ending from the stem is performed by function {{code|lua|detectEnding}}.
The set of suffixes is obtained by function {{code|lua|getSuffixes}}. This first attempts to load the paradigm from the data files. However, if the paradigm is unacceptable or missing, it will generate it itself. Paradigms from data files are only acceptable for some combinations of settings. At present, they are not acceptable for non-Roman scripts when using explicit vowels, except for the conventional ending 'ah', which denotes masculine or neuter nouns with stems in explicit -a. (The convention was chosen because the explicit vowel also represents the Sanskrit ending -aḥ.)
When paradigms are generated internally, they are converted from Latin script to the required script and implicit vowel settings. This is implemented in function {{code|lua|convert_suffixes}}.
The second stage of the generation, applicable to the Lao script only, is to, where needed, convert the ablative and instrumental plural in -bhi to the correct forms. The editor specifies the correct form using the parameter {{para|liap}}.
The third stage of the generation, applicable to Lao script only, is to, where needed, convert the letter corresponding to <y> in the suffixes to the correct letter. This setting is treated as orthogonal to the choice between using or not using implicit vowels.
The endings are then attached to the stem using the function {{code|lua|joinSuffixes}}. This invokes function {{code|lua|joinSuffixes}} to apply the writing system-dependent rules for the attachment of suffixes. There is one user-controlled input to this process, the parameter {{para|aa}}, which is applicable to the Burmes and Tai Tham scripts.
Next, the function {{code|lua|modify}} is applied to add, remove or replace the forms generated so far in accordance a list of modifications included in the invocation of {{temp|pi-decl-noun}}.
Finally, the function {{code|lua|present}} formats the list of forms for each combination of case and number. This formatting includes adding the transliteration, which is done in function {{code|lua|orJoin}}. Function {{code|lua|show}} then returns the inflection table for display on the page.
<includeonly>
{{module cat|ပွမပြံၚ်လှာဲ}}
</includeonly>
cr9ou8laaaub20xh7a4sf70niatw5dj
395186
395185
2026-05-20T10:47:03Z
咽頭べさ
33
395186
wikitext
text/x-wiki
==Purpose==
This module provides inflection tables for Pali for nouns, adjectives and pronouns. For pronouns, one currently uses the interface for nouns, while for adjectives one uses separate invocations for each gender.
Some functions are exported from this module to service the testing of noun inflection. The module also provides utility functions for the conjugation of verbs.
==Normal Use==
The normal way to use this module is to invoke the template {{tl|pi-decl-noun}}, which see for the interface. This invokes the exported function {{code|lua|show}}.
==Data tables==
The primary data table for the inflections is the data module [[Module:pi-decl/noun/Latn]], which contains the Latin script tables. These are supplemented by identically structured tables for each of the other supported scripts. If the table for a particular paradigm is missing from one of these, the table will be generated using the transliteration functions in [[Module:pi-Latn-translit]]. The data modules for the other scripts are:
* [[Module:pi-decl/noun/Thai]]
* [[Module:pi-decl/noun/Deva]]
* [[Module:pi-decl/noun/Brah]]
* [[Module:pi-decl/noun/Beng]]
* [[Module:pi-decl/noun/Sinh]]
* [[Module:pi-decl/noun/Mymr]]
* [[Module:pi-decl/noun/Lana]]
* [[Module:pi-decl/noun/Laoo]]
* [[Module:pi-decl/noun/Khmr]]
* [[Module:pi-decl/noun/Latn]]
With the exception of the masculine and neuter thematic nouns, the Thai and Lao tables are not used for declension with explicit vowels.
There is no such redundant table for the Chakma script.
<!-- TODO: Mention consistency check. -->
<!-- TODO: An explanation of these tables' other uses would be good. -->
==Deliberately Exported Functions==
The following Lua functions are exported by this module:
* {{code|lua|orJoin()}}
* {{code|lua|joinSuffix}}
* {{code|lua|arrcat_nodup}}
* {{code|lua|present}}
* {{code|lua|show}}
===Function orJoin===
<!--TODO: Document it-->
===Function joinSuffix===
The original idea was to share this function with the code for verb conjugation. However, the conjugation of verbs in the Thai and Lao scripts is more complicated, and there is therefore a more general function in use for verbs.
<!--TODO: Document it-->
===Function arrcat_nodup===
<!--TODO: Document it-->
===Function present()===
<!--TODO: Document it-->
===Function show()===
<!--TODO: Document it-->
==Other exported functions==
<!-- Check whether these are used elsewhere! -->
* {{code|lua|detectEnding()}}
* {{code|lua|joinSuffixes}}
* {{code|lua|getSuffixes}}
* {{code|lua|modify}}
==Algorithm==
The paradigm to use is determined using the script of the stem, the ending of the stem (for which there are a few conventional values - see {{temp|pi-decl-noun}}) and gender of the stem. The script is always deduced from the script of the stem, while the ending may be supplied explicitly (in Latin script) or deduced from the stem. The gender is always supplied explicitly. The deduction of the ending from the stem is performed by function {{code|lua|detectEnding}}.
The set of suffixes is obtained by function {{code|lua|getSuffixes}}. This first attempts to load the paradigm from the data files. However, if the paradigm is unacceptable or missing, it will generate it itself. Paradigms from data files are only acceptable for some combinations of settings. At present, they are not acceptable for non-Roman scripts when using explicit vowels, except for the conventional ending 'ah', which denotes masculine or neuter nouns with stems in explicit -a. (The convention was chosen because the explicit vowel also represents the Sanskrit ending -aḥ.)
When paradigms are generated internally, they are converted from Latin script to the required script and implicit vowel settings. This is implemented in function {{code|lua|convert_suffixes}}.
The second stage of the generation, applicable to the Lao script only, is to, where needed, convert the ablative and instrumental plural in -bhi to the correct forms. The editor specifies the correct form using the parameter {{para|liap}}.
The third stage of the generation, applicable to Lao script only, is to, where needed, convert the letter corresponding to <y> in the suffixes to the correct letter. This setting is treated as orthogonal to the choice between using or not using implicit vowels.
The endings are then attached to the stem using the function {{code|lua|joinSuffixes}}. This invokes function {{code|lua|joinSuffixes}} to apply the writing system-dependent rules for the attachment of suffixes. There is one user-controlled input to this process, the parameter {{para|aa}}, which is applicable to the Burmes and Tai Tham scripts.
Next, the function {{code|lua|modify}} is applied to add, remove or replace the forms generated so far in accordance a list of modifications included in the invocation of {{temp|pi-decl-noun}}.
Finally, the function {{code|lua|present}} formats the list of forms for each combination of case and number. This formatting includes adding the transliteration, which is done in function {{code|lua|orJoin}}. Function {{code|lua|show}} then returns the inflection table for display on the page.
<includeonly>
{{module cat|pi}}
</includeonly>
ckxs6sjefiohvwams17kdduocwogf4d
ကဏ္ဍ:မဝ်ဂျူပါဠိဂမၠိုၚ်
14
294784
395187
2026-05-20T10:52:33Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာပါဠိ|ပါဠိ]] » '''မဝ်ဂျူဂမၠိုၚ..."
395187
wikitext
text/x-wiki
[[:ကဏ္ဍ:ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်|ဒၞာဲလုပ်အဝေါၚ်ကဵုပၟိက်]] » [[:ကဏ္ဍ:အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်|အရေဝ်ဘာသာအိုတ်သီုဂမၠိုၚ်]] » [[:ကဏ္ဍ:ဘာသာပါဠိ|ပါဠိ]] » '''မဝ်ဂျူဂမၠိုၚ်'''
:[[:ကဏ္ဍ:မဝ်ဂျူဂမၠိုၚ်|မဝ်ဂျူ]]ဘာသာပါဠိ၊ မနွံကဵုလုပ်အဝေါၚ်ကုဒ် Lua နကဵုမကၠောန်ဗဒှ် ကဵု မစဳရေၚ်ယဵုဒုၚ်သ္ပမာန်ဂမၠိုၚ်။
[[ကဏ္ဍ:ဘာသာပါဠိ]][[ကဏ္ဍ:မဝ်ဂျူဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ပ]]
jpookhrxmuk96pc98ogqhwqk18obzod
ကဏ္ဍ:မဝ်ဂျူပွမပြံၚ်လှာဲပါဠိဂမၠိုၚ်
14
294785
395188
2026-05-20T10:56:19Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:မဝ်ဂျူပါဠိဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏပါဠိဂမၠိုၚ်]][[ကဏ္ဍ:မဝ်ဂျူဗီုအပြံၚ်အလှာဲဝေါဟာဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ပ]]"
395188
wikitext
text/x-wiki
[[ကဏ္ဍ:မဝ်ဂျူပါဠိဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏပါဠိဂမၠိုၚ်]][[ကဏ္ဍ:မဝ်ဂျူဗီုအပြံၚ်အလှာဲဝေါဟာဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ပ]]
q5xy3z58bvikz90nud8k8ojs0rszrlb
ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏပါဠိဂမၠိုၚ်
14
294786
395189
2026-05-20T10:58:01Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ထာမ်ပလိက်ပါဠိဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ပ]]"
395189
wikitext
text/x-wiki
[[ကဏ္ဍ:ထာမ်ပလိက်ပါဠိဂမၠိုၚ်]][[ကဏ္ဍ:ထာမ်ပလိက်အပြံၚ်အလှာဲပ္တဝ်ထ္ၜးပမာဏဗက်အလိုက်အရေဝ်ဘာသာဂမၠိုၚ်|ပ]]
jwb98uag8y2op34eekb44f0reyqik1v
မဝ်ဂျူ:pi-decl/noun/Mymr/doc
828
294787
395190
2026-05-20T11:01:38Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|pi}} </includeonly>"
395190
wikitext
text/x-wiki
{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. -->
<includeonly>
{{module cat|pi}}
</includeonly>
s4hekremquhb87pv9wrkn39l0t3mt3x
မဝ်ဂျူ:pi-decl/noun/Thai/doc
828
294788
395191
2026-05-20T11:01:59Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|pi}} </includeonly>"
395191
wikitext
text/x-wiki
{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. -->
<includeonly>
{{module cat|pi}}
</includeonly>
s4hekremquhb87pv9wrkn39l0t3mt3x
မဝ်ဂျူ:pi-decl/noun/Deva/doc
828
294789
395192
2026-05-20T11:02:18Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|pi}} </includeonly>"
395192
wikitext
text/x-wiki
{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. -->
<includeonly>
{{module cat|pi}}
</includeonly>
s4hekremquhb87pv9wrkn39l0t3mt3x
မဝ်ဂျူ:pi-decl/noun/Brah/doc
828
294790
395193
2026-05-20T11:02:49Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|pi}} </includeonly>"
395193
wikitext
text/x-wiki
{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. -->
<includeonly>
{{module cat|pi}}
</includeonly>
s4hekremquhb87pv9wrkn39l0t3mt3x
မဝ်ဂျူ:pi-decl/noun/Beng/doc
828
294791
395194
2026-05-20T11:03:06Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|pi}} </includeonly>"
395194
wikitext
text/x-wiki
{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. -->
<includeonly>
{{module cat|pi}}
</includeonly>
s4hekremquhb87pv9wrkn39l0t3mt3x
မဝ်ဂျူ:pi-decl/noun/Sinh/doc
828
294792
395195
2026-05-20T11:03:37Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|pi}} </includeonly>"
395195
wikitext
text/x-wiki
{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. -->
<includeonly>
{{module cat|pi}}
</includeonly>
s4hekremquhb87pv9wrkn39l0t3mt3x
မဝ်ဂျူ:pi-decl/noun/Lana/doc
828
294793
395196
2026-05-20T11:03:58Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|pi}} </includeonly>"
395196
wikitext
text/x-wiki
{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. -->
<includeonly>
{{module cat|pi}}
</includeonly>
s4hekremquhb87pv9wrkn39l0t3mt3x
မဝ်ဂျူ:pi-decl/noun/Laoo/doc
828
294794
395197
2026-05-20T11:04:15Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|pi}} </includeonly>"
395197
wikitext
text/x-wiki
{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. -->
<includeonly>
{{module cat|pi}}
</includeonly>
s4hekremquhb87pv9wrkn39l0t3mt3x
မဝ်ဂျူ:pi-decl/noun/Khmr/doc
828
294795
395198
2026-05-20T11:04:32Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|pi}} </includeonly>"
395198
wikitext
text/x-wiki
{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. -->
<includeonly>
{{module cat|pi}}
</includeonly>
s4hekremquhb87pv9wrkn39l0t3mt3x
မဝ်ဂျူ:pi-decl/noun/Latn/doc
828
294796
395199
2026-05-20T11:04:49Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. --> <includeonly> {{module cat|pi}} </includeonly>"
395199
wikitext
text/x-wiki
{{documentation needed}}<!-- Replace this with a short description of the purpose of the module, and how to use it. -->
<includeonly>
{{module cat|pi}}
</includeonly>
s4hekremquhb87pv9wrkn39l0t3mt3x
hemmes
0
294797
395201
2026-05-20T11:39:43Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{also|Hemmes}} ==နဝ်ဝေ ဗော်ခ်မဝ်== ===ကြိယာ=== {{head|nb|verbf}} # {{form of|nb|passive form|hemme}}"
395201
wikitext
text/x-wiki
{{also|Hemmes}}
==နဝ်ဝေ ဗော်ခ်မဝ်==
===ကြိယာ===
{{head|nb|verbf}}
# {{form of|nb|passive form|hemme}}
6rwapjwb97nnbtt16p00d0xlaeadrg0
Hemmes
0
294798
395202
2026-05-20T11:41:32Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{also|hemmes}} =={{=en=}}== ===နာမ်မကိတ်ညဳ=== {{head|en|proper noun form}} # {{plural of|en|Hemme}} ===ဝေါဟာလွာ=== * {{anagrams|en|a=eehmms|emmesh}}"
395202
wikitext
text/x-wiki
{{also|hemmes}}
=={{=en=}}==
===နာမ်မကိတ်ညဳ===
{{head|en|proper noun form}}
# {{plural of|en|Hemme}}
===ဝေါဟာလွာ===
* {{anagrams|en|a=eehmms|emmesh}}
s3knvtpo0lm24kkfuwo4ce8mn99ygdy
Hemme
0
294799
395203
2026-05-20T11:44:38Z
咽頭べさ
33
ခၞံကၠောန်လဝ် မုက်လိက် နကု "{{also|hemme}} =={{=en=}}== ===နိရုတ်=== ဝေါဟာကၠုၚ်နူ{{der|en|nds|-}} ကဵု ယၟုဂကူ{{der|en|nl|-}}၊ နူကဵုဝေါဟာယၟုဂကူ {{der|en|de|-}} ===နာမ်မကိတ်ညဳ=== {{en-proper noun|s}} # {{surname|en}}"
395203
wikitext
text/x-wiki
{{also|hemme}}
=={{=en=}}==
===နိရုတ်===
ဝေါဟာကၠုၚ်နူ{{der|en|nds|-}} ကဵု ယၟုဂကူ{{der|en|nl|-}}၊ နူကဵုဝေါဟာယၟုဂကူ {{der|en|de|-}}
===နာမ်မကိတ်ညဳ===
{{en-proper noun|s}}
# {{surname|en}}
fynftzlgitxfeqkkxeee1na3v0vsncm