ဝိက်ရှေန်နရဳ mnwwiktionary https://mnw.wiktionary.org/wiki/%E1%80%9D%E1%80%AD%E1%80%80%E1%80%BA%E1%80%9B%E1%80%BE%E1%80%B1%E1%80%94%E1%80%BA%E1%80%94%E1%80%9B%E1%80%B3:%E1%80%99%E1%80%AF%E1%80%80%E1%80%BA%E1%80%9C%E1%80%AD%E1%80%80%E1%80%BA%E1%80%90%E1%80%99%E1%80%BA MediaWiki 1.47.0-wmf.3 case-sensitive မဳဒဳယာ တၟေင် ဓရီုကျာ ညးလွပ် ညးလွပ် ဓရီုကျာ ဝိက်ရှေန်နရဳ ဝိက်ရှေန်နရဳ ဓရီုကျာ ဝှာင် ဝှာင် ဓရီုကျာ မဳဒဳယာဝဳကဳ မဳဒဳယာဝဳကဳ ဓရီုကျာ ထာမ်ပလိက် ထာမ်ပလိက် ဓရီုကျာ ရီု ရီု ဓရီုကျာ ကဏ္ဍ ကဏ္ဍ ဓရီုကျာ အဆက်လက္ကရဴ အဆက်လက္ကရဴ ဓရီုကျာ ကာရန် ကာရန် ဓရီုကျာ အဘိဓာန် အဘိဓာန် ဓရီုကျာ ဗီုပြၚ်သိုၚ်တၟိ ဗီုပြၚ်သိုၚ်တၟိ ဓရီုကျာ TimedText TimedText talk မဝ်ဂျူ မဝ်ဂျူ ဓရီုကျာ Event Event talk မဝ်ဂျူ:headword utilities 828 18102 395273 381444 2026-05-22T11:41:14Z 咽頭べさ 33 395273 Scribunto text/plain local export = {} local fun_is_callable_module = "Module:fun/isCallable" local languages_module = "Module:languages" local links_module = "Module:links" local parse_utilities_module = "Module:parse utilities" local string_pattern_escape_module = "Module:string/patternEscape" local string_replacement_escape_module = "Module:string/replacementEscape" local string_utilities_module = "Module:string utilities" local table_module = "Module:table" local dump = mw.dumpObject local unpack = unpack or table.unpack -- Lua 5.2 compatibility local insert = table.insert local concat = table.concat local remove = table.remove local sort = table.sort local function deepEquals(...) deepEquals = require(table_module).deepEquals return deepEquals(...) end local function escape_wikicode(...) escape_wikicode = require(parse_utilities_module).escape_wikicode return escape_wikicode(...) end local function extend(...) extend = require(table_module).extend return extend(...) end local function get_lang(...) get_lang = require(languages_module).getByCode return get_lang(...) end local function insert_if_not(...) insert_if_not = require(table_module).insertIfNot return insert_if_not(...) end local function is_callable(...) is_callable = require(fun_is_callable_module) return is_callable(...) end local function parse_inline_modifiers(...) parse_inline_modifiers = require(parse_utilities_module).parse_inline_modifiers return parse_inline_modifiers(...) end local function pattern_escape(...) pattern_escape = require(string_pattern_escape_module) return pattern_escape(...) end local function replacement_escape(...) replacement_escape = require(string_replacement_escape_module) return replacement_escape(...) end local function shallow_copy(...) shallow_copy = require(table_module).shallowCopy return shallow_copy(...) end local function split(...) split = require(string_utilities_module).split return split(...) end local function term_contains_top_level_html(...) term_contains_top_level_html = require(parse_utilities_module).term_contains_top_level_html return term_contains_top_level_html(...) end local function ugsub(...) ugsub = require(string_utilities_module).gsub return ugsub(...) end local function umatch(...) umatch = require(string_utilities_module).match return umatch(...) end local param_mods = { id = {}, -- disabled when `is_head = true` q = {type = "qualifier"}, qq = {type = "qualifier"}, l = {type = "labels"}, ll = {type = "labels"}, -- [[Module:headword]] expects part references in `.refs`. ref = {item_dest = "refs", type = "references", store = "insert-flattened"}, } local optional_param_mods = { g = {item_dest = "genders", type = "genders"}, alt = {}, lang = {type = "language"}, sc = {type = "script"}, t = {item_dest = "gloss"}, gloss = {}, pos = {}, lit = {}, tr = {}, ts = {}, face = {}, nolinkinfl = {type = "boolean"}, } local optional_headword_param_mods = { sc = {type = "script"}, tr = {}, ts = {}, } --[==[ Parse a single inflection or headword form or list of such forms. In either case, inline modifiers may be attached. `data` is an object with the following fields: * `val`: The raw value to parse. Required. * `paramname`: The name of the parameter from which the value was taken; used in error messages. Required. * `is_head`: We are parsing a headword parameter (a value which goes into the `heads` field of `data`). This changes the allowed modifiers, disabling the `id` modifier and only allowing a subset of optional modifiers. * `frob`: An optional function of one value to apply to the form after inline modifiers have been removed (i.e. to apply to the `.term` field of the returned object). * `include_mods`: List of extra inline modifiers to include, besides the default ones (see below). Each list item is either a string specifying a recognized extra inline modifier (see `optional_param_mods` in the code), or a two-item list of modifier name and modifier spec, where the spec should follow the syntax for modifier specs in `parse_inline_modifiers` in [[Module:parse utilities]]. * `exclude_mods`: List of default inline modifiers to not include. * `splitchar`: If specified, the value in `val` can be a list of forms to parse, separated by the value of `splitchar` (which is a Lua pattern, as in `parse_inline_modifiers` in [[Module:parse utilities]]). Most commonly, `splitchar` is a single comma and the values are comma-separated (in this case, splitting will not happen if a space follows the comma). * `preserve_splitchar`, `delimiter_key`, `escape_fun`, `unescape_fun`, `pre_normalize_modifiers`: As in `parse_inline_modifiers` in [[Module:parse utilities]]. Returns an object suitable for storing as one element of one of the lists in `headdata.inflections`, where `headdata` is the structure passed to [[Module:headword]]. If `splitchar` is specified, howeve, the return value is a list of such objects. The following default inline modifiers are currently recognized: * `q`: Left qualifier. * `qq`: Right qualifier. * `l`: Comma-separated list of left labels. No space should follow the comma. * `ll`: Comma-separated list of right labels. No space should follow the comma. * `ref`: Reference or references. See {{tl|IPA}} for the syntax. * `id`: Sense ID, in case there are multiple senses. See {{tl|l}}. The following are the recognized additional inline modifiers: * `g`: Comma-separated list of genders. * `alt`: Display text. * `lang`: Language code of language of the form, if different from the language of the headword. * `sc`: Script code of script of the form. Almost never needed. * `t`: Gloss for the form. * `gloss`: Gloss for the form (alias for `t`). * `pos`: Part of speech of the form. * `lit`: Literal meaning of the form. * `tr`: Manual transliteration of the form. * `ts`: Transcription of the form, for languages where the transliteration differs markedly from the pronunciation. * `face`: Face to display the form in, e.g. {"hypothetical"} for a hypothetical form (unlinkable and displayed in italics). * `nolinkinfl`: Make the form unlinkable. ]==] function export.parse_term_with_modifiers(data) local paramname, val, frob = data.paramname, data.val, data.frob local function generate_obj(term, parse_err) if frob then term = frob(term, parse_err) end return {term = term} end -- Check for inline modifier, e.g. מרים<tr:Miryem>. But exclude top-level HTML entry with <span ...>, -- <sup> or similar in it. if (val:find("<", nil, true) or data.splitchar) and not term_contains_top_level_html(val) and -- don't parse inline modifiers if is_head and the value begins with a ~ (link modifier syntax) (not data.is_head or not val:find("^~")) then local param_mods = param_mods if data.is_head then param_mods = shallow_copy(param_mods) param_mods.id = nil end if data.include_mods or data.exclude_mods then if not data.is_head then -- already copied when data.is_head param_mods = shallow_copy(param_mods) end if data.include_mods then local optional_mods = data.is_head and optional_headword_param_mods or optional_param_mods for _, mod in ipairs(data.include_mods) do if type(mod) == "table" then if #mod ~= 2 then error(("Internal error: Modifier spec %s in `include_mods` should be of length 2"):format( dump(mod))) end local modkey, modvalue = unpack(mod) param_mods[modkey] = modvalue elseif not optional_mods[mod] then error(("Internal error: Unrecognized modifier spec %s in `include_mods`"):format( dump(mod))) else param_mods[mod] = optional_mods[mod] end end end if data.exclude_mods then for _, mod in ipairs(data.exclude_mods) do if not param_mods[mod] then error(("Internal error: Modifier spec %s in `exclude_mods` not found among existing modifiers" ):format(dump(mod))) else param_mods[mod] = nil end end end end return parse_inline_modifiers(val, { paramname = paramname, param_mods = param_mods, generate_obj = generate_obj, splitchar = data.splitchar, preserve_splitchar = data.preserve_splitchar, delimiter_key = data.delimiter_key, escape_fun = data.escape_fun, unescape_fun = data.unescape_fun, pre_normalize_modifiers = data.pre_normalize_modifiers, }) else local retval = generate_obj(val) if data.splitchar then retval = {retval} end return retval end end --[==[ Parse a list of inflection forms that may have inline modifiers attached. `data` is an object with the following fields: * `forms`: The list of raw values to parse. Required. * `paramname`: The name of the first parameter from which the value was taken; used in error messages. If this is a two-element list, the first element is the first parameter and the second element is the prefix of the remaining parameters. Parameter names that are numbers are handled correctly, as are those with \1 in it marking where the parameter index goes. Required. * `qualifiers`: If specified, a possibly gappy list of left qualifiers to add to the parsed terms (for compatibility purposes). * `splitchar`: As in `parse_term_with_modifiers()`. The resulting per-term lists will be flattened. * `frob`, `include_mods`, `exclude_mods`, `is_head`, `preserve_splitchar`, `delimiter_key`, `escape_fun`, `unescape_fun`, `pre_normalize_modifiers`: As in `parse_term_with_modifiers()`. Returns a list of objects, suitable for storing as one of the lists in `headdata.inflections` (once a label is added), where `headdata` is the structure passed to [[Module:headword]]. ]==] function export.parse_term_list_with_modifiers(data) local paramname, forms = data.paramname, data.forms local qualifiers = data.qualifiers local first, restpref if type(paramname) == "table" then first = paramname[1] restpref = paramname[2] else first = paramname restpref = paramname end local terms = {} data = shallow_copy(data) for i, val in ipairs(forms) do data.paramname = i == 1 and first or type(restpref) == "number" and restpref + i - 1 or restpref:find("\1", nil, true) and restpref:gsub("\1", tostring(i)) or restpref .. i data.val = val local parsed = export.parse_term_with_modifiers(data) if qualifiers and qualifiers[i] then if data.splitchar then for _, term in ipairs(parsed) do term.q = {qualifiers[i]} end else parsed.q = {qualifiers[i]} end end if data.splitchar then extend(terms, parsed) else terms[i] = parsed end end return terms end --[==[ Construct a link to [[Appendix:Glossary]] for `entry`. If `text` is specified, it is the display text; otherwise, `entry` is used. ]==] function export.glossary_link(entry, text) text = text or entry return "[[အဆက်လက္ကရဴ:မသောၚ်ကၠးဝေါဟာ#" .. entry .. "|" .. text .. "]]" end function export.replace_glossary_links_in_label(label) if label:find("<<", nil, true) then label = label:gsub("<<(.-)|(.-)>>", export.glossary_link):gsub("<<(.-)>>", export.glossary_link) end return label end function export.insert_fixed_inflection(data) local headdata, origterm, label = data.headdata, data.originating_term, data.label local inflobj = data.inflobj or headdata inflobj.inflections = inflobj.inflections or {} if not origterm then insert(inflobj.inflections, { label = export.replace_glossary_links_in_label(label) }) else if origterm.id then error(("It doesn't make sense to pass in an ID '%s' for label '%s' in conjunction with a term value '%s'" ):format(origterm.id, label, origterm.term)) end -- Preserve qualifiers, labels, references origterm.term = nil origterm.label = export.replace_glossary_links_in_label(label) insert(inflobj.inflections, origterm) end end --[==[ Insert previously-parsed terms into an `inflections` field. The `inflections` field will be initialized if needed. `data` is an object with the following fields: * `headdata`: The headword structure passed to [[Module:headword]]. Required. * `inflobj`: The object whose `inflections` field the terms are inserted into. Defaults to `headdata`. Only needs to be set for nested inflections, which are specified for an inflection object rather than the headword data structure as a whole. * `terms`: The list of parsed terms. If {nil} or omitted, nothing happens unless `request` is set. * `label`: The label that the inflections are given; any parts of the label surrounded in <<...>> are linked to the glossary. (If the contents of <<...> contain a | in them, they are a two-part link.) Required. * `no_label`: If the term is {"-"} and there are no other terms, insert a fixed label with this value. Defaults to {"no "} plus the label. * `usually_no_label`: If the term is {"-"} and there are other terms, insert a fixed label with this value. Defaults to {"usually no "} plus the label. * `accel`: If specified, a full accelerator object to add to the inflections. * `request`: If specified and no terms are given, insert a label with a request for inflections to be given. * `enable_auto_translit`: If specified and terms are given, display automatic transliteration of the terms. ]==] function export.insert_inflection(data) local headdata, terms, label = data.headdata, data.terms, data.label local inflobj = data.inflobj or headdata if terms and terms[1] then if terms[1].term == "-" then if terms[2] then export.insert_fixed_inflection { headdata = headdata, inflobj = inflobj, originating_term = terms[1], label = data.usually_no_label or "ဗွဲတၟေၚ်ဟၟဲ " .. label, } remove(terms, 1) else export.insert_fixed_inflection { headdata = headdata, inflobj = inflobj, originating_term = terms[1], label = data.no_label or "ဟၟဲ " .. label, } return end end if data.check_missing then error("check_missing support removed; use checkredlinks=true in [[Module:headword]]") end terms.label = export.replace_glossary_links_in_label(label) if data.accel then terms.accel = data.accel end terms.enable_auto_translit = data.enable_auto_translit inflobj.inflections = inflobj.inflections or {} insert(inflobj.inflections, terms) elseif data.request then inflobj.inflections = inflobj.inflections or {} insert(inflobj.inflections, { label = export.replace_glossary_links_in_label(label), request = true, }) end end --[==[ Parse raw arguments from `forms` for inline modifiers, and insert the resulting terms (which should not require significant additional processing) into `headdata.inflections`. `data` is an object with the following fields: * `forms`: The list of raw values to parse. If {nil} or omitted, nothing happens. * `headdata`: The headword structure passed to [[Module:headword]]. Required. * `paramname`: As in `parse_term_list_with_modifiers()`. Required. * `label`: As in `insert_inflection()`. Required. * `qualifiers`, `frob`, `include_mods`, `exclude_mods`, `is_head`, `splitchar`, `preserve_splitchar`, `delimiter_key`, `escape_fun`, `unescape_fun`, `pre_normalize_modifiers`: As in `parse_term_list_with_modifiers()`. * `accel`: As in `insert_inflection()`. ]==] function export.parse_and_insert_inflection(data) local forms = data.forms if forms and forms[1] then data = shallow_copy(data) data.forms = forms data.terms = export.parse_term_list_with_modifiers(data) export.insert_inflection(data) end end --[==[ Combine two sets of qualifiers or labels. If either is {nil}, just return the other, and if both are {nil}, return {nil}. ]==] function export.combine_qualifiers_or_labels(quals1, quals2) if not quals1 and not quals2 then return nil end if not quals1 then return quals2 end if not quals2 then return quals1 end local combined = shallow_copy(quals1) for _, note in ipairs(quals2) do insert_if_not(combined, note) end return combined end --[==[ Combine the qualifiers, labels, references and ID's of two term objects. `destobj` is the "destination term object" into which the combined properties are written, and `srcobj` is the "source object" into which the properties are merged. `destobj` is side-effected (but the lists inside of `destobj` are not); if this is undesirable, make sure to shallow-copy `destobj` first. If both objects have values for a given qualifier, label or reference, the values of `destobj` come first. If both objects have a value for `id`, the values must match or an error is thrown; otherwise, the resulting value of `id` comes from whichever one is defined. '''NOTE:''' This may not be the correct behavior when deduplicating a list of term objects. See `insert_termobj_combining_duplicates` for a different approach. ]==] function export.combine_termobj_qualifiers_labels(destobj, srcobj) destobj.q = export.combine_qualifiers_or_labels(destobj.q, srcobj.q) destobj.qq = export.combine_qualifiers_or_labels(destobj.qq, srcobj.qq) destobj.l = export.combine_qualifiers_or_labels(destobj.l, srcobj.l) destobj.ll = export.combine_qualifiers_or_labels(destobj.ll, srcobj.ll) destobj.refs = export.combine_qualifiers_or_labels(destobj.refs, srcobj.refs) if destobj.id and srcobj.id and destobj.id ~= srcobj.id then -- FIXME: We probably want to pass in an error function error(("Can't specify two different ID's %s and %s when combining objects"):format(srcobj.id, destobj.id)) end destobj.id = destobj.id or srcobj.id return destobj end function export.termobj_has_qualifiers_or_labels(obj) return obj.q and obj.q[1] or obj.qq and obj.qq[1] or obj.l and obj.l[1] or obj.ll and obj.ll[1] or obj.refs and obj.refs[1] end local function one_ancillary_property_equal(prop1, prop2) local prop1_is_nil = not prop1 or not prop1[1] local prop2_is_nil = not prop2 or not prop2[1] if prop1_is_nil and prop2_is_nil then return true end if prop1_is_nil or prop2_is_nil then return false end return deepEquals(prop1, prop2) end function export.termobj_ancillary_properties_equal(obj1, obj2) return one_ancillary_property_equal(obj1.q, obj2.q) and one_ancillary_property_equal(obj1.qq, obj2.qq) and one_ancillary_property_equal(obj1.l, obj2.l) and one_ancillary_property_equal(obj1.ll, obj2.ll) and one_ancillary_property_equal(obj1.refs, obj2.refs) and obj1.id == obj2.id end function export.convert_termobj_to_formobj(termobj) local formobj = { form = termobj.term, translit = termobj.tr, } local footnotes local function mods_to_footnote(mod_prefix, mod_vals) if mod_vals and mod_vals[1] then footnotes = footnotes or {} for _, val in ipairs(mod_vals) do insert(footnotes, "[" .. mod_prefix .. ":" .. val .. "]") end end end mods_to_footnote("q", termobj.q) mods_to_footnote("qq", termobj.qq) mods_to_footnote("l", termobj.l) mods_to_footnote("ll", termobj.ll) mods_to_footnote("ref", termobj.refs) mods_to_footnote("id", termobj.id and {termobj.id} or nil) formobj.footnotes = footnotes return formobj end local recognized_multi_mods = { q = "q", qq = "qq", l = "l", ll = "ll", ref = "refs", } local recognized_single_mods = { id = "id", } function export.add_footnote_to_termobj(termobj, footnote) local stripped_footnote = footnote:match("^%[(.*)%]$") if not stripped_footnote then error("Internal error: Footnote should be surrounded by brackets at this stage: " .. footnote) end local prefix, rest = stripped_footnote:match("^([a-z]+):(.+)$") local field, is_multi if prefix then if recognized_multi_mods[prefix] then field = recognized_multi_mods[prefix] is_multi = true elseif recognized_single_mods[prefix] then field = recognized_single_mods[prefix] is_multi = false end end if not field then rest = stripped_footnote field = "l" is_multi = true end if is_multi then if not termobj[field] then termobj[field] = {} end insert(termobj[field], rest) else if termobj[field] and termobj[field] ~= rest then error(("Can't set two values for '%s': '%s' and '%s'"):format(field, termobj[field], rest)) end termobj[field] = rest end end function export.convert_formobj_to_termobj(formobj) local termobj = { term = formobj.form, tr = formobj.translit, } if formobj.footnotes then for _, footnote in ipairs(formobj.footnotes) do export.add_footnote_to_termobj(termobj, footnote) end end return termobj end local function extract_termobj_field_modifiers(fieldval) return fieldval:match("^([*+]?)(.*)$") end function export.remove_termobj_field_modifiers(termobj) local function remove_field_modifiers(field) if termobj[field] and termobj[field][1] then local any_field_modifiers = false for _, val in ipairs(termobj[field]) do local field_mods, _ = extract_termobj_field_modifiers(val) if field_mods ~= "" then any_field_modifiers = true break end end local new_field = {} if any_field_modifiers then for _, val in ipairs(termobj[field]) do local _, field_without_mods = extract_termobj_field_modifiers(val) insert_if_not(new_field, field_without_mods) end termobj[field] = new_field end end end remove_field_modifiers("q") remove_field_modifiers("qq") remove_field_modifiers("l") remove_field_modifiers("ll") remove_field_modifiers("refs") end function export.insert_termobj_combining_duplicates(destobjs, termobj) for _, destobj in ipairs(destobjs) do if destobj.term == termobj.term and destobj.tr == termobj.tr then -- Form already present; maybe combine footnotes. local function combine_field_values(field) if termobj[field] and termobj[field][1] then -- Check to see if there are existing values with *; if so, remove them. if destobj[field] and destobj[field][1] then local any_values_with_asterisk = false for _, val in ipairs(destobj[field]) do local field_mods, _ = extract_termobj_field_modifiers(val) if field_mods:find("%*") then any_values_with_asterisk = true break end end if any_values_with_asterisk then local filtered_values = {} for _, val in ipairs(destobj[field]) do local field_mods, _ = extract_termobj_field_modifiers(val) if not val:find("%*") then insert(filtered_values, val) end end if filtered_values[1] then destobj[field] = filtered_values else destobj[field] = nil end end end local any_values_with_plus = false for _, val in ipairs(termobj[field]) do local field_mods, _ = extract_termobj_field_modifiers(val) if val:find("%+") then any_footnotes_with_plus = true break end end if any_footnotes_with_plus then if not destobj[field] then destobj[field] = {} else destobj[field] = shallow_copy(destobj[field]) end for _, val in ipairs(termobj[field]) do local already_seen = false local field_mods, field_without_mods = extract_termobj_field_modifiers(val) if val:find("%+") then for _, existing_val in ipairs(destobj[field]) do local existing_field_mods, existing_field_without_mods = extract_termobj_field_modifiers(existing_val) if existing_field_without_mods == field_without_mods then already_seen = true break end end if not already_seen then insert(destobj[field], val) end end end end end end combine_field_values("q") combine_field_values("qq") combine_field_values("l") combine_field_values("ll") combine_field_values("refs") if destobj.id and termobj.id and destobj.id ~= termobj.id then -- FIXME: We probably want to pass in an error function error(("Can't specify two different ID's %s and %s when combining objects"):format(termobj.id, destobj.id)) end destobj.id = destobj.id or termobj.id return end end insert(destobjs, termobj) end export.allowed_special_indicators = { ["first"] = true, ["first-second"] = true, ["first-last"] = true, ["second"] = true, ["last"] = true, ["each"] = true, ["+"] = true, -- requests the default behavior with preposition handling } --[==[ Check for special indicators (values such as {"+first"} or {"+first-last"} that are used in a `pl`, `f`, etc. argument and indicate how to inflect a multiword term). If `form` is such an indicator, the return value is `form` minus the initial `+` sign; otherwise, if form begins with a `+` sign, an error is thrown; otherwise the return value is nil. ]==] function export.get_special_indicator(form, noerror) if form:find("^%+") then form = form:gsub("^%+", "") if not export.allowed_special_indicators[form] then if noerror then return nil end local indicators = {} for indic, _ in pairs(export.allowed_special_indicators) do insert(indicators, "+" .. indic) end sort(indicators) error("Special inflection indicator beginning with '+' can only be " .. mw.text.listToText(indicators) .. ": +" .. form) end return form end return nil end local function add_endings(bases, endings) local retval = {} if type(bases) ~= "table" then bases = {bases} end if type(endings) ~= "table" then endings = {endings} end for _, base in ipairs(bases) do for _, ending in ipairs(endings) do insert(retval, base .. ending) end end return retval end --[==[ Inflect a possibly multiword or hyphenated term `form` using the function `inflect`, which is a function of one argument that is called on a single word to inflect and should return either the inflected word or a list of inflected words. `special` indicates how to inflect the multiword term and should be e.g. {"first"} to inflect only the first word, {"first-last"} to inflect the first and last words, {"each"} to inflect each word, etc. See `allowed_special_indicators` above for the possibilities. If `special` is `+`, or is omitted and the term is multiword (i.e. containing a space character), and `prepositions` is supplied, the function checks for multiword or hyphenated terms containing the prepositions in `prepositions`, e.g. Italian [[senso di marcia]] or [[medaglia d'oro]] or Portuguese [[tartaruga-do-mar]]. If such a term is found, only the first word is inflected. Otherwise, the default is {"first-last"}. `prepositions` is a list of Lua patterns matching prepositions. The patterns will automatically have the separator character (space or hyphen) added to the left side but not the right side, so they should contain a space character (which will automatically be converted to the appropriate separator) on the right side unless the preposition is joined on the right side with an apostrophe. Examples of preposition patterns for Italian are {"di "}, {"sull'"} and {"d?all[oae] "} (which matches {"dallo "}, {"dalle "}, {"alla "}, etc.). The return value is always either a list of inflected multiword or hyphenated terms, or nil if `special` is omitted and `form` is not multiword. (If `special` is specified and `form` is not multiword or hyphenated, an error results.) ]==] function export.handle_multiword(form, special, inflect, prepositions, sep) sep = sep or form:find(" ") and " " or "%-" local raw_sep = sep == " " and " " or "-" -- Used to add regex version of separator in the replacement portion of ugsub() or :gsub() local sep_replacement = sep == " " and " " or "%%-" -- Given a Lua pattern, replace space with the appropriate separator. local function hack_re(re) if sep == " " then return re end return (re:gsub(" ", sep_replacement)) end if special == "first" then local first, rest = form:match(hack_re("^(.-)( .*)$")) if not first then error("Special indicator 'first' can only be used with a multiword term: " .. form) end return add_endings(inflect(first), rest) elseif special == "second" then local first, second, rest = form:match(hack_re("^([^ ]+ )([^ ]+)( .*)$")) if not first then error("Special indicator 'second' can only be used with a term with three or more words: " .. form) end return add_endings(add_endings({first}, inflect(second)), rest) elseif special == "first-second" then local first, space, second, rest = form:match(hack_re("^([^ ]+)( )([^ ]+)( .*)$")) if not first then error("Special indicator 'first-second' can only be used with a term with three or more words: " .. form) end return add_endings(add_endings(add_endings(inflect(first), space), inflect(second)), rest) elseif special == "each" then local terms = split(form, sep) if #terms < 2 then error("Special indicator 'each' can only be used with a multiword term: " .. form) end for i, term in ipairs(terms) do terms[i] = inflect(term) if i > 1 then terms[i] = add_endings(raw_sep, terms[i]) end end local result = "" for _, term in ipairs(terms) do result = add_endings(result, term) end return result elseif special == "first-last" then local first, middle, last = form:match(hack_re("^(.-)( .* )(.-)$")) if not first then first, middle, last = form:match(hack_re("^(.-)( )(.*)$")) end if not first then error("Special indicator 'first-last' can only be used with a multiword term: " .. form) end return add_endings(add_endings(inflect(first), middle), inflect(last)) elseif special == "last" then local rest, last = form:match(hack_re("^(.* )(.-)$")) if not rest then error("Special indicator 'last' can only be used with a multiword term: " .. form) end return add_endings(rest, inflect(last)) elseif special and special ~= "+" then error("Unrecognized special=" .. special) end -- Only do default behavior if special indicator '+' explicitly given or separator is space; otherwise we will -- break existing behavior with hyphenated words. if (special == "+" or sep == " ") and form:find(sep) then if prepositions then -- check for prepositions in the middle of the word; do it this way so we can handle -- more than one word before the preposition (and usually inflect each word) for _, prep in ipairs(prepositions) do local first, space_prep_rest = umatch(form, hack_re("^(.-)( " .. prep .. ".*)$")) if first then return add_endings(inflect(first), space_prep_rest) end end end -- multiword or hyphenated expressions default to first-last; we need to pass in the separator to avoid -- problems with multiword terms containing hyphens in the individual words return export.handle_multiword(form, "first-last", inflect, prepositions, sep) end return nil end local function link_hyphen_split_component(word, data) if data.link_hyphen_split_component then return data.link_hyphen_split_component(word) else return "[[" .. word .. "]]" end end -- Default function to split a word on apostrophes. Don't split apostrophes at the beginning or end of a word (e.g. -- [['ndrangheta]] or [[po']]). Handle multiple apostrophes correctly, e.g. [[l'altr'ieri]] -> [[l']][altr']][[ieri]]. function export.default_split_apostrophe(word, data) local apostrophe_parts = split(word, "'", true, true) local linked_apostrophe_parts = {} local apostrophes_at_beginning = "" local i = 1 -- Apostrophes at beginning get attached to the first word after (which will always exist but may -- be blank if the word consists only of apostrophes). while i < #apostrophe_parts do -- <, not <=, in case the word consists only of apostrophes local apostrophe_part = apostrophe_parts[i] i = i + 1 if apostrophe_part == "" then apostrophes_at_beginning = apostrophes_at_beginning .. "'" else break end end apostrophe_parts[i] = apostrophes_at_beginning .. apostrophe_parts[i] -- Now, do the remaining parts. A blank part indicates more than one apostrophe in a row; we join -- all of them to the preceding word. while i <= #apostrophe_parts do local apostrophe_part = apostrophe_parts[i] if apostrophe_part == "" then linked_apostrophe_parts[#linked_apostrophe_parts] = linked_apostrophe_parts[#linked_apostrophe_parts] .. "'" elseif i == #apostrophe_parts then insert(linked_apostrophe_parts, apostrophe_part) else insert(linked_apostrophe_parts, apostrophe_part .. "'") end i = i + 1 end for j, tolink in ipairs(linked_apostrophe_parts) do linked_apostrophe_parts[j] = link_hyphen_split_component(tolink, data) end return concat(linked_apostrophe_parts) end --[=[ Auto-add links to a word that should not have spaces but may have hyphens and/or apostrophes. We split off final punctuation, then split on hyphens if `data.split_hyphen` is given, and also split on apostrophes if `data.split_apostrophe` is given. We only split on hyphens if they are in the middle of the word, not at the beginning or end (hyphens at the beginning or end indicate suffixes or prefixes, respectively). `include_hyphen_prefixes`, if given, is a set of prefixes (not including the final hyphen) where we should include the final hyphen in the prefix. Hence, e.g. if "anti" is in the set, a Portuguese word like [[anti-herói]] "anti-hero" will be split [[anti-]][[herói]] (whereas a word like [[código-fonte]] "source code" will be split as [[código]]-[[fonte]]). If `data.split_apostrophe` is specified, we split on apostrophes unless `data.no_split_apostrophe_words` is given and the word is in the specified set, such as French [[c'est]] and [[quelqu'un]]. If `data.split_apostrophe` is true, the default algorithm applies, which splits on all apostrophes except those at the beginning and end of a word (as in Italian [['ndrangheta]] or [[po']]), and includes the apostrophe in the link to its left (so we auto-split French [[l'eau]] as [[l']][[eau]] and [[l'altr'ieri]] as [[l']][altr']][[ieri]]). If `data.split_apostrophe` is specified but not `true`, it should be a function of one argument that does custom apostrophe-splitting. The argument is the word to split, and the return value should be the split and linked word. ]=] local function add_single_word_links(space_word, data, term_has_spaces) local space_word_no_punct, punct local punct_pattern = data.punctuation if punct_pattern and is_callable(punct_pattern) then space_word_no_punct, punct = punct_pattern(space_word) else if punct_pattern == nil then punct_pattern = "[,;:?!]" end space_word_no_punct, punct = umatch(space_word, "^(.*)(" .. punct_pattern .. ")$") end space_word_no_punct = space_word_no_punct or space_word punct = punct or "" local words if space_word_no_punct:sub(1, 1) == "-" or space_word_no_punct:sub(-1) == "-" then -- don't split prefixes and suffixes words = {space_word_no_punct} else local splitter if term_has_spaces then splitter = data.split_hyphen_when_space else splitter = data.split_hyphen_when_no_space end if is_callable(splitter) then words = splitter(space_word_no_punct) if type(words) == "string" then return words .. punct end end end if not words then local split_hyphen if term_has_spaces then split_hyphen = data.split_hyphen_when_space else split_hyphen = data.split_hyphen_when_no_space if split_hyphen == nil then -- default to true; use `false` to avoid this split_hyphen = true end end if split_hyphen then words = split(space_word_no_punct, "-", true, true) else words = {space_word_no_punct} end end local linked_words = {} for j, word in ipairs(words) do if j < #words and data.include_hyphen_prefixes and data.include_hyphen_prefixes[word] then word = "[[" .. word .. "-]]" elseif j > 1 and data.include_hyphen_suffixes and data.include_hyphen_suffixes[word] then word = "[[-" .. word .. "]]" else -- Don't split on apostrophes if the word is in `no_split_apostrophe_words`. if (not data.no_split_apostrophe_words or not data.no_split_apostrophe_words[word]) and data.split_apostrophe and word:find("'", nil, true) then if data.split_apostrophe == true then word = export.default_split_apostrophe(word, data) else -- custom apostrophe splitter/linker word = data.split_apostrophe(word) end elseif word ~= "" then -- avoid -[[]]- (e.g. f--k) word = link_hyphen_split_component(word, data) end if j < #words then word = word .. "-" end end insert(linked_words, word) end return concat(linked_words) .. punct end --[=[ Auto-add links to a multiword term. `data` contains fields customizing how to do this. By default we proceed as follows: (1) If the term already has embedded links in it, they are left unchanged. (2) Otherwise, if there are spaces present, we split on spaces and link each word separately. (3) If a given space-separated component ends in punctuation (defaulting to [,;:?!]), it is separated off, the remainder of the algorithm run, and the punctuation pasted back on. (4) If there are hyphens in a given space-separated component, we may link each hyphenated term separately depending on the settings in `data`. Normally the hyphens are not included in the linked terms, but this can be overridden for specific prefixes and/or suffixes. By default, if there are spaces in the multiword term, we do not link hyphenated components (because of cases like "boire du petit-lait" where "petit-lait" should be linked as a whole), but do so otherwise (e.g. for "avant-avant-hier"); this can overridden for cases like "croyez-le ou non". Cases where only some of the hyphens should be split can always be handled by explicitly specifying the head (e.g. "Nord-Pas-de-Calais" given as head=[[Nord]]-[[Pas-de-Calais]]). (5) If there are apostrophes in a given component, we may link each apostrophe-separated term separately depending on the settings in `data`, including the apostrophe in the link to its left (so we split "de l'eau" as "[[de]] [[l']][[eau]]"). The settings in `data` are as follows: `split_hyphen_when_no_space`: Whether to split on hyphens when the term has no spaces. Defaults to true if set to `nil`. This can be a function of one argument, to implement a custom splitting algorithm for hyphen-separated terms. If this returns [FIXME: FINISH ME ...] If `data.split_apostrophe` is specified, we split on apostrophes unless `data.no_split_apostrophe_words` is given and the word is in the specified set, such as French [[c'est]] and [[quelqu'un]]. If `data.split_apostrophe` is true, the default algorithm applies, which splits on all apostrophes except those at the beginning and end of a word (as in Italian [['ndrangheta]] or [[po']]), and includes the apostrophe in the link to its left (so we auto-split French [[l'eau]] as [[l']][[eau]] and [[l'altr'ieri]] as [[l']][altr']][[ieri]]). If `data.split_apostrophe` is specified but not `true`, it should be a function of one argument that does custom apostrophe-splitting. The argument is the word to split, and the return value should be the split and linked word. We don't always split on hyphens because of cases like "boire du petit-lait" where "petit-lait" should be linked as a whole, but provide the option to do it for cases like "croyez-le ou non". If there's no space, however, then it makes sense to split on hyphens by `no_split_apostrophe_words` and `include_hyphen_prefixes` allow for special-case handling of particular words and are as described in the comment above add_single_word_links(). ]=] function export.add_links_to_multiword_term(term, data) if term:match("[%[%]]") then return term end local words = split(term, " ", true, true) local term_has_spaces = #words > 1 local linked_words = {} for _, word in ipairs(words) do insert(linked_words, add_single_word_links(word, data, term_has_spaces)) end local retval = concat(linked_words, " ") -- If we ended up with a single link consisting of the entire term, -- remove the link. return retval:match("^%[%[([^%[%]]*)%]%]$") or retval end local function canonicalize_begin_end_spec(spec) local from, to = spec:match("^(.-):(.*)$") if not from then from = spec to = "" end return from, to end --[==[ Given a `linked_term` that is the output of add_links_to_multiword_term(), apply modifications as given in `modifier_spec` to change the link destination of subterms (normally single-word non-lemma forms; sometimes collections of adjacent words). This is usually used to link non-lemma forms to their corresponding lemma, but can also be used to replace a span of adjacent separately-linked words to a single multiword lemma. The format of `modifier_spec` is one or more semicolon-separated subterm specs, where each such spec is of the form SUBTERM:DEST, where SUBTERM is one or more words in the `linked_term` but without brackets in them, and DEST is the corresponding link destination to link the subterm to. Any occurrence of ~ in DEST is replaced with SUBTERM. Alternatively, a single modifier spec can be of the form BEGIN[FROM:TO], which is equivalent to writing BEGINFROM:BEGINTO (see example below). For example, given the source phrase [[il bue che dice cornuto all'asino]] "the pot calling the kettle black" (literally "the ox that calls the donkey horned/cuckolded"), the result of calling add_links_to_multiword_term() is [[il]] [[bue]] [[che]] [[dice]] [[cornuto]] [[all']][[asino]]. With a modifier_spec of 'dice:dire', the result is [[il]] [[bue]] [[che]] [[dire|dice]] [[cornuto]] [[all']][[asino]]. Here, based on the modifier spec, the non-lemma form [[dice]] is replaced with the two-part link [[dire|dice]]. Another example: given the source phrase [[chi semina vento raccoglie tempesta]] "sow the wind, reap the whirlwind" (literally (he) who sows wind gathers [the] tempest"). The result of calling add_links_to_multiword_term() is [[chi]] [[semina]] [[vento]] [[raccoglie]] [[tempesta]], and with a modifier_spec of 'semina:~re; raccoglie:~re', the result is [[chi]] [[seminare|semina]] [[vento]] [[raccogliere|raccoglie]] [[tempesta]]. Here we use the ~ notation to stand for the non-lemma form in the destination link. A more complex example is [[se non hai altri moccoli puoi andare a letto al buio]], which becomes [[se]] [[non]] [[hai]] [[altri]] [[moccoli]] [[puoi]] [[andare]] [[a]] [[letto]] [[al]] [[buio]] after calling add_links_to_multiword_term(). With the following modifier_spec: 'hai:avere; altr[i:o]; moccol[i:o]; puoi: potere; andare a letto:~; al buio:~', the result of applying the spec is [[se]] [[non]] [[avere|hai]] [[altro|altri]] [[moccolo|moccoli]] [[potere|puoi]] [[andare a letto]] [[al buio]]. Here, we rely on the alternative notation mentioned above for e.g. 'altr[i:o]', which is equivalent to 'altri:altro', and link multiword subterms using e.g. 'andare a letto:~'. (The code knows how to handle multiword subexpressions properly, and if the link text and destination are the same, only a single-part link is formed.) ]==] function export.apply_link_modifiers(linked_term, modifier_spec, lang) local split_modspecs = split(modifier_spec, "%s*;%s*") for j, modspec in ipairs(split_modspecs) do local id if modspec:find("<") then local rest rest, id = modspec:match("^(.*)<id:(.-)>$") if rest then modspec = rest end end local subterm, dest, otherlang local begin_spec, rest, end_spec = modspec:match("^%[(.-)%]([^:]*)%[(.-)%]$") if begin_spec then local begin_from, begin_to = canonicalize_begin_end_spec(begin_spec) local end_from, end_to = canonicalize_begin_end_spec(end_spec) subterm = begin_from .. rest .. end_from dest = begin_to .. rest .. end_to end if not subterm then rest, end_spec = modspec:match("^([^:]*)%[(.-)%]$") if rest then local end_from, end_to = canonicalize_begin_end_spec(end_spec) subterm = rest .. end_from dest = rest .. end_to end end if not subterm then begin_spec, rest = modspec:match("^%[(.-)%]([^:]*)$") if begin_spec then local begin_from, begin_to = canonicalize_begin_end_spec(begin_spec) subterm = begin_from .. rest dest = begin_to .. rest end end if not subterm then subterm, dest = modspec:match("^(.-)%s*:%s*(.*)$") if subterm and subterm ~= "^" and subterm ~= "$" then local langdest -- Parse off an initial language code (e.g. 'en:Higgs', 'la:minūtia' or 'grc:σκατός'). Also handle -- Wikipedia prefixes ('w:Abatemarco' or 'w:it:Colle Val d'Elsa'). otherlang, langdest = dest:match("^([A-Za-z0-9._-]+):([^ ].*)$") if otherlang == "w" then local foreign_wikipedia, foreign_term = langdest:match("^([A-Za-z0-9._-]+):([^ ].*)$") if foreign_wikipedia then otherlang = otherlang .. ":" .. foreign_wikipedia langdest = foreign_term end dest = ("%s:%s"):format(otherlang, langdest) otherlang = nil elseif otherlang then otherlang = get_lang(otherlang, true, "allow etym") dest = langdest end end end if not subterm then if modspec == "?" or modspec == "!" then subterm = "$" dest = modspec elseif modspec == "..." or modspec == "...?" then subterm = "$" dest = " " .. modspec elseif modspec:find("^[A-Z]$") then -- X, Y, etc. by themselves are unlinked, to help with snowclones subterm = modspec dest = "_" else subterm = modspec dest = "~" end end if subterm == "^" then linked_term = dest:gsub("_", " ") .. linked_term elseif subterm == "$" then linked_term = linked_term .. dest:gsub("_", " ") else if subterm:find("[", nil, true) then error(("Subterm '%s' in modifier spec '%s' cannot have brackets in it"):format( escape_wikicode(subterm), escape_wikicode(modspec))) end local escaped_subterm = pattern_escape(subterm) local subterm_re = "%[%[" .. escaped_subterm:gsub("(%%?[ ',%-])", "%%]*%1%%[*") .. "%]%]" local expanded_dest if dest:find("~", nil, true) then expanded_dest = dest:gsub("~", replacement_escape(subterm)) else expanded_dest = dest end if otherlang then expanded_dest = expanded_dest .. "#" .. otherlang:getCanonicalName() end local subterm_replacement if expanded_dest == "_" then subterm_replacement = subterm if id then error("Can't supply <id:...> with an unlinked subterm") end if otherlang then error("Can't supply prefixed language with an unlinked subterm") end elseif id or otherlang then if id and expanded_dest:find("[", nil, true) then error("Can't supply <id:...> with destination with embedded brackets") end subterm_replacement = require(links_module).language_link { lang = otherlang or lang, term = expanded_dest, alt = subterm, id = id, } elseif expanded_dest:find("[", nil, true) then -- Use the destination directly if it has brackets in it (e.g. to put brackets around parts of a word). subterm_replacement = expanded_dest elseif expanded_dest == subterm then subterm_replacement = "[[" .. subterm .. "]]" else subterm_replacement = "[[" .. expanded_dest .. "|" .. subterm .. "]]" end local escaped_subterm_replacement = replacement_escape(subterm_replacement) local replaced_linked_term = ugsub(linked_term, subterm_re, escaped_subterm_replacement) if replaced_linked_term == linked_term then mw.log(("Attempted to replace %s with %s in %s"):format(subterm_re, escaped_subterm_replacement, linked_term)) error(("Subterm '%s' could not be located in %slinked expression %s, or replacement same as subterm"):format( subterm, j > 1 and "intermediate " or "", escape_wikicode(linked_term))) else linked_term = replaced_linked_term end end end return linked_term end return export babk1tf6hzii7n0v9tfb26mix17q3bq ကဏ္ဍ:ဝေါဟာအာရဗဳမဆေၚ်စပ်ကဵုတံရိုဟ်နကဵုဝေါဟာ ض ر ر 14 114488 395275 290602 2026-05-22T11:54:56Z 咽頭べさ 33 395275 wikitext text/x-wiki [[ကဏ္ဍ:မအရေဝ်အာရဗဳဗက်အလိုက်တံရိုဟ်ဂမၠိုၚ်|ض ر ر]][[ကဏ္ဍ:တံရိုဟ်အာရဗဳ မလိက်၃-မဂမၠိုၚ်| ض ر ر ]] phc8z7o1hbk0o13di77t6t140tmsuvc ကဏ္ဍ:ဝေါဟာအာရဗဳမဆေၚ်စပ်ကဵုတံရိုဟ်နကဵုဝေါဟာ ه م م 14 294834 395274 2026-05-22T11:54:07Z 咽頭べさ 33 ခၞံကၠောန်လဝ် မုက်လိက် နကု "[[ကဏ္ဍ:ဘာသာအာရဗဳ]]" 395274 wikitext text/x-wiki [[ကဏ္ဍ:ဘာသာအာရဗဳ]] as7f28r0bi3z0teqzderhbox9oqqqop 395276 395274 2026-05-22T11:58:01Z 咽頭べさ 33 395276 wikitext text/x-wiki [[ကဏ္ဍ:မအရေဝ်အာရဗဳဗက်အလိုက်တံရိုဟ်ဂမၠိုၚ်|ه م م]][[ကဏ္ဍ:တံရိုဟ်အာရဗဳ မလိက်၃-မဂမၠိုၚ်| ه م م]] 2efcni0i6la3dqgtdv2k9s68yim90ox