模組:zh-translit
呢個模組嘅解說可以喺模組:zh-translit/doc度開
local export = {}
local function fail(lang, request)
local langObj, req, cat = require("Module:languages").getByCode(lang)
if request then
cat = {"Requests for transliteration of " .. langObj:getCanonicalName() .. " terms"}
end
return nil, true, cat
end
-- These need to be pattern-escaped (e.g. "-" as "%-").
local zhpron_lect_code = {
["cdo"] = "md", -- Eastern Min (Min-dong)
["cjy"] = "j", -- Jin
["cmn"] = "m", -- Mandarin
["cmn-sic"] = "m%-s", -- Sichuanese
["gan"] = "g", -- Gan
["hak"] = "h", -- Hakka
["hsn"] = "x", -- Xiang
["ltc"] = "mc", -- Middle Chinese
["mnp"] = "mb", -- Northern Min (Min-bei)
["nan"] = "mn", -- Hokkien Southern Min (Min-nan)
["och"] = "oc", -- Old Chinese
["wuu"] = "w", -- Wu
["yue"] = "c", -- Cantonese
["zh"] = "m", -- Chinese (general); uses Mandarin pinyin
["zhx-tai"] = "c%-t", -- Taishanese
["zhx-teo"] = "mn%-t", -- Teochew
}
function export.tr(text, lang, sc)
if (not text) or text == "" then
return text
end
if not zhpron_lect_code[lang] then
lang = require("Module:languages").getByCode(lang, nil, true):getNonEtymologicalCode()
end
local content, tr = mw.title.new(text)
content = content and content:getContent()
local function process_content(content)
if lang == "ltc" and lang == "och" then
return content
elseif not content then
return false
else
-- Remove HTML comments, convert template brackets to individual characters and remove any templates nested within {{zh-pron}}.
content = content
:gsub("<!%-%-", "\1")
:gsub("%-%->", "\2")
:gsub("\1[^\2]*%f[%z\2]\2?", "")
:gsub("\2", "-->")
:gsub("{{", "\1")
:gsub("}}", "\2")
local subs
repeat
content, subs = content:gsub("(\1zh%-pron[^\2]*)\1[^\2]*\2", "%1")
until subs == 0
if lang == "cmn" or lang == "wuu" or lang == "yue" or lang == "zh" or lang == "zhx-tai" then
return content:gsub(",([^ ])", ";%1")
else
return content:gsub("/([^ ])", ";%1")
end
end
end
content = process_content(content)
if content == false then
return fail(lang)
end
if content then
if lang == "ltc" or lang == "och" then
local pron
for pron_sect in content:gmatch("\1zh%-pron[^\2]*|%s?" .. zhpron_lect_code[lang] .. "=[^|\2\n]-([^=|\2\n]+)") do
if pron and pron ~= pron_sect then
return fail(lang)
end
pron = pron_sect
end
tr = pron
else
tr = content:match("\1zh%-pron[^\2]*|%s?" .. zhpron_lect_code[lang] .. "=[^|\2\n]-;*([^;=|\2\n]+){default}")
if not tr then
local function get_prons(content)
local lower = mw.ustring.lower
local prons, ret
for pron_sect in content:gmatch("\1zh%-pron[^\2]*|%s?" .. zhpron_lect_code[lang] .. "=([^\2|\n]*[^%s\2|\n][^\2|\n]*)") do
pron_sect = mw.text.trim(pron_sect)
prons = mw.text.split(pron_sect, "%s*;%s*")
for i, pron in ipairs(prons) do
if pron:match("=") then
prons[i] = nil
end
end
if #prons > 2 then
return false
elseif #prons > 1 then
if prons[1]:gsub("^.", lower) == prons[2]:gsub("^.", lower) then
prons[1] = prons[1]:gsub("^.", lower)
else
return false
end
elseif ret and prons[1] ~= ret then
if prons[1]:gsub("^.", lower) == ret:gsub("^.", lower) then
prons[1] = prons[1]:gsub("^.", lower)
else
return false
end
end
ret = prons[1]
end
return ret
end
tr = get_prons(content)
if tr == false then
return fail(lang, true)
elseif not tr then
local pages, seen_pages = {}, {}
local function get_sees(content)
for pron_see in content:gmatch("\1zh%-see|[^\2]*\2") do
local page = pron_see:match("|1=([^|\2]+)[|\2]") or pron_see:match("\1zh%-see|([^|\2]+)[|\2]")
-- If we've seen this page before, stop.
if not seen_pages[page] then
seen_pages[page] = true
-- Otherwise, get the page content and repeat.
page = mw.title.new(page)
page = page and process_content(page:getContent())
if page:match("\1zh%-pron[^\2]*|%s?" .. zhpron_lect_code[lang] .. "=[^\2|\n]*[^%s\2|\n][^\2|\n]*") then
table.insert(pages, page)
elseif page then
get_sees(page)
end
end
end
end
get_sees(content)
content = table.concat(pages)
tr = get_prons(content)
if tr == false then
return fail(lang, true)
end
end
if not tr then
return fail(lang)
end
end
end
end
if lang == "cmn" or lang == "zh" then
local Hani = require("Module:scripts").getByCode("Hani"):getCharacters()
tr = tr:gsub("#", "")
if mw.ustring.match(tr, "[" .. Hani .. "]") then
local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
local tag = mw.loadData("Module:zh/data/cmn-tag").MT
tr = tr
:gsub("一", "yī")
:gsub("不", "bù")
:gsub(UTF8_char, function(c1)
if tag[c1] then
return tag[c1][1]:gsub("^([aāáǎàeēéěèoōóǒò])", "'%1")
else
return c1
end
end)
:gsub("^'", "") --remove initial apostrophe inserted by previous function
end
elseif lang == "cmn-sic" then
tr = tr
:gsub("([%d-])(%a)", "%1 %2")
:gsub("[%d-]+", "<sup>%0</sup>")
elseif lang == "hak" then
-- TODO
elseif lang == "ltc" or lang == "och" then
if tr == "n" then
return fail(lang)
end
local index = {}
if tr then
if lang == "ltc" then
index = mw.text.split(tr, ",")
else
index = mw.text.split(tr, ";")
end
end
for i = 1, mw.ustring.len(text) do
local module_type = lang .. "-pron"
if lang == "och" then
module_type = module_type .. "-ZS"
end
local success, data_module = pcall(require, "Module:zh/data/" .. module_type .. "/" .. mw.ustring.sub(text, i, i))
if not success or (((not index[i]) or index[i] == "y") and #data_module > 1) then
return fail(lang)
end
if index[i] == "y" then
index[i] = 1
elseif index[i] then
index[i] = tonumber(index[i])
end
index[i] = index[i] and data_module[index[i]] or data_module[1]
if lang == "ltc" then
local data = mw.loadData("Module:ltc-pron/data")
local initial, final, tone = require("Module:ltc-pron").infer_categories(index[i])
index[i] = data.initialConv["Zhengzhang"][initial] .. data.finalConv["Zhengzhang"][final] .. tone
else
index[i] = index[i][6]
end
end
tr = table.concat(index, " ")
if lang == "och" then
tr = "*" .. tr
end
elseif lang == "nan" then
-- TODO
elseif lang == "yue" then
tr = tr:gsub("[%d-]+", "<sup>%0</sup>")
elseif lang == "zhx-tai" then
tr = tr:gsub("[%d*]+%-?[%d*]*", "<sup>%0</sup>")
elseif lang == "zhx-teo" then
-- TODO
elseif lang == "wuu" then
tr = require("Module:wuu-pron").wugniu_format(tr)
else
tr = require("Module:" .. lang .. "-pron").rom(tr)
end
-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
return tr .. " "
end
return export