Модул:ru-pron
Documentation for this module may be created at Модул:ru-pron/doc
local export = {}
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local find = mw.ustring.find
local len = mw.ustring.len
local match = mw.ustring.match
local translit_tab = {
['а']='a', ['б']='b', ['в']='v', ['г']='g', ['д']='d', ['е']='e', ['ё']='jó', ['ж']='ž', ['з']='z', ['и']='i', ['й']='j',
['к']='k', ['л']='l', ['м']='m', ['н']='n', ['о']='o', ['п']='p', ['р']='r', ['с']='s', ['т']='t', ['у']='u', ['ф']='f',
['х']='x', ['ц']='c', ['ч']='č', ['ш']='š', ['щ']='šč', ['ъ']='ʺ', ['ы']='y', ['ь']='ʹ', ['э']='e', ['ю']='ju', ['я']='ja',
}
local function translit_replace_e(pre, e)
if e == nil then
e = pre
pre = ""
end
e = gsub(e, '.', {["Е"] = "Je", ["е"] = "je", ["Ѣ"] = "Jě", ["ѣ"] = "jě"})
if pre == "" or find(pre, "[АОУҮЫЭЯЁЮИЕЪЬІѢѴаоуүыэяёюиеъьіѣѵ%AEIOUYĚaeiouyě]") then
return pre .. e
else
return pre .. sub(e, 2)
end
end
-- Transliterates text, which should be a single word or phrase. It should
-- include stress marks, which are then preserved in the transliteration.
local function translit(text, lang, sc)
-- Remove word-final hard sign
text = gsub(text, "[Ъъ]$", "")
text = gsub(text, "[Ъъ]([- ])", "%1")
-- Ё needs converting if is decomposed
text = text:gsub("ё","ё"):gsub("Ё","Ё")
-- ё after a "hushing" consonant becomes ó (ё is mostly stressed)
text = gsub(text, "([жшчщЖШЧЩ])ё","%1ó")
-- ю after ж and ш becomes u (e.g. брошюра, жюри)
text = gsub(text, "([жшЖШ])ю","%1u")
-- е after a vowel or at the beginning of a word becomes je
text = gsub(text, "^([ЕеѢѣ]+)", translit_replace_e)
text = gsub(text, "(.)([ЕеѢѣ]+)",translit_replace_e)
return (gsub(text,'.',translit_tab))
end
local vowels, vowels_c, non_vowels, non_vowels_c, cyr_vowels_c = '[aäeëɛəiyoöuü]', '([aäeëɛəiyoöuü])', '[^aäeëɛəiyoöuü]', '([^aäeëɛəiyoöuü])', '([аяеёэиыоую])'
local perm_syl_onset = {
['str'] = true,
['sp'] = true, ['st'] = true, ['sk'] = true, ['sf'] = true, ['sx'] = true, ['sc'] = true,
['pr'] = true, ['kr'] = true, ['fr'] = true, ['xr'] = true,
['pl'] = true, ['tl'] = true, ['kl'] = true, ['gl'] = true, ['fl'] = true, ['xl'] = true,
['ml'] = true, ['mn'] = true,
['šč'] = true, ['dž'] = true,
}
local translit_conv = {
['c'] = 't͡s', ['č'] = 't͡ɕ', ['g'] = 'ɡ', ['ĵ'] = 'd͡z', ['ǰ'] = 'd͡ʑ', ['ǯ'] = 'ɕ', ['ӂ'] = 'ʑ', ['š'] = 'ʂ', ['ž'] = 'ʐ', ['χ'] = 'ɣ'
}
local allophones = {
['a'] = { 'a', 'ɐ', 'ə' },
['e'] = { 'e', 'ɪ', 'ɪ' },
['i'] = { 'i', 'ɪ', 'ɪ' },
['o'] = { 'o', 'ɐ', 'ə' },
['u'] = { 'u', 'ʊ', 'ʊ' },
['y'] = { 'ɨ', 'ɨ', 'ɨ' },
['ɛ'] = { 'ɛ', 'ɨ', 'ɨ' },
['ä'] = { 'æ', 'ɪ', 'ɪ' },
['ë'] = { 'e', 'ɪ', 'ɪ' },
['ö'] = { 'ɵ', 'ɪ', 'ɪ' },
['ü'] = { 'ʉ', 'ʉ', 'ʉ' },
['ə'] = { 'ə', 'ə', 'ə' },
}
local devoicing = {
['b'] = 'p', ['d'] = 't', ['g'] = 'k',
['z'] = 's', ['v'] = 'f',
['ž'] = 'š', ['χ'] = 'x',
['bʲ'] = 'pʲ', ['dʲ'] = 'tʲ',
['zʲ'] = 'sʲ', ['vʲ'] = 'fʲ',
['žʲ'] = 'šʲ'
}
local voicing = {
['p'] = 'b', ['t'] = 'd', ['k'] = 'g',
['s'] = 'z', ['f'] = 'v',
['š'] = 'ž', ['c'] = 'ĵ', ['č'] = 'ǰ', ['x'] = 'χ', ['ǯ'] = 'ӂ'
}
local geminate_pref = {
--'^abː',
'^adː', '^bezː', '^braomː', '^vː', '^voszː', '^izː', '^inː', '^kontrː', '^nadː', '^niszː',
'^o[cdmtč]ː', '^podː', '^predː', '^paszː', '^pozː', '^sː', '^sverxː', '^subː', '^tröxː', '^čeresː', '^četyröxː', '^črezː',
}
local phon_respellings = {
['-'] = ' ', --replace hyphens with a space, needs handling for prefixes, e.g. по-, suffixes, e.g. -то,, which are reduced
[vowels_c .. '([шж])ю'] = '%1%2у', [vowels_c .. '([шжц])е'] = '%1%2э', [vowels_c .. '([шжц])и'] = '%1%2ы', [vowels_c .. '([шж])ё'] = '%1%2о́',
['́ть?ся'] = '́цца', ['([^́])ть?ся'] = '%1ца',
['[дт](ь?)с(.?)'] = function(a, b)
if b ~= 'я' then
if gsub(b, cyr_vowels_c, '') == '' then
return 'ц' .. a .. 'с' .. b
else
return 'ц' .. a .. b
end
end end,
['[дт]з' .. cyr_vowels_c] = 'ĵз%1', ['^о[дт]с'] = 'оцс',
['([щч])о'] = '%1ё', ['([щч])а'] = '%1я', ['([щч])у'] = '%1ю',
['([^рн])[дт]ц'] = '%1цц', ['[тд]ч'] = 'чч',
['йо́'] = 'ё',
['стг'] = 'сг',
['([шжщч])ь$'] = '%1',
['сверхи'] = 'сверхы',
['стьд'] = 'зд',
['тьд'] = 'дд',
['р[дт]ц'] = 'рц', ['р[дт]ч'] = 'рч',
['здн'] = 'зн', ['[сз][дт]ц'] = 'сц',
['лнц'] = 'нц', ['н[дт]ц'] = 'нц',
['[сз]тл'] = 'сл', ['[сз]тн'] = 'сн',
['[сзшж]ч'] = 'щ', ['[сзшж]щ'] = 'щ',
['[зс]ш'] = 'шш', ['[зс]ж'] = 'жж',
['н[ндт]ск'] = 'нск',
['[сз]ск'] = 'ск',
['с[дт]ск'] = 'ск',
['гк'] = 'хк',
['н[дт]ш'] = 'нш',
['н[дт]г'] = 'нг',
['(.[ое])го(\204\129?)$'] = function(a, b)
if adj ~= false then
return a .. 'во' .. b
end end,
['э'] = 'ɛ',
}
local cons_assim_palatal = {
['compulsory'] = {
['stʲ'] = true, ['zdʲ'] = true,
['nč'] = true, ['nǯ'] = true
},
['optional'] = {
['slʲ'] = true, ['zlʲ'] = true, ['snʲ'] = true, ['znʲ'] = true,
['tnʲ'] = true, ['dnʲ'] = true,
['nsʲ'] = true, ['nzʲ'] = true, ['ntʲ'] = true, ['ndʲ'] = true,
}
}
--@Wyang - they may carry the stress too, as alternatives - по́ небу/по не́бу, etc.
local accentless = {
['prep'] = {
['без'] = true, ['близ'] = true,
['в'] = true, ['во'] = true,
['до'] = true,
['из-под'] = true, ['из-за'] = true,
['за'] = true,
['из'] = true, ['изо'] = true,
['к'] = true, ['ко'] = true,
['меж'] = true,
['на'] = true, ['над'] = true, ['надо'] = true,
['о'] = true, ['об'] = true, ['обо'] = true, ['от'] = true,
['по'] = true, ['под'] = true, ['подо'] = true, ['пред'] = true, ['предо'] = true, ['при'] = true, ['про'] = true,
['перед'] = true, ['передо'] = true,
['через'] = true,
['с'] = true, ['со'] = true,
['у'] = true,
['не'] = true
},
['post'] = {
['то'] = true, ['либо'] = true, ['нибудь'] = true,
['бы'] = true, ['б'] = true,
['же'] = true, ['ж'] = true,
['ка'] = true, ['тка'] = true,
['ли'] = true
}
}
function export.ipa(text, adj, gem, pal)
if type(text) == 'table' then
text, adj, gem, pal = (text.args.phon or text.args[1] or ""), ((text.args.adj or "") ~= ""), (text.args.gem or ""), (text.args.pal or "")
if text == "" then
text = mw.title.getCurrentTitle().text
end
end
gem = mw.ustring.sub(gem or '', 1, 1)
text = gsub(mw.ustring.lower(text), '-', ' ')
text = gsub(text, 'ѐ', 'е' .. '̀')
--phonetic respellings
for a, b in pairs(phon_respellings) do
text = gsub(text, a, b)
end
--make monosyllabic prepositions liaise with the following word
local word = mw.text.split(text, " ", true)
for i = 1, #word do
if accentless['prep'][word[i]] and i ~= #word then
word[i+1] = word[i] .. '‿' .. word[i+1]
word[i+1] = gsub(word[i+1], '([бдкствхзж])‿и', '%1‿ы')
word[i] = ''
elseif accentless['post'][word[i]] and i ~= 1 then
word[i-1] = word[i-1] .. word[i]
word[i] = ''
end
end
text = table.concat(word, " ")
text = gsub(text, '^ ', '')
text = gsub(text, ' $', '')
text = gsub(text, ' +', ' ')
--transliterate and tidy up
text = translit(text)
text = gsub(text, 'šč', 'ǯː')
text = gsub(text, 'ó', 'o' .. '́')
--rewrite iotated vowels
text = gsub(text, 'j[aeou]', {
['ja'] = 'ä',
['je'] = 'ë',
['jo'] = 'ö',
['ju'] = 'ü'})
--voicing/devoicing assimilations
text = gsub(text, '([bdgzvž]+)([ %-%‿%ː]?[ptksčšǯcx])', function(a, b)
return gsub(a, '.', devoicing) .. b end)
text = gsub(text, '([ptksfšcčxǯ]+)([ %-%‿ʹ%ː]?[bdgzž])', function(a, b)
return gsub(a, '.', voicing) .. b end)
--re-notate orthographic geminate consonants
text = gsub(text, (non_vowels_c) .. '%1', '%1ː')
word = mw.text.split(text, " ", true)
for i = 1, #word do
local syllable, syl_conv, pos, stress = {}, {}, {}, {}
local count = 0
pron = word[i]
--optional iotation of 'e' in a two-vowel sequence and reduction of word-final 'e'
pron = gsub(pron, '([aäeëɛiyoöuü]́?)ë([^́])', '%1(j)ë%2')
pron = gsub(pron, 'e$', 'ə')
pron = gsub(pron, '([aäeëɛəiyoöuüʹ])(́?)[äë]$', '%1%2jə')
pron = gsub(pron, non_vowels_c .. 'ä$', '%1ʲə')
pron = gsub(pron, '%(j%)jə', 'jə')
--syllabify
pron = gsub(pron, 'ʹ([äëöü])', 'ʹ/%1')
pron = gsub(pron, 'ʹi', 'ʹji')
pron = gsub(pron, '([aäeëɛəiyoöuü]́?)', '%1/')
pron = gsub(pron, '/+$', '')
pron = gsub(pron, '/([^‿/aäeëɛəiyoöuü]*)([^‿/aäeëɛəiyoöuüʹːʲ])(ʹ?ʲ?ː?[aäeëɛəiyoöuü])', '%1/%2%3')
pron = gsub(pron, '([^‿/aäeëɛəiyoöuü]?)([^‿/aäeëɛəiyoöuü])/([^‿/aäeëɛəiyoöuüʹːʲ])(ʹ?ʲ?ː?[aäeëɛəiyoöuü])', function(a, b, c, d)
if perm_syl_onset[a .. b .. c] then
return '/' .. a .. b .. c .. d
elseif perm_syl_onset[b .. c] then
return a .. '/' .. b .. c .. d
end end)
pron = gsub(pron, '/([^‿/aäeëɛəiyoöuü]+)$', '%1')
pron = gsub(pron, '/‿', '‿/')
--remove accent marks from monosyllables
if len(gsub(pron, non_vowels_c, '')) == 1 and match(pron, 'o' .. '́') then
pron = gsub(pron, '\204\129', '')
end
--write syllable indexes of stressed syllables to a table
trimmed_pron = pron
while match(trimmed_pron, '[́̀]') do -- U+0301 COMBINING ACUTE ACCENT
accent_pos = find(trimmed_pron, '[́̀]')
count = count + len(gsub(sub(trimmed_pron, 1, accent_pos - 1), '[^%/]', ''))
table.insert(pos, count + 1)
trimmed_pron = sub(trimmed_pron, accent_pos + 1, -1)
end
--treated monosyllabic non-prepositions as if accented
pron = gsub(pron, '(.*)' .. vowels_c .. '(.*)', function(a, b, c)
if not match(a .. c, vowels) then
table.insert(pos, 1)
end end)
--split by syllable
syllable = mw.text.split(pron, '/', true)
if #syllable == 1 then
table.insert(pos, 1)
end
--transform the table of stress positions
for _, pos in ipairs(pos) do
stress[pos] = true
end
for j = 1, #syllable do
local syl = syllable[j]
--remove consonant geminacy if non-initial and non-post-tonic
if match(syl, 'ː') and gem ~= 'y' then
no_replace = false
if (j == 1 and not match(syl, 'ː$')) or stress[j-1] then
no_replace = true
else
de_accent = gsub(word[i], '[̀́]', '')
for i = 1, #geminate_pref do
if not no_replace and match(de_accent, geminate_pref[i]) then
no_replace = true
end
end
end
if gem == 'n' then
no_replace = false
end
if not no_replace then
syl = gsub(syl, '([^ǯӂn])ː', '%1')
if gem == 'n' then
syl = gsub(syl, 'nː', 'n')
end
end
if match(word[i], '[^̀́]nːyj$') then
syl = gsub(syl, 'nːyj', 'n(ː)yj')
end
end
--assimilative palatalisation of consonants when followed by front vowels
if pal == 'y' or match(syl, '^[^cĵšžaäeëɛiyoöuü]*[eiəäëöüʹ]') or match(syl, '^[cĵšž][^cĵšžaäeëɛiyoöuüː]+[eiəäëöüʹ]') or match(syl, '^[cĵ][äëü]') then
syl = gsub(syl, '^([ʺʹ]?)([äëöü])', '%1j%2')
if not match(syl, 'ʺ') then
syl = gsub(syl, non_vowels_c .. '([ʹːj]?[aäeëɛəiyoöuüʹ])', function(a, b)
set = '[mnpbtdkgcfvszxrl]'
if pal == 'y' then
set = '[mnpbtdkgcfvszxrlǯӂšž]'
end
set = '(' .. set .. ')'
return gsub(a, set, '%1ʲ') .. b end)
end
end
syl = gsub(syl, 'ʺ([äëöü])', 'j%1')
syl = gsub(syl, 'ʺj', 'j')
syl = gsub(syl, 'ʺ([aɛiouy])', 'ʔ%1')
syl = gsub(syl, '(.?ː?)ʹ', function(a)
if match(a, '[čǰšǯ]') then
return a
elseif a ~= 'ʲ' then
return a .. 'ʲ'
else
return 'ʲ'
end end)
--retraction of front vowels in syllables blocking assimilative palatalisation
if not match(syl, 'ʲː?' .. vowels) and not match(syl, '[čǰǯӂ]ː?[ei]') and not match(syl, '^j?i') then
syl = gsub(syl, '[ei]', {['e'] = 'ɛ', ['i'] = 'y'})
end
--vowel allophony
if stress[j] or (j == #syllable and match(syllable[j-1] .. syllable[j], '[aieäëü]́?o')) or match(syllable[j], '̀') then
syl = gsub(syl, '(.*)́', 'ˈ%1')
syl = gsub(syl, '(.*)̀', 'ˌ%1')
syl = gsub(syl, '([ʲčǰǯ]ː?)o', '%1ö')
syl = gsub(syl, vowels_c, function(a)
if a ~= '' then
return allophones[a][1]
end end)
else
if not match((syllable[j-1] or '') .. syllable[j], '[ʺʹ]') and (j ~= #syllable or (j == #syllable and not match(syl, 'jə$'))) then
syl = gsub(syl, 'j' .. gsub(vowels_c, 'ü', ''), '(j)%1')
end
if stress[j+1] or (j == 1 and match(syl, '^' .. vowels)) then
syl = gsub(syl, vowels_c, function(a)
if a ~= '' then
return allophones[a][2]
end end)
else
syl = gsub(syl, vowels_c, function(a)
if a ~= '' then
return allophones[a][3]
end end)
end
end
syl_conv[j] = syl
end
pron = table.concat(syl_conv, "")
--consonant assimilative palatalisation
pron = gsub(pron, '([szntd])(ˈ?)([tdčǰǯlnsz]ʲ?)', function(a, b, c)
if cons_assim_palatal['compulsory'][a..c] then
return a .. 'ʲ' .. b .. c
elseif cons_assim_palatal['optional'][a..c] then
return a .. '⁽ʲ⁾' .. b .. c
end end)
--fronting of stressed 'a' between soft consonants
pron = gsub(pron, 'ˈ(..?.?)a(.?.?.?)', function(a, b)
if match(a, '[ʲčǰǯӂ]') and (b == '' or match(b, '[ʲčǰǯӂ]')) then
return 'ˈ' .. a .. 'æ' .. b
end end)
--final devoicing and devoicing assimilation
pron = gsub(pron, '([bdgzvžχ]ʲ?)$', function(a)
if not match(word[i+1] or '', '^[bdgzvžn]') then
return devoicing[a]
end end)
pron = gsub(pron, '([bdgzvž])([ %-%‿]?[ptksčšǯcx])', function(a, b)
return devoicing[a] .. b end)
if match(word[i], 'sä$') then
pron = gsub(pron, 'sʲə$', 's⁽ʲ⁾ə')
end
pron = gsub(pron, '[cčgĵǰšžǯӂχ]', translit_conv)
word[i] = pron
end
text = table.concat(word, " ")
--long vowels
text = gsub(text, '[ɐə]([ ]?)ɐ(%l?)ˈ', '%1ɐː%2ˈ')
text = gsub(text, 'ə([ ]?)[ɐə]', '%1əː')
return text
end
return export