Модул:fa-translit
Шаблон:statusШаблон:translit module documentation
Test cases
вироиш16 tests failed. (refresh)
Text | Expected | Actual | Differs at | |
---|---|---|---|---|
سَرانجام | saranjâm | саканҷам | 1 | |
سَرانْجام | saranjâm | саканҷам | 1 | |
سَرَانْجَام | saranjâm | сакаанҷаам | 1 | |
کُروز | koruz | кокӯз | 1 | |
کُرُوز | korouz | кокuз | 1 | |
طَنین | tanin | еанин | 1 | |
طَنِین | taneyn | еанeyн | 1 | |
عَصاً | ’asan | ’асan | 2 | |
خانه | xâne | ханe | 1 | |
خانِه | xâne | ханиҳ | 1 | |
کُرِۀ شُمالی | kore-ye šomâli | коки-ye шомали | 1 | |
ضَمّه | zamme | заммe | 1 | |
ضَمِّه | zamme | замииҳ | 1 | |
وُدکا | vodkâ | ӯодка | 1 | |
اَرمَنِستان | armanestân | аакманистан | 1 | |
باکو | bâku | бакӯ | 1 |
local export = {}
local U = mw.ustring.char
local fatHatan = U(0x64B) -- What is the Persian term for this?
local fathe = U(0x64E) -- also zabar
local zamme = U(0x64F) -- also piš
local kasre = U(0x650) -- also zir
local tashdid = U(0x651) -- also called shadda
local jazm = U(0x652)
local waw = U(0x0648)
local ye = U(0x06CC)
local group = "بپتثجچحخدذرزژسشصضطظغفقکگلمنوهی"
local mapping = {
["ا"] = 'а', ["ب"] = 'б', ["پ"] = 'п', ["ت"] = 'т', ["ث"] = 'с', ["ج"] = 'ҷ', ["چ"] = 'ч', ["ح"] = 'ҳ', ["خ"] = 'х',
["د"] = 'д', ["ذ"] = 'з', ["ر"] = 'к', ["ز"] = 'з', ["ژ"] = 'ж', ["س"] = 'с', ["ش"] = 'ш', ["ص"] = 'с', ["ض"] = 'з',
["ط"] = 'е', ["ظ"] = 'з', ["غ"] = 'ғ', ["ف"] = 'ф', ["ق"] = 'қ', ["ک"] = 'к', ["گ"] = 'г', ["ل"] = 'л',
["م"] = 'м', ["ن"] = 'н', ["و"] = 'ӯ', ["ه"] = 'ҳ', ["ی"] = 'и', ["آ"] = 'о',
-- displaying on separate lines as the viewing becomes distorted on these combinations
["ع"] = "’",
["ء"] = "’",
["ئ"] = "’",
["ؤ"] = "’",
["أ"] = "’",
-- diacritics
[fathe] = "а",
[kasre] = "и",
[zamme] = "о",
[jazm] = "", -- also sokun - no vowel
[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
[fatHatan] = "н",
-- ligatures
["ﻻ"] = "ло",
["ﷲ"] = "ллоҳ",
-- kashida
["ـ"] = "", -- kashida, no sound
-- numerals
["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
-- normal arabic variants to numerals
["١"] = "1", ["٢"] = "2", ["٣"] = "3", ["٤"] = "4", ["٥"] = "5",
["٦"] = "6", ["٧"] = "7", ["٨"] = "8", ["٩"] = "9", ["٠"] = "0",
-- punctuation (leave on separate lines)
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = '“', -- quotation mark
["»"] = '”', -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousand
["ۀ"] = "-ye" -- he ye (in ezâfe)
}
function export.tr(text, lang, sc)
text = mw.ustring.gsub(text, '([' .. group .. ']' .. tashdid .. '?)ه$', '%1e')
-- ou
-- NOT WORKING
text = mw.ustring.gsub(text, zamme .. waw .. '([' .. group .. '])', "ou%1")
-- ey
-- WORKING
text = mw.ustring.gsub(text, kasre .. ye .. '([' .. group .. '])', "ey%1")
text = mw.ustring.gsub(text, 'ىٰ', "â")
text = mw.ustring.gsub(text, 'ا' .. fatHatan, "an")
-- text = mw.ustring.gsub(text, 'الله', "ﷲ")
-- text = mw.ustring.gsub(text, 'لا', "ﻻ")
text = mw.ustring.gsub(text, '.', mapping)
text = mw.ustring.gsub(text, 'ou', "u")
text = mw.ustring.gsub(text, 'aâ', "â")
text = mw.ustring.gsub(text, 'âa', "a")
text = mw.ustring.gsub(text, 'ei', "i")
text = mw.ustring.gsub(text, 'ai', "ay")
text = mw.ustring.gsub(text, 'au', "aw")
text = mw.ustring.gsub(text, 'u([aâeiou])', "v%1")
text = mw.ustring.gsub(text, 'i([aâeiou])', "y%1")
text = mw.ustring.gsub(text, "([aâeiou])(" .. tashdid .. ")", "%2%1") -- swapping tašdid with vowels
text = mw.ustring.gsub(text, "(.)" .. tashdid, "%1%1") -- implementing tašdid
text = mw.ustring.gsub(text, 'eh$', "e")
text = mw.ustring.gsub(text, 'eh([^aâeiouy’bdfghjklmnpqrstvyxzčğšž])', "e%1")
return text
end
return export