মডিউল:fa-IPA

এই মডিউলের জন্য মডিউল:fa-IPA/নথি-এ নথিপত্র তৈরি করা হয়ে থাকতে পারে
--[=[ 

FIXME:

1. (ir) q and ğ should both be ɣ intervocally (āqā should give ɒːɣɒː)
2. (prs) disable auto lowering of long vowels before /h/ & /ʔ/, (causes too many issues).
3. (tg, prs) change /q/ to /ɢ/ before a voiced consonant
4. (cls) prevent the appearance of β after a final consonant (e.g -atb should not give β)
5. (cls) fix geminated β and ð should be a normal b and d
6. FIXED // (ir) [[محوطه]] gives [mo.ɦav.væ.t̪ʰé] instead of [mo.ɦæv.væ.t̪ʰé]
7. (all except cls) final geminates such as [[خط]] should transcribe as [xat(ː)]
8. (ir) add support for Shirazi dialect

]=]

local export = {}

local m_str_utils = require("Module:string utilities")

local U = m_str_utils.char
local lang = require("Module:languages").getByCode("fa")
local m_IPA = require("Module:IPA")
local m_table = require("Module:table")
local m_qual = require("Module:qualifier")
local all_consonants = "bptTjčhxdDðrzžsšʔʾğGfqkglmnŋhɦwvy'" --needed for syllables
local stop_cons = "bptTjčdDðqkg"
local non_stopc = "hxrzžsšʔğGflmhɦwvy'"

local rsplit = m_str_utils.split
local rsubn = m_str_utils.gsub
local toNFC = mw.ustring.toNFC
local ulen = m_str_utils.len
local usub = m_str_utils.sub
local pitchaccent = U(0x301)
local devoice = U(0x325)
local dtack = U(0x31E)
local gstop = U(0x027)
local dental = U(0x32A)

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

export.all_styles = {"cls", "prs", "kbl", "haz", "fa", "teh", "tg"}
export.all_style_groups = {
	all = export.all_styles,
	cls = {"cls"},
	dari = {"prs", "kbl", "haz"},
	ir = {"fa", "teh"},
	tg = {"tg"}
}

export.all_style_descs = {
	cls = "ধ্রুপদী ফার্সি",
	prs = "Dari Persian",
	kabul = "Kabuli",
	haz = "Hazaragi",
	fa = "Iranian Persian",
	teh = "Tehrani",
	tg = "Tajik"
}

local function flatmap(items, fun)
	local new = {}
	for _, item in ipairs(items) do
		local results = fun(item)
		for _, result in ipairs(results) do
			table.insert(new, result)
		end
	end
	return new
end

local common_consonants = {
	["j"] = "d͡ʒ",
	["'"] = "ʔ",
	["ḍ"] = "z",
	["D"] = "d",
	 --these are here for Hazaragi
	["T"] = "t",
	 --they are retroflexes in haz
	["ğ"] = "ɣ",
	["G"] = "ɣ",
	["ḥ"] = "h",
	["r"] = "ɾ",
	["ṣ"] = "s",
	["š"] = "ʃ",
	["ṯ"] = "s",
	["ṭ"] = "t",
	["y"] = "j",
	["ž"] = "ʒ",
	["ẓ"] = "z",
	["č"] = "t͡ʃ",
	["g"] = "ɡ",
	["`"] = "ˈ"
}

local iranian_persian_short_vowels = {["a"] = "æ", ["i"] = "e", ["u"] = "o"}

local iranian_persian_long_vowels = {
	["ā"] = "ɒː",
	["ī"] = "iː",
	["ū"] = "uː",
	["ō"] = "uː",
	["ē"] = "iː"
}

local iranian_persian_consonants = {["ḏ"] = "z", ["q"] = "ɢ", ["ğ"] = "ɢ", ["k"] = "c", ["g"] = "ɟ"}

local dari_persian_short_vowels = {["a"] = "ä", ["i"] = "ɪ", ["u"] = "ʊ"}

local dari_persian_long_vowels = {
	["ā"] = "ɑː",
	["ī"] = "iː",
	["ū"] = "uː",
	["ō"] = "oː",
	["ē"] = "eː"
}

local dari_persian_consonants = {["ḏ"] = "z", ["v"] = "w"}

local tajik_short_vowels = {["a"] = "ä", ["i"] = "i", ["u"] = "u"}

local tajik_long_vowels = {
	["ā"] = "ɔ",
	["ī"] = "i",
	["ū"] = "u",
	["ō"] = "ɵ",
	["ē"] = "e"
}

local tajik_vowels = "aieuɵɔ"

local tajik_consonants = {["ḏ"] = "z", ["ɣ"] = "ʁ", ["x"] = "χ"}

local classical_persian_short_vowels = {["a"] = "a", ["i"] = "i", ["u"] = "u"}

local classical_persian_long_vowels = {
	["ā"] = "ɑː",
	["ī"] = "iː",
	["ū"] = "uː",
	["ō"] = "oː",
	["ē"] = "eː"
}

local classical_persian_consonants = {["ḏ"] = "ð", ["v"] = "w"}

local vowels_minus_a = "iuāīūüēōːʷ"
local vowels = "aiuāīūüēōːʷ"
--ʷ and ː are counted as vowels to prevent them from being put in the next syllable
local consonant = "[^" .. vowels .. ". -]"
local vowel = "[" .. vowels .. "]"
local syllabify_pattern = "(" .. vowel .. ")(" .. consonant .. ")(" .. consonant .. "?)(" .. vowel .. ")"

local function syllabify(text)
	text = rsubn(text, "%-(" .. consonant .. ")%-(" .. consonant .. ")", "%1.%2")
	text = rsubn(text, "([" .. all_consonants .. vowels .. "])`", "%1.`")

	-- Add syllable breaks.
	for _ = 1, 2 do
		text =
			rsubn(
			text,
			syllabify_pattern,
			function(a, b, c, d)
				if c == "" and b ~= "" then
					c, b = b, ""
				end

				return a .. b .. "." .. c .. d
			end
		)
	end

	-- syllable boundry consonants
	text =
		rsubn(
		text,
		"([" .. non_stopc .. "])([" .. stop_cons .. "])([" .. non_stopc .. "]+ʷ?)([" .. all_consonants .. vowels .. "])",
		"%1%2.%3%4"
	)
	text = rsubn(text, "([" .. all_consonants .. "])([" .. all_consonants .. "])([" .. all_consonants .. "])", "%1%2.%3")
	-- ALL syllables are CV- so vowels NEED an intial consonant (ʔ)
	text = rsubn(text, "([" .. all_consonants .. "])([.])i#", "%1%2i#") --exclude izafa/ezafe
	text = rsubn(text, "([.])([" .. vowels .. "])", "%1ʔ%2")
	text = rsubn(text, "#([" .. vowels .. "])", "#ʔ%1")

	return text
end

local function remove_glottal_c(text) --only for regional dialects
	-- remove glottal consonants with appropriate glide
	text = rsubn(text, "([aā](%.?))([hɦ'])([uū])", "%1w%4")
	text = rsubn(text, "([iī])(%.?)([hɦ'])([auāēōū])", "i%2y%4")
	text = rsubn(text, "([auāēōū](%.?))([hɦ'])([iīē])", "%1y%4")
	text = rsubn(text, "([ē](%.?))([hɦ'])([auāēōū])", "%1y%4")
	text = rsubn(text, "([uū])(%.?)([hɦ'])([aāiīēō])", "u%2w%4")
	text = rsubn(text, "([ō](%.?))([hɦ'])([aāiīēō])", "%1w%4")
	--completely delete GC if both vowels are the either the same or similar
	text = rsubn(text, "([" .. all_consonants .. "])([uū])`([h'])([uū])", "`%1ū")
	text = rsubn(text, "([" .. all_consonants .. "])([iī])`([h'])([iī])", "`%1ī")
	text = rsubn(text, "([" .. all_consonants .. "])([aā])`([h'])([aā])", "`%1ā")
	--Else, turn GC into majhul long vowels
	text = rsubn(text, "([aā])((%.?)[h'])", "ā")
	text = rsubn(text, "([iī])((%.?)['])", "ē")
	text = rsubn(text, "([uū])((%.?)['])", "ō")

	text = rsubn(text, "(['h])", "")
	-- lastly, remove all remaning GC
	return text
end

local function con_assimilation(text) --DONT USE THIS ON CLASSICAL
	-- assimilation/placement of certain consonants
	text = rsubn(text, "l((%.?)[ʈɖ])", "ɭ%1") --retroflexes are only in hazaragi
	text = rsubn(text, "([nl])((%.?)[td])", "%1" .. dental .. "%2")
	text = rsubn(text, "n((%.?)[ʈɖ])", "ɳ%1")
	text = rsubn(text, "([td])", "%1" .. dental .. "")
	text = rsubn(text, "n((%.?)[kg])", "ŋ%1")
	text = rsubn(text, "n((%.?)[cɟy])", "ɲ%1")
	text = rsubn(text, "n((%.?)[bp])", "m%1")
	text = rsubn(text, "n((%.?)[qɢ])", "ɴ%1")
	text = rsubn(text, "([nm])((%.?)[fv])", "ɱ%2")
	text = rsubn(text, "([āʌɑɒäæeēīioɔōuūʊɪ](%" .. dtack .. "?)(%" .. pitchaccent .. "?)(%ː?)(%.?))([h])", "%1ɦ")
	text = rsubn(text, "r([tdszšlž])", "ɹ%1")
	-- formally, f only assimiates in the same syllable
	text = rsubn(text, "f([bjdžğ])", "v%1")
	text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r")
	text = rsubn(text, "ä(" .. pitchaccent .. ")", "æ%1")
	text = rsubn(text, "([ɦ])#", "ʱ#")
	text = rsubn(text, "([h])#", "ʰ#")
	return text
end

function export.fa_IPA(text)
	text = rsubn(text, "a[-]([" .. all_consonants .. "])[-]", "a%1")
	text = rsubn(text, "a%-", "e-")
	text = rsubn(text, "ˈ", "`")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "%-i#", "i#")
	text = rsubn(text, "[-]([" .. vowels .. all_consonants .. "])[-]", "%1")
	text = rsubn(text, "[-]", ".")
	text = rsubn(text, "v", "w")
	-- Replace xwa with xu
	text = rsubn(text, "xwa", "xu")
	-- Replace xwā with xā
	text = rsubn(text, "xwā", "xā")
	-- Replace xwē with xē
	text = rsubn(text, "xwē", "xē")

	text = rsubn(text, "w(" .. vowel .. ")", "v%1")
	text = rsubn(text, "w(" .. consonant .. ")", "w%1")
	text = rsubn(text, "([" .. vowels_minus_a .. "])w", "%1v")
	text = rsubn(text, "(" .. consonant .. ")w#", "%1v#")
	text = rsubn(text, "v%(w", "v(v")
	-- Replace diphthong
	text =
		rsubn(
		text,
		"a([wy])()",
		function(semivowel, position)
			local consonant = usub(text, position, position)
			if consonant == "" or consonant:find(consonant) then
				if semivowel == "w" then
					return "uw"
				else
					return "ey"
				end
			end
		end
	)
	--automatically denote syllables
	text = syllabify(text)
	-- then do pitch accent mark
	text = rsubn(text, "`([" .. all_consonants .. "])([" .. vowels .. "])", "%1%2" .. pitchaccent .. "")
	text = rsubn(text, "([iī])(" .. pitchaccent .. "?)(%.?)y", "E%2%3y")
	text = rsubn(text, "([ptkč](%" .. dental .. "?))([" .. vowels .. "])", "%1ʰ%3")
	text = rsubn(text, "([" .. vowels .. "](%.?))q", "%1ʁ")
	-- Replace final a with e (can be overwritten by entering æ)
	text = rsubn(text, "([a])(%" .. pitchaccent .. "?)#", "e%2#")
	-- Replace short vowels
	text = rsubn(text, ".", iranian_persian_short_vowels)
	-- Replace long vowels
	text = rsubn(text, ".", iranian_persian_long_vowels)
	text = rsubn(text, "E(" .. pitchaccent .. "?)(%.?)y", "i%1%2y")
	-- Replace jj with dj
	text = rsubn(text, "jj", "dj")
	-- Replace čč with tč
	text = rsubn(text, "čč", "tč")
	-- Replace owv- with avv-
	text = rsubn(text, "owv", "ævv")
	text = rsubn(text, "ow.v", "æv.v")
	-- Allephones
	text = rsubn(text, "([gbdjl](%" .. dental .. "?))#", "%1" .. devoice .. "#")
	-- Replace consonants
	text = rsubn(text, ".", iranian_persian_consonants)
	text = rsubn(text, "cʰ([ɒuo])", "kʰ%1")
	text = rsubn(text, "ɟ(%" .. devoice .. "?)([ɒuo])", "g%1%2")
	text = con_assimilation(text)
	text = rsubn(text, "#(g)", "%1" .. devoice .. "")
	text = rsubn(text, ".", common_consonants)
	text = rsubn(text, "ɾ", "ɹ")
	--fix the pitch accent on long vowels
	text = rsubn(text, "([ɒiu])ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː")
	text = rsubn(text, "([ɢʁ])(%.?)t", "x%2t")
	text = rsubn(text, "([ʁɢ])#", "ɢ" .. devoice .. "#")
	text = rsubn(text, "#([ʁɢ])", "#q")
	text = rsubn(text, "ʁɢ", "ɢɢ")
	text = rsubn(text, "#g", "#k")
	
	text = rsubn(text, "c", "kʲ")
	text = rsubn(text, "ɟ", "ɡʲ")
	text = rsubn(text, "ʲʰ", "ʰʲ")

	text = rsubn(text, "#", "")

	text = toNFC(text)
	
	return text
end

function export.prs_IPA(text) --based on formal speech URBAN kabul
	text = rsubn(text, "ˈ", "`")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "%-i#", "i#")
	text = rsubn(text, "[-]([" .. vowels .. all_consonants .."])[-]", "%1")
	text = rsubn(text, "[-]", ".")
	text = rsubn(text, "v", "w")
	-- Replace xwa with xu
	text = rsubn(text, "xwa", "xu")
	-- Replace xwā with xā
	text = rsubn(text, "xwā", "xā")
	-- Replace xwē with xē
	text = rsubn(text, "xwē", "xē")
	text = rsubn(text, "xwē", "xē")
	text = rsubn(text, "([iī](%`?))(%.?)y", "i%2y")
	-- Iranian and Classical dictionaries list -iyy, 
	-- But Tajik and Dari ones dont
	text = rsubn(text, "([iī]y(%`?))(%.?)y", "i%2y")

	--automatically denote syllables
	text = syllabify(text)
	--pitch accent mark
	text = rsubn(text, "`([" .. all_consonants .. "])([ʷ]?)([" .. vowels .. "])", "%1%2%3" .. pitchaccent .. "")
	text = rsubn(text, "([ptkč])([" .. vowels .. "])", "%1ʰ%2")

	-- Replace ih, īh, i\', ī\' by ēh, ē\'
	text = rsubn(text, "i((%.?)[ɦh'])", "e" .. dtack .. "%1")
	text = rsubn(text, "ī((%.?)[ɦh'])", "ē%1")
	-- Replace uh, ūh, u\', ū\' by ɵh, ɵ\'
	text = rsubn(text, "u((%.?)[hɦ'])", "o" .. dtack .. "%1")
	text = rsubn(text, "ū((%.?)[hɦ'])", "ō%1")
	-- Replace short vowels
	text = rsubn(text, ".", dari_persian_short_vowels)
	-- Replace long vowels
	text = rsubn(text, ".", dari_persian_long_vowels)
	-- Replace jj with dj
	text = rsubn(text, "jj", "dj")
	-- Replace čč with tč
	text = rsubn(text, "čč", "tč")
	--fix the pitch accent on long vowels
	text = rsubn(text, "([ʌɑeiou])ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː")
	-- Allephones
	text = con_assimilation(text)
	-- Replace consonants
	text = rsubn(text, ".", common_consonants)
	text = rsubn(text, "f([bjdžğ])", "v%1")
	text = rsubn(text, ".", dari_persian_consonants)
	text = rsubn(text, "ɾ(%.?)ɾ", "r%1r")
	text = rsubn(text, "ɪ(" .. pitchaccent .. "?)(%.?)j", "i%1%2j")
	text = rsubn(text, "#ɾ", "#r")

	text = rsubn(text, "#", "")

	text = toNFC(text)

	return text
end

function export.prs_kbl_IPA(text) --Colloquial dialect of Kabul
	text = rsubn(text, "ˈ", "`")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "%-i#", "i#")
	text = rsubn(text, "[-]([" .. vowels .. all_consonants .."])[-]", "%1")
	text = rsubn(text, "[-]", ".")

	text = rsubn(text, "v", "w")
	-- Replace xwa with xu
	text = rsubn(text, "xwa", "xu")
	-- Replace xwā with xā
	text = rsubn(text, "xwā", "xā")
	-- Replace xwē with xē
	text = rsubn(text, "xwē", "xē")
	text = rsubn(text, "xwē", "xē")
	text = rsubn(text, "([iī](%`?))(%.?)y", "i%2y")
	-- Iranian and Classical dictionaries list -iyy, 
	-- But Tajik and Dari ones dont
	text = rsubn(text, "([iī]y(%`?))(%.?)y", "i%2y")
	--pitch accent mark
	text = rsubn(text, "`([" .. all_consonants .. "])([" .. vowels .. "])", "%1%2" .. pitchaccent .. "")
	-- remove glottal consonants for some dialects
	text = remove_glottal_c(text)
	--automatically denote syllables
	text = syllabify(text)
	-- universal aspiration
	text = rsubn(text, "([ptkč])([" .. vowels .. "])", "%1ʰ%2")

	-- Replace short vowels
	text = rsubn(text, ".", dari_persian_short_vowels)
	-- Replace long vowels
	text = rsubn(text, ".", dari_persian_long_vowels)
	-- Replace jj with dj
	text = rsubn(text, "jj", "dj")
	text = rsubn(text, "āw", "aw") -- lost colloquially
	-- Replace čč with tč
	text = rsubn(text, "čč", "tč")
	-- Allephones
	text = con_assimilation(text)
	-- Replace consonants
	text = rsubn(text, ".", dari_persian_consonants)
	text = rsubn(text, "f((%.?)[bjdžğ])", "v%1")
	text = rsubn(text, ".", common_consonants)
	--fix the pitch accent on long vowels
	text = rsubn(text, "([ʌɑeiou])ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː")
	text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r")
	text = rsubn(text, "ɪ(" .. pitchaccent .. "?)(%.?)j", "i%1%2j")
	text = rsubn(text, "#ɾ", "#r")

	text = rsubn(text, "#", "")

	text = toNFC(text)

	return text
end

function export.prs_haz_IPA(text) --Hazaragi
	text = rsubn(text, "ˈ", "`")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "([iī](%`?))(%.?)y", "i%2y")
	-- Iranian and Classical dictionaries list -iyy, 
	-- But Tajik and Dari ones dont
	text = rsubn(text, "([iī]y(%`?))(%.?)y", "i%2y")
	text = rsubn(text, "%-i#", "i#")
	text = rsubn(text, "[-]([" .. vowels .. all_consonants .."])[-]", "%1")
	text = rsubn(text, "[-]", ".")

	--these conversions need to happen BEFORE EVERYTHING ELSE
	--pitch accent mark
	text = rsubn(text, "`([" .. all_consonants .. "])([" .. vowels .. "])", "%1%2" .. pitchaccent .. "")
	--Vowel Harmony
	text = rsubn(text, "ē(" .. pitchaccent .. "?)([" .. all_consonants .. "])([ūiī])", "%3%1%2%3")
	text =
		rsubn(text, "ē(" .. pitchaccent .. "?)([" .. all_consonants .. "])([" .. all_consonants .. "])([ī])", "%4%1%2%3%4")
	text = rsubn(text, "i(" .. pitchaccent .. "?)([" .. all_consonants .. "])([ouū])", "%3%1%2%3")
	text = rsubn(text, "ī(" .. pitchaccent .. "?)([" .. all_consonants .. "])([ēōuūiī])", "%3%1%2%3")
	text = rsubn(text, "ō(" .. pitchaccent .. "?)([" .. all_consonants .. "])([uū])", "%3%1%2%3")
	text = rsubn(text, "ō(" .. pitchaccent .. "?)([" .. all_consonants .. "])([i])", "u%1%2%3")
	text = rsubn(text, "ō(" .. pitchaccent .. "?)([" .. all_consonants .. "])([ī])", "ū%1%2%3")
	-- Replace xwa with xu
	text = rsubn(text, "xwa", "xu")
	-- Replace xwā with xā
	text = rsubn(text, "xwā", "xā")
	-- Replace xwē with xē
	text = rsubn(text, "xwē", "xē")
	text = rsubn(text, "xwē", "xē")
	text = rsubn(text, "v", "w")
	-- remove glottal consonants for some dialects
	text = remove_glottal_c(text)
	--automatically denote syllables
	text = syllabify(text)

	-- universal aspiration
	text = rsubn(text, "([ptkč])([" .. vowels .. "])", "%1ʰ%2")
	--delete certain consonant clusters and dipthongs
	text = rsubn(text, "āy", "ay")
	text = rsubn(text, "āw", "aw")
	--retroflex consonants
	text = rsubn(text, "D", "ɖ")
	text = rsubn(text, "T", "ʈ")
	-- Replace short vowels 
	-- no consistent vowel length for i or u
	text = rsubn(text, "ī", "i")
	text = rsubn(text, "ū", "u")
	text = rsubn(text, ".", classical_persian_short_vowels)
	--approximate vowels
	text = rsubn(text, "ā", "ɔː")
	text = rsubn(text, "ō", "ʊː")
	text = rsubn(text, "a", "ä")
	-- Replace long vowels
	text = rsubn(text, ".", dari_persian_long_vowels)
	--fix the pitch accent on long vowels
	text = rsubn(text, "([ɔeiʊu])ː" .. pitchaccent .. "", "%1" .. pitchaccent .. "ː")
	-- Replace jj with dj
	text = rsubn(text, "jj", "dj")
	-- Replace čč with tč
	text = rsubn(text, "čč", "tč")
	--allophones
	text = con_assimilation(text)
	text = rsubn(text, "([gbdjl](%" .. dental .. "?))#", "%1" .. devoice .. "#")
	--Terminal voicing in not phonetic
	text = rsubn(text, "([gbdj])#", "%1" .. devoice .. "#")
	-- Replace consonants
	text = rsubn(text, ".", dari_persian_consonants)
	text = rsubn(text, "f((%.?)[bjdžğ])", "v%1")
	text = rsubn(text, ".", common_consonants)
	text = rsubn(text, "ɾ(%.?)ɾ", "#r%1r")
	text = rsubn(text, "#ɾ", "#r")

	text = rsubn(text, "#", "")

	text = toNFC(text)

	return text
end

function export.tg_IPA(text)
	text = rsubn(text, "ˈ", "`")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "%-i#", "i#")
	-- Iranian and Classical dictionaries list -iyy, 
	-- But Tajik and Dari ones dont
	text = rsubn(text, "([iī]y(%`?))(%.?)y", "i%2y")
	text = rsubn(text, "[-]([" .. vowels .. all_consonants .."])[-]", "%1")
	text = rsubn(text, "[-]", ".")

	text = rsubn(text, "v", "w")
	-- Replace xwa with xu
	text = rsubn(text, "xwa", "xu")
	-- Replace xwā with xā
	text = rsubn(text, "xwā", "xā")
	-- Replace xwē with xē
	text = rsubn(text, "xwē", "xē")
	text = rsubn(text, "xwē", "xē")
	--automatically denote syllables
	text = syllabify(text)

	--pitch accent mark
	text = rsubn(text, "`([" .. all_consonants .. "])([" .. vowels .. "])", "%1%2" .. pitchaccent .. "")

	-- Replace jj with dj
	text = rsubn(text, "jj", "dj")
	-- Replace čč with tč
	text = rsubn(text, "čč", "tč")
	text = rsubn(text, "w([" .. vowels .. "])", "v%1")
	-- universal aspiration
	text = rsubn(text, "([ptkč])([" .. vowels .. "])", "%1ʰ%2")

	-- Replace ih, īh, i\', ī\' by ēh, ē\'
	text = rsubn(text, "([iī])(%" .. pitchaccent .. "?)([h'ʔɦ])([^" .. tajik_vowels .. "])", "e%2%3%4")
	-- Replace uh, ūh, u\', ū\' by ɵh, ɵ\'
	text = rsubn(text, "([uū])(%" .. pitchaccent .. "?)([hʔ'ɦ])([^" .. tajik_vowels .. "])", "ɵ%2%3%4")
	--aspiration
	text = rsubn(text, "([ptkč](%" .. dental .. "?))([" .. vowels .. "])", "%1ʰ%3")

	-- Replace short vowels
	text = rsubn(text, ".", tajik_short_vowels)
	-- Replace long vowels
	text = rsubn(text, ".", tajik_long_vowels)
	--allophones
	text = con_assimilation(text)
	-- Replace consonants
	text = rsubn(text, ".", common_consonants)
	text = rsubn(text, ".", tajik_consonants)
	text = rsubn(text, "([aä])(" .. pitchaccent .. ")", "æ%2")

	text = rsubn(text, "#", "")

	text = toNFC(text)

	return text
end

function export.fa_cls_IPA(text)
	text = rsubn(text, "ˈ", "`")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "([iī](%`?))(%.?)y", "i%2y")
	text = rsubn(text, "%-i#", "i#")
	text = rsubn(text, "[-]([" .. vowels .. all_consonants .."])[-]", "%1")
	text = rsubn(text, "[-]", ".")

	text = rsubn(text, "v", "w")
	-- Replace xwa with xʷa
	text = rsubn(text, "xwa", "xʷa")
	-- Replace xwā with xʷā
	text = rsubn(text, "xwā", "xʷā")
	-- Replace xwē with xʷē
	text = rsubn(text, "xwē", "xʷē")
	--automatically denote syllables
	text = syllabify(text)
	-- Replace short vowels
	text = rsubn(text, ".", classical_persian_short_vowels)
	-- Replace d with ḏ after vowels
	text = rsubn(text, "([" .. vowels .. "]+.?)(%`?)([d])", "%1%2ḏ")
	text = rsubn(text, "([" .. vowels .. "]+.?)(%`?)([b])", "%1%2β")
	-- Replace long vowels
	text = rsubn(text, ".", classical_persian_long_vowels)
	-- Replace jj with dj
	text = rsubn(text, "jj", "dj")
	-- Replace čč with tč
	text = rsubn(text, "čč", "tč")
	-- Replace consonants
	text = rsubn(text, ".", common_consonants)
	text = rsubn(text, ".", classical_persian_consonants)
	text = rsubn(text, "#", "")

	text = toNFC(text)

	return text
end

-- ROMANIZATIONS

function export.romanize_fa_cls(text, script, options)
	if type(text) == "table" then
		options = {}
		text, script = text.args[1], text.args[2]
	end
	text = rsubn(text, "`", "")
	text = rsubn(text, "ˈ", "")
	text = rsubn(text, "[,]", ", ")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "([iī])(%.?)y", "iy")

	--kill incorrect characters
	text = rsubn(text, "([" .. dental .. pitchaccent .. devoice .. dtack .. "ʰ])", "")
	text = rsubn(text, "([ɴŋ])", "n")
	--remove v
	text = rsubn(text, "v", "w")
	--ensure vowels are paired to a consonant
	text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")
	text = rsubn(text, "([.])", "")
	text = rsubn(text, "([" .. vowels .. "])([dḍ])", "%1ḏ")
	text = rsubn(text, "([" .. vowels .. "](%-?))b", "%1ḇ")
	text = rsubn(text, "ḏ", "ḏ")
	text = rsubn(text, "ḏd", "ḏḏ")
	text = rsubn(text, "ḇb", "ḇḇ")
	text = rsubn(text, "G", "ğ")
	text = rsubn(text, "ḍ", "z")
	text = rsubn(text, "ṭ", "t")
	text = rsubn(text, "ṯ", "s")
	text = rsubn(text, "ṣ", "s")
	text = rsubn(text, "ḥ", "h")
	-- remove Hazaragi retroflexes
	text = rsubn(text, "D", "d")
	text = rsubn(text, "T", "t")
	text = rsubn(text, "ɖ", "d")
	text = rsubn(text, "ʈ", "t")
	text = rsubn(text, "#'", "#")
	text = rsubn(text, "#", "")
	return text
end

function export.romanize_prs(text, script, options)
	if type(text) == "table" then
		options = {}
		text, script = text.args[1], text.args[2]
	end
	text = rsubn(text, "`", "")
	text = rsubn(text, "ˈ", "")
	text = rsubn(text, "[,]", ", ")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "([iī](%.?))y", "i%2y")
	text = rsubn(text, "([iī]y(%.?))y", "i%2y")
	text = rsubn(text, "i(['h])", "e%1")
	text = rsubn(text, "u(['h])", "o%1")

	--kill incorrect characters
	text = rsubn(text, "([" .. dental .. pitchaccent .. devoice .. dtack .. "ʰ])", "")
	text = rsubn(text, "([ɴŋ])", "n")
	text = rsubn(text, "v", "w")
	-- Replace xw clusters
	text = rsubn(text, "xw([āē])", "x%1")
	text = rsubn(text, "xwa", "xu")
	-- for rare exceptions
	text = rsubn(text, "ʷ", "w")
	--ensure vowels are paired to a consonant
	text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")
	text = rsubn(text, "([.])", "")
	text = rsubn(text, "ḍ", "z")
	text = rsubn(text, "ḏ", "z")
	text = rsubn(text, "ṯ", "s")
	text = rsubn(text, "ṭ", "t")
	text = rsubn(text, "G", "ğ")
	text = rsubn(text, "ṣ", "s")
	text = rsubn(text, "ḥ", "h")
	-- THIS SHOULD ONLY BE DONE FOR HAZARAGI RETROFLEX ENTRIES
	-- THEY SHOULD NEVER APPEAR IN A MAIN ENTRY
	text = rsubn(text, "D", "ḍ")
	text = rsubn(text, "T", "ṭ")
	text = rsubn(text, "ɖ", "ḍ")
	text = rsubn(text, "ʈ", "ṭ")
	text = rsubn(text, "#'", "#")

	-- remove unnecessary marks
	text = rsubn(text, "#", "")
	return text
end

function export.romanize_ira(text, script, options)
	if type(text) == "table" then
		options = {}
		text, script = text.args[1], text.args[2]
	end
	text = rsubn(text, "`", "")
	text = rsubn(text, "ˈ", "")
	text = rsubn(text, "[,]", ", ")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"

	--kill incorrect characters
	text = rsubn(text, "([" .. dental .. pitchaccent .. devoice .. dtack .. "ʰ])", "")
	text = rsubn(text, "([ɴŋ])", "n")
	text = rsubn(text, "v", "w")
	-- Replace xw clusters
	text = rsubn(text, "xw([āē])", "x%1")
	text = rsubn(text, "xwa", "xu")
	text = rsubn(text, "ʷ", "")
	text = rsubn(text, "w(" .. vowel .. ")", "v%1")
	text = rsubn(text, "w(" .. consonant .. ")", "w%1")
	text = rsubn(text, "([" .. vowels_minus_a .. "])w", "%1v")
	text = rsubn(text, "v%(w", "v(v")
	text = rsubn(text, "(" .. consonant .. ")w#", "%1v#")
	text = rsubn(text, "wv", "vv")
	text = rsubn(text, "wæ", "væ")
	--ensure vowels are paired to a consonant
	text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")
	text = rsubn(text, "([.])", "")
	text = rsubn(text, "iy", "īy")
	text = rsubn(text, "ay", "ey")
	text = rsubn(text, "aw", "ow")
	text = rsubn(text, "ḍ", "z")
	text = rsubn(text, "ḏ", "z")
	text = rsubn(text, "ṭ", "t")
	text = rsubn(text, "G", "ğ")
	text = rsubn(text, "q", "ğ")
	text = rsubn(text, "ṯ", "s")
	text = rsubn(text, "ṣ", "s")
	text = rsubn(text, "ḥ", "h")
	text = rsubn(text, "ā", "â")
	text = rsubn(text, "u", "o")
	text = rsubn(text, "i", "e")
	-- remove Hazaragi retroflexes
	text = rsubn(text, "D", "d")
	text = rsubn(text, "T", "t")
	text = rsubn(text, "ɖ", "d")
	text = rsubn(text, "ʈ", "t")
	-- Tajik does not have vowel length
	text = rsubn(text, "([ēī])", "i")
	text = rsubn(text, "([ūō])", "u")
	-- terminal w is only possible in a dipthong
	text = rsubn(text, "([o]0)w#", "v#")
	text = rsubn(text, "a#", "e#")
	text = rsubn(text, "a%-", "e-")
	text = rsubn(text, "æ", "a")
	text = rsubn(text, "#'", "#")
	text = rsubn(text, "#", "")
	return text
end

function export.romanize_tg(text, script, options)
	if type(text) == "table" then
		options = {}
		text, script = text.args[1], text.args[2]
	end
	text = rsubn(text, "[,]", ", ")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "i(['h])", "ē%1")
	text = rsubn(text, "u(['h])", "ō%1")
	text = rsubn(text, "([iī]y(%.?))y", "i%2y")
	text = rsubn(text, "ˈ", "`")
	text = rsubn(text, "([iī]y`y)", "i`y")

	--kill incorrect characters
	text = rsubn(text, "([" .. dental .. pitchaccent .. devoice .. dtack .. "ʰ])", "")
	text = rsubn(text, "([ɴŋ])", "n")
	text = rsubn(text, "w", "v")
	-- Replace xw clusters
	text = rsubn(text, "xv([āē])", "x%1")
	text = rsubn(text, "xva", "xu")
	text = rsubn(text, "ʷ", "")
	--ensure vowels are paired to a consonant
	text = rsubn(text, "([.])([" .. vowels .. "])", "%1'%2")
	text = rsubn(text, "([.])", "")
	text = rsubn(text, "ḍ", "z")
	text = rsubn(text, "ḏ", "z")
	text = rsubn(text, "ṯ", "s")
	text = rsubn(text, "ṭ", "t")
	text = rsubn(text, "(['])", "ʾ")
	text = rsubn(text, "ṣ", "s")
	text = rsubn(text, "ḥ", "h")
	text = rsubn(text, "G", "ġ")
	text = rsubn(text, "ğ", "ġ")
	text = rsubn(text, "ē", "e")
	text = rsubn(text, "ō", "ü")
	text = rsubn(text, "ā", "o")
	-- remove Hazaragi retroflexes
	text = rsubn(text, "D", "d")
	text = rsubn(text, "T", "t")
	text = rsubn(text, "ɖ", "d")
	text = rsubn(text, "ʈ", "t")
	-- Tajik does not have vowel length
	text = rsubn(text, "([iī])", "i")
	text = rsubn(text, "`([" .. all_consonants .. "])i#", "%1ī#")
	text = rsubn(text, "([ūu])", "u")
	text = rsubn(text, "`", "")
	text = rsubn(text, "#([ʾ])", "")
	text = rsubn(text, "#", "")
	return text
end

local function one_term_ipa(text, style)
	if style == "cls" then
		text = export.fa_cls_IPA(text)
	elseif style == "prs" then
		text = export.prs_IPA(text)
	elseif style == "kbl" then
		text = export.prs_kbl_IPA(text) or export.prs_IPA(text) --should ignore conversion if specified
	elseif style == "haz" then
		text = export.prs_haz_IPA(text)
	elseif style == "fa" then
		text = export.fa_IPA(text)
	elseif style == "teh" then
		text = export.fa_IPA(text)
	elseif style == "tg" then
		text = export.tg_IPA(text)
	end

	return text
end

-- style == one of the following:
-- "cls": Classical Persian
-- "prs": Dari Persian
-- "kbl": Kabuli
-- "haz": Hazaragi
-- "fa": Iranian Persian
-- "teh": Tehrani
-- "tg": Tajik
function export.IPA(text, style)
	local variants = {text}

	local function apply_sub(from, to1, to2)
		return function(item)
			if rfind(item, from) then
				if to2 then
					return {rsub(item, from, to1), rsub(item, from, to2)}
				else
					return {rsub(item, from, to1)}
				end
			else
				return {item}
			end
		end
	end

	local function call_one_term_ipa(variant)
		local result = {
			{
				phonemic = one_term_ipa(variant, style, false, err)
			}
		}
		local function apply_sub(item, from, to1, qual1, to2, qual2)
			if rfind(item.phonemic, from) or rfind(item.phonetic, from) then
				return {
					{
						phonemic = rsub(item.phonemic, from, to1),
						qualifiers = qual1
					},
					{
						phonemic = rsub(item.phonemic, from, to2),
						qualifiers = qual2
					}
				}
			else
				return {item}
			end
		end

		return result
	end

	return flatmap(variants, call_one_term_ipa)
end

function export.express_styles(inputs, args_style)
	local pronuns_by_style = {}
	local expressed_styles = {}

	local function dostyle(style)
		pronuns_by_style[style] = {}
		for _, val in ipairs(inputs[style]) do
			local pronuns = export.IPA(val, style)
			for _, pronun in ipairs(pronuns) do
				table.insert(pronuns_by_style[style], pronun)
			end
		end
	end

	local function all_available(styles)
		local available_styles = {}
		for _, style in ipairs(styles) do
			if pronuns_by_style[style] then
				table.insert(available_styles, style)
			end
		end
		return available_styles
	end

	local function express_style(hidden_tag, tag, styles, indent)
		indent = indent or 1
		if hidden_tag == true then
			hidden_tag = tag
		end
		if type(styles) == "string" then
			styles = {styles}
		end
		styles = all_available(styles)
		if #styles == 0 then
			return
		end
		local style = styles[1]

		-- If style specified, make sure it matches the requested style.
		local style_matches
		if not args_style then
			style_matches = true
		else
			local or_styles = rsplit(args_style, "%s*,%s*")
			for _, or_style in ipairs(or_styles) do
				local and_styles = rsplit(or_style, "%s*%+%s*")
				local and_matches = true
				for _, and_style in ipairs(and_styles) do
					local negate
					if and_style:find("^%-") then
						and_style = and_style:gsub("^%-", "")
						negate = true
					end
					local this_style_matches = false
					for _, part in ipairs(styles) do
						if part == and_style then
							this_style_matches = true
							break
						end
					end
					if negate then
						this_style_matches = not this_style_matches
					end
					if not this_style_matches then
						and_matches = false
					end
				end
				if and_matches then
					style_matches = true
					break
				end
			end
		end
		if not style_matches then
			return
		end

		local new_style = {
			tag = tag,
			represented_styles = styles,
			pronuns = pronuns_by_style[style],
			indent = indent
		}
		for _, hidden_tag_style in ipairs(expressed_styles) do
			if hidden_tag_style.tag == hidden_tag then
				table.insert(hidden_tag_style.styles, new_style)
				return
			end
		end
		table.insert(
			expressed_styles,
			{
				tag = hidden_tag,
				styles = {new_style}
			}
		)
	end

	for style, _ in pairs(inputs) do
		dostyle(style)
	end

	local function diff(style1, style2)
		if not pronuns_by_style[style1] or not pronuns_by_style[style2] then
			return true
		end
		return not m_table.deepEquals(pronuns_by_style[style1], pronuns_by_style[style2])
	end

	local fa_teh_different = diff("fa", "teh")
	local prs_kbl_different = diff("prs", "kbl")
	local prs_haz_different = diff("prs", "haz")

	-- Classical Persian
	express_style("[[w:Classical Persian|Classical Persian]]", "[[w:Classical Persian|Classical Persian]]", "cls")

	-- Dari Persian
	express_style(
		"[[w:Dari Persian|Dari, formal]]",
		"[[w:Dari Persian|Dari, formal]]",
		"prs"
	)
	express_style("[[w:Dari Persian|Dari, formal]]", "[[w:Dari#South-Eastern|Kabuli]]", "kbl", 2)
	express_style("[[w:Dari Persian|Dari, formal]]", "[[w:Hazaragi dialect|Hazaragi]]", "haz", 2)

	-- Iranian Persian
	express_style(
		"[[w:Iranian Persian|Iran, formal]]",
		"[[w:Iranian Persian|Iran, formal]]",
		"fa"
	)
	if fa_teh_different then
		express_style("[[w:Iranian Persian|Iran, formal]]", "[[w:Tehrani accent|Tehrani]]", "teh", 2)
	end

	-- Tajik
	express_style(
		"[[w:Tajik language|Tajik, formal]]",
		"[[w:Tajik language|Tajik, formal]]",
		"tg"
	)

	return expressed_styles
end

function export.show(frame)
	-- Create parameter specs
	local params = {
		[1] = {}, -- this replaces style group 'all'
		["pre"] = {},
		["post"] = {},
		["ref"] = {},
		["style"] = {},
		["bullets"] = {type = "number", default = 1}
	}
	for group, _ in pairs(export.all_style_groups) do
		if group ~= "all" then
			params[group] = {}
		end
	end
	for _, style in ipairs(export.all_styles) do
		params[style] = {}
	end

	-- Parse arguments
	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)

	-- Set inputs
	local inputs = {}
	-- If 1= specified, do all styles.
	if args[1] then
		for _, style in ipairs(export.all_styles) do
			inputs[style] = args[1]
		end
	end
	-- Then do remaining style groups other than 'all', overriding 1= if given.
	for group, styles in pairs(export.all_style_groups) do
		if group ~= "all" and args[group] then
			for _, style in ipairs(styles) do
				inputs[style] = args[group]
			end
		end
	end
	-- Then do individual style settings.
	for _, style in ipairs(export.all_styles) do
		if args[style] then
			inputs[style] = args[style]
		end
	end
	-- If no inputs given, set all styles based on current pagename.
	if not next(inputs) then
		local text = mw.title.getCurrentTitle().text
		for _, style in ipairs(export.all_styles) do
			inputs[style] = text
		end
	end

	for style, input in pairs(inputs) do
		inputs[style] = rsplit(input, ",")
	end
	local expressed_styles = export.express_styles(inputs, args.style)

	local lines = {}

	local function format_style(tag, expressed_style, is_first)
		local pronunciations = {}
		local formatted_pronuns = {}
		for _, pronun in ipairs(expressed_style.pronuns) do
			table.insert(
				pronunciations,
				{
					pron = "[" .. pronun.phonemic .. "]",
					qualifiers = pronun.qualifiers
				}
			)
			local formatted_phonemic = "[" .. pronun.phonemic .. "]"
			if pronun.qualifiers then
				formatted_phonemic = "(" .. table.concat(pronun.qualifiers, ", ") .. ") " .. formatted_phonemic
			end
			table.insert(formatted_pronuns, formatted_phonemic)
		end
		-- Number of bullets: When indent = 1, we want the number of bullets given by `args.bullets`,
		-- and when indent = 2, we want `args.bullets + 1`, hence we subtract 1.
		local bullet = string.rep("*", args.bullets + expressed_style.indent - 1) .. " "
		-- Here we construct the formatted line in `formatted`, and also try to construct the equivalent without HTML
		-- and wiki markup in `formatted_for_len`, so we can compute the approximate textual length for use in sizing
		-- the toggle box with the "more" button on the right.
		local pre = is_first and args.pre and args.pre .. " " or ""
		local pre_for_len = pre .. (tag and "(" .. tag .. ") " or "")
		pre = pre .. (tag and m_qual.format_qualifier(tag) .. " " or "")
		local post = is_first and (args.ref or "") .. (args.post and " " .. args.post or "") or ""
		local formatted = bullet .. pre .. m_IPA.format_IPA_full { lang = lang, items = pronunciations } .. post
		local formatted_for_len = bullet .. pre .. "IPA(key): " .. table.concat(formatted_pronuns, ", ") .. post
		return formatted, formatted_for_len
	end

	for i, style_group in ipairs(expressed_styles) do
		if #style_group.styles == 1 then
			style_group.formatted, style_group.formatted_for_len =
				format_style(style_group.styles[1].tag, style_group.styles[1], i == 1)
		else
			style_group.formatted, style_group.formatted_for_len = format_style(style_group.tag, style_group.styles[1], i == 1)
			for j, style in ipairs(style_group.styles) do
				style.formatted, style.formatted_for_len = format_style(style.tag, style, i == 1 and j == 1)
			end
		end
	end

	local function textual_len(text)
		text = rsub(text, "<.->", "")
		return ulen(text)
	end

	local maxlen = 0
	for i, style_group in ipairs(expressed_styles) do
		local this_len = textual_len(style_group.formatted_for_len)
		if #style_group.styles > 1 then
			for _, style in ipairs(style_group.styles) do
				this_len = math.max(this_len, textual_len(style.formatted_for_len))
			end
		end
		maxlen = math.max(maxlen, this_len)
	end

	for i, style_group in ipairs(expressed_styles) do
		if #style_group.styles == 1 then
			table.insert(lines, "<div>\n" .. style_group.formatted .. "</div>")
		else
			local inline = '\n<div class="vsShow" style="display:none">\n' .. style_group.formatted .. "</div>"
			local full_prons = {}
			for _, style in ipairs(style_group.styles) do
				table.insert(full_prons, style.formatted)
			end
			local full = '\n<div class="vsHide">\n' .. table.concat(full_prons, "\n") .. "</div>"
			local em_length = math.floor(maxlen * 0.68) -- from [[Module:grc-pronunciation]]
			table.insert(
				lines,
				'<div class="vsSwitcher" data-toggle-category="pronunciations" style="width: ' ..
					em_length ..
						'em; max-width:100%;"><span class="vsToggleElement" style="float: right;">&nbsp;</span>' ..
							inline .. full .. "</div>"
			)
		end
	end

	-- major hack to get bullets working on the next line
	return table.concat(lines, "\n") .. "\n<span></span>"
end

return export