وحدة:Language

require('strict')

local p = {}

local gsub = mw.ustring.gsub
local find = mw.ustring.find
local match = mw.ustring.match
local lower = mw.ustring.lower
local upper = mw.ustring.upper
local U = mw.ustring.char

--[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]

local languages = {
	["ang"] = {
		["name"] = "Old English",
		["article"] = {"Old English"},
		["scripts"] = {"Latn"},
		-- Remove macrons, acutes, and overdots
		["replacements"] = {
			["[ĀÁ]"] = "A",
			["[āá]"] = "a",
			["[ǢǼ]"] = "Æ",
			["[ǣǽ]"] = "æ",
			["Ċ"]    = "C",
			["ċ"]    = "c",
			["[ĒÉ]"] = "E",
			["[ēé]"] = "e",
			["Ġ"]    = "G",
			["ġ"]    = "g",
			["[ĪÍ]"] = "I",
			["[īí]"] = "i",
			["[ŌÓ]"] = "O",
			["[ōó]"] = "o",
			["[ŪÚ]"] = "U",
			["[ūú]"] = "u",
			["[ȲÝ]"] = "Y",
			["[ȳý]"] = "y",
			},
		},
	["ar"] = {
		["name"] = "Arabic",
		["article"] = "Arabic language",
		["scripts"] = { "Arab" },
			--[[ ālif with wasla is replaced by ālif;
			taṭwīl, fatḥatan, ḍammatan, kasratan,
			fatḥa, ḍamma, kasra,
			shadda, sukūn, and superscript (dagger) ālif are removed. ]]
		["direction"] = "rtl", -- Should be in the script data module.
		["replacements"] = {
			[U(0x0671)] = U(0x0627),
			["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
				..U(0x064E)..U(0x064F)..U(0x0650)
				..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
			},
		},
	["bn"] = {
		["name"] = "Bengali",
		["article"] = "Bengali language",
		["scripts"] = { "Beng" },
		},
	["de"] = {
		["name"] = "German",
		["article"] = "German language",
		["scripts"] = { "Latn" },
		--[[
		["replacements"] = {
			["ae"]    = "ä",
			["oe"]    = "ö",
			["ue"]    = "ü",
			["A[Ee]"] = "Ä",
			["O[Ee]"] = "Ö",
			["U[Ee]"] = "Ü",
		},
		]]
		},
	["en"] = {
		["name"] = "English",
		["article"] = "English language",
		["scripts"] = { "Latn" },
		},
	["es"] = {
		["name"] = "Spanish",
		["article"] = "Spanish language",
		["scripts"] = { "Latn" },
		},
	["fr"] = {
		["name"] = "French",
		["article"] = "French language",
		["scripts"] = { "Latn" },
		},
	["frm"] = {
		["name"] = "Middle French",
		["article"] = "Middle French",
		["scripts"] = { "Latn" },
		},
	["grc"] = {
		["name"] = "Ancient Greek",
		["article"] = "Ancient Greek",
		["scripts"] = { "Grek" },
		["replacements"] = {
			-- Vowels with macrons or breves are replaced with plain letters.
			["[ᾱᾰ]"] = "α",
			["[ᾹᾸ]"] = "Α",
			["[ῑῐ]"] = "ι",
			["[ῙῘ]"] = "Ι",
			["[ῡῠ]"] = "υ",
			["[ῩῨ]"] = "Υ",
			["ϐ"]    = "β",
			["ϵ"]    = "ε",
			["ϑ"]    = "θ",
			["ϰ"]    = "κ",
			["ϱ"]    = "ρ",
			["ϲ"]    = "σ",
			["ϕ"]    = "φ",
			},
		},
	["hi"] = {
		["name"] = "Hindi",
		["article"] = "Hindi",
		["scripts"] = { "Deva" },
		},
	["ja"] = {
		["name"] = "Japanese",
		["article"] = "Japanese language",
		["scripts"] = { "Jpan" },
		},
	["la"] = {
		["name"] = "Latin",
		["article"] = "Latin",
		["scripts"] = { "Latn" },
		["replacements"] = {
			-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
			["[ĀĂ]"]  = "A",
			["[āă]"]  = "a",
			["[ĒĔ]"]  = "E",
			["[ēĕë]"] = "e",
			["[ĪĬÏ]"] = "I",
			["[īĭï]"] = "i",
			["[ŌŎ]"]  = "O",
			["[ōŏ]"]  = "o",
			["[ŪŬÜ]"] = "U",
			["[ūŭü]"] = "u",
			["Ȳ"]     = "Y",
			["ȳ"]     = "y"
			},
		},
	["mul"] = {
		["name"] = "Translingual",
		["article"] = "",
		["script"] = { "" },
		},
	["pt"] = {
		["name"] = "Portuguese",
		["article"] = "Portuguese language",
		["scripts"] = { "Latn" },
		},
	["pa"] = {
		["name"] = "Punjabi",
		["article"] = "Punjabi language",
		["scripts"] = { "Guru", "Arab", }
		},
	["ru"] = {
		["name"] = "Russian",
		["article"] = "Russian language",
		["scripts"] = { "Cyrl" },
		-- Combining acute accent is removed.
		["replacements"] = { [U(0x0301)] = "", }
		},
	["ur"] = {
		["name"] = "Urdu",
		["article"] = "Urdu",
		["scripts"] = { "Arab" },
		},
	["zh"] = {
		["name"] = "Chinese",
		["article"] = "Chinese language",
		["scripts"] = { "Hani" },
		},
	}

--[[

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = { "" },
		},

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = { "" },
		["replacements"] = {
			},
		},

]]

local function ifNotEmpty(value)
	if value == "" then
		return nil
	else
		return value
	end
end

local function checkForString(variable)
	return variable ~= "" and variable ~= nil
end

local function makeLinkedName(languageCode)
	local data = languages[languageCode]
	local article = data["article"]
	local name = data["name"]
	return "[[" .. article .. "|" .. name .. "]]: "
end

local function makeEntryName(word, languageCode)
	local data = languages[languageCode]
	word = tostring(word)
	if word == nil then
		error("The function makeEntryName requires a string argument")
	elseif word == "" then
		return ""
	else
		-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
		word = gsub(word, "\'\'\'", "")
		word = gsub(word, "\'\'", "")
		if data == nil then
			return word
		else
			local replacements = data and data["replacements"]
			if replacements == nil then
				return word
			else
				for regex, replacement in pairs(replacements) do
					word = gsub(word, regex, replacement)
				end
				return word
			end
		end
	end
end

local function getCodes(codes, text)
	local languageCode, scriptCode
	local errorText = ""
	if find(codes, "^%s*%a%a%a?%s*$") or find(codes, "^%s*%a%a%a?-%a%a%a%a%s*$") then
		-- A three- or two-letter lowercase sequence at beginning of first parameter
		languageCode = find(codes, "^%s*%a%a%a?") and (match(codes, "^%s*(%l%l%l?)") or gsub(match(codes, "^%s*(%a%a%a?)"), "(%a%a%a?)", function(a) return lower(a) end, 1) )
		-- One uppercase and three lowercase letters at the end of the first parameter
		scriptCode = find(codes, "%a%a%a%a%s*$") and (match(codes, "(%u%l%l%l)%s*$") or gsub(match(codes, "(%a%a%a%a)%s*$"), "(%a)(%a%a%a)", function(a, b) return upper(a) .. lower(b) end, 1) ) or require("Module:Language/scripts").isLatn(text) and "Latn" or "unknown"
	elseif find(codes, "^%s*%a%a%a?") then
		languageCode = gsub(match(codes, "%a%a%a?"), "(%a%a%a?)", function(a) return lower(a) end, 1)
		local invalidCode = gsub(codes, "^%s*%a%a%a?%-?", "")
		errorText = ' <span style="font-size: smaller">[<code>'..invalidCode..'</code> is not a valid script code.]</span>'
	elseif find(codes, "%-?%a%a%a%a%s*$") then
		scriptCode = gsub(match(codes, "%a%a%a%a"), "%a%a%a%a", function(a) return lower(a) end, 1)
		local invalidCode = gsub(codes, "%s*%a%a%a%a%-?$", "")
		errorText = ' <span style="font-size: smaller">[<code>'..invalidCode..'</code> is not a valid language code.]</span>'
	elseif codes == nil or codes == "" then
		errorText = ' <span style="font-size: smaller">[no language or script code provided]</span>'
	else
		errorText = ' <span style="font-size: smaller">[<code>'..codes..'</code> is not a valid language or script code.]</span>'
	end
	return languageCode, scriptCode, errorText
end

local function tag(text, languageCode, script, italics)
	local data = languages[languageCode]
	local italicize = script == "Latn" and italics
	if not text then text = "[text?]" end
	local textDirectionMarkers = data and data["direction"] == "rtl" and { ' dir="rtl"', '&lrm;' } or { "", "" }
	local out = italicize and "<i lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode  .. "\"" .. textDirectionMarkers[1] .. ">" .. text .. "</i>" or "<span lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\"" .. textDirectionMarkers[1] .. ">" .. text .. "</span>" .. textDirectionMarkers[2]
	return out
end

function p.lang(frame)
	local parent = frame:getParent()
	local args = parent.args[1] and parent.args or frame.args
	local codes = args[1]
	local text = args[2] or error("Provide text in the second parameter")
	local languageCode, scriptCode, errorText = getCodes(codes, text)
	local italics = args.italics or args.i
	italics = not (italics == "n" or italics == "-")
	
	return tag(text, languageCode, scriptCode, italics) .. errorText
end

local function linkToWiktionary(entry, linkText, languageCode)
	local data = languages[languageCode]
	local name
	if languageCode then
		name = data and data.name or mw.language.fetchLanguageName(languageCode, 'en') -- On other languages' wikis, use mw.getContentLanguage():getCode(), or replace with that wiki's language code.
		if entry and linkText then
			return "[[wikt:" .. entry .. "#" .. name .. "|" .. linkText .. "]]"
		else
			error("linkToWiktionary needs a Wiktionary entry or link text, or both")
		end
	else
		return "[[wikt:" .. entry .. "|" .. linkText .. "]]"
	end
end

function p.wiktlang(frame)
	local parent = frame:getParent()
	local args = parent.args[1] and parent.args or frame.args
	
	local codes = args[1] and mw.text.trim(args[1])
	local word1 = ifNotEmpty(args[2])
	local word2 = ifNotEmpty(args[3])
	
	if not args[2] then
		error("Parameter 2 is required")
	end
	
	local languageCode, scriptCode, errorText = getCodes(codes, word2 or word1)
	
	local italics = args.italics or args.i or args.italic
	italics = not (italics == "n" or italics == "-" or italics == "no")
	
	local entry, linkText
	if word2 and word1 then
		entry = makeEntryName(word1, languageCode)
		linkText = word2
	elseif word1 then
		entry = makeEntryName(word1, languageCode)
		linkText = word1
	end
	
	local out
	if languageCode and entry and linkText then
		out = tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italics)
	elseif entry and linkText then
		out = linkToWiktionary(entry, linkText)
	else
		out = '<span style="font-size: smaller;">[text?]</span>'
	end
	
	if out and errorText then
		return out .. errorText
	else
		return errorText or error("The function wiktlang generated nothing")
	end
end

function p.wikt(frame)
	local parent = frame:getParent()
	local args = parent.args[1] and parent.args or frame.args
	local codes = args[1] or nil
	local word1 = args[2] or nil
	local word2 = args[3] or nil
	local languageCode, scriptCode, errorText = getCodes(codes, word1)
	local errorMessage = errorText
	local italics = args.italics or args.i
	italics = not (italics == "n" or italics == "-")
	local entry, linkText
	if checkForString(word2) and checkForString(word1) then
		entry = makeEntryName(word1, languageCode)
		linkText = word2
	elseif checkForString(word1) then
		entry = makeEntryName(word1, languageCode)
		linkText = word1
	end
	local out = (languageCode and entry and linkText and tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italics) ) or entry and linkText and linkToWiktionary(entry, linkText) or  '<span style="font-size: smaller;">[text?]</span>'
	return out and out .. errorMessage or errorMessage or error("The function wikt generated nothing")
end

return p