模組:NumberToChinese

local p = {}
local calc_lib = {} --用於處理科學記號
local yesno = require('Module:Yesno')
local function stringToTable(s) --字串轉陣列
	local t = {}
	for i = 1, #s do --將每一個字元存入陣列
		t[i] = s:sub(i, i)
	end
	return t
end
local ClearTenOne, NoClearOne, ClearAllOne = 0, 1, 2 --清除首位1的狀況,清十前1、不清1、清所有首位1
local Normal, Financial = 0, 1 --小寫和大寫
local Over, Ten, Hundred, Thousand, LargeStart = 10, 11, 12, 13, 14 --Over是萬進的節點ID,LargeStart是萬進位數中文的開始ID
local Data = { --中文數字資料
	standard = {{'〇', '一', '二', '三', '四', '五', '六', '七', '八', '九'}, {'零', '壹', '貳', '參', '肆', '伍', '陸', '柒', '捌', '玖'}},
	decimal = {{'', '十', '百', '千'}, {'', '拾', '佰', '仟'}},
	large = {'', '萬', '億', '兆', '京', '垓', '秭', '穰', '溝', '澗', '正', '載', '極', '恆河沙', '阿僧祇', '那由他', '不可思議', '無量', '大數'},
	NotANumber = "這不是一個數字", Infinity = "無窮大", NumberingZero = '〇',
	Positive = '正', Negative = '負', Point = '點', Dash = '之'
}
local largeSize = #(Data.large)

local function argsToVariable(frame) --輸入參數陣列轉變數
	local args = require('Module:Arguments').getArgs(frame)
	local number = args.num or args.number or args[1] or 0
	local numberType = args.b or args.daiji or args["大寫"] or args["大写"] or args.numberType or Normal
	numberType = tonumber(numberType) or (yesno(numberType) and Financial or Normal)
	if (numberType > Financial) then
		numberType = Financial
	end
	local clearOne = args.ten or args.clearOne or ClearTenOne
	clearOne = tonumber(clearOne) or ClearTenOne
	if (clearOne > ClearAllOne) then
		clearOne = ClearAllOne
	end
	return number, numberType, clearOne
end

local function IDToChinese(id, numberType) --由上述定義可以知道ID代表意義
	if id < Over then --小於Over的id即數字本身
		return Data.standard[numberType + 1][id + 1]
	elseif id < LargeStart then --Over至LargeStart的id即為萬進前的單位 (如:十、百、千)
		return Data.decimal[numberType + 1][id - Over + 1]
	elseif id < LargeStart + largeSize then --萬進後的單位
		return Data.large[id - LargeStart + 1]
	else --用完後單位再進位
		return p.LargeName(id - LargeStart)
	end
end

local function LessThan10000ToID(number) --低於10000的轉換,用途為中文數字是萬進
	local id = {}
	table.insert(id, 0) --先丟0讓千位判斷較容易
	local numberArr = stringToTable(number .. '')
	for i = 1, 4 do
		repeat
			if id[#id] == 0 and numberArr[i] == '0' then --當連續0的時候直接跳出
				break
			elseif id[#id] ~= 0 and numberArr[i] == '0' then --當上一位不是0,目前是0時單純加入0不加入千百十的ID
				table.insert(id, 0)
				break
			end
			table.insert(id, tonumber(numberArr[i])) --其他狀況就照一般說法位數大小和位數名稱
			table.insert(id, Thousand - i + 1)
		until true
	end
	if numberArr[1] ~= '0' then --如果千位不為0把首位0去除
		table.remove(id, 1)
	end
	if #id > 1 then --只有在0000的時候ID個數才會只有1
		id[#id] = Over
	else
		table.insert(id, Over)
	end
	return id
end
local function FrontNumberToChinese(number, numberType, clearOne) --這邊的前數指的是大數,但實際英文並非如此稱呼,只是要讓名稱淺顯易懂
	number = '0000' .. number --把位數補到4的倍數,先補4個0在清除多餘的,下列程式本身有去除首位0的功能,若首位0會自動消除
	number = number:sub(#number % 4 + 1)
	local numberLargeLength = #number / 4
	local id = {}
	table.insert(id, 0) --先補0讓最高的萬進位數容易判斷
	for i = 1, numberLargeLength do
		repeat
			local data = LessThan10000ToID(number:sub(i * 4 - 3, i * 4 - 3 + 4))
			if id[#id] == 0 and #data == 2 then --如果上一個萬進位數為0,目前也為0(0000的ID個數最後有加上Over,所以為2),則直接跳出
				break
			elseif id[#id] ~= 0 and #data == 2 then --如果上一個萬進位數不為0,目前為0,補0跳出
				table.insert(id, 0)
				break
			elseif id[#id] == 0 and data[1] == 0 then --如果上一個萬進位數為0,目前的萬進位數首位為0,清除掉首位0
				table.remove(data, 1)
			end
			for j = 1, #data do --將目前的萬進位數加入到ID陣列裡
				table.insert(id, data[j])
			end
			id[#id] = LargeStart + numberLargeLength - i --最後補上萬進位數的ID
		until true
	end
	if #id == 1 then --ID長度為1代表答案是0
		return IDToChinese(0, numberType)
	end
	table.remove(id, 1) --清除首位0
	table.remove(id, #id) --清除最後一個元素,有可能是萬進位數的第1個空格ID,也有可能是末位0
	if clearOne == ClearTenOne and id[1] == 1 and id[2] == Ten then --如果是選擇清十前1,則必須首位要是一十才清1
		table.remove(id, 1)
	elseif clearOne == ClearAllOne and id[1] == 1 and #id > 1 then --不管如何首位1都清,但是單獨1不清1
		table.remove(id, 1)
	end
	local chinese = '' --轉成中文回傳
	for i = 1, #id do
		chinese = chinese .. IDToChinese(id[i], numberType)
	end
	return chinese
end
local function BackNumberToChinese(number, numberType) --這邊的後數指的是小數,但實際英文並非如此稱呼,只是要讓名稱淺顯易懂
	local chinese = ''
	local numberLength = #number
	local numberArr = stringToTable(number .. '')
	while numberLength > 0 do --尋找末位0的個數,並扣除個數
		if numberArr[numberLength] ~= '0' then
			break
		else
			numberLength = numberLength - 1
		end
	end
	for i = 1, numberLength do --直接轉成中文回傳
		chinese = chinese .. IDToChinese(tonumber(numberArr[i]), numberType)
	end
	return chinese
end

local function NumberToChinese(number, numberType, clearOne) --轉中文
	number = tostring(number) --轉成字串進行作業
	Data.standard[Normal + 1][0 + 1] = Data.standard[Financial + 1][0 + 1] --一般數量時大小寫的0皆使用「零」,然後Lua的陣列從1開始
	local chinese = ''
	number = mw.ustring.gsub(mw.text.trim(number), "%s+", '') --移除空白字元
	local check_char = mw.ustring.sub(number, 1, 1) --讀取第一個字以便判斷正負號
	if check_char == '+' then --有正號才顯示正
		chinese = chinese .. Data.Positive
		number = mw.ustring.sub(number, 2, #number)
	elseif check_char == '-' or check_char == '−' then
		chinese = chinese .. Data.Negative
		number = mw.ustring.sub(number, 2, #number) --在此階段仍有可能有非ASCII字元,故使用mw.ustring
	end
	if number == '∞' or number:lower()=='inf' then
		chinese = chinese .. Data.Infinity
		return chinese
	end
	if number:lower()=='nan' then --nan會讓tonumber通過,故提前跳出
		chinese = ((chinese == '') and Data.Positive or chinese) .. Data.NotANumber
		return chinese
	end
	if number:find("[%d%.][Ee][%d%+%-]") then --如果有科學記號
		if type(calc_lib.scientific2number) ~= type(function()end) then calc_lib = require("Module:Complex_Number/Calculate")end
		number = calc_lib.scientific2number(number) --將科學記號轉換成一般數字
	end
	if number:find("[Ee]") then return Data.NotANumber end --如果轉換完還有e字元代表非法的科學記號
	
	local frontNumber, backNumber = '', ''
	local point = number:find('%.')
	if point == nil then --如果小數點不存在代表只有大數
		frontNumber = number
	else --擷取大數部分及小數部分
		frontNumber = number:sub(1, point - 1)
		backNumber = number:sub(point + 1, #number)
	end
	if tonumber('0' .. frontNumber) == nil or --lua空字串判斷為非數值,所以一定要至少補1個0
		tonumber('0.' .. backNumber .. '0') == nil then --如果這裡有小數點(變為超過1個小數點)則會誤判為數字,所以一定要補1個'0.'
		return Data.NotANumber
	end
	chinese = chinese .. FrontNumberToChinese(frontNumber, numberType, clearOne)
	local success, backChinese = pcall(BackNumberToChinese, backNumber, numberType)
	if not success then return Data.NotANumber end --遺漏的事項會讓轉換的過程出錯;若出錯,則假定其並非一個數字
	if backChinese ~= '' then --如果小數為空字串則連小數點都不加入中文字串
		chinese = chinese .. Data.Point .. backChinese
	end
	return chinese
end
local function NumberToChineseNumbering(number, numberType) --編號用途,編號只敘述數字不敘述進位系統,且有多個「點」和「之」
	number = tostring(number) --轉成字串進行作業
	Data.standard[Normal + 1][0 + 1] = Data.NumberingZero --一般編號時小寫的0使用「〇」,然後Lua的陣列從1開始
	local chinese = ''
	local numberLength = #number
	local numberArr = stringToTable(number .. '')
	for i = 1, numberLength do
		if numberArr[i] == '.' then --小數點在編號時中文通常念作「點」
			chinese = chinese .. Data.Point
		elseif numberArr[i] == '-' then --減號在編號時中文通常念作「之」
			chinese = chinese .. Data.Dash
		elseif tonumber(numberArr[i]) ~= nil then
			chinese = chinese .. IDToChinese(tonumber(numberArr[i]), numberType)
		end
	end
	return chinese
end
local function loadInvChineseData()
	Data.number={['叄'] = 3, ['叁'] = 3, ['䦉'] = 4, ['兩'] = 2, ['两'] = 2}
	Data.standard[3] = {'零', '壹', '贰', '参', '肆', '伍', '陆', '柒', '捌', '玖'}
	Data.standard[4] = {'洞', '么', '兩', '叄', '肆', '伍', '六', '拐', '八', '勾'}
	Data.otherNumber = {
		['廿'] = {addAt = 2, value = 2}, ['念'] = {addAt = 2, value = 2},
		['卅'] = {addAt = 2, value = 3},
		['卌'] = {addAt = 2, value = 4},
		['皕'] = {addAt = 3, value = 2},
	}
	local all_large = {Data.large, {'', '万', '亿', '兆', '京', '垓', '秭', '穰', '沟', '涧', '正', '载', '极', '恒河沙', '阿僧祇', '那由他', '不可思议', '无量', '大数'}}
	Data.less1000={}
	Data.largeTable = {['$'] = 1, [Data.Point] = 1, ['点'] = 1}
	for i=1,#(Data.standard) do --建表以利數字查表
		for j=1,#(Data.standard[i]) do Data.number[Data.standard[i][j]] = j - 1 end
	end
	for i=0,9 do Data.number[tostring(i)] = i end
	for i=1,#(Data.decimal) do --建表以利單位查表
		for j=1,#(Data.decimal[i]) do Data.less1000[Data.decimal[i][j]] = j end
	end
	Data.maxLarge = 0
	for i=1,#all_large do --建表以利大數量詞查表
		for j=1,#(all_large[i]) do 
			if mw.ustring.len(all_large[i][j]) > Data.maxLarge then Data.maxLarge = mw.ustring.len(all_large[i][j]) end
			Data.largeTable[all_large[i][j]] = j 
		end
	end
end
local function FindChineseToken(str, i)
	local it = mw.ustring.sub(str, i, i)
	for j=1,Data.maxLarge do --對照表中所有可能長度的子字串
		local lit = mw.ustring.sub(str, i, i + j)
		if Data.largeTable[lit] then --如果查表命中
			it = lit
			i = i + j
			break --表示已經找到數字詞彙,跳出
		end
	end
	return ((it=='')and'null'or it), i --查不到返回'null'以利判斷
end
local function LoadChineseToken(input_str)
	local str = input_str..'$' --加入結尾符號
	local token_list = {}
	local i, strlen, it, uit = 1, mw.ustring.len(str), '', ''
	local buffer, buffer_flag = {0,0,0,0}, false --每四位數一個緩衝區
	while i <= strlen do --逐一數字詞彙查找
		it, i = FindChineseToken(str, i)
		uit = it
		if not(Data.number[it] or Data.otherNumber[it] or Data.less1000[it] or Data.largeTable[it])then
			if it~='null'then error(mw.ustring.format("未知的字元 '%s'", it), 2)end
		end
		if Data.less1000[it] then --單獨的數字量詞(十、百、千),當作省略了 "一" 的表示方式
			buffer[Data.less1000[it]] = 1
			local check_error, err_i = FindChineseToken(str, i + 1)
			if Data.less1000[check_error] then
				error(mw.ustring.format("錯誤的用法 '%s'", it..check_error), 2)
			end
			buffer_flag = true
		elseif Data.otherNumber[it] then
			
			buffer[Data.otherNumber[it].addAt] = Data.otherNumber[it].value
			local check_error, err_i = FindChineseToken(str, i + 1)
			if Data.less1000[check_error] then
				error(mw.ustring.format("錯誤的用法 '%s'", it..check_error), 2)
			end
			buffer_flag = true
		elseif Data.number[it] then --讀到數字
			if Data.number[it] ~= 0 then
				i = i + 1
				uit, i = FindChineseToken(str, i) --檢查下一個數字詞彙是否是數字量詞
				if not(Data.number[uit] or Data.less1000[uit] or Data.largeTable[uit])then
					if uit~='null'then error(mw.ustring.format("未知的字元 '%s'", uit), 2)end
				end
				local check_error, err_i = FindChineseToken(str, i + 1)
				if Data.less1000[check_error] then
					error(mw.ustring.format("錯誤的用法 '%s'", uit..check_error), 2)
				end
				if Data.less1000[uit] then --數字 + 數字量詞 在該位數做紀錄
					buffer[Data.less1000[uit]] = Data.number[it]
					buffer_flag = true
				elseif Data.number[uit] then
					error(mw.ustring.format("錯誤的用法 '%s'", it..uit), 2)
				else --數字 + 大數量詞 表示本緩衝區(小於1000的子字串)已讀完,儲存token,更新緩衝區
					buffer[1] = Data.number[it]
					table.insert(token_list, buffer)
					buffer = {0,0,0,0}
					buffer_flag = false
				end
			else
				local check_next, next_i = FindChineseToken(str, i + 1)
				if (Data.largeTable[(check_next == '')and'null'or check_next]) and (not buffer_flag) then
					buffer[1] = 1 --省略1的寫法
					buffer_flag = true
				end
			end
		end
		if Data.largeTable[(uit == '')and'null'or uit] then --讀到的內容是大數量詞
			if buffer_flag then --檢查是否有未儲存的緩衝區
				table.insert(token_list, buffer) --如有未儲存的緩衝區,儲存token,更新緩衝區
				buffer = {0,0,0,0}
				buffer_flag = false
			end
			table.insert(token_list, Data.largeTable[uit]) --儲存大數量詞id到token表
		end
		i = i + 1
	end
	if buffer_flag then --還有未儲存的緩衝區表示有萬未以下的數
		table.insert(token_list, buffer) --儲存token
	end
	if type(token_list[1]) ~= type({}) then --最高位如果本身是大數量詞id,當作省略了 "一" 的表示方式
		table.insert(token_list, 1, {1,0,0,0})
	end
	
	local loop_count, maxId = #token_list, 0 --準備整理token
	local result = {}
	i, buffer_flag = 1, false
	local last_it = {}

	while i <= loop_count do
		it = token_list[i]
		if type(it) == type(0) then --如果是大數量詞id
			it = it - 1
			uit = token_list[i + 1] --如果是連續的大數量詞id,將之值相加 (lua陣列以1起始所以減1)
			while type(uit) == type(0) and i <= loop_count do
				it = it + uit - 1
				i = i + 1
				uit = token_list[i + 1]
			end
			it = it + 1 --加回1變成id
			if it > maxId then maxId = it end --紀錄出現過的最大id
			result[it] = last_it --在這id位置下儲存解析完的一組4個數位
			buffer_flag = false
		else
			last_it = it
			buffer_flag = true
		end
		i = i + 1
	end
	if buffer_flag then --剩餘萬位以下的一組4個數位
		result[1] = last_it 
		if 1 > maxId then maxId = 1 end
	end
	return result, maxId
end
--{{#invoke:NumberToChinese|Chinese_To_Number}}
function p.Chinese_To_Number(frame)
	local input_str = ((type(frame)==type({})) and (frame.args or frame) or {tostring(frame) or ""})[1]
	loadInvChineseData() --建立中文數字查表
	local str = mw.ustring.gsub(mw.text.trim(input_str), "%s", '')
	if str == '' then return '' end
	local number_pattern = Data.Point..'点%.'
	local large_pattern = ''
	local number, sign = '', ''

	local check_char = mw.ustring.sub(str, 1, 1) --讀取第一個字以便判斷正負號
	if (check_char == Data.Positive or check_char == '+') and not (str == Data.Positive) then --有正號才顯示正
		sign = '+'
		str = mw.ustring.sub(str, 2, -1)
	elseif check_char == Data.Negative or check_char == '负' or check_char == '-' or check_char == '−' then
		sign = '-'
		str = mw.ustring.sub(str, 2, -1) 
	elseif check_char == "零" then --處理如 "零下三" 的表示方法
		local check_char = mw.ustring.sub(str..'$$', 2, 2)
		if check_char == "上" then
			sign = '+'
			str = mw.ustring.sub(str, 3, -1) 
		elseif check_char == "下" then
			sign = '-'
			str = mw.ustring.sub(str, 3, -1) 
		end
		if str == '' then return "0" end
	end
	if str == '' then return '' end --清完正負號為空表示是空字串
	if str == p.NotANumber() then return sign .. 'nan' end
	if mw.ustring.match(str,'[無无][穷窮限]') then return sign .. 'inf' end

	for name,index in pairs(Data.number) do if type(name) == type("string") then number_pattern = number_pattern .. name end end
	for name,index in pairs(Data.less1000) do if type(name) == type("string") then large_pattern = large_pattern .. name end end
	for name,index in pairs(Data.largeTable) do if type(name) == type("string") then large_pattern = large_pattern .. name end end
	for name,index in pairs(Data.otherNumber) do if type(name) == type("string") then large_pattern = large_pattern .. name end end
	if mw.ustring.match(str, "^["..number_pattern.."]+$") then --全部都是數字直接輸出
		for i=1,mw.ustring.len(str) do
			local it = mw.ustring.sub(str,i,i)
			if it == '点' or it == Data.Point or it == '.' then
				number = number .. '.'
			else
				number = number .. (Data.number[it] or 0)
			end
		end
		return sign .. number
	end
	local find_invalid, find_invalid_end = mw.ustring.find(str, "[^"..number_pattern..large_pattern.."]+")
	if find_invalid then
		error(mw.ustring.format("輸入的內容含有無效的詞彙 '%s'", mw.ustring.sub(str, find_invalid, find_invalid_end)), 2)
	end
	local point = mw.ustring.find(str, "[点"..Data.Point.."%.]")
	local frontStr, backStr = '', ''
	if point == nil then --如果小數點不存在代表只有大數
		frontStr = str
	else --擷取大數部分及小數部分
		frontStr = mw.ustring.sub(str, 1, point - 1)
		backStr = mw.ustring.sub(str, point + 1, -1)
	end
	
	local tokens, token_len = LoadChineseToken(frontStr)
	local zero_flag = true

	for i=token_len,1,-1 do
		for j = 4,1,-1 do
			local it = (tokens[i]or{})[j] or 0
			if zero_flag then
				if it ~= 0 then 
					zero_flag = false
					number = number .. it
				end
			else
				number = number .. it
			end
		end
	end
	if backStr ~= '' then
		number = number .. '.'
		for i=1,mw.ustring.len(backStr) do
			local it = mw.ustring.sub(backStr,i,i)
			if Data.number[it] then
				number = number .. Data.number[it]
			end
		end
	end
	return sign .. number
end
function p.NotANumber() return Data.NotANumber end --用於模板內字串判斷
function p.Infinity() return Data.Infinity end
function p.LargeName(frame) 
	local index = ((type(frame)==type({})) and (frame.args or frame) or {tonumber(tostring(frame)) or 1})[1]
	local overlying, quotient = '', index
	local remainder = quotient % (largeSize - 1)
	local largecount = (quotient - remainder) / (largeSize - 1)
	overlying = Data.large[remainder + 1]
	for i = 1, largecount do
		overlying = overlying .. Data.large[largeSize]
	end
	return overlying
end
--{{#invoke:NumberToChinese|Number_to_Chinese}}
function p.Number_to_Chinese(frame) --進位系統的中文數字
	return NumberToChinese(argsToVariable(frame))
end
function p.Number_to____(frame) --編號的中文數字
	return NumberToChineseNumbering(argsToVariable(frame))
end
--{{#invoke:NumberToChinese|Number_to_Numbering}}
function p.Number_to_Numbering(frame) --編號的中文數字
	return NumberToChineseNumbering(argsToVariable(frame))
end
--轉給其他模組使用,但為了避免影響其他運作中模板,改為函數呼叫
--因此此函數命名遵照[[Wikipedia:Lua代码风格#命名常规]]
function p._numberToChinese(number, numberType, clearOne)
	return NumberToChinese(tostring(tostring(number)), numberType or 0, clearOne or 0) --轉中文
end
function p._numberToNumbering(number, numberType)
	return NumberToChinese(tostring(tostring(number)), numberType or 0) --轉中文
end

return p