Module:Format for sorting

Description
Test using main:

hallo hallo d d hello this is 45567 ff

tests using textString

series 45.567.1 ? = series 00045.00567.00001 ?

Roman numeral

00017

00011

minimalStrip

Drukkerij - Uitgeverij sdfsdf De Eik Antwerpen

main

drukkerij uitgeverij sdfsdf de eik antwerpen



--[[
Note that the name 'Format for sorting' has become a misnomer. It's also used for strings other than sorting labels.

-- {{#invoke:Strip title for sorting|main|...}}

]]--

local mDiacritics = require('Module:Diacritics')
local mConvertNumerals = require('Module:ConvertNumerals')
local mPadNumbers = require('Module:Pad numbers')

local p = {}

function p.test(frame)
	local str = frame.args[1] or ""
	--[[
	str = mw.ustring.gsub( str, "%a", "L" )
	str = mw.ustring.gsub( str, "%c", "C" )
	str = mw.ustring.gsub( str, "%d", "D" )
	str = mw.ustring.gsub( str, "%p", "P" )
	str = mw.ustring.gsub( str, "%s", "S" )
	]]--
	--str = mw.ustring.gsub( str, "%W", " " ) .. " (not w)"
	str = mw.ustring.gsub( str, "[%w%s]", "%" )
	return str
end

--[[
Currently used for strings of type Page with appropriate collation
]]--
function p.main( frame )
	local str = mw.text.unstrip(frame.args[1]) or ""
	return p.flatten( str )
end

--[[ 
Replacement for Template:Strip title for sorting
]]--
function p.minimalStrip( frame )
	local str = frame.args[1] or ""
	local newStr = p.minimalStripForSorting( str )
	return newStr
end

--[[
Used for strings of type Text, hence the need for padding with leading zeroes
]]--
function p.textString( frame )
	return p.textStringFormat( frame.args[1] or "0" )
end

function p.doRemoveExtraSpace(frame)
	local str = frame.args[1] or ""
	return p.removeExtraSpace(str)
end

--[[]]--
function p.formatTitleForFileUpload(frame)
	local str = frame.args[1] or ""
	-- no effecr 
	--str = string.gsub( str, "\u{0308}", "" )
	--str = string.gsub( str, "\u{00f6}", "" )
	
	str = p.flatten( str )
	str = p.removeExtraSpace(str)
	-- used in sort labels but not needed here
	str = string.gsub( str, "¶", "" )
	str = string.gsub( str, "%s", "-" )
	return str
end

local function removePunctuation( str )
	local str = str
	str = string.gsub( str,  "%-?[%p%.]+", "" )
	local addchars ={  "‘", "’", "“", "”" }
	for i, v in ipairs( addchars ) do
    	str = string.gsub(str, v, "")
	end
	if str == nil then
		error(" No string received. " )
	else
		return str
	end
end

local function removeDiacritics( str )
	local str = mDiacritics.strip_diacrits( str )
	return str
end

local function padNumbers( str, padlength )
	local str = mPadNumbers.replaceNumbers( str, padlength )
	return str
end

local function convertToArabic( str )
	local newstr = mConvertNumerals.runToArabicNum( str, str )
	if newstr == str then
		-- If newstr is equal to default, there is nothing to convert
		return str
	else
		local paddedstr = mPadNumbers.replaceNumbers( newstr, 5 )
		return paddedstr
	end
end

--[[
Extreme munging, hence 'flatten'
]]--
p.flatten = function( str )
	local str = str or ""
	str = removeDiacritics( str )
	str = string.lower( str )
	str = removePunctuation( str )
	if str == nil then
		error("...")
	else
		return str
	end
end

--[[
Remove superfluous space created by: double spaces, newlines, carriage returns and tabs
]]--
p.removeExtraSpace = function(str)
	local str = string.gsub( str, "[\t\n\r]", " " )
	str = string.gsub( str, "%s%s+", " " )
	return mw.text.trim( str )
end

--[[
Mimimal munging
]]--
p.minimalStripForSorting = function( str )
	local str = str
	-- escape magic characters = ( ) . % + - * ? [ ^ $
	local notAllowed = { "'", ",", '"', "%(", "%)", ">", "<", "%[", "]", "%." }
	for k,v in pairs( notAllowed ) do
		str = string.gsub( str, v, "" )
	end
	return str
end

p.textStringFormat = function( str )
	local str = str
	str = mw.text.unstrip( str )
	str = string.lower( str )
	str = removeDiacritics( str )
	--str = removePunctuation( str )
	str = padNumbers( str, 5 )
	str = convertToArabic( str )

	if str == nil then
		error("...")
	elseif str == "" then
		return "0"
	else
		return str
	end
end

return p