Module:Format for sorting

Description
Test using main:

hallo hallo d d hello this is 45567 ff

tests using textString

series 45.567.1 ? = series 00045.00567.00001 ?

Roman numeral

00017

00011

minimalStrip

Drukkerij - Uitgeverij sdfsdf De Eik Antwerpen

main

drukkerij uitgeverij sdfsdf de eik antwerpen



--[[
-- {{#invoke:Strip title for sorting|main|...}}
]]--

local mDiacritics = require('Module:Diacritics')
local mConvertNumerals = require('Module:ConvertNumerals')
local mPadNumbers = require('Module:Pad numbers')

local p = {}

local function removePunctuation( str )
	local str = str
	str = string.gsub( str,  "%-?[%p%.]+", "" )
	local addchars ={  "‘", "’", "“", "”" }
	for i, v in ipairs( addchars ) do
    	str = string.gsub(str, v, "")
	end
	if str == nil then
		error(" No string received. " )
	else
		return str
	end
end

local function removeDiacritics( str )
	str = mDiacritics.strip_diacrits( str )
	return str
end

local function padNumbers( str, padlength )
	str = mPadNumbers.replaceNumbers( str, padlength )
	return str
end

local function convertToArabic( str )
	newstr = mConvertNumerals.runToArabicNum( str, str )
	if newstr == str then
		-- If newstr is equal to default, there is nothing to convert
		return str
	else
		paddedstr = mPadNumbers.replaceNumbers( newstr, 5 )
		return paddedstr
	end
end

--[[
Currently used for strings of type Page with appropriate collation
]]--
function p.main( frame )
	local str = mw.text.unstrip(frame.args[1]) or ""
	return p.flatten( str )
end

--[[
Extreme munging, hence 'flatten'
]]--
p.flatten = function( str )
	local str = str or ""
	str = removeDiacritics( str )
	str = string.lower( str )
	str = removePunctuation( str )
	if str == nil then
		error("...")
	else
		return str
	end
end

p.testRemoveDoubleSpaces = function(frame)
	local str = frame.args[1] or ""
	return p.removeDoubleSpaces(str)
end
p.removeDoubleSpaces = function(str)
	return str:gsub("%s%s", " ")
end

--[[ 
Replacement for Template:Strip title for sorting
]]--
function p.minimalStrip( frame )
	local str = frame.args[1]
	local newStr = p.minimalStripForSorting( str )
	return newStr
end

--[[
Mimimal munging
]]--
p.minimalStripForSorting = function( str )
	local str = str
	-- escape magic characters = ( ) . % + - * ? [ ^ $
	local notAllowed = { "'", ",", '"', "%(", "%)", ">", "<", "%[", "]", "%." }
	for k,v in pairs( notAllowed ) do
		str = string.gsub( str, v, "" )
	end
	return str
end

--[[
Used for strings of type Text, hence the need for padding with leading zeroes
]]--
function p.textString( frame )
	return p.textStringFormat( frame.args[1] or "0" )
end

p.textStringFormat = function( str )
	local str = str
	str = mw.text.unstrip( str )
	str = string.lower( str )
	str = removeDiacritics( str )
	--str = removePunctuation( str )
	str = padNumbers( str, 5 )
	str = convertToArabic( str )

	if str == nil then
		error("...")
	elseif str == "" then
		return "0"
	else
		return str
	end
end

return p