Module:Ref info/data

MyWikiBiz, Author Your Legacy — Friday January 10, 2025
Jump to navigationJump to search

Documentation for this module may be created at Module:Ref info/data/doc

--[[--------------------------< C S 1 _ T E M P L A T E _ P A T T E R N S >------------------------------------

These are patterns for cs1 templates and their redirects.  These patterns exclude redirects that are vcite-like
which redirects should be deleted because vcite is not cs1.

]]

local cs1_template_patterns = {													-- lua patterns of the cannonical names and redirects
	'[Cc]ite ar[Xx]iv',															-- arXiv is the canonical name
		'[Cc]ite ArXiv',

	'[Cc]ite AV media',															-- canonical
		'[Cc]ite audio', '[Cc]ite AV', '[Cc]ite AV ?Media', '[Cc]ite av media', 
		'[Cc]ite cd', '[Cc]ite DVD', '[Cc]ite dvd', '[Cc]ite film',
		'[Cc]ite image', '[Cc]ite media', '[Cc]ite movie',
		'[Cc]ite music video', '[Cc]ite radio', '[Cc]ite song',
		'[Cc]ite ?video', '[Cc]ite visual', '[Cc]ite You[Tt]ube',
		'[Cc]ita vídeo',														-- non-English redirect; TODO: tally separately?

	'[Cc]ite AV media notes',													-- canonical
		'[Cc]ite album[ %-]notes', '[Cc]ite av media notes',
		'[Cc]ite DVD[ %-]notes', '[Cc]ite dvd%-notes', '[Cc]ite liner notes',
		'[Cc]ite music release notes', '[Ll]iner notes',

	'[Cc]ite bio[Rr]xiv',														-- bioRxiv is the canonical form
	'[Cc]ite [Bb]ook',															-- book is the canonical form
		'[Bb]ook cite', '[Bb]ook reference', '[Bb]ook reference url',
		'[Cc] book', '[Cc]it book', '[Cc]ite books', '[Cc]ite chapter',
		'[Cc]ite ebook', '[Cc]ite manual', '[Cc]ite page',
		'[Cc]ite publication', '[Cc]ite score',
		'[Cc]ite work', '[Cc]ite%-?book', 
		'[Bb]okref', '[Cc]itace monografie', '[Cc]itar livro',					-- non-English redirects; TODO: tally separately?
		'[Cc]iteer boek', '[Oo]uvrage', '[Rr]ef%-llibre', '서적 인용',

	'[Cc]ite citeseerx',														-- canonical
	'[Cc]ite conference',														-- canonical
		'[Cc]ita conferenza', '[Cc]ite proceedings', '[Cc]onference reference',	-- cita conferenza is non-English; TODO: tally separately?
		
	'[Cc]ite ?encyclopedia',													-- cite encyclopedia is the canonical name
		'[Cc]ite contribution', '[Cc]ite dic', '[Cc]ite dictionary',
		'[Cc]ite encyclopaedia', '[Cc]ite encyclopædia', '[Ee]ncyclopedia',

	'[Cc]ite [Ee]pisode',														-- episode is the canonical form
		'[Cc]ite show',

	'[Cc]ite interview',														-- canonical
	'[Cc]ite ?journal',															-- cite journal is the canonical form
		'[Cc] journal', '[Cc]itation journal', '[Cc]ite abstract',
		'[Cc]ite document', '[Cc]ite Journal', '[Cc]ite journal zh',
		'[Cc]ite ?paper', '[Vv]cite2 journal',
		'[Cc]ita pubblicazione', '[Cc]itace periodika', '[Cc]itar jornal',		-- non-English redirects; TODO: tally separately?
		'[Cc]itar publicació', '[Cc]ytuj pismo', '[Tt]idskriftsref',
		'Навод из стручног часописа', '저널 인용',

	'[Cc]ite [Mm]agazine',														-- magazine is the canonical form
		'[Cc]ite mag', '[Cc]ite magazine article', '[Cc]ite newsletter',
		'[Cc]ite periodical',

	'[Cc]ite mailing ?list',													-- mailing list is the canonical form
		'[Cc]ite list',

	'[Cc]ite maps?',															-- map is the canonical form
	'[Cc]ite[ %-]?news',														-- cite news is the canonical form
		'[Cc] news', '[Cc]it news', '[Cc]itation news', '[Cc]ite article',
		'[Cc]ite n', '[Cc]ite new', '[Cc]ite newspaper', '[Cc]ite News',
		'[Cc]ite news%-q', '[Cc]ite news2', '[Cc]itenewsauthor', '[Cc]ute news',
		'[Cc]itar notícia', '[Hh]aber kaynağı', '[Tt]idningsref', 'استشهاد بخبر',	-- non-English redirects; TODO: tally separately?
		'뉴스 인용',

	'[Cc]ite newsgroup',														-- canonical
		'[Cc]ite usenet',
		
	'[Cc]ite podcast',															-- canonical
	'[Cc]ite [Pp]ress release',													-- press release is the canonical form
		'[Cc]ite media release', '[Cc]ite news release', '[Cc]ite pr',
		'[Cc]ite press', '[Cc]ite press release\.', '[Cc]ite press[%-]?release',

	'[Cc]ite report',															-- canonical
	'[Cc]ite serial',															-- canonical
	'[Cc]ite sign',																-- canonical
		'[Cc]ite plaque',

	'[Cc]ite speech',															-- canonical
	'[Cc]ite ssrn',																-- canonical
		'[Cc]ite SSRN',
		
	'[Cc]ite tech ?report',														-- techreport is the canonical form
		'[Cc]ite standard', '[Cc]ite technical report', '[Tt]echrep reference',

	'[Cc]ite thesis',															-- canonical
		'[Cc]ite dissertation',
		'[Cc]itar tese',														-- non-English redirect; TODO: tally separately?

	'[Cc]ite [Ww]eb',															-- web is the canonical form
		'[Cc] web', '[Cc]it web', '[Cc]ite blog', '[Cc]ite URL', '[Cc]ite url',
		'[Cc]ite w', '[Cc]ite wb', '[Cc]ite we', '[Cc]ite web\.',
		'[Cc]ite webpage', '[Cc]ite website', '[Cc]ite website article',
		'[Cc]ite%-?web', '[Cc]itweb', '[Cc]w', '[Rr]ef web', '[Ww]eb citation',
		'[Ww]eb cite', '[Ww]eb link', '[Ww]eb[ %-]reference', '[Ww]eblink',
		'[Cc]hú thích web', '[Cc]ita web', '[Cc]itace elektronické monografie',	-- non-English redirects; TODO: tally separately?
		'[Cc]itat web', 'مرجع ويب', 'یادکرد وب', '웹 인용',
	}


--[[--------------------------< C S 2 _ T E M P L A T E _ P A T T E R N S >------------------------------------

These are patterns for cs2 templates redirects.

]]

local cs2_template_patterns = {													-- lua patterns of the cannonical names and redirects
	'[Cc]itation',
		'[Cc]ite', '[Cc]ite citation', '[Cc]ite study',
		'[Cc]ite [Tt]echnical standard',
	}


--[[--------------------------< C S 1 2 _ S T R I P P E D _ L I S T >------------------------------------------

This table is created from cs1_template_patterns and cs2_template_patterns.  To make this table, entries in
the source tables are evaluated to replace lua patterns with the appropriate characters and create names for
this list.  This list is used to identify cs1|2 templates when counting and listing cs1-like templates.

For example:
	[Cc]ite ar[Xx]iv
becomes
	Cite arXiv
	Cite arxiv
	cite arXiv
	cite arxiv

]]

local cs12_stripped_list = {};
	local function add_stripped (name)
		if not cs12_stripped_list[name] then
			cs12_stripped_list[name] = true;
		end
	end


	local function pattern_convert (name)
		if name:match ('(.-)%[(%a)(%a)%](.*)') then								-- mixed case optional letters
			local l;
			lead, c, l, tail = name:match ('(.-)%[(%a)(%a)%](.*)');
			add_stripped (lead .. c .. tail);									-- uppercase
			add_stripped (lead .. l .. tail);									-- lowercase
	
		elseif name:match ('^([^%[]+)(%[ %%%-%]%?)(.+)$') then					-- [ %-]?
			lead, c, tail = name:match ('^([^%[]+)(%[ %%%-%]%?)(.+)$');
			add_stripped (lead .. tail);										-- neither char
			add_stripped (lead .. ' ' .. tail);									-- space
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^([^%[]+)(%[%%%-%]%?)(.+)$') then					-- [%-]?
			lead, c, tail = name:match ('^([^%[]+)(%[%%%-%]%?)(.+)$');
			add_stripped (lead .. tail);										-- no hyphen
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^([^%[]+)(%[ %%%-%])(.+)$') then					-- [ %-]
			lead, c, tail = name:match ('^([^%[]+)(%[ %%%-%])(.+)$');
			add_stripped (lead .. ' ' .. tail);									-- space
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^([^%?]+)(%%%-%?)(.+)$') then						-- %-?
			lead, c, tail = name:match ('^([^%?]+)(%%%-%?)(.+)$');
			add_stripped (lead .. tail);										-- no hyphen
			add_stripped (lead .. '-' .. tail);									-- hyphen
	
		elseif name:match ('^(.-)(%%%-)(.+)$') then								-- %-
			lead, c, tail = name:match ('^(.-)(%%%-)(.+)$');
			add_stripped (lead .. '-' .. tail);									-- hyphen

		elseif name:match ('^(.-)(.)%?(.*)$') then								-- .?
			lead, c, tail = name:match ('^(.-)(.)%?(.*)$');
			add_stripped (lead .. tail);										-- no character
			add_stripped (lead .. c .. tail);									-- character
	
		else
			add_stripped (name);												-- no patterns so save as is
		end
	end


	for _, list in ipairs ({cs1_template_patterns, cs2_template_patterns}) do
		for _, pattern in ipairs (list) do
			local name = pattern:gsub ('^%[(%a)%a%]', '%1');					-- leading character (usually uppercase)
			pattern_convert (name);
			name = pattern:gsub ('^%[%a(%a)%]', '%1');							-- leading character (usually lowercase)
			pattern_convert (name);
		end
	end


--[[--------------------------< V C I T E _ T E M P L A T E _ P A T T E R N S >--------------------------------

These are patterns for Vcite-family templates and their redirects.

]]

local vcite_template_patterns = {
	'[Vv]cite book',															-- canonical
		'[Vv]ancite book', '[Vv]ancite report', '[Vv]cite encyclopedia',
		'[Vv]cite report',

	'[Vv]cite journal',															-- canonical
		'[Cc]it journal', '[Cc]it paper', '[Vv]ancite journal', '[Vv]cite paper',

	'[Vv]cite news',															-- canonical
		'[Vv]ancite news',

	'[Vv]cite web',																-- canonical
		'[Vv]ancite web',
	}


--[[--------------------------< H A R V _ T E M P L A T E _ P A T T E R N S >----------------------------------

These are patterns for the harv family of templates and their redirects.

]]

local harv_template_patterns = {												-- lua patterns of the cannonical names and redirects
	'[Hh]arvard citation no brackets',											-- canonical
		'[Hh]arnvb', '[Hh]arvardnb', '[Hh]arvnb',

	'[Hh]arvard citation',														-- canonical
		'[Hh]arv', '[Hh]arvsp',

	'[Hh]arvard citation text',													-- canonical
		'[Hh]arvtxt',

	'[Hh]arvcoltxt',															-- canonical
	'[Hh]arvcol',																-- canonical
		'[Hh]rvcoln',

	'[Hh]arvcolnb',																-- canonical
	'[Hh]arvard citations',														-- canonical
		'[Hh]arvs',
		
	'[Hh]arvp',																	-- canonical
	};


--[[--------------------------< S F N _ T E M P L A T E _ P A T T E R N S >------------------------------------

These are patterns for the sfn family of templates and their redirects.

]]

local sfn_template_patterns = {													-- lua patterns of the cannonical names and redirects
	'[Ss]fn',																	-- canonical
		'[Hh]f', '[Ss]f', '[Ss]hortened footnote',
		'[Ss]hortened footnote template', '[Ss]nf',
	
	'[Ss]fnp',																	-- canonical
		'[Ss]fb', '[Ss]fnb',
	
	'[Ss]fnm',																	-- canonical
	
	'[Ss]fnmp',																	-- canonical	
	};


--[[--------------------------< R E F L I S T _ T E M P L A T E _ P A T T E R N S >----------------------------

These are patterns for the reflist template and its redirects.

]]

local reflist_template_patterns = {
	'[Rr]ef[Ll]ist',															-- reflist is the canonical form
		'[Ff]ootnotes?', '[Ff]ootnotesSmall', '[Rr]ealist', '[Rr]ef [Ll]ist',
		'[Rr]ef%-list', '[Rr]eference', '[Rr]eference list', '[Rr]efIist',
		'[Rr]EFLIST', '[Rr]efs', '[Rr]FS', '[Rr]fs',
		'[Jj]egyzetek', '[Ll]istaref', '[Rr]efer[eê]ncias', '[Rr]éférences',	-- non-English redirects
		'[Tt]ham khảo', 'Примечания', 'مراجع', 'پانویس',
	};


--[[--------------------------< C L E A N U P _ T E M P L A T E _ P A T T E R N S >----------------------------

These are patterns for some of the cleanup templates and their redirects.

]]

local cleanup_template_patterns = {
	'[Cc]itation [Nn]eeded',													-- Citation needed is the canonical form
		'[Aa]re you sure%?', '[Cc][Bb]', '[Cc]cn', '[Cc]iation needed', '[Cc]it',
		'[Cc]itaiton needed', '[Cc]itation missing', '[Cc]itation need',
		'[Cc]itation [Rr]equested', '[Cc]itation ?required',
		'[Cc]itation[-Nn]?eeded', '[Cc]itationeeded', '[Cc]ite missing',
		'[Cc]ite[ %-]?needed', '[Cc]ite source', '[Cc]itesource', '[Cc]itn',
		'[Cc]N', '[Cc]n', '[Cc]tn', '[Ff]ACT', '[Ff]act[s%?]?',
		'[Ff]citation needed', '[Mm]e%-fact', '[Nn]eed [Cc]itation',
		'[Nn]eed sources', '[Nn]eed%-ref', '[Nn]eedcitation', '[Nn]eedcite',
		'[Nn]eeds citations?', '[Nn]eeds reference', '[Nn]eedsref',
		'[Nn]o source given', '[Pp]ROV%-statement', '[Pp]rove ?it',
		'[Rr]ef%-?needed', '[Rr]ef%?', '[Rr]eference needed', '[Rr]efplease',
		'[Rr]equest [Cc]itation', '[Rr]éférence nécessaire', '[Ss]ource needed',
		'[Ss]ource%?', '[Ss]ourceme', '[Uu]ncited', '[Uu]nreferenced inline',
		'[Uu]nsourced%-inline',
		'[Cc]ita requerida', '[Cc]itazione necessaria', '[Kk]älla behövs',		-- non-English redirects

	'[Dd]isputed[ %-]inline',													-- Disputed inline is canonical form
		'[Dd]ispute[ %-]inline',

	'[Dd]ubious',																-- canonical
		'[Dd]ebatable', '[Dd]isputable', '[Dd]isputed?Assertion',
		'[Dd]isputed Point', '[Dd]oubtful', '[Dd]UB', '[Dd]ub',
		'[Dd]ubious %- [Dd]iscuss', '[Dd]ubious [Ii]nline', 
		'[Dd]ubious%-inline', '[Oo]dd',
		'[Ii]frågasatt uppgift',												-- non-English redirect

	'[Ff]ailed ?verification',													-- Failed verification is the canonical form
		'[Bb]adref', '[Ff]ailed ref', '[Ff]ailed reference',
		'[Ff]ails verification', '[Ff][Vv]', '[Nn]cg', '[Nn]ICG', '[Nn]icg', 
		'[Nn]IGC', '[Nn]igc', '[Nn]ot in citation', '[Nn]ot in citation given',
		'[Nn]ot in cited source', '[Nn]ot in ref', '[Nn]ot in ref given',
		'[Nn]ot in reference', '[Nn]ot in reference given', '[Nn]ot in source',
		'[Nn]ot in source given', '[Nn]ot specifically in source', 
		'[Nn]otincitation', '[Nn]otincitationgiven', '[Nn]otinref', 
		'[Nn]otinsource', '[Nn]otinsourcegiven', '[Vv]erification[ %-]failed',
	};


--[[--------------------------< D E A D _ L I N K _ T E M P L A T E _ P A T T E R N S >------------------------

These are patterns for the dead link template and its redirects.

]]

local dead_link_template_patterns = {
	'[Dd]ead [Ll]ink',															-- Dead link is the canonical form
		'404', '[Bb]ad ?link', '[Bb]roken', '[Bb]roken ?link', '[Dd]ead',
		'[Dd]ead cite', '[Dd]ead link%-now', '[Dd]ead links', '[Dd]ead page',
		'[Dd]ead URL', '[Dd]ead[ %-]?url', '[Dd]ead%-inline', '[Dd]ead%-link',
		'[Dd]eadcite', '[Dd]eadlinks?', '[Dd][Ll]', '[Dd]l%-now',
		'[Ll]ink ?broken',
		'[Cc]ollegamento interrotto', '[Tt]oter Link', 'Недоступная ссылка',	-- non-English redirects
	};


--[[--------------------------< W E B A R C H I V E _ T E M P L A T E _ P A T T E R N S >----------------------

These are patterns for the webarchive template and its redirects.

]]

local webarchive_template_patterns = {
	'[Ww]eb ?archive',															-- Webarchive is the canonical form
		'[Aa]rchive url', '[Ii]AWM', '[Ii]awm', '[Uu]rl archive', '[Ww]ayBack',
		'[Ww]aybackdate', '[Ww]ebarchiv', '[Ww]ebcitation',
	};


--[[--------------------------< R E F B E G I N _ T E M P L A T E _ P A T T E R N S >--------------------------

These are patterns for the refbegin template and its redirects.

]]

local refbegin_template_patterns = {
	'[Rr]ef ?begin',															-- Refbegin is the canonical form
		'[Bb]eginref', '[Ss]ourcesstart', '[Ss]ourcestart',
	}


--[[--------------------------< R P _ T E M P L A T E _ P A T T E R N S >--------------------------------------

These are patterns for the rp template and its redirects.

]]

local rp_template_patterns = {
	'[Rr][Pp]',																	-- Rp is the canonical form
		'[Pp]age reference',
		'[Rr]efpages?',
	}


--[[--------------------------< C S 1 | 2   A U T H O R - N A M E _ P A T T E R N S >--------------------------

These are patterns for the cs1|2 author-name parameters.

]]

local authors_param_patterns = {'authors', 'people', 'credits'};
local author_param_patterns = {'author1?', 'host1?', 'subject1?'};
local last_param_patterns = {'last1?', 'author1?%-last1?', 'surname1?'};


--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]

return
	{
	author_param_patterns = author_param_patterns,
	authors_param_patterns = authors_param_patterns,
	cleanup_template_patterns = cleanup_template_patterns,
	cs1_template_patterns = cs1_template_patterns,
	cs12_stripped_list = cs12_stripped_list,
	cs2_template_patterns = cs2_template_patterns,
	dead_link_template_patterns = dead_link_template_patterns,
	harv_template_patterns = harv_template_patterns,
	last_param_patterns = last_param_patterns,
	refbegin_template_patterns = refbegin_template_patterns,
	reflist_template_patterns = reflist_template_patterns,
	rp_template_patterns = rp_template_patterns,
	sfn_template_patterns = sfn_template_patterns,
	vcite_template_patterns = vcite_template_patterns,
	webarchive_template_patterns = webarchive_template_patterns,
	}