Module:Convert/makeunits
The purpose of this module is to prepare the data used by Module:Convert to allow conversion between units of measurement.
Usage: Put one of the following lines (with nothing else) in a sandbox:
{{#invoke:convert/makeunits|makeunits}}
{{subst:#invoke:convert/makeunits|makeunits}}
Previewing the sandbox should display the wikitext that needs to be copied and pasted into Module:Convert/data. If a problem occurs, warning messages will be displayed to indicate that the unit definitions need to be fixed.
By default, the module reads the unit definitions from Module:Convert/documentation/conversion data. For testing purposes, it is possible to specify that the definitions are read from another page, for example, User:Johnuniq/sandbox2, by specifying the wanted title:
{{#invoke:convert/makeunits|makeunits|User:Johnuniq/sandbox2}}
{{subst:#invoke:convert/makeunits|makeunits|User:Johnuniq/sandbox2}}
The module contains table specials
which is used to insert a small amount of "built-in" data that is not currently defined in the input wikitext.
The module reads data from Module:Convert/text to allow localization of the table of units for use on another wiki.
-- This module generates the wikitext required at Module:Convert/data -- by reading and processing the wikitext of the master list of units -- (see conversion_data for the page title). -- -- Script method: -- * Read lines, ignoring everything before "== Conversions ==". -- * Process the following lines: -- * Find next level-3 heading like "=== Length ===". -- * Parse each following line starting with "|" -- (but ignore lines starting with "|-" or "|}". -- * Split such lines into fields (delimiter "||") and trim -- leading/trailing whitespace from each field. -- Remove any "colspan" at front of second field (symbol). -- * Remove thousand separators (commas) from the scale field. -- If the scale is a number, do not change it. -- Otherwise, it should be an expression like "5/9", in -- which case it is replaced by the value of the expression. -- * Remove wiki formatting '[[...]]' from the link field. -- * Remove redundant fields from the unit to reduce size of data table. -- * Create alternative forms of a unit such as an alias or a combination. -- * Stop processing when encounter end of text or a line starting -- with a level-2 heading ("==" but not "==="). -- * Repeat above for each heading listed at prepare_data(). -- * Output Lua source for the units table. -- -- -- Output has the following form. -- local all_units = { -- ["unitcode"] = { -- standard format -- name1 = "singular name", -- omitted if redundant -- name1_us = "singular name sp=us", -- omitted if redundant -- name2 = "plural name", -- omitted if redundant -- name2_us = "plural name sp=us", -- omitted if redundant -- symbol = "symbol", -- sym_us = "symbol sp=us", -- omitted if redundant -- usename = 1, -- omitted if empty -- utype = "unit type", -- from level-3 heading -- scale = 1, -- a value, if necessary from evaluating an expression -- subdivs = { ["ft"] = { 5280, default = "km" }, ["yd"] = { 1760 } } -- composite input; omitted if empty -- link = "title of article for wikilink", -- omitted if empty or redundant -- ... -- other values -- }, -- ["unitcode"] = { -- alternative format to generate an alias -- target = "unit code", -- ... -- optional values to override those of target -- }, -- ["unitcode"] = { -- alternative format to generate a "per" unit like $/acre or BTU/h -- per = {u1, u2}, -- numbered table of unitcodes (u1 may be a currency symbol) -- ... -- optional values -- }, -- ["unitcode"] = { -- alternative format to generate an error message -- shouldbe = "message that some other unit code should be used", -- }, -- ["unitcode"] = { -- alternative format for combination outputs (like 'm ft') -- combination = {u1, u2, ...}, -- numbered table of unitcodes -- utype = "unit type", -- as for standard format -- }, -- ["unitcode"] = { -- alternative format for output multiples (like 'ftin') -- combination = {u1, u2, ...}, -- numbered table of unitcodes -- multiple = {f1, f2, ...}, -- numbered table of integer factors -- utype = "unit type", -- as for standard format -- }, -- ... -- } local ulower = mw.ustring.lower local usub = mw.ustring.sub local text_code local specials = { -- This table is used to add extra fields when defining some units which -- require exceptions to normal processing. -- Each key is in the local language, while each value is fixed text. -- However, this script should NOT be edited. -- Instead, the translation_table in Module:Convert/text can be edited, -- and this script will replace sections of the following with localized -- definitions from Module:Convert/text, if given. -- Ask for assistance at [[:en:Module talk:Convert]]. -- LATER: It would be better if this was defined in the conversion data. utype = { -- ["unit type in local language"] = "name_used_in_this_script" ["fuel efficiency"] = "type_fuel_efficiency", ["length"] = "type_length", ["temperature"] = "type_temperature", ["volume"] = "type_volume", }, ucode = { exception = { -- ["unit code in local language"] = "name_used_in_module_convert" ["ft"] = "integer_more_precision", ["in"] = "subunit_more_precision", ["lb"] = "integer_more_precision", }, istemperature = { -- Common temperature scales (not keVT or MK). -- ["unit code in local language"] = true ["C"] = true, ["F"] = true, ["K"] = true, ["R"] = true, }, usesymbol = { -- Use unit symbol not name if abbr not specified. -- ["unit code in local language"] = 1 ["C"] = 1, ["F"] = 1, ["K"] = 1, ["R"] = 1, ["C-change"] = 1, ["F-change"] = 1, ["K-change"] = 1, }, alttype = { -- Unit has an alternate type that is a valid conversion. -- ["unit code in local language"] = "alternate type in local language" ["Nm"] = "energy", ["ftlb"] = "torque", ["ftlb-f"] = "torque", ["ftlbf"] = "torque", ["inlb"] = "torque", ["inlb-f"] = "torque", ["inlbf"] = "torque", ["inoz-f"] = "torque", ["inozf"] = "torque", }, }, } -- Module text for the local language (localization). -- A default table of text for enwiki is provided here. -- If needed for another wiki, wanted sections from the table can be -- copied into translation_table in Module:Convert/text. -- For example, copying and modifying only the titles section may give: -- -- local translation_table = { -- ... -- other items -- mtext = { -- titles = { -- -- name_used_in_this_script = 'Title of page' -- conversion_data = 'Modul:Convert/documentation/conversion data/dok', -- }, -- }, -- } local mtext = { section_names = { -- name_used_in_this_script = 'Section title used in conversion data' overrides = 'Overrides', conversions = 'Conversions', outmultiples = 'Output multiples', combinations = 'Combinations', inmultiples = 'Input multiples', defaults = 'Defaults', links = 'Links', perunits = 'Automatic per units', varnames = 'Variable names', }, titles = { -- name_used_in_this_script = 'Title of page' conversion_data = 'Module:Convert/documentation/conversion data', }, messages = { -- name_used_in_this_script = 'Error message ($1 = first parameter, $2 = second)' m_als_bad = 'Alias has invalid text in field "$1".', m_als_dup = 'Alias "$1" already defined.', m_als_link = 'Alias "$1" must include a wikilink ("[[...]]") in the symlink text.', m_als_mul = 'Alias "$1" has multiplier "$2" which is not a number.', m_als_same = 'Should omit "$1" for alias "$2" because it is the same as its target.', m_als_type = 'Target of alias "$1" has wrong type.', m_als_undef = 'Primary unit must be defined before alias "=$1"', m_cmb_miss = 'Missing unit code for a combination.', m_cmb_none = 'No units specified for combination "$1"', m_cmb_one = 'Only one unit specified for combination "$1"', m_cmb_type = 'Unit "$1" in combination "$2" has wrong type.', m_cmb_undef = 'Unit "$1" in combination "$2" not defined.', m_cmp_def = 'Composite "$1" must specify a default unit code.', m_cmp_int = 'Composite "$1" has components where scale ratios are not integers.', m_cmp_inval = 'Composite "$1" has a component with an invalid scale, "$2".', m_cmp_many = 'Composite "$1" has too many fields.', m_cmp_miss = 'Missing unit code for a composite.', m_cmp_order = 'Composite "$1" has components in wrong order or with invalid scales.', m_cmp_scale = 'Alternate unit "$1" in composite "$2" has wrong scale.', m_cmp_two = 'Composite "$1" must specify exactly two unit codes.', m_cmp_type = 'Unit "$1" in composite "$2" has wrong type.', m_cmp_undef = 'Unit "$1" in composite "$2" not defined.', m_def_cond = 'Invalid condition in default "$1" for unit "$2".', m_def_fmt = 'Default output "$1" for unit "$2" should have 2 or 3 "!".', m_def_rpt = 'Default output "$1" for unit "$2" is repeated.', m_def_same = 'Default output for unit "$1" is the same unit.', m_def_type = 'Default output "$1" for unit "$2" has wrong type.', m_def_undef = 'Default output "$1" for unit "$2" is not defined.', m_dfs_code = 'Defaults section: no unit code specified.', m_dfs_dup = 'Defaults section: unit "$1" has already been specified.', m_dfs_none = 'Defaults section: unit "$1" has no default specified.', m_dfs_sym = 'Defaults section: unit "$1" must have a symbol.', m_dfs_two = 'Defaults section: unit "$1" should have two fields only.', m_dfs_undef = 'Defaults section: unit "$1" is not defined.', m_dup_code = 'Unit code "$1" has already been defined.', m_error = 'Error:', m_ftl_read = 'Could not read wikitext from "[[$1]]".', m_ftl_table = '[[$1]] should export table "$2".', m_ftl_type = 'Fatal error: unknown data type for "$1"', m_hdg_lev2 = 'Level 2 heading "$1" not found.', m_hdg_lev3 = 'No level 3 heading before: $1', m_line_num = ' (line $1).', m_lnk_brack = 'Link "$1" has wrong number of brackets.', m_lnk_dup = 'Link exception "$1" is already defined.', m_lnk_miss = 'Missing unit code for a link.', m_lnk_none = 'No link defined for unit "$1".', m_lnk_sym = 'Unit code "$1" for a link must have a symbol.', m_lnk_two = 'Row for unit "$1" link should have two fields only.', m_lnk_type = 'Link exception "$1" has wrong type.', m_lnk_undef = 'Unit code "$1" for a link is not defined.', m_miss_code = 'Missing unit code.', m_miss_sym = 'Missing symbol.', m_miss_type = 'Missing unit type.', m_mul_int = 'Multiple "$1" has components where scale ratios are not integers.', m_mul_miss = 'Missing unit code for a multiple.', m_mul_none = 'No units specified for multiple "$1"', m_mul_one = 'Only one unit specified for multiple "$1"', m_mul_order = 'Multiple "$1" has components in wrong order or with invalid scales.', m_mul_scale = 'Multiple "$1" has a component with an invalid scale, "$2".', m_mul_std = 'Unit "$1" in multiple "$2" must be a standard unit.', m_mul_type = 'Unit "$1" in multiple "$2" has wrong type.', m_mul_undef = 'Unit "$1" in multiple "$2" not defined.', m_no_title = 'Need title of page with unit definitions.', m_ovr_dup = 'Override "$1" is already defined.', m_ovr_miss = 'Missing unit code for an override.', m_per_dup = 'Per unit "$1" already defined.', m_per_empty = 'Unit "$1" has an empty field in the "per".', m_per_fuel = 'Unit "$1" has invalid unit types for fuel efficiency.', m_per_inv = 'Invalid field for a "per".', m_per_two = 'Unit "$1" does not have exactly 2 fields in the "per".', m_per_undef = 'Unit "$1" has undefined unit code "$2" in the "per".', m_percent_s = 'Field "$1" must not contain "%s".', m_pfx_bad = 'Unknown prefix: "$1".', m_pfx_name = 'Unit with Prefix set must include Name.', m_scl_bad = 'Scale expression is invalid: "$1".', m_scl_miss = 'Missing scale.', m_scl_oflow = 'Scale expression gives an invalid value: "$1".', m_var_cnt = 'Variable names section: each row must have the configured number of columns.', m_var_dup = 'Unit "$1" already has a variable name.', m_var_miss = 'Missing field for a variable name.', m_var_undef = 'Unit "$1" in variable names is not defined.', m_warning = 'Warning:', m_wrn_more = ' (and more not shown)', m_wrn_nbsp = 'Line $1 contains a nonbreaking space.', m_wrn_nodef = 'Units with the following unit codes have no default output.', m_wrn_ucode = ' $1', }, } local function message(key, ...) -- Return a message from the message table, which can be localized. -- '$1', '$2', ... are replaced with the first, second, ... parameters, -- each of which must be a string or a number. -- The global variable is_test_run can be set by a testing program to -- check the messages generated by this program. local rep = {} for i, v in ipairs({...}) do rep['$' .. i] = v end key = key or '???' local extra if is_test_run and key ~= 'm_line_num' then extra = key .. ': ' else extra = '' end return extra .. string.gsub(mtext.messages[key] or key, '$%d+', rep) end local function quit(key, ...) -- Use error() to pass an error message to the surrounding pcall(). error(message(key, ...), 0) end local function quit_no_message() -- Throw an error. -- This is used in some functions which can throw an error with a message, -- but where the message is in fact never displayed because the calling -- function uses pcall to catch errors, and any message is ignored. -- Using this function documents that the message (which may be useful in -- some other application) does not need translation as it never appears. error('this message is not displayed', 0) end local function collection() -- Return a table to hold items. return { n = 0, add = function (self, item) self.n = self.n + 1 self[self.n] = item end, pop = function (self, item) if self.n > 0 then local top = self[self.n] self.n = self.n - 1 return top end end, join = function (self, sep) return table.concat(self, sep or '\n') end, } end local warnings = collection() local function add_warning(key, ...) -- Add a warning that will be inserted before the final result. warnings:add(message(key, ...)) end ---Begin code to evaluate expressions----------------------------------- -- This is needed because Lua's loadstring() is not available in Scribunto, -- and each scale value can be specifed as an expression such as "5/9". -- More complex expressions are supported, including use of parentheses -- and the binary operators: + - * / ^ local operators = { ['+'] = { precedence = 1, associativity = 1, func = function (a, b) return a + b end }, ['-'] = { precedence = 1, associativity = 1, func = function (a, b) return a - b end }, ['*'] = { precedence = 2, associativity = 1, func = function (a, b) return a * b end }, ['/'] = { precedence = 2, associativity = 1, func = function (a, b) return a / b end }, ['^'] = { precedence = 3, associativity = 2, func = function (a, b) return a ^ b end }, ['('] = '(', [')'] = ')', } local function tokenizer(text) -- Function 'next' returns the next token which is one of: -- number -- table (operator) -- string ('(' or ')') -- nil (end of text) -- If invalid, an error is thrown. -- The number is unsigned (unary operators are not supported). return { pos = 1, maxpos = #text, text = text, next = function(self) if self.pos <= self.maxpos then local p1, p2, hit = self.text:find('^%s*([+%-*/^()])', self.pos) if hit then self.pos = p2 + 1 return operators[hit] end p1, p2, hit = self.text:find('^%s*(%d*%.?%d*[eE][+-]?%d*)', self.pos) if not hit then p1, p2, hit = self.text:find('^%s*(%d*%.?%d*)', self.pos) end local value = tonumber(hit) if value then self.pos = p2 + 1 return value end quit_no_message('invalid number "' .. self.text:sub(self.pos) .. '"') end end } end local function evaluate_tokens(tokens, inparens) -- Return the value from evaluating tokenized expression, or throw an error. local numstack, opstack = collection(), collection() local function perform_ops(precedence, associativity) while opstack.n > 0 and (opstack[opstack.n].precedence > precedence or (opstack[opstack.n].precedence == precedence and associativity == 1)) do local rhs = numstack:pop() local lhs = numstack:pop() if not (rhs and lhs) then quit_no_message('missing number') end local op = opstack:pop() numstack:add(op.func(lhs, rhs)) end end local token_last local function set_state(token_type) if token_last == token_type then local missing = (token_type == 'number') and 'operator' or 'number' quit_no_message('missing ' .. missing) end token_last = token_type end while true do local token = tokens:next() if type(token) == 'number' then set_state('number') numstack:add(token) elseif type(token) == 'table' then set_state('operator') perform_ops(token.precedence, token.associativity) opstack:add(token) elseif token == '(' then set_state('number') numstack:add(evaluate_tokens(tokens, true)) elseif token == ')' then if inparens then break end quit_no_message('unbalanced parentheses') else break end end perform_ops(0) if numstack.n > 1 then quit_no_message('missing operator') end if numstack.n < 1 then quit_no_message('missing number') end return numstack:pop() end local function evaluate(expression) -- Return value (a number) from evaluating expression (a string), -- or throw an error if invalid. -- This is not bullet proof, but it should support the expressions used. return evaluate_tokens(tokenizer(expression)) end ---End code to evaluate expressions------------------------------------- ---Begin code adapted from Module:Convert------------------------------- local plural_suffix = 's' -- may be changed from translation.plural_suffix below local function shallow_copy(t) -- Return a shallow copy of t. -- Do not need the features and overhead of mw.clone() provided by Scribunto. local result = {} for k, v in pairs(t) do result[k] = v end return result end local function split(text, delimiter) -- Return a numbered table with fields from splitting text. -- The delimiter is used in a regex without escaping (for example, '.' would fail). -- Each field has any leading/trailing whitespace removed. local t = {} text = text .. delimiter -- to get last item for item in text:gmatch('%s*(.-)%s*' .. delimiter) do table.insert(t, item) end return t end local unit_mt = { -- Metatable to get missing values for a unit that does not accept SI prefixes. -- Warning: The boolean value 'false' is returned for any missing field -- so __index is not called twice for the same field in a given unit. __index = function (self, key) local value if key == 'name1' or key == 'sym_us' then value = self.symbol elseif key == 'name2' then value = self.name1 .. plural_suffix elseif key == 'name1_us' then value = self.name1 if not rawget(self, 'name2_us') then -- If name1_us is 'foot', do not make name2_us by appending plural_suffix. self.name2_us = self.name2 end elseif key == 'name2_us' then local raw1_us = rawget(self, 'name1_us') if raw1_us then value = raw1_us .. plural_suffix else value = self.name2 end elseif key == 'link' then value = self.name1 else value = false end rawset(self, key, value) return value end } local function prefixed_name(unit, name, index) -- Return unit name with SI prefix inserted at correct position. -- index = 1 (name1), 2 (name2), 3 (name1_us), 4 (name2_us). -- The position is a byte (not character) index, so use Lua's sub(). local pos = rawget(unit, 'prefix_position') if type(pos) == 'string' then pos = tonumber(split(pos, ',')[index]) end if pos then return name:sub(1, pos - 1) .. unit.si_name .. name:sub(pos) end return unit.si_name .. name end local unit_prefixed_mt = { -- Metatable to get missing values for a unit that accepts SI prefixes. -- Before use, fields si_name, si_prefix must be defined. -- The unit must define _symbol, _name1 and -- may define _sym_us, _name1_us, _name2_us -- (_sym_us, _name2_us may be defined for a language using sp=us -- to refer to a variant unrelated to U.S. units). __index = function (self, key) local value if key == 'symbol' then value = self.si_prefix .. self._symbol elseif key == 'sym_us' then value = rawget(self, '_sym_us') if value then value = self.si_prefix .. value else value = self.symbol end elseif key == 'name1' then value = prefixed_name(self, self._name1, 1) elseif key == 'name2' then value = rawget(self, '_name2') if value then value = prefixed_name(self, value, 2) else value = self.name1 .. plural_suffix end elseif key == 'name1_us' then value = rawget(self, '_name1_us') if value then value = prefixed_name(self, value, 3) else value = self.name1 end elseif key == 'name2_us' then value = rawget(self, '_name2_us') if value then value = prefixed_name(self, value, 4) elseif rawget(self, '_name1_us') then value = self.name1_us .. plural_suffix else value = self.name2 end elseif key == 'link' then value = self.name1 else value = false end rawset(self, key, value) return value end } local function lookup(units, unitcode, sp, what) -- Return a copy of the unit if found, or return nil. -- In this cut-down code, sp is always nil, and what is ignored. local t = units[unitcode] if t then if t.shouldbe then return nil end local result = shallow_copy(t) if result.prefixes then result.si_name = '' result.si_prefix = '' return setmetatable(result, unit_prefixed_mt) end return setmetatable(result, unit_mt) end local SIprefixes = text_code.SIprefixes for plen = SIprefixes[1] or 2, 1, -1 do -- Look for an SI prefix; should never occur with an alias. -- Check for longer prefix first ('dam' is decametre). -- SIprefixes[1] = prefix maximum #characters (as seen by mw.ustring.sub). local prefix = usub(unitcode, 1, plen) local si = SIprefixes[prefix] if si then local t = units[usub(unitcode, plen+1)] if t and t.prefixes then local result = shallow_copy(t) if (sp == 'us' or t.sp_us) and si.name_us then result.si_name = si.name_us else result.si_name = si.name end result.si_prefix = si.prefix or prefix -- In this script, each scale is a string. result.scale = tostring(tonumber(t.scale) * 10 ^ (si.exponent * t.prefixes)) result.prefixes = nil -- a prefixed unit does not take more prefixes (in this script, the returned unit may be added to the list of units) return setmetatable(result, unit_prefixed_mt) end end end local exponent, baseunit = unitcode:match('^e(%d+)(.*)') if exponent then local engscale = text_code.eng_scales[exponent] if engscale then local result = lookup(units, baseunit, sp, 'no_combination') if not result then return nil end if not (result.offset or result.builtin or result.engscale) then result.defkey = unitcode -- key to lookup default exception result.engscale = engscale -- Do not set result.scale as this code is called for units where that is not set. return result end end end return nil end local function evaluate_condition(value, condition) -- Return true or false from applying a conditional expression to value, -- or throw an error if invalid. -- A very limited set of expressions is supported: -- v < 9 -- v * 9 < 9 -- where -- 'v' is replaced with value -- 9 is any number (as defined by Lua tonumber) -- '<' can also be '<=' or '>' or '>=' -- In addition, the following form is supported: -- LHS and RHS -- where -- LHS, RHS = any of above expressions. local function compare(value, text) local arithop, factor, compop, limit = text:match('^%s*v%s*([*]?)(.-)([<>]=?)(.*)$') if arithop == nil then quit_no_message('Invalid default expression.') elseif arithop == '*' then factor = tonumber(factor) if factor == nil then quit_no_message('Invalid default expression.') end value = value * factor end limit = tonumber(limit) if limit == nil then quit_no_message('Invalid default expression.') end if compop == '<' then return value < limit elseif compop == '<=' then return value <= limit elseif compop == '>' then return value > limit elseif compop == '>=' then return value >= limit end quit_no_message('Invalid default expression.') -- should not occur end local lhs, rhs = condition:match('^(.-%W)and(%W.*)') if lhs == nil then return compare(value, condition) end return compare(value, lhs) and compare(value, rhs) end ---End adapted code----------------------------------------------------- local function strip(text) -- Return text with no leading/trailing whitespace. return text:match("^%s*(.-)%s*$") end local function empty(text) -- Return true if text is nil or empty (assuming a string). return text == nil or text == '' end -- Tables of units: k = unit code, v = unit table. local units_index = {} -- all units: normal, alias, per, combination, or multiple local alias_index = {} -- all aliases (to detect attempts to define more than once) local per_index = {} -- all "per" units (to detect attempts to define more than once) local function get_unit(ucode, utype) -- Look up unit code in our cache of units. -- If utype == nil, the unit should already have been defined. -- Otherwise, ucode may represent an automatically generated combination -- where each component must have the given utype; a dummy unit is returned. if empty(ucode) then return nil end local unit = lookup(units_index, ucode) if unit or not utype then return unit end local combo = collection() if ucode:find('+', 1, true) then for item in (ucode .. '+'):gmatch('%s*(.-)%s*%+') do if item ~= '' then combo:add(item) end end elseif ucode:find('%s') then for item in ucode:gmatch('%S+') do combo:add(item) end end if combo.n > 1 then local result = setmetatable({ utype = utype }, { __index = function (self, key) error('Bug: invalid use of automatically generated unit') end }) for _, v in ipairs(combo) do local component = lookup(units_index, v) if not component or component.shouldbe or component.combination then return nil end if utype ~= component.utype then result.utype = component.utype -- set wrong type which caller will detect break end end return result end end local overrides = {} -- read from input for unit codes that should not be checked for a duplicate local function insert_unique_unit(data, unit, index) -- After inserting any required built-in data, insert the unit into the -- data table and (if index not nil) add to index, -- but not if the unit code is already defined. local ucode = unit.unitcode local known = get_unit(ucode) if known and not overrides[ucode] then quit('m_dup_code', ucode) end for item, t in pairs(specials.ucode) do unit[item] = t[ucode] end if index then index[ucode] = unit end table.insert(data, unit) end local function check_condition(condition) -- Return true if condition appears to be valid; otherwise return false. for _, value in ipairs({ 0, 0.1, 1, 1.1, 10, 100, 1000, 1e4, 1e5 }) do local success, result = pcall(evaluate_condition, value, condition) if not success then return false end end return true end local function check_default_expression(default, ucode) -- Return a numbered table of names present in param default -- (two names if an expression, or one name (param default) otherwise). -- Throw an error if a problem occurs. -- An expression uses pipe-delimited fields with 'v' representing -- the input value for the conversion. -- Example (suffix is optional): 'v < 120 ! small ! big ! suffix' -- returns { 'smallsuffix', 'bigsuffix' }. if not default:find('!', 1, true) then return { default } end local t = {} for item in (default .. '!'):gmatch('%s*(.-)%s*!') do t[#t+1] = item -- split on '!', removing leading/trailing whitespace end if not (#t == 3 or #t == 4) then quit('m_def_fmt', default, ucode) end local condition, default1, default2 = t[1], t[2], t[3] if #t == 4 then default1 = default1 .. t[4] default2 = default2 .. t[4] end if not check_condition(condition) then quit('m_def_cond', default, ucode) end return { default1, default2 } end local function check_default(default, ucode, utype, unit_table) -- Check the given name (or expression) of a default output. -- Normally a unit must not define itself as its default. However, -- some units are defined merely for use in per units, and they have -- the same ucode, utype and default. -- Example: unit cent which cannot be converted to anything other than -- a cent, but which can work, for example, in cent/km and cent/mi. -- Throw an error if a problem occurs. local done = {} for _, default in ipairs(check_default_expression(default, ucode)) do if done[default] then quit('m_def_rpt', default, ucode) end if default == ucode and ucode ~= utype then quit('m_def_same', ucode) end local default_table = get_unit(default, utype) if not default_table then quit('m_def_undef', default, ucode) end if not (utype == unit_table.utype and utype == default_table.utype) then quit('m_def_type', default, ucode) end done[default] = true end end local function check_all_defaults(cfg, units) -- Check each default in units and warn if needed. -- This is done after all input data has been processed. -- Throw an error if a problem occurs. local errors = collection() local missing = collection() -- unitcodes with missing defaults for _, unit in ipairs(units) do if not unit.shouldbe and not unit.combination then -- This is a standard unit or an alias/per (not shouldbe, combo). -- An alias may have a default defined, but it is optional. local default = unit.default local ucode = unit.unitcode if empty(default) then if not unit.target then -- unit should have a default missing:add(ucode) end else local ok, msg = pcall(check_default, default, ucode, unit.utype, unit) if not ok then errors:add(msg) if errors.n >= cfg.maxerrors then break end end end end end if errors.n > 0 then error(errors:join(), 0) end if missing.n > 0 then add_warning('m_wrn_nodef') local limit = cfg.maxerrors for _, v in ipairs(missing) do limit = limit - 1 if limit < 0 then add_warning('m_wrn_more') break end add_warning('m_wrn_ucode', v) end end end local function check_all_pers(cfg, units) -- Check each component of each "per" unit and warn if needed. -- In addition, add any required extra fields for some types of units. -- This is done after all input data has been processed. -- Throw an error if a problem occurs. local errors = collection() local function errmsg(key, ...) errors:add(message(key, ...)) end for _, unit in ipairs(units) do local per = unit.per if per then local ucode = unit.unitcode if #per ~= 2 then errmsg('m_per_two', ucode) else local types = {} for i, v in ipairs(per) do if empty(v) then errmsg('m_per_empty', ucode) end if not text_code.currency[v] then local t = get_unit(v) if t then types[i] = t.utype else errmsg('m_per_undef', ucode, v) end end end if specials.utype[unit.utype] == 'type_fuel_efficiency' then local expected = { type_volume = 1, type_length = 2 } local top_type = expected[specials.utype[types[1]]] local bot_type = expected[specials.utype[types[2]]] if top_type and bot_type and top_type ~= bot_type then unit.iscomplex = true if top_type == 1 then unit.invert = 1 else unit.invert = -1 end else errmsg('m_per_fuel', ucode) end end end end if errors.n >= cfg.maxerrors then break end end if errors.n > 0 then error(errors:join(), 0) end end local function update_units(units, composites, varnames) -- Update some unit definitions with extra data defined in other sections. -- This is done after all input data has been processed. for _, unit in ipairs(units) do local comp = composites[unit.unitcode] if comp then unit.subdivs = '{ ' .. table.concat(comp.subdivs, ', ') .. ' }' end local vn = varnames[unit.unitcode] if vn then unit.varname = vn end end end local function make_override(cfg, data) -- Return a function which, when called, stores a unit code that is not to be -- checked for a duplicate. The table is stored in data (also a table). return function (utype, fields) local ucode = fields[1] if empty(ucode) then quit('m_ovr_miss') end if data[ucode] then quit('m_ovr_dup', ucode) end data[ucode] = true end end local function make_default(cfg, data) -- Return a function which, when called, stores a table that defines a -- default output unit. The table is stored in data (also a table). local defaults_index = {} -- to detect attempts to define a default twice return function (utype, fields) -- Store a table defining a unit. -- This is for a unit such as 'kg' that has a default output unit -- different from what is defined for the base unit ('g'). -- Throw an error if a problem occurs. local ucode = fields[1] local default = fields[2] if empty(ucode) then quit('m_dfs_code') end if empty(default) then quit('m_dfs_none', ucode) end if #fields ~= 2 then quit('m_dfs_two', ucode) end local unit_table = get_unit(ucode) if not unit_table then quit('m_dfs_undef', ucode) end local symbol = unit_table.defkey or unit_table.symbol if empty(symbol) then quit('m_dfs_sym', ucode) end check_default(default, ucode, utype, unit_table) if defaults_index[ucode] then quit('m_dfs_dup', ucode) end defaults_index[ucode] = default table.insert(data, { symbol = symbol, default = default }) end end local function clean_link(link, name) -- Return link, customary where: -- link = given link after removing any '[[...]]' wiki formatting -- and removing any leading '+' or '*' or '@'; -- customary = 1 if leading '+', or 2 if '*' or 3 if '@', or nil -- (for extra "US" or "U.S." or "Imperial" customary units link). -- Result has leading/trailing whitespace removed, and is nil if empty -- or if link matches the name, if a name is specified. -- Exception: If the link is empty and the name starts with '[[', -- the link is stored as '' (for a unit name which is always linked). -- If the resulting link is nil, no link field is stored, and -- if a link is required, it will be set from the unit's name. local original = link if empty(link) then return (name and name:sub(1, 2) == '[[') and '' or nil end local prefixes = { ['+'] = 1, ['*'] = 2, ['@'] = 3 } local customary = prefixes[link:sub(1, 1)] if customary then link = strip(link:sub(2)) end if link:sub(1, 2) == '[[' then link = link:sub(3) end if link:sub(-2) == ']]' then link = link:sub(1, -3) end link = strip(link) if link:sub(1, 1) == '[' or link:sub(-1) == ']' then quit('m_lnk_brack', original) end if link == '' then link = nil elseif name then local l = ulower(usub(link, 1, 1)) .. usub(link, 2) local n = ulower(usub(name, 1, 1)) .. usub(name, 2) if l == n then link = nil -- link == name, ignoring case of first letter end end return link, customary end local function make_link(cfg, data) -- Return a function which, when called, stores a table that defines a -- link exception. The table is stored in data (also a table). local links_index = {} -- to detect attempts to define a link twice return function (utype, fields) -- Store a table defining a unit. -- This is for a unit such as 'kg' that has a linked article -- different from what is defined for the base unit ('g'). -- Throw an error if a problem occurs. local ucode = fields[1] local link = clean_link(fields[2]) if empty(ucode) then quit('m_lnk_miss') end if empty(link) then quit('m_lnk_none', ucode) end if #fields ~= 2 then quit('m_lnk_two', ucode) end local unit_table = get_unit(ucode) if not unit_table then quit('m_lnk_undef', ucode) end if utype ~= unit_table.utype then quit('m_lnk_type', ucode) end local symbol = unit_table.symbol if empty(symbol) then quit('m_lnk_sym', ucode) end if links_index[ucode] then quit('m_lnk_dup', ucode) end links_index[ucode] = link table.insert(data, { symbol = symbol, link = link }) end end local function clean_scale(scale) -- Return cleaned scale as a string, after evaluating any expression. -- It would be better to retain scale expressions like "5/9" so that -- the expression is evaluated on the server and maintains the full -- resolution of the server. However, there are many such expressions -- in the table of all units, and it seems pointless to require the -- server to evaluate all of them just to do one convert. if empty(scale) then quit('m_scl_miss') end assert(type(scale) == 'string', 'Bug: scale has an unexpected type') scale = string.gsub(scale, ',', '') -- remove comma separators if tonumber(scale) then -- not an expression return scale end local status, value = pcall(evaluate, scale) if not (status and type(value) == 'number') then quit('m_scl_bad', scale) end local result = string.format('%.17g', value) if result:find('[#n]') then -- Lua can give results like "#INF" while Scribunto gives "inf". Either is an error. quit('m_scl_oflow', scale) end -- Omit redundant zeros from results like '1.2e-005'. -- Do not bother looking for results like '1.2e+005' as none occur in practice. local lhs, zeros, rhs = result:match('^(.-e%-)(0+)(.*)') if zeros then result = lhs .. rhs end return result end local function add_alias_optional_fields(unit, start, fields, target) -- Inspect fields[i] for i = start, start+1 ..., and extract any -- definitions appropriate for an alias or "per", and add them to unit. -- For an alias, target is a valid unit; for a "per", target is nil. -- Throw error if encounter an invalid entry. for i = start, #fields do local field = fields[i] if not empty(field) then local lhs, rhs = field:match('^%s*(.-)%s*=%s*(.-)%s*$') local good if not empty(rhs) then for _, item in ipairs({ 'sp', 'default', 'link', 'multiplier', 'symbol', 'symlink' }) do if lhs == item then if item == 'sp' then if rhs == 'us' then unit.sp_us = true good = true end elseif item == 'link' then local tlink if target then tlink = target[item] end local link, customary = clean_link(rhs, tlink) if link then unit[item] = link end if customary then unit.customary = customary end good = true elseif item == 'symlink' then local pos1 = rhs:find('[[', 1, true) local pos2 = rhs:find(']]', 1, true) if not (pos1 and pos2 and (pos1 < pos2)) then quit('m_als_link', unit.unitcode) end unit.symlink = rhs good = true elseif item == 'multiplier' then if not tonumber(rhs) then quit('m_als_mul', unit.unitcode, rhs) end unit[item] = rhs good = true else if target and rhs == target[item] then quit('m_als_same', item, unit.unitcode) end unit[item] = rhs good = true end break end end end if not good then quit('m_als_bad', field) end end end end local function make_alias(fields, ucode, utype, symbol) -- Return a new alias unit, or return nil if symbol is not already -- defined as the unit code of the target unit. -- Throw an error if invalid. local target = get_unit(symbol) if not target then return nil end local unit = { unitcode = ucode, utype = utype, target = symbol } add_alias_optional_fields(unit, 3, fields, target) if alias_index[ucode] then quit('m_als_dup', ucode) else alias_index[ucode] = unit end if target.utype ~= utype then quit('m_als_type', ucode) end return unit end local function make_per(fields, ucode, utype, symbol) -- Return a new "per" unit, or return nil if symbol is not of form "x/y". -- Throw an error if invalid. -- The top, bottom unit codes are checked later, after all units are defined. local top, bottom = symbol:match('^(.-)/(.*)$') if not top then return nil end local unit = { unitcode = ucode, utype = utype, per = { strip(top), strip(bottom) } } add_alias_optional_fields(unit, 3, fields) if per_index[ucode] then quit('m_per_dup', ucode) else per_index[ucode] = unit end return unit end local function make_unit(cfg, data) -- Return a function which, when called, stores a table that defines a -- single unit. The table is stored in data (also a table). local fieldnames = { -- Fields in the Conversions section are assumed to be in the following order. 'unitcode', 'symbol', 'sym_us', 'scale', 'extra', 'name1', 'name2', 'name1_us', 'name2_us', 'prefixes', 'default', 'link', } return function (utype, fields) -- Store a table defining a unit. -- Throw an error if a problem occurs. local ucode, symbol = fields[1], fields[2] if empty(utype) then quit('m_miss_type') end if empty(ucode) then quit('m_miss_code') end if empty(symbol) then quit('m_miss_sym') end local prefix = symbol:sub(1, 1) if prefix == '~' or prefix == '=' or prefix == '!' or prefix == '*' then if symbol:sub(1, 2) == '==' then prefix = symbol:sub(1, 2) end symbol = strip(symbol:sub(#prefix + 1)) -- omit prefix and any following whitespace fields[2] = symbol else prefix = nil -- not a valid prefix end if prefix == '=' or prefix == '==' then -- ucode is an alias (a fake unit code used in a convert template), or -- defines a "per" unit like "$/acre" or "BTU/h". -- For an alias, symbol is the unit code of the actual unit. -- For a "per", symbol is of form "x/y" where x and y are unit codes, -- or x is a recognized currency symbol and y is a unit code. -- Checking that x and y are valid is deferred until all units have -- been defined so, for example, "BTU/h" can be defined before "h". local unit if prefix == '=' then unit = make_alias(fields, ucode, utype, symbol) else unit = make_per(fields, ucode, utype, symbol) end if not unit then -- Do not define an alias in terms of another alias. quit('m_als_undef', symbol) end insert_unique_unit(data, unit, units_index) return elseif prefix == '!' then -- ucode may be incorrectly entered as a unit code. -- symbol is a message saying what unit code should be used. local unit = { unitcode = ucode, shouldbe = symbol } insert_unique_unit(data, unit, nil) return end -- Make the unit. local unit = { utype = utype } for i, name in ipairs(fieldnames) do if not empty(fields[i]) then unit[name] = fields[i] end end -- Remove redundancy from unit. if unit.sym_us == symbol then unit.sym_us = nil end local prefixes = unit.prefixes local name1, name2 = unit.name1, unit.name2 if name1 then if name1 == symbol and not prefixes then -- A unit which takes an SI prefix must not have a nil name because, -- for example, the name for "kW" = "kilo" .. "watt" (name for "W"). -- The "not prefixes" test is needed for bnwiki where the -- watt unit has the same name and symbol. unit.name1 = nil end else name1 = symbol end if name2 then if name2 == name1 .. plural_suffix then unit.name2 = nil end else name2 = name1 .. plural_suffix end local name1_us, name2_us = unit.name1_us, unit.name2_us if name1_us then if name1_us == name1 then unit.name1_us = nil end end if name2_us then if unit.name1_us then if name2_us == unit.name1_us .. plural_suffix then unit.name2_us = nil end elseif name2_us == name2 then unit.name2_us = nil end end -- Other changes to unit. unit.scale = clean_scale(unit.scale) local extra = unit.extra if not empty(extra) then -- Set appropriate fields for a unit that needs more than a simple -- multiplication by a ratio of unit scales to convert values. unit.iscomplex = true if extra == 'volume/length' then unit.invert = 1 elseif extra == 'length/volume' then unit.invert = -1 elseif specials.utype[utype] == 'type_temperature' then unit.offset = extra elseif extra == 'invert' then unit.invert = -1 else unit.builtin = extra end end if prefix == '~' then -- Magic code for units like "acre" where the symbol is not really a -- symbol, and output should use the singular or plural name instead. unit.usename = 1 elseif prefix == '*' then -- Magic code for units like "pitch" which have a symbol that is the same as -- another unit with entries defined in the default or link exceptions tables. unit.defkey = ucode -- key for default exceptions unit.linkey = ucode -- key for link exceptions end local name_for_link if prefixes then if prefixes == 'SI' then unit.prefixes = 1 elseif prefixes == 'SI2' then unit.prefixes = 2 elseif prefixes == 'SI3' then unit.prefixes = 3 else quit('m_pfx_bad', prefixes) end else -- Only units which do not accept SI prefixes have name_for_link set. -- That is because, for example, if set name_for_link = name1 for unit g, -- then the link is "kilogram" for kg, and "yottagram" for Yg, and so on -- for all prefixes. That might be desirable for some units, but not all. name_for_link = name1 end unit.link, unit.customary = clean_link(unit.link, name_for_link) if prefixes then -- The SI prefix is always at the start (position = 1) for symbol and sym_us. -- However, each name (name1, name2, name1_us, name2_us) can have the SI prefix -- at any position, and that position can be different for each name. -- For enwiki, the only units with names where the prefix is not at the start -- are "square metre" and "cubic metre" ("square meter" and "cubic meter" for sp=us). -- Some other wikis want the flexibility that the prefix position can be different -- so the position is stored as nil (if always 1), or N (an integer, if always N), -- or a string of four comma-separated numbers such as "5,7,9,11" which means the -- prefix position for (name1, name2, name1_us, name2_us) is (5, 7, 9, 11) -- respectively. local name1, name1_us = unit.name1, unit.name1_us -- after redundancy removed if not name1 then quit('m_pfx_name') end local positions = collection() for i, k in ipairs({ 'name1', 'name2', 'name1_us', 'name2_us' }) do local name = unit[k] local pos if name then pos = name:find('%s', 1, true) if pos then unit[k] = name:sub(1, pos - 1) .. name:sub(pos + 2) end elseif i == 2 or i == 3 then pos = positions[1] elseif i == 4 then pos = positions[unit.name1_us and 3 or 2] end positions:add(pos or 1) end local pos = positions[1] for i = 2, positions.n do if pos ~= positions[i] then pos = '"' .. positions:join(',') .. '"' break end end if pos ~= 1 then unit.prefix_position = pos end for _, name in ipairs({ 'symbol', 'sym_us', 'name1', 'name1_us', 'name2', 'name2_us' }) do unit['_' .. name] = unit[name] unit[name] = nil -- force call to __index metamethod so any SI prefix can be handled end end for name, v in pairs(unit) do -- Reject if a string field includes "%s" (should not occur after above). if type(v) == 'string' and v:find('%s', 1, true) then quit('m_percent_s', name) end end insert_unique_unit(data, unit, units_index) end end local function make_combination(cfg, data) -- Return a function which, when called, stores a table that defines a -- single combination unit. The table is stored in data (also a table). return function (utype, fields) -- Store a table defining a unit. -- This is for a combination unit that specifies more than one output. -- The target units must be defined first. -- Throw an error if a problem occurs. local unit = { utype = utype, combination = {} } for i, v in ipairs(fields) do if i == 1 then -- unitcode if v == '' then quit('m_cmb_miss') end unit.unitcode = v elseif v == '' then -- Ignore empty fields. else local target = get_unit(v) if not target then quit('m_cmb_undef', v, unit.unitcode) end if target.utype ~= utype then quit('m_cmb_type', v, unit.unitcode) end table.insert(unit.combination, v) end end if #unit.combination < 2 then quit(#unit.combination == 0 and 'm_cmb_none' or 'm_cmb_one', unit.unitcode) end insert_unique_unit(data, unit, units_index) end end local function make_perunit(cfg, data) -- Return a function which, when called, stores a table that defines a -- fixup for an automatic per unit. The table is stored in data (also a table). local pertype_index = {} -- to detect attempts to define a fixup twice return function (utype, fields) -- Store a table to define a fixup. -- Typos or other errors in the input are not detected! -- Parameter utype is ignored (it is nil). -- Throw an error if a problem occurs. local lhs, rhs, link, multiplier for i, v in ipairs(fields) do if v == '' then -- Ignore empty fields. elseif i == 1 then lhs = v -- like "length/time" elseif i == 2 then rhs = v -- like "speed" elseif i == 3 then link = v elseif i == 4 then if not tonumber(v) then quit('m_per_inv') end multiplier = v else quit('m_per_inv') end end if lhs and (rhs or link or multiplier) then if link or multiplier then local parts = collection() if rhs then parts:add('utype = "' .. rhs .. '"') end if link then parts:add('link = "' .. link .. '"') end if multiplier then parts:add('multiplier = ' .. multiplier) end rhs = '{ ' .. parts:join(', ') .. ' }' else rhs = '"' .. rhs .. '"' end if pertype_index[lhs] then quit('m_per_dup', lhs) end pertype_index[lhs] = rhs table.insert(data, { lhs = lhs, rhs = rhs }) else quit('m_per_inv') end end end local function make_varname(cfg, data) -- Return a function which, when called, stores a table that defines a -- variable name for a unit. The table is stored in data (also a table). return function (utype, fields) -- Set or update an entry in the data table to record that a unit has a variable name. -- This is for slwiki where a unit name depends on the value. -- The target units must be defined first. -- Parameter utype is ignored (it is nil). -- Throw an error if a problem occurs. local count = #fields if count ~= cfg.varcolumns then quit('m_var_cnt') end local ucode local names = {} for i = 1, count do local v = fields[i] if empty(v) then quit('m_var_miss') end if i == 1 then -- unitcode ucode = v if not get_unit(v) then quit('m_var_undef', v) end else table.insert(names, v) end end if data[ucode] then quit('m_var_dup', ucode) end data[ucode] = table.concat(names, '!') end end local function reversed(t) -- Return a numbered table in reverse order. local reversed, count = {}, #t for i = 1, count do reversed[i] = t[count + 1 - i] end return reversed end local function make_inputmultiple(cfg, data) -- Return a function which, when called, stores a table that defines a -- single composite (multiple input) unit. The table is stored in data (also a table). return function (utype, fields) -- Set or update an entry in the data table to record that a unit -- accepts subdivisions to make a composite input unit like '|2|ft|6|in'. -- The target units must be defined first. -- Throw an error if a problem occurs. local unitcode -- dummy code required for simplicity, but which is not used in output local alternate_code -- an alternative unit code can be specified to replace convert input local fixed_name -- a fixed name can be specified to replace the unit's normal symbol/name local default_code local ucodes, scales = {}, {} for i, v in ipairs(fields) do -- 1=composite, 2=ucode1, 3=ucode2, 4=default, 5=alternate, 6=name if i == 1 then if v == '' then quit('m_cmp_miss') end unitcode = v elseif 2 <= i and i <= 5 then if not (i == 5 and v == '') then local target = get_unit(v, (i == 4) and utype or nil) -- the default may be an auto combination if not target then quit('m_cmp_undef', v, unitcode) end if target.utype ~= utype then quit('m_cmp_type', v, unitcode) end if i < 4 then if not target.scale then quit('m_mul_std', v, unitcode) end table.insert(ucodes, v) table.insert(scales, target.scale) elseif i == 4 then default_code = v else if scales[#scales] ~= target.scale then quit('m_cmp_scale', v, unitcode) end alternate_code = v end end elseif i == 6 then if v ~= '' then fixed_name = v end else quit('m_cmp_many', unitcode) end end if #ucodes ~= 2 then quit('m_cmp_two', unitcode) end if not default_code then quit('m_cmp_def', unitcode) end -- Component units must be specified from most-significant to least-significant, -- and each ratio of a pair of scales must be very close to an integer. -- Currently, there will be exactly two scales and one ratio. local ratios, count = {}, #scales for i = 1, count do local scale = tonumber(scales[i]) if scale == nil or scale <= 0 then quit('m_cmp_inval', unitcode, scales[i]) end scales[i] = scale end for i = 1, count - 1 do local ratio = scales[i] / scales[i + 1] local rounded = math.floor(ratio + 0.5) if rounded < 2 then quit('m_cmp_order', unitcode) end if math.abs(ratio - rounded)/ratio > 1e-6 then quit('m_cmp_int', unitcode) end ratios[i] = rounded end local text = { tostring(ratios[1]) } local function add_text(key, value) table.insert(text, string.format('%s = %q', key, value)) end if default_code then add_text('default', default_code) end if alternate_code then add_text('unit', alternate_code) end if fixed_name then add_text('name', fixed_name) end local subdiv = string.format('["%s"] = { %s }', ucodes[2], table.concat(text, ', ')) local main_code = ucodes[1] local item = data[main_code] if item then table.insert(item.subdivs, subdiv) else data[main_code] = { subdivs = { subdiv } } end end end local function make_outputmultiple(cfg, data) -- Return a function which, when called, stores a table that defines a -- single multiple output unit. The table is stored in data (also a table). return function (utype, fields) -- Store a table defining a unit. -- This is for a multiple unit like 'ydftin' (result in yards, feet, inches). -- The target units must be defined first. -- Throw an error if a problem occurs. local unit = { utype = utype } local ucodes, scales = {}, {} for i, v in ipairs(fields) do if i == 1 then -- unitcode if v == '' then quit('m_mul_miss') end unit.unitcode = v elseif v == '' then -- Ignore empty fields. else local target = get_unit(v) if not target then quit('m_mul_undef', v, unit.unitcode) end if target.utype ~= utype then quit('m_mul_type', v, unit.unitcode) end if not target.scale then quit('m_mul_std', v, unit.unitcode) end table.insert(ucodes, v) table.insert(scales, target.scale) end end if #ucodes < 2 then quit(#ucodes == 0 and 'm_mul_none' or 'm_mul_one', unit.unitcode) end -- Component units must be specified from most-significant to least-significant -- (so scale values will be in descending order), -- and each ratio of a pair of scales must be very close to an integer. -- The componenets and ratios are stored in reverse order (least significant first). -- This script stores a unit scale as a string (might be an expression like "5/9"), -- but scales in a multiple are handled as numbers (should never be expressions). local ratios, count = {}, #scales for i = 1, count do local scale = tonumber(scales[i]) if scale == nil or scale <= 0 then quit('m_mul_scale', unit.unitcode, scales[i]) end scales[i] = scale end for i = 1, count - 1 do local ratio = scales[i] / scales[i + 1] local rounded = math.floor(ratio + 0.5) if rounded < 2 then quit('m_mul_order', unit.unitcode) end if math.abs(ratio - rounded)/ratio > 1e-6 then quit('m_mul_int', unit.unitcode) end ratios[i] = rounded end unit.combination = reversed(ucodes) unit.multiple = reversed(ratios) insert_unique_unit(data, unit, units_index) end end -- To make updating the data module easier, this script inserts a preamble -- and a postamble so the result can be used to replace the whole page. local data_preamble = [=[ -- Conversion data used by [[Module:Convert]] which uses mw.loadData() for -- read-only access to this module so that it is loaded only once per page. -- See [[:en:Template:Convert/Transwiki guide]] if copying to another wiki. -- -- These data tables follow: -- all_units all properties for a unit, including default output -- default_exceptions exceptions for default output ('kg' and 'g' have different defaults) -- link_exceptions exceptions for links ('kg' and 'g' have different links) -- -- These tables are generated by a script which reads the wikitext of a page that -- documents the required properties of each unit; see [[:en:Module:Convert/doc]]. ]=] local data_postamble = [=[ return { all_units = all_units, default_exceptions = default_exceptions, link_exceptions = link_exceptions, per_unit_fixups = per_unit_fixups, }]=] local out_unit_prefix = [[ --------------------------------------------------------------------------- -- Do not change the data in this table because it is created by running -- -- a script that reads the wikitext from a wiki page (see note above). -- --------------------------------------------------------------------------- local all_units = {]] local out_unit_suffix = [[ } ]] local out_default_prefix = [[ --------------------------------------------------------------------------- -- Do not change the data in this table because it is created by running -- -- a script that reads the wikitext from a wiki page (see note above). -- --------------------------------------------------------------------------- local default_exceptions = { -- Prefixed units with a default different from that of the base unit. -- Each key item is a prefixed symbol (unitcode for engineering notation).]] local out_default_suffix = [[ } ]] local out_default_item = [[ ["{symbol}"] = "{default}",]] local out_link_prefix = [[ --------------------------------------------------------------------------- -- Do not change the data in this table because it is created by running -- -- a script that reads the wikitext from a wiki page (see note above). -- --------------------------------------------------------------------------- local link_exceptions = { -- Prefixed units with a linked article different from that of the base unit. -- Each key item is a prefixed symbol (not unitcode).]] local out_link_suffix = [[ } ]] local out_link_item = [[ ["{symbol}"] = "{link}",]] local out_perunit_prefix = [[ --------------------------------------------------------------------------- -- Do not change the data in this table because it is created by running -- -- a script that reads the wikitext from a wiki page (see note above). -- --------------------------------------------------------------------------- local per_unit_fixups = { -- Automatically created per units of form "x/y" may have their unit type -- changed, for example, "length/time" is changed to "speed". -- Other adjustments can also be specified.]] local out_perunit_suffix = [[ } ]] local out_perunit_item = [[ ["{lhs}"] = {rhs},]] local combination_specification = { -- pure combination like 'm ft', or a multiple like 'ftin' 'combination', 'multiple', 'utype', } local alias_specification = { 'target', 'symbol', 'sp_us', 'default', 'link', 'symlink', 'customary', 'multiplier', } local per_specification = { 'per', 'symbol', 'sp_us', 'utype', 'invert', 'iscomplex', 'default', 'link', 'symlink', 'customary', 'multiplier', } local shouldbe_specification = { 'shouldbe', } local unit_specification = { '_name1', '_name1_us', '_name2', '_name2_us', '_symbol', '_sym_us', 'prefix_position', 'name1', 'name1_us', 'name2', 'name2_us', 'varname', 'symbol', 'sym_us', 'usename', 'usesymbol', 'utype', 'alttype', 'builtin', 'scale', 'offset', 'invert', 'iscomplex', 'istemperature', 'exception', 'prefixes', 'default', 'subdivs', 'defkey', 'linkey', 'link', 'customary', 'sp_us', } local no_quotes = { combination = true, customary = true, multiple = true, multiplier = true, offset = true, per = true, prefix_position = true, scale = true, subdivs = true, } local function add_unit_lines(results, unit, spec) -- Add lines of Lua source to define a unit to the results collection. local function add_line(line) -- Had planned to replace sequences of spaces with 4-column tabs here -- (because the CodeEditor now assumes the use of such tabs). -- However, 4-column tabs are only visible when editing a module -- with browser scripting and the CodeEditor enabled, and that is rare. -- A module is usually viewed (with 8-column tabs), and some indents -- would be messed up unless 8-column tabs are used. Therefore, -- have decided to simply replace 8 spaces at start of line with a single -- tab which reduces the size of the module, and is correct for viewing. if line:sub(1, 8) == string.rep(' ', 8) then line = '\t' .. line:sub(9) end results:add(line) end local first_item = ' ["' .. unit.unitcode .. '"] = {' local last_item = ' },' add_line(first_item) for _, k in ipairs(spec) do local v = unit[k] if v then local want_quotes = (type(v) == 'string' and not no_quotes[k]) if type(v) == 'boolean' then v = tostring(v) elseif type(v) == 'number' or k == 'scale' then -- Replace results like '1e-006' with '1e-6'. v = string.gsub(tostring(v), '(e[+-])0+([1-9].*)', '%1%2', 1) elseif type(v) ~= 'string' then quit('m_ftl_type', unit.unitcode) end local fmt = string.format('%8s%%-9s= %%%s,', '', want_quotes and 'q' or 's') add_line(fmt:format(k, v)) end end add_line(last_item) end local function numbered_table_as_string(data, unit) local t = {} for _, v in ipairs(data) do if type(v) == 'string' then table.insert(t, '"' .. v .. '"') elseif type(v) == 'number' then table.insert(t, tostring(v)) else quit('m_ftl_type', unit.unitcode) end end return '{ ' .. table.concat(t, ', ') .. ' }' end local function extract_heading(line) -- Return n, s where n = heading level number (nil if none), and -- s = heading text (with leading/trailing whitespace removed). local pattern = '^(==+)%s*(.-)%s*(==+)%s*$' local before, heading, after = line:match(pattern) if heading and #heading > 0 then -- Don't bother checking if before == after. return #before, heading end end local function fields(line) -- Return a numbered table of fields split from line. -- Items are delimited by "||". -- Each item has leading/trailing whitespace removed, and any encoded pipe -- characters are decoded. -- The second field (for symbol when processing units) is adjusted to -- remove any "colspan" at the front of lines like: -- "| unitcode || colspan="11" | !Text to display for an error message". local t = {} line = line .. "||" -- to get last field for item in line:gmatch("%s*(.-)%s*||") do table.insert(t, (item:gsub('|', '|'))) end if t[2] then local cleaned = t[2]:match('^%s*colspan%s*=.-|%s*(.*)$') if cleaned then t[2] = cleaned end end return t end local function prepare_section(cfg, maker, lines, section, need_section, need_utype) -- Process the first level-two section with the given section name -- in the given table of lines of wikitext. -- If successful, maker inserts each item into a table. -- Otherwise, an error is thrown. local skip = true local errors = collection() local utype -- unit type (from level-three heading) local nbsp = '\194\160' -- nonbreaking space is utf-8 encoded as hex c2 a0 for linenumber, line in ipairs(lines) do if skip then -- Skip down to and including the starting heading. local level, heading = extract_heading(line) if level == 2 and heading == section then skip = false end else -- Accummulate unit definitions. local c1 = line:sub(1, 1) local c2 = line:sub(2, 2) if c1 == '|' and not (c2 == '-' or c2 == '}') then if need_utype and empty(utype) then quit('m_hdg_lev3', line) end if line:find(nbsp, 1, true) then -- For example, "acre ft" does not work if it contains nbsp. add_warning('m_wrn_nbsp', linenumber) end local ok, msg = pcall(maker, utype, fields(line:sub(2))) if not ok then if msg:sub(-1) == '.' then msg = msg:sub(1, -2) end errors:add(msg .. message('m_line_num', linenumber)) if errors.n >= cfg.maxerrors then break end end else local level, heading = extract_heading(line) if level == 3 then utype = ulower(heading) elseif level == 2 then break end end end end if skip and need_section then quit('m_hdg_lev2', section) end if errors.n > 0 then error(errors:join(), 0) end end local function get_page_lines(page_title) -- Read the wikitext of the page at the given title; split the text into -- lines with leading and trailing space removed from each line. -- Return a numbered table of the lines, or throw an error. if empty(page_title) then quit('m_no_title') end local t = mw.title.new(page_title) if t then local content = t:getContent() if content then if content:sub(-1) ~= '\n' then content = content .. '\n' end local lines = collection() for line in string.gmatch(content, '[\t ]*(.-)[\t\r ]*\n') do lines:add(line) end return lines end end quit('m_ftl_read', page_title) end local function prepare_data(cfg, is_sandbox) -- Read the page of conversion data, and process the wikitext -- in the sections with wanted level-two headings. -- Return units, defaults, links (three tables). -- Throw an error if a problem occurs. local composites, defaults, links, units, perunits, varnames = {}, {}, {}, {}, {}, {} local sections = { { 'overrides' , make_override , overrides , 0 }, { 'conversions' , make_unit , units , 0 }, { 'outmultiples', make_outputmultiple, units , 0 }, { 'combinations', make_combination , units , 0 }, { 'inmultiples' , make_inputmultiple , composites, 0 }, -- after all units defined so default will be defined { 'defaults' , make_default , defaults , 0 }, { 'links' , make_link , links , 0 }, { 'perunits' , make_perunit , perunits , 1 }, { 'varnames' , make_varname , varnames , 1 }, } local lines = get_page_lines(cfg.data_title) for _, section in ipairs(sections) do local heading = mtext.section_names[section[1]] local maker = section[2](cfg, section[3]) local code = section[4] local need_section, need_utype if code == 0 and not is_sandbox then need_section = true end if code == 0 then need_utype = true end prepare_section(cfg, maker, lines, heading, need_section, need_utype) end check_all_defaults(cfg, units) check_all_pers(cfg, units) update_units(units, composites, varnames) return units, defaults, links, perunits end local function _makeunits(cfg, results) -- Read the wikitext for the conversion data. -- Append output to given results collection, or throw error if a problem. text_code = require(cfg.text_title) for _, name in ipairs({ 'SIprefixes', 'eng_scales', 'currency' }) do if type(text_code[name]) ~= 'table' then quit('m_ftl_table', cfg.text_title, name) end end local translation = text_code.translation_table if translation then if translation.plural_suffix then plural_suffix = translation.plural_suffix end local ts = translation.specials if ts then if ts.utype then specials.utype = ts.utype end if ts.ucode then specials.ucode = ts.ucode end end local tm = translation.mtext if tm then if tm.section_names then mtext.section_names = tm.section_names end if tm.titles then mtext.titles = tm.titles end if tm.messages then mtext.messages = tm.messages end end end local is_sandbox local conversion_data_title = mtext.titles.conversion_data if cfg.data_title and cfg.data_title ~= conversion_data_title then if is_test_run then is_sandbox = true data_preamble = nil data_postamble = nil out_unit_prefix = 'local all_units = {' out_unit_suffix = '}' out_default_prefix = '\nlocal default_exceptions = {' out_default_suffix = '}' out_default_item = '\t["{symbol}"] = "{default}",' out_link_prefix = '\nlocal link_exceptions = {' out_link_suffix = '}' out_link_item = '\t["{symbol}"] = "{link}",' out_perunit_prefix = '\nlocal per_unit_fixups = {' out_perunit_suffix = '}' out_perunit_item = '\t["{lhs}"] = {rhs},' end else cfg.data_title = conversion_data_title end local units, defaults, links, perunits = prepare_data(cfg, is_sandbox) if data_preamble then results:add(data_preamble) end results:add(out_unit_prefix) for _, unit in ipairs(units) do local spec if unit.target then spec = alias_specification elseif unit.per then spec = per_specification unit.per = numbered_table_as_string(unit.per, unit) elseif unit.shouldbe then spec = shouldbe_specification elseif unit.combination then spec = combination_specification unit.combination = numbered_table_as_string(unit.combination, unit) if unit.multiple then unit.multiple = numbered_table_as_string(unit.multiple, unit) end else spec = unit_specification end add_unit_lines(results, unit, spec) end results:add(out_unit_suffix) for _, t in ipairs({ { defaults, out_default_prefix, out_default_item, out_default_suffix }, { links , out_link_prefix , out_link_item , out_link_suffix }, { perunits, out_perunit_prefix, out_perunit_item, out_perunit_suffix } }) do local data, prefix, item, suffix = t[1], t[2], t[3], t[4] if #data > 0 or not is_sandbox then results:add(prefix) for _, unit in ipairs(data) do results:add((item:gsub('{([%w_]+)}', unit))) end results:add(suffix) end end if data_postamble then results:add(data_postamble) end end local function makeunits(frame) local args = frame.args local config = { data_title = args[1], text_title = args[2] or 'Module:Convert/text', varcolumns = tonumber(args.varcolumns) or 5, -- #columns in "Variable names" section; slwiki uses 5 maxerrors = 20, } local results = collection() local ok, msg = pcall(_makeunits, config, results) if not ok then results:add(message('m_error')) results:add('') results:add(msg) end local warn = '' if warnings.n > 0 then warn = message('m_warning') .. '\n\n' .. warnings:join() .. '\n\n' end -- Pre tags returned by a module are html tags, not like wikitext <pre>...</pre>. -- The following renders the text as is, and preserves tab characters. return '<pre>\n' .. mw.text.nowiki(warn .. results:join()) .. '\n</pre>\n' end return { makeunits = makeunits }