Module:String: Difference between revisions
Content added Content deleted
Enwikipedia>Mr. Stradivarius (add an escapePattern function for escaping Lua patterns) |
Enwikipedia>Mr. Stradivarius m (use tabs instead of spaces, and remove trailing whitespace) |
||
Line 1: | Line 1: | ||
--[[ |
--[[ |
||
This module is intended to provide access to basic string functions. |
This module is intended to provide access to basic string functions. |
||
Most of the functions provided here can be invoked with named parameters, |
Most of the functions provided here can be invoked with named parameters, |
||
unnamed parameters, or a mixture. If named parameters are used, Mediawiki will |
unnamed parameters, or a mixture. If named parameters are used, Mediawiki will |
||
automatically remove any leading or trailing whitespace from the parameter. |
automatically remove any leading or trailing whitespace from the parameter. |
||
Depending on the intended use, it may be advantageous to either preserve or |
Depending on the intended use, it may be advantageous to either preserve or |
||
remove such whitespace. |
remove such whitespace. |
||
Global options |
Global options |
||
ignore_errors: If set to 'true' or 1, any error condition will result in |
ignore_errors: If set to 'true' or 1, any error condition will result in |
||
an empty string being returned rather than an error message. |
an empty string being returned rather than an error message. |
||
error_category: If an error occurs, specifies the name of a category to |
error_category: If an error occurs, specifies the name of a category to |
||
include with the error message. The default category is |
include with the error message. The default category is |
||
[Category:Errors reported by Module String]. |
[Category:Errors reported by Module String]. |
||
no_category: If set to 'true' or 1, no category will be added if an error |
no_category: If set to 'true' or 1, no category will be added if an error |
||
is generated. |
is generated. |
||
Unit tests for this module are available at Module:String/tests. |
Unit tests for this module are available at Module:String/tests. |
||
]] |
]] |
||
Line 39: | Line 39: | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or |
If invoked using named parameters, Mediawiki will automatically remove any leading or |
||
trailing whitespace from the target string. |
trailing whitespace from the target string. |
||
]] |
]] |
||
function str.len( frame ) |
function str.len( frame ) |
||
local new_args = str._getParameters( frame.args, {'s'} ); |
|||
local s = new_args['s'] or ''; |
|||
return mw.ustring.len( s ) |
|||
end |
end |
||
Line 61: | Line 61: | ||
i: The fist index of the substring to return, defaults to 1. |
i: The fist index of the substring to return, defaults to 1. |
||
j: The last index of the string to return, defaults to the last character. |
j: The last index of the string to return, defaults to the last character. |
||
The first character of the string is assigned an index of 1. If either i or j |
The first character of the string is assigned an index of 1. If either i or j |
||
is a negative value, it is interpreted the same as selecting a character by |
is a negative value, it is interpreted the same as selecting a character by |
||
counting from the end of the string. Hence, a value of -1 is the same as |
counting from the end of the string. Hence, a value of -1 is the same as |
||
selecting the last character of the string. |
selecting the last character of the string. |
||
If the requested indices are out of range for the given string, an error is |
If the requested indices are out of range for the given string, an error is |
||
reported. |
reported. |
||
]] |
]] |
||
function str.sub( frame ) |
function str.sub( frame ) |
||
local new_args = str._getParameters( frame.args, { 's', 'i', 'j' } ); |
|||
local s = new_args['s'] or ''; |
|||
local i = tonumber( new_args['i'] ) or 1; |
|||
local j = tonumber( new_args['j'] ) or -1; |
|||
local len = mw.ustring.len( s ); |
|||
-- Convert negatives for range checking |
|||
if i < 0 then |
|||
i = len + i + 1; |
|||
end |
|||
if j < 0 then |
|||
j = len + j + 1; |
|||
end |
|||
if i > len or j > len or i < 1 or j < 1 then |
|||
return str._error( 'String subset index out of range' ); |
|||
end |
|||
if j < i then |
|||
return str._error( 'String subset indices out of order' ); |
|||
end |
|||
return mw.ustring.sub( s, i, j ) |
|||
-- Convert negatives for range checking |
|||
if i < 0 then |
|||
i = len + i + 1; |
|||
end |
|||
if j < 0 then |
|||
j = len + j + 1; |
|||
end |
|||
if i > len or j > len or i < 1 or j < 1 then |
|||
return str._error( 'String subset index out of range' ); |
|||
end |
|||
if j < i then |
|||
return str._error( 'String subset indices out of order' ); |
|||
end |
|||
return mw.ustring.sub( s, i, j ) |
|||
end |
end |
||
Line 101: | Line 101: | ||
]] |
]] |
||
function str.sublength( frame ) |
function str.sublength( frame ) |
||
local i = tonumber( frame.args.i ) or 0 |
|||
local len = tonumber( frame.args.len ) |
|||
return mw.ustring.sub( frame.args.s, i + 1, len and ( i + len ) ) |
|||
end |
end |
||
Line 109: | Line 109: | ||
match |
match |
||
This function returns a substring from the source string that matches a |
This function returns a substring from the source string that matches a |
||
specified pattern. |
specified pattern. |
||
Line 123: | Line 123: | ||
start: The index within the source string to start the search. The first |
start: The index within the source string to start the search. The first |
||
character of the string has index 1. Defaults to 1. |
character of the string has index 1. Defaults to 1. |
||
match: In some cases it may be possible to make multiple matches on a single |
match: In some cases it may be possible to make multiple matches on a single |
||
string. This specifies which match to return, where the first match is |
string. This specifies which match to return, where the first match is |
||
match= 1. If a negative number is specified then a match is returned |
match= 1. If a negative number is specified then a match is returned |
||
counting from the last match. Hence match = -1 is the same as requesting |
counting from the last match. Hence match = -1 is the same as requesting |
||
the last match. Defaults to 1. |
the last match. Defaults to 1. |
||
Line 133: | Line 133: | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or |
If invoked using named parameters, Mediawiki will automatically remove any leading or |
||
trailing whitespace from each string. In some circumstances this is desirable, in |
trailing whitespace from each string. In some circumstances this is desirable, in |
||
other cases one may want to preserve the whitespace. |
other cases one may want to preserve the whitespace. |
||
If the match_number or start_index are out of range for the string being queried, then |
If the match_number or start_index are out of range for the string being queried, then |
||
this function generates an error. An error is also generated if no match is found. |
this function generates an error. An error is also generated if no match is found. |
||
If one adds the parameter ignore_errors=true, then the error will be suppressed and |
If one adds the parameter ignore_errors=true, then the error will be suppressed and |
||
an empty string will be returned on any failure. |
an empty string will be returned on any failure. |
||
Line 149: | Line 149: | ||
]] |
]] |
||
function str.match( frame ) |
function str.match( frame ) |
||
local new_args = str._getParameters( frame.args, {'s', 'pattern', 'start', 'match', 'plain', 'nomatch'} ); |
|||
local s = new_args['s'] or ''; |
|||
local start = tonumber( new_args['start'] ) or 1; |
|||
local plain_flag = str._getBoolean( new_args['plain'] or false ); |
|||
local pattern = new_args['pattern'] or ''; |
|||
local match_index = math.floor( tonumber(new_args['match']) or 1 ); |
|||
local nomatch = new_args['nomatch']; |
|||
if s == '' then |
|||
return str._error( 'Target string is empty' ); |
|||
end |
|||
if pattern == '' then |
|||
return str._error( 'Pattern string is empty' ); |
|||
end |
|||
if math.abs(start) < 1 or math.abs(start) > mw.ustring.len( s ) then |
|||
return str._error( 'Requested start is out of range' ); |
|||
end |
|||
if match_index == 0 then |
|||
return str._error( 'Match index is out of range' ); |
|||
end |
|||
if plain_flag then |
|||
pattern = str._escapePattern( pattern ); |
|||
end |
|||
local result |
|||
if match_index == 1 then |
|||
-- Find first match is simple case |
|||
result = mw.ustring.match( s, pattern, start ) |
|||
else |
|||
if start > 1 then |
|||
s = mw.ustring.sub( s, start ); |
|||
end |
|||
local iterator = mw.ustring.gmatch(s, pattern); |
|||
if match_index > 0 then |
|||
-- Forward search |
|||
for w in iterator do |
|||
match_index = match_index - 1; |
|||
if match_index == 0 then |
|||
result = w; |
|||
break; |
|||
end |
|||
end |
|||
end |
|||
end |
|||
else |
|||
-- Reverse search |
|||
local result_table = {}; |
|||
local count = 1; |
|||
for w in iterator do |
|||
result_table[count] = w; |
|||
count = count + 1; |
|||
end |
|||
end |
|||
result = result_table[ count + match_index ]; |
|||
end |
|||
end |
|||
end |
|||
if result == nil then |
|||
if nomatch == nil then |
|||
return str._error( 'Match not found' ); |
|||
else |
|||
return nomatch; |
|||
end |
|||
else |
|||
return result; |
|||
end |
|||
end |
end |
||
Line 231: | Line 231: | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or |
If invoked using named parameters, Mediawiki will automatically remove any leading or |
||
trailing whitespace from the target string. In some circumstances this is desirable, in |
trailing whitespace from the target string. In some circumstances this is desirable, in |
||
other cases one may want to preserve the whitespace. |
other cases one may want to preserve the whitespace. |
||
The first character has an index value of 1. |
The first character has an index value of 1. |
||
If one requests a negative value, this function will select a character by counting backwards |
If one requests a negative value, this function will select a character by counting backwards |
||
from the end of the string. In other words pos = -1 is the same as asking for the last character. |
from the end of the string. In other words pos = -1 is the same as asking for the last character. |
||
Line 242: | Line 242: | ||
]] |
]] |
||
function str.pos( frame ) |
function str.pos( frame ) |
||
local new_args = str._getParameters( frame.args, {'target', 'pos'} ); |
|||
local target_str = new_args['target'] or ''; |
|||
local pos = tonumber( new_args['pos'] ) or 0; |
|||
if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then |
|||
return str._error( 'String index out of range' ); |
|||
end |
|||
return mw.ustring.sub( target_str, pos, pos ); |
|||
end |
end |
||
Line 257: | Line 257: | ||
This function duplicates the behavior of {{str_find}}, including all of its quirks. |
This function duplicates the behavior of {{str_find}}, including all of its quirks. |
||
This is provided in order to support existing templates, but is NOT RECOMMENDED for |
This is provided in order to support existing templates, but is NOT RECOMMENDED for |
||
new code and templates. New code is recommended to use the "find" function instead. |
new code and templates. New code is recommended to use the "find" function instead. |
||
Line 268: | Line 268: | ||
]] |
]] |
||
function str.str_find( frame ) |
function str.str_find( frame ) |
||
local new_args = str._getParameters( frame.args, {'source', 'target'} ); |
|||
local source_str = new_args['source'] or ''; |
|||
local target_str = new_args['target'] or ''; |
|||
if target_str == '' then |
|||
return 1; |
|||
end |
|||
local start = mw.ustring.find( source_str, target_str, 1, true ) |
|||
if start == nil then |
|||
start = -1 |
|||
end |
|||
return start |
|||
if target_str == '' then |
|||
return 1; |
|||
end |
|||
local start = mw.ustring.find( source_str, target_str, 1, true ) |
|||
if start == nil then |
|||
start = -1 |
|||
end |
|||
return start |
|||
end |
end |
||
Line 303: | Line 303: | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or |
If invoked using named parameters, Mediawiki will automatically remove any leading or |
||
trailing whitespace from the parameter. In some circumstances this is desirable, in |
trailing whitespace from the parameter. In some circumstances this is desirable, in |
||
other cases one may want to preserve the whitespace. |
other cases one may want to preserve the whitespace. |
||
This function returns the first index >= "start" where "target" can be found |
This function returns the first index >= "start" where "target" can be found |
||
within "source". Indices are 1-based. If "target" is not found, then this |
within "source". Indices are 1-based. If "target" is not found, then this |
||
function returns 0. If either "source" or "target" are missing / empty, this |
function returns 0. If either "source" or "target" are missing / empty, this |
||
function also returns 0. |
function also returns 0. |
||
Line 314: | Line 314: | ||
]] |
]] |
||
function str.find( frame ) |
function str.find( frame ) |
||
local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } ); |
|||
local source_str = new_args['source'] or ''; |
|||
local pattern = new_args['target'] or ''; |
|||
local start_pos = tonumber(new_args['start']) or 1; |
|||
local plain = new_args['plain'] or true; |
|||
if source_str == '' or pattern == '' then |
|||
return 0; |
|||
end |
|||
plain = str._getBoolean( plain ); |
|||
if source_str == '' or pattern == '' then |
|||
local start = mw.ustring.find( source_str, pattern, start_pos, plain ) |
|||
return 0; |
|||
if start == nil then |
|||
end |
|||
start = 0 |
|||
end |
|||
plain = str._getBoolean( plain ); |
|||
return start |
|||
local start = mw.ustring.find( source_str, pattern, start_pos, plain ) |
|||
if start == nil then |
|||
start = 0 |
|||
end |
|||
return start |
|||
end |
end |
||
Line 352: | Line 352: | ||
count: The number of occurences to replace, defaults to all. |
count: The number of occurences to replace, defaults to all. |
||
plain: Boolean flag indicating that pattern should be understood as plain |
plain: Boolean flag indicating that pattern should be understood as plain |
||
text and not as a Lua style regular expression, defaults to true |
text and not as a Lua style regular expression, defaults to true |
||
]] |
]] |
||
function str.replace( frame ) |
function str.replace( frame ) |
||
local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } ); |
|||
local source_str = new_args['source'] or ''; |
|||
local pattern = new_args['pattern'] or ''; |
|||
local replace = new_args['replace'] or ''; |
|||
local count = tonumber( new_args['count'] ); |
|||
local plain = new_args['plain'] or true; |
|||
if source_str == '' or pattern == '' then |
|||
return source_str; |
|||
end |
|||
plain = str._getBoolean( plain ); |
|||
if source_str == '' or pattern == '' then |
|||
return source_str; |
|||
pattern = str._escapePattern( pattern ); |
|||
end |
|||
replace = mw.ustring.gsub( replace, "%%", "%%%%" ); --Only need to escape replacement sequences. |
|||
plain = str._getBoolean( plain ); |
|||
end |
|||
local result; |
|||
if plain then |
|||
pattern = str._escapePattern( pattern ); |
|||
replace = mw.ustring.gsub( replace, "%%", "%%%%" ); --Only need to escape replacement sequences. |
|||
else |
|||
end |
|||
result = mw.ustring.gsub( source_str, pattern, replace ); |
|||
end |
|||
local result; |
|||
if count ~= nil then |
|||
result = mw.ustring.gsub( source_str, pattern, replace, count ); |
|||
else |
|||
result = mw.ustring.gsub( source_str, pattern, replace ); |
|||
end |
|||
return result; |
|||
end |
end |
||
--[[ |
--[[ |
||
simple function to pipe string.rep to templates. |
simple function to pipe string.rep to templates. |
||
]] |
]] |
||
function str.rep( frame ) |
function str.rep( frame ) |
||
local repetitions = tonumber( frame.args[2] ) |
|||
if not repetitions then |
|||
return str._error( 'function rep expects a number as second parameter, received "' .. ( frame.args[2] or '' ) .. '"' ) |
|||
end |
|||
return string.rep( frame.args[1] or '', repetitions ) |
|||
end |
end |
||
Line 410: | Line 410: | ||
]] |
]] |
||
function str.escapePattern( frame ) |
function str.escapePattern( frame ) |
||
local pattern_str = frame.args[1] |
|||
if not pattern_str then |
|||
return str._error( 'No pattern string specified' ); |
|||
end |
|||
local result = str._escapePattern( pattern_str ) |
|||
return result |
|||
end |
end |
||
Line 425: | Line 425: | ||
]] |
]] |
||
function str._getParameters( frame_args, arg_list ) |
function str._getParameters( frame_args, arg_list ) |
||
local new_args = {}; |
|||
local index = 1; |
|||
local value; |
|||
for i,arg in ipairs( arg_list ) do |
|||
value = frame_args[arg] |
|||
if value == nil then |
|||
value = frame_args[index]; |
|||
index = index + 1; |
|||
end |
|||
new_args[arg] = value; |
|||
end |
|||
return new_args; |
|||
end |
end |
||
--[[ |
--[[ |
||
Line 445: | Line 445: | ||
]] |
]] |
||
function str._error( error_str ) |
function str._error( error_str ) |
||
local frame = mw.getCurrentFrame(); |
|||
local error_category = frame.args.error_category or 'Errors reported by Module String'; |
|||
local ignore_errors = frame.args.ignore_errors or false; |
|||
local no_category = frame.args.no_category or false; |
|||
if str._getBoolean(ignore_errors) then |
|||
return ''; |
|||
end |
|||
local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'; |
|||
if error_category ~= '' and not str._getBoolean( no_category ) then |
|||
error_str = '[[Category:' .. error_category .. ']]' .. error_str; |
|||
end |
|||
end |
|||
return error_str; |
|||
end |
end |
||
Line 466: | Line 466: | ||
]] |
]] |
||
function str._getBoolean( boolean_str ) |
function str._getBoolean( boolean_str ) |
||
local boolean_value; |
|||
if type( boolean_str ) == 'string' then |
|||
boolean_str = boolean_str:lower(); |
|||
if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0' |
|||
or boolean_str == '' then |
|||
boolean_value = false; |
|||
else |
|||
boolean_value = true; |
|||
end |
|||
end |
|||
elseif type( boolean_str ) == 'boolean' then |
|||
boolean_value = boolean_str; |
|||
else |
|||
error( 'No boolean value found' ); |
|||
end |
|||
return boolean_value |
|||
end |
end |
||
--[[ |
--[[ |
||
Helper function that escapes all pattern characters so that they will be treated |
Helper function that escapes all pattern characters so that they will be treated |
||
as plain text. |
as plain text. |
||
]] |
]] |
||
function str._escapePattern( pattern_str ) |
function str._escapePattern( pattern_str ) |
||
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ); |
|||
end |
end |
||