Module:String: Difference between revisions
Content added Content deleted
Enwikipedia>Dragons flight (add description and whitespace handling option for len) |
Enwikipedia>Dragons flight (improves commenting, adds whitespace and error handling to sub, improvements to error handling generally.) |
||
Line 1: | Line 1: | ||
--[[ |
|||
This module is intended to provide access to basic string functions. |
|||
Most of the functions provided here can be invoked with named parameters, |
|||
unnamed parameters, or a mixture. If named parameters are used, Mediawiki will |
|||
automatically remove any leading or trailing whitespace from the parameter. |
|||
Depending on the intended use, it may be advantageous to either preserve or |
|||
remove such whitespace. |
|||
Global options |
|||
ignore_errors: If set to 'true' or 1, any error condition will result in |
|||
an empty string being returned rather than an error message. |
|||
error_category: If an error occurs, specifies the name of a category to |
|||
include with the error message. The default category is |
|||
[Category:Errors reported by Module String]. If set to an empty |
|||
string, no category will be added during an error. |
|||
]] |
|||
local str = {} |
local str = {} |
||
--[ |
--[[ |
||
len |
len |
||
Line 9: | Line 29: | ||
{{#invoke:String|len|target_string|}} |
{{#invoke:String|len|target_string|}} |
||
OR |
OR |
||
{{#invoke:String| |
{{#invoke:String|len|s=target_string}} |
||
Parameters |
Parameters |
||
Line 16: | Line 36: | ||
If invoked using named parameters, Mediawiki will automatically remove any leading or |
If invoked using named parameters, Mediawiki will automatically remove any leading or |
||
trailing whitespace from the target string. |
trailing whitespace from the target string. |
||
] |
]] |
||
function str.len( frame ) |
function str.len( frame ) |
||
local new_args = str._getParameters( frame.args, {'s'} ); |
local new_args = str._getParameters( frame.args, {'s'} ); |
||
Line 23: | Line 43: | ||
end |
end |
||
--[[ |
|||
sub |
|||
This function returns a substring of the target string at specified indices. |
|||
Usage: |
|||
{{#invoke:String|sub|target_string|start_index|end_index}} |
|||
OR |
|||
{{#invoke:String|sub|s=target_string|i=start_index|j=end_index}} |
|||
Parameters |
|||
s: The string to return a subset of |
|||
i: The fist index of the substring to return, defaults to 1. |
|||
j: The last index of the string to return, defaults to the last character. |
|||
The first character of the string is assigned an index of 1. If either i or j |
|||
is a negative value, it is interpreted the same as selecting a character by |
|||
counting from the end of the string. Hence, a value of -1 is the same as |
|||
selecting the last character of the string. |
|||
If the requested indices are out of range for the given string, an error is |
|||
reported. |
|||
]] |
|||
function str.sub( frame ) |
function str.sub( frame ) |
||
local new_args = str._getParameters( frame.args, { 's', 'i', 'j' } ); |
|||
local s = new_args['s'] or ''; |
|||
local i = tonumber( new_args['i'] ) or 1; |
|||
local j = tonumber( new_args['j'] ) or -1; |
|||
local len = mw.ustring.len( s ); |
|||
-- Convert negatives for range checking |
|||
if i < 0 then |
|||
i = len + i + 1; |
|||
end |
|||
if j < 0 then |
|||
j = len + j + 1; |
|||
end |
|||
if j < i then |
|||
return str._error( 'String subset indices out of order' ); |
|||
end |
|||
if i > len or j > len or i < 1 or j < 1 then |
|||
return str._error( 'String subset index out of range' ); |
|||
end |
|||
return mw.ustring.sub( s, i, j ) |
|||
end |
end |
||
Line 37: | Line 102: | ||
end |
end |
||
--[ |
--[[ |
||
pos |
pos |
||
Line 61: | Line 126: | ||
A requested value of zero, or a value greater than the length of the string returns an error. |
A requested value of zero, or a value greater than the length of the string returns an error. |
||
] |
]] |
||
function str.pos( frame ) |
function str.pos( frame ) |
||
local new_args = str._getParameters( frame.args, {'target', 'pos'} ); |
local new_args = str._getParameters( frame.args, {'target', 'pos'} ); |
||
Line 68: | Line 133: | ||
if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then |
if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then |
||
return |
return str._error( 'String index out of range' ); |
||
end |
end |
||
Line 74: | Line 139: | ||
end |
end |
||
--[ |
--[[ |
||
str_find |
str_find |
||
Line 87: | Line 152: | ||
value of "1", which is generally unexpected behavior, and must be accounted for |
value of "1", which is generally unexpected behavior, and must be accounted for |
||
separatetly. |
separatetly. |
||
] |
]] |
||
function str.str_find( frame ) |
function str.str_find( frame ) |
||
local new_args = str._getParameters( frame.args, {'source', 'target'} ); |
local new_args = str._getParameters( frame.args, {'source', 'target'} ); |
||
Line 105: | Line 170: | ||
end |
end |
||
--[ |
--[[ |
||
find |
find |
||
Line 133: | Line 198: | ||
This function should be safe for UTF-8 strings. |
This function should be safe for UTF-8 strings. |
||
] |
]] |
||
function str.find( frame ) |
function str.find( frame ) |
||
local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } ); |
local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } ); |
||
Line 155: | Line 220: | ||
end |
end |
||
--[ |
--[[ |
||
replace |
replace |
||
Line 174: | Line 239: | ||
plain: Boolean flag indicating that pattern should be understood as plain |
plain: Boolean flag indicating that pattern should be understood as plain |
||
text and not as a Lua style regular expression, defaults to true |
text and not as a Lua style regular expression, defaults to true |
||
] |
]] |
||
function str.replace( frame ) |
function str.replace( frame ) |
||
local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } ); |
local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } ); |
||
Line 204: | Line 269: | ||
end |
end |
||
--[ |
--[[ |
||
Helper function that populates the argument list given that user may need to use a mix of |
Helper function that populates the argument list given that user may need to use a mix of |
||
named and unnamed parameters. This is relevant because named parameters are not |
named and unnamed parameters. This is relevant because named parameters are not |
||
identical to unnamed parameters due to string trimming, and when dealing with strings |
identical to unnamed parameters due to string trimming, and when dealing with strings |
||
we sometimes want to either preserve or remove that whitespace depending on the application. |
we sometimes want to either preserve or remove that whitespace depending on the application. |
||
] |
]] |
||
function str._getParameters( frame_args, arg_list ) |
function str._getParameters( frame_args, arg_list ) |
||
local new_args = {}; |
local new_args = {}; |
||
Line 227: | Line 292: | ||
end |
end |
||
--[ |
--[[ |
||
Helper function to handle error messages. |
|||
]] |
|||
function str._error( error_str ) |
|||
local frame = mw.getCurrentFrame(); |
|||
local error_category = frame.args.error_category or 'Errors reported by Module String'; |
|||
local ignore_errors = frame.args.ignore_errors or false; |
|||
if str._getBoolean(ignore_errors) then |
|||
return ''; |
|||
end |
|||
local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'; |
|||
if error_category ~= '' then |
|||
error_str = '[[Category:' .. error_category .. ']]' .. error_str; |
|||
end |
|||
return error_str; |
|||
end |
|||
--[[ |
|||
Helper Function to interpret boolean strings |
Helper Function to interpret boolean strings |
||
] |
]] |
||
function str._getBoolean( boolean_str ) |
function str._getBoolean( boolean_str ) |
||
local boolean_value; |
local boolean_value; |
||
Line 248: | Line 333: | ||
end |
end |
||
--[ |
--[[ |
||
Helper function that escapes all pattern characters so that they will be treated |
Helper function that escapes all pattern characters so that they will be treated |
||
as plain text. |
as plain text. |
||
] |
]] |
||
function str._escapePattern( pattern_str ) |
function str._escapePattern( pattern_str ) |
||
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ); |
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ); |