Module:Template translation: Difference between revisions
Content added Content deleted
mNo edit summary |
m (there are other chars that generate exceptions in mw.language.isKnownLanguageTag, and some language codes still not validated correctly; check this correctly) |
||
Line 1: | Line 1: | ||
local this = {} |
local this = {} |
||
function this.checkLanguage(subpage |
function this.checkLanguage(subpage) |
||
--[[Check first if there's an |
--[[Check first if there's an any invalid character that would cause the |
||
⚫ | |||
function. This test does not work with regexps, use plain search instead (no need |
|||
- all ASCII controls in [\000-\031\127], |
|||
to use Unicode parser, apostrophes can only appear isolated as one byte in UTF-8). |
|||
- double quote ("), sharp sign (#), ampersand (&), apostrophe ('), |
|||
⚫ | |||
- slash (/), colon (:), semicolon (;), lower than (<), greater than (>), |
|||
⚫ | |||
- brackets and braces ([, ], {, }), pipe (|), backslash (\\) |
|||
All other characters are accepted, including space and all non-ASCII |
|||
characters (including \192, which is invalid in UTF-8). |
|||
⚫ | |||
if mw.language.isValidCode(subpage) and mw.language.isKnownLanguageTag(subpage) |
|||
--[[However "SupportedLanguages" are too restrictive, as they discard many |
|||
valid BCP47 script variants (only because MediaWiki still does not |
|||
define automatic transliterators for them, e.g. "en-dsrt" or |
|||
"fr-brai" for French transliteration in Braille), and country variants, |
|||
(useful in localized data, even if they are no longer used for |
|||
translations, such as zh-cn, also useful for legacy codes). |
|||
We want to avoid matching subpagenames containing any uppercase letter, |
|||
(even if they are considered valid in BCP 47, in which they are |
|||
case-insensitive; they are not "SupportedLanguages" for MediaWiki, so |
|||
they are not "KnownLanguageTags" for MediaWiki). |
|||
To be more restrictive, we exclude any character that is not |
|||
ASCII and not a lowercase letter, minus-hyphen, or digit, and any code |
|||
that does not start by a letter or does not finish by a letter or digit. |
|||
of that has more than 8 characters between hyphens, or has two hyphens. |
|||
--]] |
|||
or string.find(subpage, "^[%l][%-%d%l]*[%d%l]$") ~= nil |
|||
and string.find(subpage, "[%d%l][%d%l][%d%l][%d%l][%d%l][%d%l][%d%l][%d%l][%d%l]") == nil |
|||
⚫ | |||
then |
then |
||
return subpage |
|||
⚫ | |||
then |
|||
return subpage |
|||
end |
|||
end |
end |
||
-- Otherwise there's currently no known language subpage |
-- Otherwise there's currently no known language subpage |