Modul:ISO15924
Die Dokumentation für dieses Modul kann unter Modul:ISO15924/Doku erstellt werden
local ISO15924 = { suite = "ISO15924",
serial = "2020-03-10",
item = 71584769,
statics = "codes" }
--[=[
ISO 15924 support for scripting systems
* fetch()
* getLanguageScript()
* getScripts()
* isCJK()
* isRTL()
* isScript()
* isTrans()
* scriptName()
* showScript()
* showScripts()
* testScripts()
* failsafe()
]=]
local Failsafe = ISO15924
local GlobalMod = ISO15924
local Unicode
ISO15924.Text = { }
ISO15924.Unicode = { }
Unicode = ISO15924.Unicode
Unicode.RomanN = { bef = { [ 32 ] = true,
[ 160 ] = true,
[ 8239 ] = true,
[ 40 ] = true,
[ 45 ] = true,
[ 91 ] = true
},
dig = { [ 73 ] = true, -- I
[ 86 ] = true, -- V
[ 88 ] = true, -- X
[ 76 ] = true, -- L
[ 67 ] = true, -- C
[ 68 ] = true, -- D
[ 77 ] = true -- M
},
fol = { [ 32 ] = true,
[ 160 ] = true,
[ 8239 ] = true,
[ 41 ] = true,
[ 44 ] = true,
[ 46 ] = true,
[ 93 ] = true
} }
ISO15924.Commons = { "cjk",
"iso639script",
"reverse",
"rtl",
"trans",
"unicodes" }
local foreignModule = function ( access, advanced, append, alt, alert )
-- Fetch global module
-- Precondition:
-- access -- string, with name of base module
-- advanced -- true, for require(); else mw.loadData()
-- append -- string, with subpage part, if any; or false
-- alt -- number, of wikidata item of root; or false
-- alert -- true, for throwing error on data problem
-- Postcondition:
-- Returns whatever, probably table
-- 2019-10-29
local storage = access
local finer = function ()
if append then
storage = string.format( "%s/%s",
storage,
append )
end
end
local fun, lucky, r, suited
if advanced then
fun = require
else
fun = mw.loadData
end
GlobalMod.globalModules = GlobalMod.globalModules or { }
suited = GlobalMod.globalModules[ access ]
if not suited then
finer()
lucky, r = pcall( fun, "Module:" .. storage )
end
if not lucky then
if not suited and
type( alt ) == "number" and
alt > 0 then
suited = string.format( "Q%d", alt )
suited = mw.wikibase.getSitelink( suited )
GlobalMod.globalModules[ access ] = suited or true
end
if type( suited ) == "string" then
storage = suited
finer()
lucky, r = pcall( fun, storage )
end
if not lucky and alert then
error( "Missing or invalid page: " .. storage, 0 )
end
end
return r
end -- foreignModule()
local function fill( accumulate, assign, append )
-- Copy external sequence into local collection
-- Precondition:
-- accumulate -- table, with relevant definitions
-- assign -- table, with assigned definitions
-- append -- table, if code names to be appended to entries
if type( assign ) == "table" then
local e
for k, v in pairs( assign ) do
if type( v ) == "table" then
e = { }
for kk, vv in pairs( v ) do
table.insert( e, vv )
end -- for kk, vv
if append then
for i = 1, #append do
table.insert( e, append[ i ] )
end -- for i
end
table.insert( accumulate, e )
end
end -- for k, v
end
end -- fill()
local function fulfil( ask, attribute )
-- Check whether script has a certain attribute
-- Precondition:
-- ask -- string, with language or script code
-- attribute -- string, with "cjk" or "rtl"
-- Returns true, if matchin
local got = ISO15924.fetch( attribute )
local r
if type( got ) == "table" then
local n = #ask
local script
if n == 4 then
script = ask
elseif n < 4 then
script = ISO15924.getLanguageScript( ask )
else
script = ask:match( "^%a%a%a?%-(%a%a%a%a)$" )
if not script then
script = ask:match( "^(%a%a%a?)%-%a%a$" )
script = ISO15924.getLanguageScript( script )
end
end
if script then
script = script:sub( 1, 1 ):upper() ..
script:sub( 2 ):lower()
r = got[ script ]
end
end
return r or false
end -- fulfil()
ISO15924.Text.scriptName = function ( assigned, alien, add )
-- Retrieve script name, hopefully linked
-- Precondition:
-- assigned -- string, with script code
-- alien -- string, with language code, or not
-- add -- arbitrary additional information
-- Returns string
local r, trsl
if type( assigned ) == "string" and
assigned:match( "^%u%l%l%l$" ) then
trsl = ISO15924.fetch( "translate" )
r = assigned
else
r = ""
end
if type( trsl ) == "table" then
local slang
if type( alien ) == "string" and
alien:match( "^%l%l%l?%-?" ) then
slang = alien:lower()
end
if not slang then
if not ISO15924.Text.sublang then
local title = mw.title.getCurrentTitle()
ISO15924.Text.sublang = title.text:match( "/%l%l%l?$" )
ISO15924.Text.sublang = ISO15924.Text.sublang or true
end
if type( ISO15924.Text.sublang ) == "string" and
type( trsl[ ISO15924.Text.sublang ] ) == "table" then
slang = ISO15924.Text.sublang
end
end
if not slang then
if not ISO15924.Text.sitelang then
local contLang = mw.language.getContentLanguage()
ISO15924.Text.sitelang = contLang:getCode():lower()
end
slang = ISO15924.Text.sitelang
end
if type( trsl[ slang ] ) == "table" then
trsl = trsl[ slang ]
elseif type( trsl.en ) == "table" then
trsl = trsl.en
slang = "en"
else
trsl = false
end
if trsl then
local pages = ISO15924.fetch( "pages" )
trsl = trsl[ assigned ]
if type( trsl ) == "string" then
r = trsl
elseif type( trsl ) == "table" then
if type( trsl[ 1 ] ) == "string" then
r = trsl[ 1 ]
if add and slang == "de" then
if tonumber( add ) == 2 and
type( trsl[ 2 ] ) == "string" then
r = trsl[ 2 ]
end
end
end
end
if type( pages ) == "table" then
local p
for k, v in pairs( pages ) do
if type( v ) == "table" and v.lang == slang then
p = v
break -- for k, v
end
end -- for k, v
if p and type( p.targets ) == "table" then
p = p.targets[ assigned ]
if type( p ) == "string" then
-- different server issues --
if mw.ustring.upper( mw.ustring.sub( p, 1, 1 ) )
~=
mw.ustring.upper( mw.ustring.sub( r, 1, 1 ) )
or mw.ustring.sub( p, 2 ) ~=
mw.ustring.sub( r, 2 ) then
r = string.format( "%s|%s", p, r )
end
r = string.format( "[[%s]]", r )
end
end
end
if add and slang == "de" then
if tonumber( add ) == 2 then
local s = "in "
if type( trsl ) == "table" and
type( trsl[ 3 ] ) == "string" then
s = trsl[ 3 ] .. " "
end
r = s .. r
end
end
end
end
return r
end -- ISO15924.Text.scriptName()
Unicode.flat = function ( analyse )
-- Remove markup and syntax from wikitext
-- Precondition:
-- analyse -- string, with wikitext
-- Returns string, with cleaned content plain text
local r = analyse
if r:find( "&", 1, true ) then
r = mw.text.decode( r, true )
end
r = mw.text.trim( mw.text.unstrip( r ) )
if r:find( "<", 1, true ) and
r:find( ">", 1, true ) then
r = r:gsub( "(</?%l[^>]*>)", "" )
end
if r:find( "[", 1, true ) and
( ( r:find( "[[", 1, true ) and
r:find( "]]", 1, true ) ) or
r:find( "[http", 1, true ) or
r:find( "[//", 1, true ) ) then
local lucky, WLink = pcall( require, "Module:WLink" )
if type( WLink ) == "table" then
r = WLink.WLink().getPlain( r )
end
end
return r
end -- Unicode.flat()
Unicode.getRanges = function ()
-- Retrieve collection of Unicode ranges
-- Returns table, with all relations codepoint / scripts
if type( Unicode.ranges ) ~= "table" then
local e, unique
Unicode.ranges = { }
unique = ISO15924.fetch( "reverse" )
for k, range in pairs( unique ) do
e = { }
for j, v in pairs( range ) do
table.insert( e, v )
end -- for j, v
table.insert( Unicode.ranges, e )
end -- for k, range
end
return Unicode.ranges
end -- Unicode.getRanges()
Unicode.getScripts = function ( allow, analyse, assume )
-- Check all chars for expected script code ranges
-- Precondition:
-- allow -- table, with permitted unspecific ranges
-- analyse -- string or number or table, with text
-- assume -- string, or nil, with ID of expected script
-- Returns table, with all relations codepoint / scripts
local uc = Unicode.getRanges()
local cp = type( analyse )
local r = { }
local e, n, p, s, v
if cp == "string" then
e = Unicode.flat( analyse )
cp = { }
n = mw.ustring.len( e )
for i = 1, n do
table.insert( cp, mw.ustring.codepoint( e, i, i ) )
end -- for i
elseif cp == "table" then
cp = analyse
elseif cp == "number" then
cp = { analyse }
end
for i = 1, #cp do
n = cp[ i ]
p = { n, false }
for k = 1, #uc do
e = uc[ k ]
if n <= e[ 2 ] then
if n >= e[ 1 ] then
v = e[ 3 ]
if type( v ) == "table" then
s = v[ 1 ]
if assume then
for j = 2, #v do
if v[ j ] == assume then
s = v[ j ]
break -- for j
end
end -- for j
end
else
s = "???"
end
p[ 2 ] = s
n = false
end
break -- for k
elseif n < e[ 1 ] then
break -- for k
end
end -- for k
if n then
for j = 1, #allow do
e = allow[ j ]
if n <= e[ 2 ] then
if n >= e[ 1 ] then
p[ 2 ] = true
end
break -- for j
elseif n < e[ 1 ] then
break -- for j
end
end -- for j
end
table.insert( r, p )
end -- for i
return r
end -- Unicode.getScripts()
Unicode.isScript = function ( all, ask, analyse )
-- Check all chars for expected script code ranges
-- Precondition:
-- all -- table, with all definitions
-- ask -- string, with supposed script code
-- analyse -- string or number or table, with text
-- Returns
-- 1. true, if all chars within
-- 2. table, with analyse text
local f = function ( array, amount, a )
local k = a
local e
for i = 1, amount do
e = array[ i ]
if k >= e[ 1 ] then
if k <= e[ 2 ] then
k = false
break -- for i
end
else
break -- for i
end
end -- for i
return k
end
local s = analyse
local cp = type( s )
local uc = { }
local xx = { }
local r = true
local m, na, nu, nx
if cp == "string" then
s = Unicode.flat( s )
cp = { }
na = mw.ustring.len( s )
for i = 1, na do
table.insert( cp, mw.ustring.codepoint( s, i, i ) )
end -- for i
elseif cp == "table" then
cp = s
elseif cp == "number" then
cp = { s }
else
cp = { }
end
Unicode.merge( uc, all, ask )
Unicode.merge( xx, all, "*" )
na = #cp
nu = #uc
nx = #xx
for j = 1, na do
m = f( uc, nu, cp[ j ] )
if m then
m = f( xx, nx, m )
if m then
r = false
break -- for j
end
end
end -- for j
return r, cp
end -- Unicode.isScript()
Unicode.merge = function ( accumulate, all, ask, append )
-- Ensure single list of items
-- Precondition:
-- accumulate -- table, with collection to be extended
-- all -- table, with all definitions
-- ask -- string, with requested script code
-- append -- true, if code names to be appended to entries
-- The accumulate table may have been extended
local g = all[ ask ]
if type( g ) == "table" then
local codes, s
for k, v in pairs( g ) do
s = type( v )
break -- for k, v
end -- for k, v
if s == "string" then
for k, v in pairs( g ) do
if append then
codes = { ask, v }
table.sort( codes )
end
fill( accumulate, all[ v ], codes )
end -- for k, v
Unicode.sort( accumulate )
elseif s == "table" then
if append then
codes = { ask }
end
fill( accumulate, g, codes )
end
end
end -- Unicode.merge()
Unicode.romanNumbers = function ( array, at )
-- Check for possible roman numbers
-- Precondition:
-- array -- table, with elements as sequence tables
-- all -- number, with position within array
-- Returns number, which is identical or greater than at, to proceed
local r = at
local e = array[ r ]
if Unicode.RomanN.dig[ e[ 1 ] ] and
r > 1 and
Unicode.RomanN.bef[ array[ r - 1 ][ 1 ] ] then
local j = r
while j < #array do
e = array[ j + 1 ]
if Unicode.RomanN.dig[ e[ 1 ] ] then
j = j + 1
else
break -- while j
end
end -- while j
if j == #array or
Unicode.RomanN.fol[ e[ 1 ] ] then
r = j + 1
end
end
return r
end -- Unicode.romanNumbers()
Unicode.showScripts = function ( analysed )
-- Retrieve codepoints and assigned script codes for string
-- Precondition:
-- analysed -- table, as returned by Unicode.getScripts()
-- Returns string, with every codepoint-script identified
local r = ""
local c, d, k, s
for i = 1, #analysed do
c = analysed[ i ]
k = c[ 1 ]
s = string.format( "%X", k )
d = c[ 2 ]
if d then
if type( d ) == "string" then
s = string.format( "%s-%s-%s",
s,
mw.ustring.char( k ),
d )
end
else
s = s .. "-????"
end
r = string.format( "%s %s", r, s )
end -- for i
return r
end -- Unicode.showScripts()
Unicode.sort = function ( apply )
-- Sort code ranges
-- apply -- table, with request
local function f( a1, a2 )
return a1[ 1 ] < a2[ 1 ]
end
table.sort( apply, f )
end -- Unicode.sort()
Unicode.testScripts = function ( assume, analyse )
-- Check whether all chars match script
-- Precondition:
-- assume -- string, with expected script code
-- analyse -- string or number or table, with text
-- Postcondition:
-- Returns
-- 1. number, of chars matching assume
-- 2. number, of chars violating assume
local rA = 0
local rX = 0
local xx = { }
local i = 1
local cp, e, p
Unicode.merge( xx, ISO15924.fetch( "unicodes" ), "*" )
cp = Unicode.getScripts( xx, analyse, assume )
while i <= #cp do
e = cp[ i ]
p = e[ 2 ]
if type( p ) == "string" then
if p == assume then
rA = rA + 1
elseif p == "Latn" then
local j = Unicode.romanNumbers( cp, i )
if j > i then
i = j
rX = rX - 1
end
rX = rX + 1
end
end
i = i + 1
end -- while i
return rA, rX
end -- Unicode.testScripts()
ISO15924.fetch = function ( access, alert )
-- Fetch mw.loadData component
-- Precondition:
-- access -- table name
-- alert -- true, for throwing error on data problem
-- Postcondition:
-- Returns table
local r = ISO15924[ access ]
if type( r ) ~= "table" then
local ext, s, sub
if not ISO15924.config then
ISO15924.config = true
ISO15924.fetch( "config", alert ) -- self
if ISO15924.config.live then
ISO15924.statics = "commons"
end
end
for i = 1, #ISO15924.Commons do
s = ISO15924.Commons[ i ]
if s == access then
sub = ISO15924.statics
break -- for i
end
end -- for i
sub = sub or access
ISO15924.loadData = ISO15924.loadData or { }
if ISO15924.loadData[ sub ] then
ext = ISO15924.loadData[ sub ]
else
ext = foreignModule( ISO15924.suite,
false,
sub,
ISO15924.item,
alert )
ISO15924.loadData[ sub ] = ext
end
if type( ext ) == "table" then
if type( ext[ access ] ) == "table" then
r = ext[ access ]
elseif sub == "config" then
r = ext
else
r = { }
end
else
r = { }
end
ISO15924[ access ] = r
end
return r
end -- ISO15924.fetch()
ISO15924.getLanguageScript = function ( ask )
-- Retrieve primary script for language
-- Precondition:
-- ask -- string, with language code
-- Returns string, with associated script code
local r
if type( ask ) == "string" then
local s = ask
local n = #s
if n == 7 or n == 8 then
r = s:match( "^%a%a%a?%-(%a%a%a%a)$" )
if r then
r = r:sub( 1, 1 ):upper() ..
r:sub( 2 ):lower()
end
elseif n > 3 then
s = s:match( "^(%a%a%a?)%-" )
end
if not r and s then
local written = ISO15924.fetch( "iso639script" )
if type( written ) == "table" then
r = written[ s:lower() ]
if type( r ) == "table" then
r = r[ 1 ]
end
end
end
end
return r or "Latn"
end -- ISO15924.getLanguageScript()
ISO15924.getScripts = function ( analyse )
-- Retrieve codepoints and assigned script codes
-- Precondition:
-- analyse -- string or number or table, with text
-- Returns table, with all relations codepoint / scripts
local xx = { }
Unicode.merge( xx, ISO15924.fetch( "unicodes" ), "*" )
return Unicode.getScripts( xx, analyse, false )
end -- ISO15924.getScripts()
ISO15924.isCJK = function ( ask )
-- Check whether script is Chinese-Japanese-Korean (CJK)
-- Precondition:
-- ask -- string, with language or script code
-- Returns true, if CJK
return fulfil( ask, "cjk" )
end -- ISO15924.isCJK()
ISO15924.isRTL = function ( ask )
-- Check whether script is right-to-left
-- Precondition:
-- ask -- string, with language or script code
-- Returns true, if right-to-left
return fulfil( ask, "rtl" )
end -- ISO15924.isRTL()
ISO15924.isScript = function ( assume, analyse )
-- Check all chars for expected script code ranges
-- Precondition:
-- assume -- string, with expected script code
-- analyse -- string or number or table, with text
-- Returns
-- 1. true, if all chars within
-- 2. analyse as table
return Unicode.isScript( ISO15924.fetch( "unicodes" ),
assume,
analyse )
end -- ISO15924.isScript()
ISO15924.isTrans = function ( ask, assign, about )
-- Check whether valid transcription for context
-- Precondition:
-- ask -- string, with transcription key
-- assign -- string, with language or scripting code
-- about -- string or nil, with site scripting code
-- Postcondition:
-- Returns boolean
local r = false
local t, trans
local r, trsl
if type( ask ) == "string" then
local trans = ISO15924.fetch( "trans" )
local t = trans[ assign ]
if type( t ) == "table" then
for k, v in pairs( t ) do
if v == ask then
r = true
break -- for i
end
end -- for k, v
end
end
if not r and about == "Latn" then
r = ( ask == "BGN-PCGN" or ask == "ALA-LC" )
end
return r
end -- ISO15924.isTrans()
ISO15924.scriptName = function ( assigned, alien, add )
-- Retrieve script name, hopefully linked
-- Precondition:
-- assigned -- string, with script code
-- alien -- string, with language code, or not
-- add -- arbitrary additional information
-- Returns string
return ISO15924.Text.scriptName( assigned, alien, add )
end -- ISO15924.scriptName()
ISO15924.showScript = function ( analyse )
-- Retrieve assigned script code of first character
-- Precondition:
-- analyse -- string or number or table, with text
-- Returns string, with every codepoint-script identified
local xx = { }
local cp, r
Unicode.merge( xx, ISO15924.fetch( "unicodes" ), "*" )
cp = Unicode.getScripts( xx, analyse, false )
if #cp > 0 then
local s = cp[ 1 ][ 2 ]
if type( s ) == "string" then
r = s
end
end
return r or false
end -- ISO15924.showScript()
ISO15924.showScripts = function ( analyse )
-- Retrieve codepoints and assigned script codes for and as string
-- Precondition:
-- analyse -- string or number or table, with text
-- Returns string, with every codepoint-script identified
local xx = { }
local cp
Unicode.merge( xx, ISO15924.fetch( "unicodes" ), "*" )
cp = Unicode.getScripts( xx, analyse, false )
return Unicode.showScripts( cp )
end -- ISO15924.showScripts()
ISO15924.testScripts = function ( assume, analyse )
-- Check whether all chars match script
-- Precondition:
-- assume -- string, with expected script code
-- analyse -- string or number or table, with text
-- Postcondition:
-- Returns
-- 1. number, of chars matching assume
-- 2. number, of chars violating assume
return Unicode.testScripts( assume, analyse )
end -- ISO15924.testScripts()
Failsafe.failsafe = function ( atleast )
-- Retrieve versioning and check for compliance
-- Precondition:
-- atleast -- string, with required version or "wikidata" or "~"
-- or false
-- Postcondition:
-- Returns string -- with queried version, also if problem
-- false -- if appropriate
-- 2019-10-15
local last = ( atleast == "~" )
local since = atleast
local r
if last or since == "wikidata" then
local item = Failsafe.item
since = false
if type( item ) == "number" and item > 0 then
local entity = mw.wikibase.getEntity( string.format( "Q%d",
item ) )
if type( entity ) == "table" then
local seek = Failsafe.serialProperty or "P348"
local vsn = entity:formatPropertyValues( seek )
if type( vsn ) == "table" and
type( vsn.value ) == "string" and
vsn.value ~= "" then
if last and vsn.value == Failsafe.serial then
r = false
else
r = vsn.value
end
end
end
end
end
if type( r ) == "nil" then
if not since or since <= Failsafe.serial then
r = Failsafe.serial
else
r = false
end
end
return r
end -- Failsafe.failsafe()
-- Export
local p = { }
p.getLanguageScript = function ( frame )
local s = mw.text.trim( frame.args[ 1 ] or "" )
return ISO15924.getLanguageScript( s )
end -- p.getLanguageScript
p.isCJK = function ( frame )
local s = mw.text.trim( frame.args[ 1 ] or "" )
return ISO15924.isCJK( s ) and "1" or ""
end -- p.isCJK()
p.isRTL = function ( frame )
local s = mw.text.trim( frame.args[ 1 ] or "" )
return ISO15924.isRTL( s ) and "1" or ""
end -- p.isRTL()
p.isScript = function ( frame )
local s1 = mw.text.trim( frame.args[ 1 ] or "" )
local s2 = mw.text.trim( frame.args[ 2 ] or "" )
local r, cp = ISO15924.isScript( s1, s2 )
return r and "1" or ""
end -- p.isScript
p.isTrans = function ( frame )
-- Check whether valid transcription for context
-- 1 -- string, with transcription key
-- 2 -- string, with language or scripting code
-- site -- string or nil, with site scripting code
local s1 = mw.text.trim( frame.args[ 1 ] or "" )
local s2 = mw.text.trim( frame.args[ 2 ] or "" )
local site = mw.text.trim( frame.args.site or "" )
return ISO15924.isTrans( s1, s2, site ) and "1" or ""
end -- p.isTrans
p.scriptName = function ( frame )
local s1 = mw.text.trim( frame.args[ 1 ] or "" )
local s2 = mw.text.trim( frame.args[ 2 ] or "" )
local slang = mw.text.trim( frame.args.lang or "" )
return ISO15924.Text.scriptName( s1, slang, s2 )
end -- p.scriptName
p.showScript = function ( frame )
local s = frame.args[ 1 ]
local r
if s then
s = mw.text.trim( s )
if s ~= "" then
if s:sub( 1, 2 ) == "U+" then
s = s:match( "^U%+(%x+)$" )
if s then
s = tonumber( s, 16 )
end
end
if s then
r = ISO15924.showScript( s )
end
end
end
return r or ""
end -- p.showScript
p.showScripts = function ( frame )
local s = frame.args[ 1 ]
local r
if s then
r = ISO15924.showScripts( mw.text.trim( s ) )
else
r = ""
end
return r
end -- p.showScripts
p.failsafe = function ( frame )
-- Versioning interface
local s = type( frame )
local since
if s == "table" then
since = frame.args[ 1 ]
elseif s == "string" then
since = frame
end
if since then
since = mw.text.trim( since )
if since == "" then
since = false
end
end
return Failsafe.failsafe( since ) or ""
end -- p.failsafe()
p.ISO15924 = function ()
return ISO15924
end -- p.ISO15924
return p