reworked regexp pattern handling...

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
This commit is contained in:
Alex A. Naanou 2022-04-14 15:41:48 +03:00
parent c199f97802
commit a1bc80caf5
2 changed files with 51 additions and 41 deletions

View File

@ -8,6 +8,7 @@
"ig-actions": "*", "ig-actions": "*",
"ig-features": "*", "ig-features": "*",
"ig-object": "*", "ig-object": "*",
"ig-types": "^6.9.4",
"jszip": "*", "jszip": "*",
"requirejs": "*" "requirejs": "*"
}, },
@ -18,7 +19,6 @@
"showdown": "*", "showdown": "*",
"xss": "*" "xss": "*"
}, },
"devDependencies": {},
"scripts": { "scripts": {
"bootstrap": "node scripts/bootstrap.js" "bootstrap": "node scripts/bootstrap.js"
} }

View File

@ -234,30 +234,30 @@ module.page = {
//--------------------------------------------------------------------- //---------------------------------------------------------------------
// XXX add escaping... // XXX add escaping...
var _MACRO_PATTERN = var MACRO_PATTERN_STR =
[[ [[
// @macro(arg ..) // @macro(arg ..)
// XXX add support for '\)' in args... // XXX add support for '\)' in args...
'\\\\?@(?<nameInline>$MACROS)\\((?<argsInline>([^)])*)\\)', '\\\\?@(?<nameInline>MACROS)\\((?<argsInline>([^)])*)\\)',
// <macro ..> | <macro ../> // <macro ..> | <macro ../>
// XXX revise escaped > and /> // XXX revise escaped > and />
'<\\s*(?<nameOpen>$MACROS)(?<argsOpen>\\s+([^>/])*)?/?>', '<\\s*(?<nameOpen>MACROS)(?<argsOpen>\\s+([^>/])*)?/?>',
// </macro> // </macro>
'</\\s*(?<nameClose>$MACROS)\\s*>', '</\\s*(?<nameClose>MACROS)\\s*>',
].join('|'), 'smig'] ].join('|'), 'smig']
var MACRO_PATTERN_GROUPS = 8 var MACRO_PATTERN
var MACRO_PATTERN_GROUPS =
'<MACROS>'.split(new RegExp(`(${ MACRO_PATTERN_STR })`)).length-2
// XXX still buggy... // XXX still buggy...
var MACRO_ARGS_PATTERN = var MACRO_ARGS_PATTERN =
RegExp('('+[ RegExp('('+[
// named args... // named args...
'(?<nameQuoted>[a-zA-Z-_]+)\\s*=([\'"])(?<valueQupted>([^\\3]|\\\\3)*)\\3\\s*', '(?<nameQuoted>[a-zA-Z-_]+)\\s*=([\'"])(?<valueQuoted>([^\\3]|\\\\3)*)\\3\\s*',
'(?<nameUnquoted>[a-zA-Z-_]+)\\s*=(?<valueUnquoted>[^\\s]*)', '(?<nameUnquoted>[a-zA-Z-_]+)\\s*=(?<valueUnquoted>[^\\s]*)',
// positional args... // positional args...
'([\'"])(?<argQuoted>([^\\8]|\\\\8)*)\\8', '([\'"])(?<argQuoted>([^\\8]|\\\\8)*)\\8',
'(?<arg>[^\\s]+)', '(?<arg>[^\\s]+)',
].join('|') +')', 'smig') ].join('|') +')', 'smig')
//var MACRO_ARGS_PATTERN_GROUPS = 10
var MACRO_ARGS_PATTERN_GROUPS = 12
// XXX do we need basic inline and block commets a-la lisp??? // XXX do we need basic inline and block commets a-la lisp???
var COMMENT_PATTERN = var COMMENT_PATTERN =
RegExp('('+[ RegExp('('+[
@ -276,11 +276,10 @@ var clearComments =
module.clearComments = module.clearComments =
function(str){ function(str){
return str return str
.replace(COMMENT_PATTERN, function(...a){ .replace(COMMENT_PATTERN,
var groups = a.pop() function(...a){
return groups.uncomment ? return a.pop().uncomment
groups.uncomment || '' }) }
: ''}) }
// //
@ -300,10 +299,11 @@ function(str){
// match: <string>, // match: <string>,
// } // }
// //
// XXX need m and a to be calculated automatically rather than hardcoded... //
// ...can we use .replace(..) for its access to named groups??? // NOTE: this internally uses macros' keys to generate the lexing pattern.
//
// XXX closure: macros
// XXX feels a bit ugly... // XXX feels a bit ugly...
// XXX closure: macros...
var lex = var lex =
module.lex = module.lex =
function*(str){ function*(str){
@ -313,43 +313,47 @@ function*(str){
str = clearComments(str) str = clearComments(str)
var lst = str.split( var lst = str.split(
// XXX cache this??? module.MACRO_PATTERN
?? (MACRO_PATTERN = module.MACRO_PATTERN =
new RegExp( new RegExp(
'('+ _MACRO_PATTERN[0] '('+ MACRO_PATTERN_STR[0]
.replace(/\$MACROS/g, Object.keys(macros).join('|')) +')', .replace(/MACROS/g,
_MACRO_PATTERN[1])) Object.keys(macros).join('|')) +')',
MACRO_PATTERN_STR[1])))
var macro = false var macro = false
while(lst.length > 0){ while(lst.length > 0){
if(macro){ if(macro){
var cur = lst.splice(0, MACRO_PATTERN_GROUPS) var match = lst.splice(0, MACRO_PATTERN_GROUPS)[0]
var match = cur[0] // NOTE: we essentially are parsing the detected macro a
// special case: quoted inline macro -> text... // second time here, this gives us access to named groups
// avoiding maintaining match indexes with the .split(..)
// output...
// XXX for some reason .match(..) here returns a list with a string...
var cur = [...match.matchAll(MACRO_PATTERN)][0].groups
// special case: escaped inline macro -> keep as text...
if(match.startsWith('\\@')){ if(match.startsWith('\\@')){
yield match yield match
macro = false macro = false
continue } continue }
// group args... // args...
console.log('--- args:', cur[2] || cur[5] || '')
//var _args = (cur[2] || cur[4] || '')
var _args = (cur[2] || cur[5] || '')
.split(MACRO_ARGS_PATTERN)
var args = {} var args = {}
var i = -1 var i = -1
while(_args.length > 1){ for(var {groups}
of (cur.argsInline ?? cur.argsOpen ?? '')
.matchAll(MACRO_ARGS_PATTERN)){
i++ i++
var arg = _args.splice(0, MACRO_ARGS_PATTERN_GROUPS) args[groups.nameQuoted ?? groups.nameUnquoted ?? i] =
console.log(' -', arg) groups.valueQuoted
// NOTE: for positional args we use order (i) as key... ?? groups.valueUnquoted
//args[ arg[2] || arg[5] || i ] = ?? groups.argQuoted
// arg[4] || arg[6] || arg[8] || arg[9] } ?? groups.arg }
args[ arg[2] || arg[6] || i ] =
arg[4] || arg[7] || arg[9] || arg[11] }
// macro-spec... // macro-spec...
yield { yield {
//name: (cur[1] || cur[3] || cur[5]).toLowerCase(), name: (cur.nameInline
name: (cur[1] || cur[4] || cur[7]).toLowerCase(), ?? cur.nameOpen
?? cur.nameClose)
.toLowerCase(),
type: match[0] == '@' ? type: match[0] == '@' ?
'inline' 'inline'
: match[1] == '/' ? : match[1] == '/' ?
@ -386,6 +390,10 @@ function*(str){
// ... // ...
// } // }
// //
//
// NOTE: this internaly uses macros to check for propper nesting
//
// XXX closure: macros
// XXX normalize lex to be a generator (???) // XXX normalize lex to be a generator (???)
var group = var group =
module.group = module.group =
@ -450,6 +458,8 @@ var WIKIWORD_PATTERN =
//--------------------------------------------------------------------- //---------------------------------------------------------------------
var filters = {
}
var macros = { var macros = {
now: function(){}, now: function(){},
filter: function(){}, filter: function(){},