diff --git a/pwiki2.js b/pwiki2.js index cdd89f7..e0d4784 100755 --- a/pwiki2.js +++ b/pwiki2.js @@ -480,200 +480,289 @@ object.Constructor('BasePage', { //--------------------------------------------------------------------- -// XXX add escaping... -var MACRO_PATTERN_STR = - [[ - // @macro(arg ..) - // XXX add support for '\)' in args... - '\\\\?@(?MACROS)\\((?([^)])*)\\)', - // | - // XXX revise escaped > and /> - '<\\s*(?MACROS)(?\\s+([^>/])*)?/?>', - // - 'MACROS)\\s*>', - ].join('|'), 'smig'] -var MACRO_PATTERN -var MACRO_PATTERN_GROUPS = - ''.split(new RegExp(`(${ MACRO_PATTERN_STR })`)).length-2 -// XXX still buggy... -var MACRO_ARGS_PATTERN = - RegExp('('+[ - // named args... - '(?[a-zA-Z-_]+)\\s*=([\'"])(?([^\\3]|\\\\3)*)\\3\\s*', - '(?[a-zA-Z-_]+)\\s*=(?[^\\s]*)', - // positional args... - '([\'"])(?([^\\8]|\\\\8)*)\\8', - '(?[^\\s]+)', - ].join('|') +')', 'smig') -// XXX do we need basic inline and block commets a-la lisp??? -var COMMENT_PATTERN = - RegExp('('+[ - // - '', +var parser = +module.parser = { + // NOTE: the actual macro pattern is not stored as it depends on + // the macro name list which is page dependant... + // XXX add escaping... + MACRO_PATTERN_STR: [[ + // @macro(arg ..) + // XXX add support for '\)' in args... + '\\\\?@(?MACROS)\\((?([^)])*)\\)', + // | + // XXX revise escaped > and /> + '<\\s*(?MACROS)(?\\s+([^>/])*)?/?>', + // + 'MACROS)\\s*>', + ].join('|'), 'smig'], + // NOTE: this depends on .MACRO_PATTERN_STR and thus is lazily generated... + __MACRO_PATTERN_GROUPS: undefined, + get MACRO_PATTERN_GROUPS(){ + return this.__MACRO_PATTERN_GROUPS + ?? (this.__MACRO_PATTERN_GROUPS = + ''.split(new RegExp(`(${ this.MACRO_PATTERN_STR })`)).length-2) }, + // XXX still buggy... + MACRO_ARGS_PATTERN: RegExp('('+[ + // named args... + '(?[a-zA-Z-_]+)\\s*=([\'"])(?([^\\3]|\\\\3)*)\\3\\s*', + '(?[a-zA-Z-_]+)\\s*=(?[^\\s]*)', + // positional args... + '([\'"])(?([^\\8]|\\\\8)*)\\8', + '(?[^\\s]+)', + ].join('|') +')', 'smig'), + // XXX do we need basic inline and block commets a-la lisp??? + COMMENT_PATTERN: RegExp('('+[ + // + '', - // .. - '<\\s*pwiki-comment[^>]*>.*<\\/\\s*pwiki-comment\\s*>', - // - '<\\s*pwiki-comment[^\\/>]*\\/>', - ].join('|') +')', 'smig') + // .. + '<\\s*pwiki-comment[^>]*>.*<\\/\\s*pwiki-comment\\s*>', + // + '<\\s*pwiki-comment[^\\/>]*\\/>', + ].join('|') +')', 'smig'), + // General parser API... + // -var clearComments = -module.clearComments = -function(str){ - return str - .replace(COMMENT_PATTERN, - function(...a){ - return a.pop().uncomment - || '' }) } + clearComments: function(str){ + return str + .replace(this.COMMENT_PATTERN, + function(...a){ + return a.pop().uncomment + || '' }) }, + // + // ::= + // + // | { + // name: , + // type: 'inline' + // | 'element' + // | 'opening' + // | 'closing', + // args: { + // : , + // : , + // ... + // } + // match: , + // } + // + // + // NOTE: this internally uses macros' keys to generate the lexing pattern. + // + // XXX feels a bit ugly... + lex: function*(page, str){ + str = str + ?? page.raw + // NOTE: we are doing a separate pass for comments to completely + // decouple them from the base macro syntax, making them fully + // transparent... + str = this.clearComments(str) -// -// ::= -// -// | { -// name: , -// type: 'inline' -// | 'element' -// | 'opening' -// | 'closing', -// args: { -// : , -// : , -// ... -// } -// match: , -// } -// -// -// NOTE: this internally uses macros' keys to generate the lexing pattern. -// -// XXX feels a bit ugly... -var lex = -module.lex = -function*(str, macros){ - // NOTE: we are doing a separate pass for comments to completely - // decouple them from the base macro syntax, making them fully - // transparent... - str = clearComments(str) + // XXX should this be cached??? + var MACRO_PATTERN = new RegExp( + '('+ this.MACRO_PATTERN_STR[0] + .replace(/MACROS/g, Object.keys(page.macros).join('|')) +')', + this.MACRO_PATTERN_STR[1]) - var lst = str.split( - module.MACRO_PATTERN - ?? (MACRO_PATTERN = module.MACRO_PATTERN = - new RegExp( - '('+ MACRO_PATTERN_STR[0] - .replace(/MACROS/g, - Object.keys(macros).join('|')) +')', - MACRO_PATTERN_STR[1]))) + var lst = str.split(MACRO_PATTERN) - var macro = false - while(lst.length > 0){ - if(macro){ - var match = lst.splice(0, MACRO_PATTERN_GROUPS)[0] - // NOTE: we essentially are parsing the detected macro a - // second time here, this gives us access to named groups - // avoiding maintaining match indexes with the .split(..) - // output... - // XXX for some reason .match(..) here returns a list with a string... - var cur = [...match.matchAll(MACRO_PATTERN)][0].groups - // special case: escaped inline macro -> keep as text... - if(match.startsWith('\\@')){ - yield match - macro = false - continue } - // args... - var args = {} - var i = -1 - for(var {groups} - of (cur.argsInline ?? cur.argsOpen ?? '') - .matchAll(MACRO_ARGS_PATTERN)){ - i++ - args[groups.nameQuoted ?? groups.nameUnquoted ?? i] = - groups.valueQuoted - ?? groups.valueUnquoted - ?? groups.argQuoted - ?? groups.arg } - // macro-spec... - yield { - name: (cur.nameInline - ?? cur.nameOpen - ?? cur.nameClose) - .toLowerCase(), - type: match[0] == '@' ? - 'inline' - : match[1] == '/' ? - 'closing' - : match[match.length-2] == '/' ? - 'element' - : 'opening', - args, - match, - } - macro = false - // normal text... - } else { - var str = lst.shift() - // skip empty strings from output... - if(str != ''){ - yield str } - macro = true } } } + var macro = false + while(lst.length > 0){ + if(macro){ + var match = lst.splice(0, this.MACRO_PATTERN_GROUPS)[0] + // NOTE: we essentially are parsing the detected macro a + // second time here, this gives us access to named groups + // avoiding maintaining match indexes with the .split(..) + // output... + // XXX for some reason .match(..) here returns a list with a string... + var cur = [...match.matchAll(MACRO_PATTERN)][0].groups + // special case: escaped inline macro -> keep as text... + if(match.startsWith('\\@')){ + yield match + macro = false + continue } + // args... + var args = {} + var i = -1 + for(var {groups} + of (cur.argsInline ?? cur.argsOpen ?? '') + .matchAll(this.MACRO_ARGS_PATTERN)){ + i++ + args[groups.nameQuoted ?? groups.nameUnquoted ?? i] = + groups.valueQuoted + ?? groups.valueUnquoted + ?? groups.argQuoted + ?? groups.arg } + // macro-spec... + yield { + name: (cur.nameInline + ?? cur.nameOpen + ?? cur.nameClose) + .toLowerCase(), + type: match[0] == '@' ? + 'inline' + : match[1] == '/' ? + 'closing' + : match[match.length-2] == '/' ? + 'element' + : 'opening', + args, + match, + } + macro = false + // normal text... + } else { + var str = lst.shift() + // skip empty strings from output... + if(str != ''){ + yield str } + macro = true } } }, - -// -// ::= -// -// | { -// type: 'inline' -// | 'element' -// | 'block', -// body: [ -// , -// ... -// ], -// -// // rest of items are the same as for lex(..) -// ... -// } -// -// -// NOTE: this internaly uses macros to check for propper nesting -// -// XXX normalize lex to be a generator (???) -var group = -module.group = -function*(lex, to=false, macros){ - // NOTE: we are not using for .. of .. here as it depletes the - // generator even if the end is not reached... - while(true){ - var {value, done} = lex.next() - // check if unclosed blocks remaining... - if(done){ - if(to){ + // Group block elements... + // + // ::= + // + // | { + // type: 'inline' + // | 'element' + // | 'block', + // body: [ + // , + // ... + // ], + // + // // rest of items are the same as for lex(..) + // ... + // } + // + // + // NOTE: this internaly uses macros to check for propper nesting + // + // XXX normalize lex to be a generator (???) + group: function*(page, lex, to=false){ + lex = lex + ?? this.lex(page) + lex = typeof(lex) == 'string' ? + this.lex(page, lex) + : lex + // NOTE: we are not using for .. of .. here as it depletes the + // generator even if the end is not reached... + while(true){ + var {value, done} = lex.next() + // check if unclosed blocks remaining... + if(done){ + if(to){ + throw new Error( + 'Premature end of unpit: Expected closing "'+ to +'"') } + return } + // assert nesting rules... + if(page.macros[value.name] instanceof Array + && page.macros[value.name].includes(to)){ throw new Error( - 'Premature end of unpit: Expected closing "'+ to +'"') } - return } - // assert nesting rules... - if(macros[value.name] instanceof Array - && macros[value.name].includes(to)){ - throw new Error( - 'Unexpected "'+ value.name +'" macro' - +(to ? - ' in "'+to+'"' - : '')) } - // open block... - if(value.type == 'opening'){ - value.body = [...group(lex, value.name, macros)] - value.type = 'block' - yield value - continue - // close block... - } else if(value.type == 'closing'){ - if(value.name != to){ - throw new Error('Unexpected closing "'+ value.name +'"') } - // NOTE: we are intentionally not yielding the value here... - return } - yield value } } + 'Unexpected "'+ value.name +'" macro' + +(to ? + ' in "'+to+'"' + : '')) } + // open block... + if(value.type == 'opening'){ + value.body = [...this.group(page, lex, value.name)] + value.type = 'block' + yield value + continue + // close block... + } else if(value.type == 'closing'){ + if(value.name != to){ + throw new Error('Unexpected closing "'+ value.name +'"') } + // NOTE: we are intentionally not yielding the value here... + return } + yield value } }, + + // Expand macros... + // + expand: function*(page, ast, state={}){ + ast = ast == null ? + this.group(page) + : typeof(ast) == 'string' ? + this.group(page, ast) + : ast instanceof types.Generator ? + ast + : ast.iter() + + while(true){ + var {value, done} = ast.next() + if(done){ + return } + + // text block... + if(typeof(value) == 'string'){ + yield value + continue } + + // macro... + var {name, args, body} = value + var res = + page.macros[name].call(page, args, body, state) + ?? '' + if(res instanceof Array + || page.macros[name] instanceof types.Generator){ + yield* res + } else { + yield res } } }, + + // Fully parse a page... + // + // This runs in two stages: + // - expand the page text + // - apply filters + // + // XXX add a special filter to clear pending filters... (???) + // XXX rename??? + parse: function(page, ast, state={}){ + var that = this + // XXX should we handle strings as input??? + ast = ast + ?? this.expand(page, null, state) + + var _normalize = function(filters){ + var skip = new Set() + return filters + .flat() + .tailUnique() + .filter(function(filter){ + filter[0] == '-' + && skip.add(filter.slice(1)) + return filter[0] != '-' }) + .filter(function(filter){ + return !skip.has(filter) })} + + return [...ast] + .map(function(section){ + var filters = state.filters + // nested filters... + if(typeof(section) != 'string'){ + state.filters = _normalize(section.filters) + var res = [...that.parse(page, section.data, state)] + .flat() + .join('') + state.filters = filters + return res + // local filters... + } else { + return filters ? + _normalize(filters) + .reduce(function(res, filter){ + if(page.filters[filter] == null){ + throw new Error( + '.parse(..): unsupported filter: '+ filter) } + return page.filters[filter].call(page, res) + ?? res }, section) + : section } }) + .flat() + .join('') }, +} // XXX PATH_VARS need to handle path variables... @@ -770,7 +859,7 @@ object.Constructor('Page', BasePage, { // serialize the block for later processing... var res = { filters: state.filters, - data: [...this.expand(body, state)], + data: [...this.parser.expand(this, body, state)], } // restore global filters... state.filters = parent_filters @@ -789,87 +878,9 @@ object.Constructor('Page', BasePage, { 'else': ['macro'], }, - parse: function*(str){ - yield* group( - lex( - str - ?? this.raw, - this.macros), - false, - this.macros) }, - expand: function*(ast, state={}){ - ast = ast - ?? this.parse() - ast = ast instanceof types.Generator ? - ast - : ast.iter() - while(true){ - var {value, done} = ast.next() - if(done){ - return } - - // text block... - if(typeof(value) == 'string'){ - yield value - continue } - - // macro... - var {name, args, body} = value - var res = - this.macros[name].call(this, args, body, state) - ?? '' - if(res instanceof Array - || this.macros[name] instanceof types.Generator){ - yield* res - } else { - yield res } } }, - // XXX add a special filter to clear pending filters... (???) - // XXX rename and use this instead of render... - postProcess: function(ast, state={}){ - var that = this - ast = ast - ?? this.expand(null, state) - - var _normalize = function(filters){ - var skip = new Set() - return filters - .flat() - .tailUnique() - .filter(function(filter){ - filter[0] == '-' - && skip.add(filter.slice(1)) - return filter[0] != '-' }) - .filter(function(filter){ - return !skip.has(filter) })} - - return [...ast] - .map(function(section){ - var filters = state.filters - // nested filters... - if(typeof(section) != 'string'){ - state.filters = _normalize(section.filters) - var res = [...that.postProcess(section.data, state)] - .flat() - .join('') - state.filters = filters - return res - // local filters... - } else { - return filters ? - _normalize(filters) - .reduce(function(res, filter){ - if(that.filters[filter] == null){ - throw new Error( - '.postProcess(..): unsupported filter: '+ filter) } - return that.filters[filter].call(that, res) - ?? res }, section) - : section } }) - .flat() - .join('') }, - - // XXX do we need boht this and .postProcess(..) ??? - render: function(state={}){ - return this.postProcess(null, state) }, + parser: module.parser, + parse: function(state={}){ + return this.parser.parse(this, null, state) }, // raw page text... @@ -892,7 +903,7 @@ object.Constructor('Page', BasePage, { // XXX FUNC handle functions as pages... // XXX need to support pattern pages... get text(){ - return this.render() }, + return this.parse() }, set text(value){ this.store.update(this.location, {text: value}) },