/********************************************************************** * * * **********************************************************************/ ((typeof define)[0]=='u'?function(f){module.exports=f(require)}:define) (function(require){ var module={} // make module AMD/node compatible... /*********************************************************************/ var types = require('ig-types') //--------------------------------------------------------------------- // Parser... // XXX should we warn about stuff like -- currently // this will simply be ignored, i.e. passed trough the parser // without change... // XXX might be a good idea to both think of a good async parse and // create tools for sync parsing (get links etc.)... var BaseParser = module.BaseParser = { // patterns... // // The way the patterns are organized might seem a bit overcomplicated // and it has to be to be able to reuse the same pattern in different // contexts, e.g. the arguments pattern... // // needs: // STOP -- '\\>' or ')' // PREFIX -- 'inline' or 'elem' // // XXX quote escaping??? // /(?['"])(\\\k|[^\1])*\k/ // ...this will work but we'll also need to remove the \ in the // final string... MACRO_ARGS: ['(\\s*(',[ // arg='val' | arg="val" | arg=val '(?[a-z-]+)\\s*=\\s*(?'+([ // XXX CHROME/NODE BUG: this does not work yet... //'\\s+(?[\'"])[^\\k]*\\k', "'(?[^']*)'", '"(?[^"]*)"', '(?[^\\sSTOP\'"]+)', ].join('|'))+')', // "arg" | 'arg' // XXX CHROME/NODE BUG: this does not work yet... //'\\s+(?[\'"])[^\\k]*\\k', '"(?[^"]*)"', "'(?[^']*)'", // arg // NOTE: this is last because it could eat up parts of the above // alternatives... //'|\\s+[^\\s\\/>\'"]+', '(?[^\\sSTOP\'"]+)', ].join('|'), '))'].join(''), MACRO_ARGS_PATTERN: undefined, // // .buildArgsPattern([, [, ]]) // -> // // .buildArgsPattern([, [, false]]) // -> // buildArgsPattern: function(prefix='elem', stop='', regexp='smig'){ var pattern = this.MACRO_ARGS .replace(/PREFIX/g, prefix) .replace(/STOP/g, stop) return regexp ? new RegExp(pattern, regexp) : pattern }, // // needs: // MACROS // INLINE_ARGS -- MACRO_ARGS.replace(/STOP/, ')') // ARGS -- MACRO_ARGS.replace(/STOP/, '\\/>') // // XXX BUG: this fails to match inline macros with non-empty args @moo(a) // ...the problem seems to be with the lack of whitespace // between ( and the first arg -- @moo( a) is matched fine... MACRO: '('+([ // @macro(arg ..) '\\\\?@(?MACROS)\\((?INLINE_ARGS)\\)', // | '<\\s*(?MACROS)(?ARGS)?\\s*/?>', // 'MACROS)\\s*>', ].join('|'))+')', MACRO_PATTERN: undefined, MACRO_PATTERN_GROUPS: undefined, // // .buildMacroPattern([, ]) // -> // // .buildMacroPattern([, false]) // -> // buildMacroPattern: function(macros=['MACROS'], regexp='smig'){ var pattern = this.MACRO .replace(/MACROS/g, macros.join('|')) .replace(/INLINE_ARGS/g, this.buildArgsPattern('inline', ')', false) +'*') .replace(/ARGS/g, this.buildArgsPattern('elem', '\\/>', false) +'*') return regexp ? new RegExp(pattern, regexp) : pattern }, countMacroPatternGroups: function(){ // NOTE: the -2 here is to compensate for the leading and trailing ""'s... return ''.split(this.buildMacroPattern()).length - 2 }, // XXX should this be closer to .stripComments(..) // XXX do we need basic inline and block commets a-la lisp??? COMMENT_PATTERN: RegExp('('+[ // '', // .. '<\\s*pwiki-comment[^>]*>.*<\\/\\s*pwiki-comment\\s*>', // '<\\s*pwiki-comment[^\\/>]*\\/>', ].join('|') +')', 'smig'), // helpers... // normalizeFilters: function(filters){ var skip = new Set() return filters .flat() .tailUnique() .filter(function(filter){ filter[0] == '-' && skip.add(filter.slice(1)) return filter[0] != '-' }) .filter(function(filter){ return !skip.has(filter) })}, // // Spec format: // [, ... [, ...]] // // NOTE: the input to this is formatted by .lex(..) // NOTE: arg pre-parsing is dome by .lex(..) but at that stage we do not // yet touch the actual macros (we need them to get the .arg_spec) // so the actual parsing is done in .expand(..) parseArgs: function(spec, args, state){ var that = this // spec... var order = spec.slice() var bools = new Set( order[order.length-1] instanceof Array ? order.pop() : []) order = order .filter(function(k){ return !(k in args) }) var res = {} var pos = Object.entries(args) // stage 1: populate res with explicit data and place the rest in pos... .reduce(function(pos, [key, value]){ ;/^[0-9]+$/.test(key) ? (bools.has(value) ? // bool... (res[value] = true) // positional... : (pos[key*1] = value)) // keyword... : (res[key] = value) return pos }, []) // stage 2: populate implicit values from pos... .forEach(function(e, i){ order.length == 0 ? (res[e] = true) : (res[order.shift()] = e) }) return res }, // Strip comments... // stripComments: function(str){ return str .replace(this.COMMENT_PATTERN, function(...a){ return a.pop().uncomment || '' }) }, // Lexically split the string... // // ::= // // | { // name: , // type: 'inline' // | 'element' // | 'opening' // | 'closing', // args: { // : , // : , // ... // } // match: , // } // // // NOTE: this internally uses page.macros' keys to generate the // lexing pattern. lex: function*(page, str){ // XXX we can't get .raw from the page without going async... //str = str // ?? page.raw // NOTE: we are doing a separate pass for comments to completely // decouple them from the base macro syntax, making them fully // transparent... str = this.stripComments(str) // XXX should this be cached??? var macro_pattern = this.MACRO_PATTERN ?? this.buildMacroPattern(Object.keys(page.macros)) var macro_pattern_groups = this.MACRO_PATTERN_GROUPS ?? this.countMacroPatternGroups() var macro_args_pattern = this.MACRO_ARGS_PATTERN ?? this.buildArgsPattern() var lst = str.split(macro_pattern) var macro = false while(lst.length > 0){ if(macro){ var match = lst.splice(0, macro_pattern_groups)[0] // NOTE: we essentially are parsing the detected macro a // second time here, this gives us access to named groups // avoiding maintaining match indexes with the .split(..) // output... // XXX for some reason .match(..) here returns a list with a string... var cur = [...match.matchAll(macro_pattern)][0].groups // special case: escaped inline macro -> keep as text... if(match.startsWith('\\@')){ yield match macro = false continue } // args... var args = {} var i = -1 for(var {groups} of (cur.argsInline ?? cur.argsOpen ?? '') .matchAll(macro_args_pattern)){ i++ args[groups.elemArgName ?? groups.inlineArgName ?? i] = groups.elemSingleQuotedValue ?? groups.inlineSingleQuotedValue ?? groups.elemDoubleQuotedValue ?? groups.inlineDoubleQuotedValue ?? groups.elemValue ?? groups.inlineValue ?? groups.elemSingleQuotedArg ?? groups.inlineSingleQuotedArg ?? groups.elemDoubleQuotedArg ?? groups.inlineDoubleQuotedArg ?? groups.elemArg ?? groups.inlineArg } // macro-spec... yield { name: (cur.nameInline ?? cur.nameOpen ?? cur.nameClose) .toLowerCase(), type: match[0] == '@' ? 'inline' : match[1] == '/' ? 'closing' : match[match.length-2] == '/' ? 'element' : 'opening', args, match, } macro = false // normal text... } else { var str = lst.shift() // skip empty strings from output... if(str != ''){ yield str } macro = true } } }, // Group block elements... // // ::= // // | { // type: 'inline' // | 'element' // | 'block', // body: [ // , // ... // ], // // // rest of items are the same as for lex(..) // ... // } // // NOTE: this internaly uses page.macros to check for propper nesting //group: function*(page, lex, to=false){ group: function*(page, lex, to=false, parent){ // XXX we can't get .raw from the page without going async... //lex = lex // ?? this.lex(page) lex = typeof(lex) == 'string' ? this.lex(page, lex) : lex var quoting = to && (page.QUOTING_MACROS ?? []).includes(to) && [] // NOTE: we are not using for .. of .. here as it depletes the // generator even if the end is not reached... while(true){ var {value, done} = lex.next() // check if unclosed blocks remaining... if(done){ if(to){ throw new Error( 'Premature end of input: Expected closing "'+ to +'"') } return } // special case: quoting -> collect text... // NOTE: we do not care about nesting here... if(quoting !== false){ if(value.name == to && value.type == 'closing'){ yield quoting.join('') return } else { quoting.push( typeof(value) == 'string' ? value : value.match ) } continue } // assert nesting rules... // NOTE: we only check for direct nesting... // XXX might be a good idea to link nested block to the parent... if(page.macros[value.name] instanceof Array && !page.macros[value.name].includes(to) // do not complain about closing nestable tags... && !(value.name == to && value.type == 'closing')){ throw new Error( 'Unexpected "'+ value.name +'" macro' +(to ? ' in "'+to+'"' : '')) } // open block... if(value.type == 'opening'){ //value.body = [...this.group(page, lex, value.name)] value.body = [...this.group(page, lex, value.name, value)] value.type = 'block' // close block... } else if(value.type == 'closing'){ if(value.name != to){ throw new Error('Unexpected closing "'+ value.name +'"') } // NOTE: we are intentionally not yielding the value here... return } // normal value... yield value } }, // Expand macros... // // ::= // // // returned by .macros.filter(..) // | { // filters: [ // '' // | '-', // ... // ], // data: [ , .. ], // } // expand: async function*(page, ast, state={}){ ast = ast == null ? //this.group(page) this.group(page, await page.raw) : typeof(ast) == 'string' ? this.group(page, ast) : ast instanceof types.Generator ? ast : ast.iter() while(true){ var {value, done} = ast.next() if(done){ return } // text block... if(typeof(value) == 'string'){ yield value continue } // macro... var {name, args, body} = value // nested macro -- skip... if(typeof(page.macros[name]) != 'function'){ continue } // args... args = this.parseArgs.call(page, page.macros[name].arg_spec ?? [], args, state) // call... var res = await page.macros[name].call(page, args, body, state, value) ?? '' // result... if(res instanceof Array || page.macros[name] instanceof types.Generator){ yield* res } else { yield res } } }, // Fully parse a page... // // This runs in two stages: // - expand the page // - lex the page -- .lex(..) // - group block elements -- .group(..) // - expand macros -- .expand(..) // - apply filters // // NOTE: this has to synchronize everything between stage 1 (up to // and including expand) and stage 2 (post-handlers, filters, ...) // because the former need a fully loaded and expanded page if // we want to do this in 2 stages and not 3... // XXX might be fun to try a load-and-tweak approach the first // version used -- i.e. setting placeholders and replacing // them on demand rather than on encounter (as is now), e.g. // a slot when loaded will replace the prior occurrences... // // XXX add a special filter to clear pending filters... (???) parse: async function(page, ast, state={}){ var that = this ast = ast ?? this.expand(page, null, state) ast = typeof(ast) == 'string' ? this.expand(page, ast, state) : ast // NOTE: we need to await for ast here as we need stage 2 of // parsing to happen AFTER everything else completes... return await Promise.iter((await ast) // post handlers... .map(function(section){ return typeof(section) == 'function' ? // NOTE: this can produce promises... section.call(page, state) : section })) .flat() // filters... // XXX if one of the post-handlers is a promise this will // need to sync... .map(function(section){ return ( // expand section... typeof(section) != 'string' ? section.data // global filters... : state.filters ? that.normalizeFilters(state.filters) .reduce(function(res, filter){ // unknown filter... // NOTE: we try not to break on user errors // if we can help it... if(page.filters[filter] == null){ console.warn( '.parse(..): unsupported filter: '+ filter) return res } // NOTE: if a filter returns falsy then it // will have no effect on the result... return page.filters[filter].call(page, res) ?? res }, section) // no global filters... : section ) }) .flat() .join('') }, } var parser = module.parser = { __proto__: BaseParser, } /********************************************************************** * vim:set ts=4 sw=4 : */ return module })