From 5d6f7d908ff4fe7cd2e843248e906a3e5f345547 Mon Sep 17 00:00:00 2001 From: "Alex A. Naanou" Date: Wed, 26 Mar 2025 17:20:56 +0300 Subject: [PATCH] encoding + typo... Signed-off-by: Alex A. Naanou --- diff2.js | 2196 +++++++++++++++++++++++++++--------------------------- 1 file changed, 1098 insertions(+), 1098 deletions(-) diff --git a/diff2.js b/diff2.js index 058e62c..b7a5f00 100644 --- a/diff2.js +++ b/diff2.js @@ -1,1098 +1,1098 @@ -/********************************************************************** -* -* -* -**********************************************/ /* c8 ignore next 2 */ -((typeof define)[0]=='u'?function(f){module.exports=f(require)}:define) -(function(require){ var module={} // make module AMD/node compatible... -/*********************************************************************/ - -var object = require('ig-object') -var types = require('ig-types') - - - -/*********************************************************************/ -// -// -// XXX try using offset from prev element as an index for arrays... -// ...this should make it possible to use LCS on the linear diff -// directly... -// -// -// XXX thinks this needs to do: -// - path specification - DONE -// - walk object tree - DONE -// - generate a spec - DONE -// - serializable -// - support props - ??? -// - build object via spec - DONE -// - update object via spec -// - contextual updates -// - forward -// match context + pre state -// - reverse -// match context + post state -// - subtract specs (diff) -// - full -// - relaxed -- ignore item order -// - modes: -// - JSON -// - reconstructable -// - full -// - not reconstructable -- if some types are used (functions, ...) -// - compare protocol -// - reconstruct protocol -// -// -// XXX proposed diff format: -// [ -// // change... -// [ -// // pre-context... -// // XXX should '=' here be optional??? -// ['=', , ], -// ... -// ['=', , ], -// -// // changes... -// // addition... -// ['+', , ], -// // removal... -// ['-', , ], -// // unchanged context line.... -// ['=', , ], -// ... -// -// // post-context... -// ['=', , ], -// ... -// ['=', , ], -// ], -// -// ... -// ] -// -// -// -/*********************************************************************/ - -var STOP = -module.STOP = - types.STOP - - -var EMPTY = -module.EMPTY = -//Symbol.EMPTY = - Symbol('EMPTY') - - -// XXX need a way to uniquely serilaize this to a string path... -// ...or some other way to use it in a convinient manner... -var CONTENT = -module.CONTENT = -//Symbol.CONTENT = - Symbol('CONTENT') - - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -// XXX OFFSET: should we also store .index??? -var Offset = -module.Offset = -object.Constructor('Offset', { - value: undefined, - index: undefined, - - // XXX - toString: function(){ - //return `Offset(${this, value})` }, - return this.index == null ? - `Offset(${this.value})` - : `Offset(${this.value}, ${this.index})` }, - // XXX - valueOf: function(){ - return this.value }, - // XXX should we consistency check that index is >= offset ??? - __init__: function(value, index){ - this.value = value - this.index = index }, -}) - - -// XXX revise... -var Path = -module.Path = -object.Constructor('Path', Array, { - __str: undefined, - get str(){ - return this.__str - || (this.__str = path2str(this)) }, - set str(value){ - delete this.__str - this.splice(0, this.length, ...str2path(value)) }, - - valueOf: function(){ - return this.str }, - toString: function(){ - return 'Path: '+ this.valueOf() }, -}) - - -//--------------------------------------------------------------------- - -// -// Walk([, ]) -// -> -// -// Walk() -// -> -// -// -// NOTE: in the Walk() case, must contain a .handler -// function. -// -// -// XXX need a way to produce nested results... -// XXX should we move this to a separate lib??? -var Walk = -module.Walk = -object.Constructor('Walk', { - - // - // .handler(obj, path, next, type) - // -> - // -> [path, ...] - // - // XXX should there be a default here??? - handler: undefined, - - // - // Format: - // { - // : , - // - // : { - // list: , - // ... - // }, - // - // ... - // } - // - // XXX should there be a default here??? - listers: undefined, - - // - // .normalizePath() - // -> - // - // (optional) - normalizePath: undefined, - - // NOTE: handler argument always overwrites the value given in options... - __init__: function(handler, options){ - // handle Walk() case... - if(arguments.length == 1 && typeof(handler) != 'function'){ - options = handler - handler = options.handler } - // sanity check... - if(typeof(handler) != 'function'){ - throw new Error('Walk(..): a callable handler us required.') } - options - && Object.assign(this, options) - this.handler = - handler instanceof types.Generator ? - handler - : Object.assign( - function*(){ yield handler.call(this, ...arguments) }, - {toString: function(){ return handler.toString() }}) }, - // XXX STACK: might be a good idea to form the output of this like - // a stack language program... - // This would require actions to: - // - block type (BLOCK) - // - push sub-path (PUSH) - // - pop / level-end (POP) - // XXX add options... - __call__: function*(_, obj, path=[], type='root', seen=new Map()){ - var that = this - path = this.normalizePath ? - this.normalizePath(path) - : path - // handle reference loops... - if(seen.has(obj)){ - yield* this.handler(obj, path, seen.get(obj), 'LINK') - return } - typeof(obj) == 'object' - && seen.set(obj, path) - - // list... - // - // format: - // [ - // [, [ [, ], .. ]], - // .. - // ] - var next = - [...Object.entries(this.listers || {}) - // NOTE: we need this to support throwing STOP... - .iter() - .filter(function([n, h]){ - return (typeof(h) == 'function' || h.list) - && !that['no' + n.capitalize()] }) - .map(function([n, h]){ - var res = typeof(h) == 'function' ? - h.call(that.listers, obj) - : h.list(obj) - return res - && [n, res] }) - .filter(function(e){ - return !!e }) ] - // walk... - try { - // XXX should this be a yield* or a simple yield??? - yield* this.handler(obj, path, next, type) - /* XXX STACK: BLOCK... - .map(function([p, v]){ - v && typeof(v) == 'object' - && console.log(' BLOCK', v.type) - return [p, v] }) - //*/ - // next/children... - yield* next - .iter() - .map(function*([type, items]){ - yield* items - .iter() - .map(function*([key, value]){ - // XXX STACK: PUSH key - //console.log(' PUSH', key) - // XXX add relative path support... - // ...maybe [path, key] instead of path.concat(key) ??? - yield* that(value, path.concat(key), type, seen) }) }) - // handle STOP... - } catch(err){ - if(err === module.STOP){ - return - } else if(err instanceof module.STOP){ - yield err.value - return } - throw err } - - // XXX STACK: POP - //console.log(' POP') - }, -}) - - - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -// XXX PATH: might be a good idea to use a Path(..) object instead of -// current list/string paths and simply use both as a way to -// serialize... -// XXX we can treat the output as a stack language... -// ...need to clear out unneeded stuff... -// XXX add function support... -// XXX this needs .name set correctly... -var objectWalker = -module.objectWalker = -Walk({ - // support string paths... - normalizePath: function(path){ - return path instanceof Array ? - path - : typeof(path) == 'string' ? - str2path(path) - : [] }, - - handler: function(obj, path, next, type){ - return type == 'LINK' ? - [path, 'LINK', next] - : [ - path, - // null... - obj == null ? - obj - // objects... - : typeof(obj) == 'object' ? - { - type: obj.constructor.name, - source: obj, - } - // primitives... - : obj, - ] }, - - listers: { - // prevent dissecting null... - null: function(obj){ - if(obj === null){ - throw module.STOP } }, - - // XXX do we use offsets here or indexes... - set: function(obj){ - return obj instanceof Set - && [...obj.values()] - .entries() - .map(function([k, v]){ - return [[module.CONTENT, k], v] }) }, - map: function(obj){ - return obj instanceof Map - && obj.entries() - .map(function*([k, v], i){ - yield* [ - [[module.CONTENT, i+'@key'], k], - [[module.CONTENT, i], v], - ] }) }, - - //* XXX should we handle array elements differently??? - // ...these to simply mark attr type for the handler(..), not - // sure if the added complexity is worth it... (???) - array: function(obj){ - var prev = 0 - return obj instanceof Array - && [...Object.entries(obj)] - .filter(function([k, v]){ - return !isNaN(parseInt(k)) }) - // NOTE: we are outputting index offsets instead of - // actual indexes... - // XXX need to make this a syntax... - .map(function([k, v]){ - [prev, k] = [k, k-prev] - return [module.Offset(k, prev), v] }) }, - attr: function(obj){ - return obj instanceof Array ? - [...Object.entries(obj)] - .filter(function([k, v]){ - return isNaN(parseInt(k)) }) - : typeof(obj) == 'object' - && [...Object.entries(obj)] }, - /*/ - attr: function(obj){ - return typeof(obj) == 'object' - && Object.entries(obj) }, - //*/ - proto: function(obj){ - return typeof(obj) == 'object' - && obj.constructor.prototype !== obj.__proto__ - && [['__proto__', obj.__proto__]] }, - }, -}) - - - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -// like objectWalker(..) but with 'text' support... -// -var objectWalkerWithText = -module.objectWalkerWithText = -Object.assign( - object.create(objectWalker), - //object.create('objectWalkerWithText', objectWalker), - { - handler: function*(obj, path, next, type){ - yield* !this.noText - && typeof(obj) == 'string' - && obj.includes('\n') ? - [ [path, { - type: 'text', - source: obj, - }] ] - : objectWalker.handler.call(this, ...arguments) }, - listers: Object.assign( - {text: function(obj){ - return typeof(obj) == 'string' - && obj.includes('\n') - && obj.split(/\n/g).entries() - .map(function([k, v]){ - return [[module.CONTENT, k], v] }) }}, - objectWalker.listers), - }) - - - -//--------------------------------------------------------------------- - -// path2str(..) -// -// XXX need to figure out a way to avoid clashes with module.CONTENT in -// path with actual attribute keys... -// ways to do this: -// - serialize CONTENT in a cleaver way -// - add a different path separator to indicate content and quote -// it in strings -- ':'??? -// XXX Q: should there be a difference between: -// ['', module.CONTENT] -// and -// [module.CONTENT] ??? -// ...currently they are the same... -// A: there should be a difference.... -// [...d.handle({'':new Set([1,2,3]), x:123}).chain(d.serializePaths)] -// ...the problem is in path2str(..) -// XXX OFFSET: add option to either encode offset or index... -var serializePathElem = function(p, i, l){ - return ( - p instanceof Offset ? - // XXX OFFSET: add option to either encode offset or index... - // XXX OFFSET: should we use + or : ??? - // XXX OFFSET: should we use .toString() or something like .toKey()??? - ':'+ p.value - : typeof(p) == 'object' ? - JSON.stringify(p) - // quote special chars... - : typeof(p) == 'string' ? - // XXX OFFSET: should we use + or : ??? - //p.replace(/([\/:+])/g, '\\$1') - p.replace(/([\/:])/g, '\\$1') - : p )} -// XXX OFFSET: add option to either encode offset or index... -var path2str = -module.path2str = -function(p){ - return '/'+ p - // flatten lispPaths... - .flat(Infinity) - .map(serializePathElem) - .reduce(function(res, e){ - e = e === module.CONTENT ? - (res.length == 0 ? - '' - : res.pop()) - + ':CONTENT' - // special case: '' as key... - : e === '' ? - "''" - : e - res.push(e) - return res }, []) - .join('/') } - - -// str2path(..) -// -var unquote = function(str){ - return str - .replace(/^(['"])(.*)\1$/, '$2') } -var deserializePathElem = function(p){ - return p == ':CONTENT'? - [module.CONTENT] - : /[^\\]:CONTENT$/.test(p) ? - [unquote(p.replace(/(? ??? -// XXX add support for options.lispPaths -var str2path = -module.str2path = -function(str){ - return str instanceof Array ? - str - : str == '' || str == '/' ? - [] - : (str - // remove leading '/' - .replace(/^\//, '') - .split(/\//g) - .map(deserializePathElem) - .flat()) } - - -var serializePaths = -module.serializePaths = -types.generator.iter - .map(function([p, v, ...rest]){ - return rest.length > 0 && v == 'LINK' ? - // link... - [path2str(p), v, path2str(rest[0])] - : [path2str(p), v] }) - - -// Remove attributes from object metadata... -// -// stripAttr(attr, ...) -// -> -// -// () -// -> -// -// .chain() -// -var stripAttr = -module.stripAttr = -function(...attrs){ - return types.generator.iter - .map(function([p, v, ...rest]){ - if(v && typeof(v) == 'object'){ - // keep things non-destructive... - v = Object.assign({}, v) - attrs - .forEach(function(attr){ - attr in v - && (delete v[attr]) }) } - return [p, v, ...rest] }) } - - - -//--------------------------------------------------------------------- -// XXX construct... - - -// -// atPath(root, path) -// -> value -// -// atPath(root, path, value) -// -> value -// -// NOTE: to set this needs the full basepath to exist... -// -// XXX need to write a map key to an item that does not exist... -// XXX need a way to insert values after a specific index/key, this -// would require: -// - indicated this in spec: -// a) AFTER/BEFORE symbol(s) in path (???) -// here there are two ways to think about this: -// - AFTER means *replace* the item after -// - AFTER means *between* this and the item after -// b) context (a-la diff) -// - insert to array: -// .splice(i, 0, ) -// - insert into set: -// .splice(..) -// - insert into map -// ??? -// - insert into object -// ??? -// ...revise... -// XXX str2path and passing in a list path produce different results... -// XXX OFFSET: add offset support... -// ...get prev in context and add offset... -var atPath = -module.atPath = -function(root, path, value){ - path = str2path(path) - //console.log(' ', path, value) - // special case: get/set root... - if(path.length == 0 || path[0] == '/'){ - return arguments.length > 2 ? - value - : root } - // get value at path... - var mode = 'normal' - var base - var target = path - .reduce(function(cur, p){ - // CONTENT... - if(p === module.CONTENT){ - // NOTE: indicate to the next iteration that we'll need - // to use map/set API to get/set the value... - mode = 'content' - base = cur - return cur } - // value in content... - if(mode == 'content'){ - mode = 'content-item' - return ( - // set item... - cur instanceof Set ? - [...cur][p] - // map key... - : typeof(p) != 'number' - && p.slice(-4) == '@key' ? - // NOTE: we can write to a non-existant item... - ([...cur][p.slice(0, -4)] || [])[0] - // map value... - : [...cur][p][1] )} - // attr... - mode = 'normal' - base = cur - return cur[p] }, root) - - // get... - if(arguments.length <= 2){ - return target } - - // write attr... - if(mode != 'content-item'){ - value === module.EMPTY ? - (delete base[path.last()]) - : (base[path.last()] = value) - // XXX should we return value or base??? - // ...should we return target when writing EMPTY - return value } - - var index = path.last() - // write set item... - if(base instanceof Set){ - value === module.EMPTY ? - base.delete(target) - :index == base.size ? - base.add(value) - : base.replaceAt(index, value) - // XXX should we return value or base??? - // ...should we return target when writing EMPTY - return value } - - // write map item/key... - var isKey = typeof(index) != 'number' - && index.slice(-4) == '@key' - index = isKey ? - index.slice(0, -4) - : index - var key = atPath(root, - path - .slice(0,-1) - .concat([index +'@key'])) - value === module.EMPTY ? - base.delete(key) - : isKey ? - (base.has(key) ? - base.replaceKey(key, value) - : base.set(value, undefined)) - : base.set(key, value) - - // XXX should we return value or base??? - // ...should we return target when writing EMPTY - return value } - - -// XXX rename to patch(..) -var write = -module.write = -function(root, spec){ - return types.generator.iter(spec) - .reduce(function(root, [path, value, ...rest]){ - //console.log('>>>>', path2str(path), value) - // generate/normalize value... - value = - // XXX STUB... - typeof(value) == 'function' ? - value.source - : value == null || typeof(value) != 'object' ? - value - // XXX move this out / use HANDLERS... - : value.type == 'Object' ? - {} - : value.type == 'Array' ? - [] - : value.type == 'Set' ? - new Set() - : value.type == 'Map'? - new Map() - : undefined - // root value... - if(path.length == 0 - || (path.length == 1 - && (path[0] == '/' || path[0] == ''))){ - return value } - // link... - if(rest.length > 0 && value == 'LINK'){ - atPath(root, path, atPath(root, rest[0])) - return root } - // set value... - atPath(root, path, value) - return root }, root) } - - - -//--------------------------------------------------------------------- - -// Get common chunks (LCS)... -// -// Format: -// [ -// , -// -// // XXX should this be an obj??? -// [ -// , -// , -// , -// ], -// ... -// ] -// -var commonSections = -module.commonSections = -function(A, B, cmp){ - cmp = cmp - || function(a, b){ - return a === b || a == b } - - var LCS = function(a, b){ - // calculate length of chunk... - var l = 0 - while(a+l < A.length && b+l < B.length - && a+l in A && b+l in B - && cmp(A[a+l], B[b+l])){ - l++ } - // get next chunks... - var L = A.length > a+l ? - LCS(a+l+1, b+l) - : [0] - var R = B.length > b+l ? - LCS(a+l, b+l+1) - : [0] - // select the best chunk-set... - // NOTE: we maximize the number of elements in a chunk set then - // minimize the number of chunks per set... - var next = L[0] == R[0] ? - (L.length < R.length ? L : R) - : L[0] > R[0] ? - L - : R - return ( - // non-empty chunk and next... - next[0] > 0 && l > 0 ? - [l + next[0], [a, b, l]].concat(next.slice(1)) - // non-empty chunk and empty next... - : l > 0 ? - [l, [a, b, l]] - // empty chunk... - : next ) } - - return LCS(0, 0) } - - -// -// Format: -// [ -// // change... -// [ -// // XXX do we need the indexes??? -// , , -// , , -// ], -// -// ... -// ] -// -// NOTE: this is generic -- does not care what it compares... -// -// XXX alternativly try to implement the tree cmp approach from diff.js... -// XXX would be nice to be able to mixin stuff cmp -- to wrap container -// type and the like... -var diffSections = -module.diffSections = -function(A, B, cmp){ - var pa = 0 - var pb = 0 - return commonSections(A, B, cmp) - // remove length... - .slice(1) - // add rear block... - .concat([[A.length, B.length, 0]]) - .reduce(function(gaps, [a, b, l]){ - // only push changes of >0 length... - ;(pa == a && pb == b) - // NOTE: we are collecting changes before the current chunk... - || gaps.push([ - pa, - A.slice(pa, a), - pb, - B.slice(pb, b), - ]) - // next change start... - pa = a + l - pb = b + l - return gaps }, []) } - - - -// -// XXX if two sub-trees are different, do we treat them as a big set of -// atomic changes or replace X with Y??? -// XXX need: -// - skip/clear unchanged sub-trees when removing root node (optional) -// - link comparison -- should support linking from both sides... -// - item relative indexing (DONE) -// ...should calculate actual indexes for patching from context... -// - ... (anything else???) -var keyValueDiff = -function(A, B, options={}){ - return diffSections( - [...objectWalkerWithText(A)], - [...objectWalkerWithText(B)], - // XXX add link support... - function([ap, av], [bp, bv]){ - return ap.cmp(bp) - && (av == bv - || (typeof(av) == 'object' - && typeof(bv) == 'object' - && av.type - && av.type == bv.type )) }) } - - -// XXX this completely ignores the path/key... -// XXX this works great for arrays but is questionable on other stuff... -var valueDiff = -function(A, B, options={}){ - return diffSections( - [...objectWalkerWithText(A)], - [...objectWalkerWithText(B)], - // XXX add link support... - function([ap, av], [bp, bv]){ - return av == bv - || (typeof(av) == 'object' - && typeof(bv) == 'object' - && av.type - && av.type == bv.type ) }) } - - -var diff = keyValueDiff - - -// convert diff sections into diff... -// -// XXX so this is a bit more complicated than simply converting from one -// format to another... -// we need to: -// - convert offset changes to one of: -// - insertions of empty slots -// - ??? -// - ... -// XXX changes to a path should not affect the subtree... -// /a/b/.. -> /a/x/.. -// not sure how and where (here or in diff(..)) to handle this yet... -// XXX add context lines... -var toDiff = -module.toDiff = -function*(A, B, options={}){ - yield* diff(A, B, options) - .iter() - .map(function*([ia, ca, ib, cb]){ - // XXX add pre-context... - // ...if changes are adjacent, then skip context... - // ...use atPath(..) - yield ['='] - // XXX OFFSET: here we have to be a bit more cleaver, if an - // offset of an element changed then we need to insert - // a set of empty slots shifting the rest of the array... - // XXX convert offset to index... - yield* ca - .map(function(change){ - return ['-', ...change] }) - yield* cb - .map(function(change){ - return ['+', ...change] }) - // XXX add post-context... - // ...if changes are adjacent, then skip context... - yield ['='] }) } - - - - -//--------------------------------------------------------------------- -// XXX move to test... - -var o = module.o = { - number: 123, - string: 'abc', - - // XXX add a mode to unify these... - 'null': null, - 'undefined': undefined, - 'NaN': NaN, - - - empty_array: [], - array: [1, 2, 3,,,,'N'], - - // XXX set key is the object itself, is this what we want??? - set: new Set([ - 1, - [], - {a:1}, - ]), - map: new Map([ - [[9,8,7], 123], - [321, {x: 123}], - ]), - - object: { - x: {}, - }, - object_gen2: Object.assign( - Object.create({ - x: 'parent', - z: 'shadowed', - }), - { - y: 'local', - z: 'shadowing', - }), - - /*/ XXX - func: function(){}, - func_with_attrs: Object.assign( - function(){}, - { - x: 333, - }), - //*/ - - array_with_attrs: Object.assign( - [1, 2, 3], - { - a: 'some value', - b: 'some other value', - // will overwrite 2... - 1: 333, - }), - - 'special/character\\in:key': [], - - /* XXX EXPERIMENTAL - text: `this - is - a - multi-line - block of text...`, - //*/ -} - -// loop... -// NOTE: we are creating the loop before we pass it to JSON because JSON -// does not support loops in objects... -o.object.y = o.object - - -// generate spec... -console.log([ - ...objectWalker(o) - .chain( - serializePaths, - stripAttr('source'), - )]) - - -/*/ use spec to create a new object... -console.log('\n\n---\n', - [...objectWalker(write(null, objectWalker(o))) - .chain( - serializePaths, - // make the output a bit more compact... - stripAttr('source'), - )]) -//*/ - - -/* XXX -var A = ['x',1,2,3,1,2,3,4,5,,,4] -var B = ['x',1,2,3,4,5,6] - -console.log(A) -console.log(B) - -console.log( - commonSections(A, B)) -console.log( - diffSections(A, B)) - -console.log([...objectWalker(A).chain(serializePaths)]) -console.log([...objectWalker(B).chain(serializePaths)]) - -// XXX -console.log(diff(B, A)) -//*/ - -/* -console.log(JSON.stringify(diff( - [1,4,2,3], - [1,2,3], -), null, ' ')) -//*/ - - -console.log('---') - -console.log([ - ...objectWalker( - `This - is - a - multiline - text`) - .chain( - serializePaths, - stripAttr('source'), - ) ]) - -console.log('---') - -console.log([ - ...objectWalkerWithText( - `This - is - a - multiline - text`) - .chain( - serializePaths, - stripAttr('source'), - ) ]) - -/* XXX -console.log('---') - -console.log(valueDiff( - [1,2,3], - [1,2,4,3], -)) - -console.log('---') - -console.log(valueDiff( - new Set([1,2,3]), - [1,2,4,3], -)) - -//*/ -console.log('---') - -console.log([ - ...objectWalkerWithText( - [,,,1]) - .chain( - serializePaths, - stripAttr('source'), - ) ]) - - -console.log('---') - -console.dir( - [...toDiff( - keyValueDiff( - [['a', 'b'], 2, 3], - [1, 2, {x:1,y:2}], - ))], {depth: null}) - -console.log('---') - -console.dir( - keyValueDiff( - [,,,1,2,3], - [1,2,{x:1,y:2}], - ), {depth: null}) -console.log('') -// XXX BUG: the first change is wrong, it should be "add 3 empty"... -console.dir( - [...toDiff( - [,,,1,2,3], - [1,2,{x:1,y:2}], - )], {depth: null}) - -console.log('---') - - - - - -/********************************************************************** -* vim:set ts=4 sw=4 : */ return module }) +/********************************************************************** +* +* +* +**********************************************/ /* c8 ignore next 2 */ +((typeof define)[0]=='u'?function(f){module.exports=f(require)}:define) +(function(require){ var module={} // make module AMD/node compatible... +/*********************************************************************/ + +var object = require('ig-object') +var types = require('ig-types') + + + +/*********************************************************************/ +// +// +// XXX try using offset from prev element as an index for arrays... +// ...this should make it possible to use LCS on the linear diff +// directly... +// +// +// XXX think this needs to do: +// - path specification - DONE +// - walk object tree - DONE +// - generate a spec - DONE +// - serializable +// - support props - ??? +// - build object via spec - DONE +// - update object via spec +// - contextual updates +// - forward +// match context + pre state +// - reverse +// match context + post state +// - subtract specs (diff) +// - full +// - relaxed -- ignore item order +// - modes: +// - JSON +// - reconstructable +// - full +// - not reconstructable -- if some types are used (functions, ...) +// - compare protocol +// - reconstruct protocol +// +// +// XXX proposed diff format: +// [ +// // change... +// [ +// // pre-context... +// // XXX should '=' here be optional??? +// ['=', , ], +// ... +// ['=', , ], +// +// // changes... +// // addition... +// ['+', , ], +// // removal... +// ['-', , ], +// // unchanged context line.... +// ['=', , ], +// ... +// +// // post-context... +// ['=', , ], +// ... +// ['=', , ], +// ], +// +// ... +// ] +// +// +// +/*********************************************************************/ + +var STOP = +module.STOP = + types.STOP + + +var EMPTY = +module.EMPTY = +//Symbol.EMPTY = + Symbol('EMPTY') + + +// XXX need a way to uniquely serilaize this to a string path... +// ...or some other way to use it in a convinient manner... +var CONTENT = +module.CONTENT = +//Symbol.CONTENT = + Symbol('CONTENT') + + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// XXX OFFSET: should we also store .index??? +var Offset = +module.Offset = +object.Constructor('Offset', { + value: undefined, + index: undefined, + + // XXX + toString: function(){ + //return `Offset(${this, value})` }, + return this.index == null ? + `Offset(${this.value})` + : `Offset(${this.value}, ${this.index})` }, + // XXX + valueOf: function(){ + return this.value }, + // XXX should we consistency check that index is >= offset ??? + __init__: function(value, index){ + this.value = value + this.index = index }, +}) + + +// XXX revise... +var Path = +module.Path = +object.Constructor('Path', Array, { + __str: undefined, + get str(){ + return this.__str + || (this.__str = path2str(this)) }, + set str(value){ + delete this.__str + this.splice(0, this.length, ...str2path(value)) }, + + valueOf: function(){ + return this.str }, + toString: function(){ + return 'Path: '+ this.valueOf() }, +}) + + +//--------------------------------------------------------------------- + +// +// Walk([, ]) +// -> +// +// Walk() +// -> +// +// +// NOTE: in the Walk() case, must contain a .handler +// function. +// +// +// XXX need a way to produce nested results... +// XXX should we move this to a separate lib??? +var Walk = +module.Walk = +object.Constructor('Walk', { + + // + // .handler(obj, path, next, type) + // -> + // -> [path, ...] + // + // XXX should there be a default here??? + handler: undefined, + + // + // Format: + // { + // : , + // + // : { + // list: , + // ... + // }, + // + // ... + // } + // + // XXX should there be a default here??? + listers: undefined, + + // + // .normalizePath() + // -> + // + // (optional) + normalizePath: undefined, + + // NOTE: handler argument always overwrites the value given in options... + __init__: function(handler, options){ + // handle Walk() case... + if(arguments.length == 1 && typeof(handler) != 'function'){ + options = handler + handler = options.handler } + // sanity check... + if(typeof(handler) != 'function'){ + throw new Error('Walk(..): a callable handler us required.') } + options + && Object.assign(this, options) + this.handler = + handler instanceof types.Generator ? + handler + : Object.assign( + function*(){ yield handler.call(this, ...arguments) }, + {toString: function(){ return handler.toString() }}) }, + // XXX STACK: might be a good idea to form the output of this like + // a stack language program... + // This would require actions to: + // - block type (BLOCK) + // - push sub-path (PUSH) + // - pop / level-end (POP) + // XXX add options... + __call__: function*(_, obj, path=[], type='root', seen=new Map()){ + var that = this + path = this.normalizePath ? + this.normalizePath(path) + : path + // handle reference loops... + if(seen.has(obj)){ + yield* this.handler(obj, path, seen.get(obj), 'LINK') + return } + typeof(obj) == 'object' + && seen.set(obj, path) + + // list... + // + // format: + // [ + // [, [ [, ], .. ]], + // .. + // ] + var next = + [...Object.entries(this.listers || {}) + // NOTE: we need this to support throwing STOP... + .iter() + .filter(function([n, h]){ + return (typeof(h) == 'function' || h.list) + && !that['no' + n.capitalize()] }) + .map(function([n, h]){ + var res = typeof(h) == 'function' ? + h.call(that.listers, obj) + : h.list(obj) + return res + && [n, res] }) + .filter(function(e){ + return !!e }) ] + // walk... + try { + // XXX should this be a yield* or a simple yield??? + yield* this.handler(obj, path, next, type) + /* XXX STACK: BLOCK... + .map(function([p, v]){ + v && typeof(v) == 'object' + && console.log(' BLOCK', v.type) + return [p, v] }) + //*/ + // next/children... + yield* next + .iter() + .map(function*([type, items]){ + yield* items + .iter() + .map(function*([key, value]){ + // XXX STACK: PUSH key + //console.log(' PUSH', key) + // XXX add relative path support... + // ...maybe [path, key] instead of path.concat(key) ??? + yield* that(value, path.concat(key), type, seen) }) }) + // handle STOP... + } catch(err){ + if(err === module.STOP){ + return + } else if(err instanceof module.STOP){ + yield err.value + return } + throw err } + + // XXX STACK: POP + //console.log(' POP') + }, +}) + + + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// XXX PATH: might be a good idea to use a Path(..) object instead of +// current list/string paths and simply use both as a way to +// serialize... +// XXX we can treat the output as a stack language... +// ...need to clear out unneeded stuff... +// XXX add function support... +// XXX this needs .name set correctly... +var objectWalker = +module.objectWalker = +Walk({ + // support string paths... + normalizePath: function(path){ + return path instanceof Array ? + path + : typeof(path) == 'string' ? + str2path(path) + : [] }, + + handler: function(obj, path, next, type){ + return type == 'LINK' ? + [path, 'LINK', next] + : [ + path, + // null... + obj == null ? + obj + // objects... + : typeof(obj) == 'object' ? + { + type: obj.constructor.name, + source: obj, + } + // primitives... + : obj, + ] }, + + listers: { + // prevent dissecting null... + null: function(obj){ + if(obj === null){ + throw module.STOP } }, + + // XXX do we use offsets here or indexes... + set: function(obj){ + return obj instanceof Set + && [...obj.values()] + .entries() + .map(function([k, v]){ + return [[module.CONTENT, k], v] }) }, + map: function(obj){ + return obj instanceof Map + && obj.entries() + .map(function*([k, v], i){ + yield* [ + [[module.CONTENT, i+'@key'], k], + [[module.CONTENT, i], v], + ] }) }, + + //* XXX should we handle array elements differently??? + // ...these to simply mark attr type for the handler(..), not + // sure if the added complexity is worth it... (???) + array: function(obj){ + var prev = 0 + return obj instanceof Array + && [...Object.entries(obj)] + .filter(function([k, v]){ + return !isNaN(parseInt(k)) }) + // NOTE: we are outputting index offsets instead of + // actual indexes... + // XXX need to make this a syntax... + .map(function([k, v]){ + [prev, k] = [k, k-prev] + return [module.Offset(k, prev), v] }) }, + attr: function(obj){ + return obj instanceof Array ? + [...Object.entries(obj)] + .filter(function([k, v]){ + return isNaN(parseInt(k)) }) + : typeof(obj) == 'object' + && [...Object.entries(obj)] }, + /*/ + attr: function(obj){ + return typeof(obj) == 'object' + && Object.entries(obj) }, + //*/ + proto: function(obj){ + return typeof(obj) == 'object' + && obj.constructor.prototype !== obj.__proto__ + && [['__proto__', obj.__proto__]] }, + }, +}) + + + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +// like objectWalker(..) but with 'text' support... +// +var objectWalkerWithText = +module.objectWalkerWithText = +Object.assign( + object.create(objectWalker), + //object.create('objectWalkerWithText', objectWalker), + { + handler: function*(obj, path, next, type){ + yield* !this.noText + && typeof(obj) == 'string' + && obj.includes('\n') ? + [ [path, { + type: 'text', + source: obj, + }] ] + : objectWalker.handler.call(this, ...arguments) }, + listers: Object.assign( + {text: function(obj){ + return typeof(obj) == 'string' + && obj.includes('\n') + && obj.split(/\n/g).entries() + .map(function([k, v]){ + return [[module.CONTENT, k], v] }) }}, + objectWalker.listers), + }) + + + +//--------------------------------------------------------------------- + +// path2str(..) +// +// XXX need to figure out a way to avoid clashes with module.CONTENT in +// path with actual attribute keys... +// ways to do this: +// - serialize CONTENT in a cleaver way +// - add a different path separator to indicate content and quote +// it in strings -- ':'??? +// XXX Q: should there be a difference between: +// ['', module.CONTENT] +// and +// [module.CONTENT] ??? +// ...currently they are the same... +// A: there should be a difference.... +// [...d.handle({'':new Set([1,2,3]), x:123}).chain(d.serializePaths)] +// ...the problem is in path2str(..) +// XXX OFFSET: add option to either encode offset or index... +var serializePathElem = function(p, i, l){ + return ( + p instanceof Offset ? + // XXX OFFSET: add option to either encode offset or index... + // XXX OFFSET: should we use + or : ??? + // XXX OFFSET: should we use .toString() or something like .toKey()??? + ':'+ p.value + : typeof(p) == 'object' ? + JSON.stringify(p) + // quote special chars... + : typeof(p) == 'string' ? + // XXX OFFSET: should we use + or : ??? + //p.replace(/([\/:+])/g, '\\$1') + p.replace(/([\/:])/g, '\\$1') + : p )} +// XXX OFFSET: add option to either encode offset or index... +var path2str = +module.path2str = +function(p){ + return '/'+ p + // flatten lispPaths... + .flat(Infinity) + .map(serializePathElem) + .reduce(function(res, e){ + e = e === module.CONTENT ? + (res.length == 0 ? + '' + : res.pop()) + + ':CONTENT' + // special case: '' as key... + : e === '' ? + "''" + : e + res.push(e) + return res }, []) + .join('/') } + + +// str2path(..) +// +var unquote = function(str){ + return str + .replace(/^(['"])(.*)\1$/, '$2') } +var deserializePathElem = function(p){ + return p == ':CONTENT'? + [module.CONTENT] + : /[^\\]:CONTENT$/.test(p) ? + [unquote(p.replace(/(? ??? +// XXX add support for options.lispPaths +var str2path = +module.str2path = +function(str){ + return str instanceof Array ? + str + : str == '' || str == '/' ? + [] + : (str + // remove leading '/' + .replace(/^\//, '') + .split(/\//g) + .map(deserializePathElem) + .flat()) } + + +var serializePaths = +module.serializePaths = +types.generator.iter + .map(function([p, v, ...rest]){ + return rest.length > 0 && v == 'LINK' ? + // link... + [path2str(p), v, path2str(rest[0])] + : [path2str(p), v] }) + + +// Remove attributes from object metadata... +// +// stripAttr(attr, ...) +// -> +// +// () +// -> +// +// .chain() +// +var stripAttr = +module.stripAttr = +function(...attrs){ + return types.generator.iter + .map(function([p, v, ...rest]){ + if(v && typeof(v) == 'object'){ + // keep things non-destructive... + v = Object.assign({}, v) + attrs + .forEach(function(attr){ + attr in v + && (delete v[attr]) }) } + return [p, v, ...rest] }) } + + + +//--------------------------------------------------------------------- +// XXX construct... + + +// +// atPath(root, path) +// -> value +// +// atPath(root, path, value) +// -> value +// +// NOTE: to set this needs the full basepath to exist... +// +// XXX need to write a map key to an item that does not exist... +// XXX need a way to insert values after a specific index/key, this +// would require: +// - indicated this in spec: +// a) AFTER/BEFORE symbol(s) in path (???) +// here there are two ways to think about this: +// - AFTER means *replace* the item after +// - AFTER means *between* this and the item after +// b) context (a-la diff) +// - insert to array: +// .splice(i, 0, ) +// - insert into set: +// .splice(..) +// - insert into map +// ??? +// - insert into object +// ??? +// ...revise... +// XXX str2path and passing in a list path produce different results... +// XXX OFFSET: add offset support... +// ...get prev in context and add offset... +var atPath = +module.atPath = +function(root, path, value){ + path = str2path(path) + //console.log(' ', path, value) + // special case: get/set root... + if(path.length == 0 || path[0] == '/'){ + return arguments.length > 2 ? + value + : root } + // get value at path... + var mode = 'normal' + var base + var target = path + .reduce(function(cur, p){ + // CONTENT... + if(p === module.CONTENT){ + // NOTE: indicate to the next iteration that we'll need + // to use map/set API to get/set the value... + mode = 'content' + base = cur + return cur } + // value in content... + if(mode == 'content'){ + mode = 'content-item' + return ( + // set item... + cur instanceof Set ? + [...cur][p] + // map key... + : typeof(p) != 'number' + && p.slice(-4) == '@key' ? + // NOTE: we can write to a non-existant item... + ([...cur][p.slice(0, -4)] || [])[0] + // map value... + : [...cur][p][1] )} + // attr... + mode = 'normal' + base = cur + return cur[p] }, root) + + // get... + if(arguments.length <= 2){ + return target } + + // write attr... + if(mode != 'content-item'){ + value === module.EMPTY ? + (delete base[path.last()]) + : (base[path.last()] = value) + // XXX should we return value or base??? + // ...should we return target when writing EMPTY + return value } + + var index = path.last() + // write set item... + if(base instanceof Set){ + value === module.EMPTY ? + base.delete(target) + :index == base.size ? + base.add(value) + : base.replaceAt(index, value) + // XXX should we return value or base??? + // ...should we return target when writing EMPTY + return value } + + // write map item/key... + var isKey = typeof(index) != 'number' + && index.slice(-4) == '@key' + index = isKey ? + index.slice(0, -4) + : index + var key = atPath(root, + path + .slice(0,-1) + .concat([index +'@key'])) + value === module.EMPTY ? + base.delete(key) + : isKey ? + (base.has(key) ? + base.replaceKey(key, value) + : base.set(value, undefined)) + : base.set(key, value) + + // XXX should we return value or base??? + // ...should we return target when writing EMPTY + return value } + + +// XXX rename to patch(..) +var write = +module.write = +function(root, spec){ + return types.generator.iter(spec) + .reduce(function(root, [path, value, ...rest]){ + //console.log('>>>>', path2str(path), value) + // generate/normalize value... + value = + // XXX STUB... + typeof(value) == 'function' ? + value.source + : value == null || typeof(value) != 'object' ? + value + // XXX move this out / use HANDLERS... + : value.type == 'Object' ? + {} + : value.type == 'Array' ? + [] + : value.type == 'Set' ? + new Set() + : value.type == 'Map'? + new Map() + : undefined + // root value... + if(path.length == 0 + || (path.length == 1 + && (path[0] == '/' || path[0] == ''))){ + return value } + // link... + if(rest.length > 0 && value == 'LINK'){ + atPath(root, path, atPath(root, rest[0])) + return root } + // set value... + atPath(root, path, value) + return root }, root) } + + + +//--------------------------------------------------------------------- + +// Get common chunks (LCS)... +// +// Format: +// [ +// , +// +// // XXX should this be an obj??? +// [ +// , +// , +// , +// ], +// ... +// ] +// +var commonSections = +module.commonSections = +function(A, B, cmp){ + cmp = cmp + || function(a, b){ + return a === b || a == b } + + var LCS = function(a, b){ + // calculate length of chunk... + var l = 0 + while(a+l < A.length && b+l < B.length + && a+l in A && b+l in B + && cmp(A[a+l], B[b+l])){ + l++ } + // get next chunks... + var L = A.length > a+l ? + LCS(a+l+1, b+l) + : [0] + var R = B.length > b+l ? + LCS(a+l, b+l+1) + : [0] + // select the best chunk-set... + // NOTE: we maximize the number of elements in a chunk set then + // minimize the number of chunks per set... + var next = L[0] == R[0] ? + (L.length < R.length ? L : R) + : L[0] > R[0] ? + L + : R + return ( + // non-empty chunk and next... + next[0] > 0 && l > 0 ? + [l + next[0], [a, b, l]].concat(next.slice(1)) + // non-empty chunk and empty next... + : l > 0 ? + [l, [a, b, l]] + // empty chunk... + : next ) } + + return LCS(0, 0) } + + +// +// Format: +// [ +// // change... +// [ +// // XXX do we need the indexes??? +// , , +// , , +// ], +// +// ... +// ] +// +// NOTE: this is generic -- does not care what it compares... +// +// XXX alternativly try to implement the tree cmp approach from diff.js... +// XXX would be nice to be able to mixin stuff cmp -- to wrap container +// type and the like... +var diffSections = +module.diffSections = +function(A, B, cmp){ + var pa = 0 + var pb = 0 + return commonSections(A, B, cmp) + // remove length... + .slice(1) + // add rear block... + .concat([[A.length, B.length, 0]]) + .reduce(function(gaps, [a, b, l]){ + // only push changes of >0 length... + ;(pa == a && pb == b) + // NOTE: we are collecting changes before the current chunk... + || gaps.push([ + pa, + A.slice(pa, a), + pb, + B.slice(pb, b), + ]) + // next change start... + pa = a + l + pb = b + l + return gaps }, []) } + + + +// +// XXX if two sub-trees are different, do we treat them as a big set of +// atomic changes or replace X with Y??? +// XXX need: +// - skip/clear unchanged sub-trees when removing root node (optional) +// - link comparison -- should support linking from both sides... +// - item relative indexing (DONE) +// ...should calculate actual indexes for patching from context... +// - ... (anything else???) +var keyValueDiff = +function(A, B, options={}){ + return diffSections( + [...objectWalkerWithText(A)], + [...objectWalkerWithText(B)], + // XXX add link support... + function([ap, av], [bp, bv]){ + return ap.cmp(bp) + && (av == bv + || (typeof(av) == 'object' + && typeof(bv) == 'object' + && av.type + && av.type == bv.type )) }) } + + +// XXX this completely ignores the path/key... +// XXX this works great for arrays but is questionable on other stuff... +var valueDiff = +function(A, B, options={}){ + return diffSections( + [...objectWalkerWithText(A)], + [...objectWalkerWithText(B)], + // XXX add link support... + function([ap, av], [bp, bv]){ + return av == bv + || (typeof(av) == 'object' + && typeof(bv) == 'object' + && av.type + && av.type == bv.type ) }) } + + +var diff = keyValueDiff + + +// convert diff sections into diff... +// +// XXX so this is a bit more complicated than simply converting from one +// format to another... +// we need to: +// - convert offset changes to one of: +// - insertions of empty slots +// - ??? +// - ... +// XXX changes to a path should not affect the subtree... +// /a/b/.. -> /a/x/.. +// not sure how and where (here or in diff(..)) to handle this yet... +// XXX add context lines... +var toDiff = +module.toDiff = +function*(A, B, options={}){ + yield* diff(A, B, options) + .iter() + .map(function*([ia, ca, ib, cb]){ + // XXX add pre-context... + // ...if changes are adjacent, then skip context... + // ...use atPath(..) + yield ['='] + // XXX OFFSET: here we have to be a bit more cleaver, if an + // offset of an element changed then we need to insert + // a set of empty slots shifting the rest of the array... + // XXX convert offset to index... + yield* ca + .map(function(change){ + return ['-', ...change] }) + yield* cb + .map(function(change){ + return ['+', ...change] }) + // XXX add post-context... + // ...if changes are adjacent, then skip context... + yield ['='] }) } + + + + +//--------------------------------------------------------------------- +// XXX move to test... + +var o = module.o = { + number: 123, + string: 'abc', + + // XXX add a mode to unify these... + 'null': null, + 'undefined': undefined, + 'NaN': NaN, + + + empty_array: [], + array: [1, 2, 3,,,,'N'], + + // XXX set key is the object itself, is this what we want??? + set: new Set([ + 1, + [], + {a:1}, + ]), + map: new Map([ + [[9,8,7], 123], + [321, {x: 123}], + ]), + + object: { + x: {}, + }, + object_gen2: Object.assign( + Object.create({ + x: 'parent', + z: 'shadowed', + }), + { + y: 'local', + z: 'shadowing', + }), + + /*/ XXX + func: function(){}, + func_with_attrs: Object.assign( + function(){}, + { + x: 333, + }), + //*/ + + array_with_attrs: Object.assign( + [1, 2, 3], + { + a: 'some value', + b: 'some other value', + // will overwrite 2... + 1: 333, + }), + + 'special/character\\in:key': [], + + /* XXX EXPERIMENTAL + text: `this + is + a + multi-line + block of text...`, + //*/ +} + +// loop... +// NOTE: we are creating the loop before we pass it to JSON because JSON +// does not support loops in objects... +o.object.y = o.object + + +// generate spec... +console.log([ + ...objectWalker(o) + .chain( + serializePaths, + stripAttr('source'), + )]) + + +/*/ use spec to create a new object... +console.log('\n\n---\n', + [...objectWalker(write(null, objectWalker(o))) + .chain( + serializePaths, + // make the output a bit more compact... + stripAttr('source'), + )]) +//*/ + + +/* XXX +var A = ['x',1,2,3,1,2,3,4,5,,,4] +var B = ['x',1,2,3,4,5,6] + +console.log(A) +console.log(B) + +console.log( + commonSections(A, B)) +console.log( + diffSections(A, B)) + +console.log([...objectWalker(A).chain(serializePaths)]) +console.log([...objectWalker(B).chain(serializePaths)]) + +// XXX +console.log(diff(B, A)) +//*/ + +/* +console.log(JSON.stringify(diff( + [1,4,2,3], + [1,2,3], +), null, ' ')) +//*/ + + +console.log('---') + +console.log([ + ...objectWalker( + `This + is + a + multiline + text`) + .chain( + serializePaths, + stripAttr('source'), + ) ]) + +console.log('---') + +console.log([ + ...objectWalkerWithText( + `This + is + a + multiline + text`) + .chain( + serializePaths, + stripAttr('source'), + ) ]) + +/* XXX +console.log('---') + +console.log(valueDiff( + [1,2,3], + [1,2,4,3], +)) + +console.log('---') + +console.log(valueDiff( + new Set([1,2,3]), + [1,2,4,3], +)) + +//*/ +console.log('---') + +console.log([ + ...objectWalkerWithText( + [,,,1]) + .chain( + serializePaths, + stripAttr('source'), + ) ]) + + +console.log('---') + +console.dir( + [...toDiff( + keyValueDiff( + [['a', 'b'], 2, 3], + [1, 2, {x:1,y:2}], + ))], {depth: null}) + +console.log('---') + +console.dir( + keyValueDiff( + [,,,1,2,3], + [1,2,{x:1,y:2}], + ), {depth: null}) +console.log('') +// XXX BUG: the first change is wrong, it should be "add 3 empty"... +console.dir( + [...toDiff( + [,,,1,2,3], + [1,2,{x:1,y:2}], + )], {depth: null}) + +console.log('---') + + + + + +/********************************************************************** +* vim:set ts=4 sw=4 : */ return module })