From 2d6521646af09a1ebb4580a336e88ee927436b9d Mon Sep 17 00:00:00 2001 From: "Alex A. Naanou" Date: Wed, 29 Feb 2012 17:04:55 +0400 Subject: [PATCH] started a different, simpler implementation of index.py (merge when done) --- index2.py | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100755 index2.py diff --git a/index2.py b/index2.py new file mode 100755 index 00000000..c1f74f62 --- /dev/null +++ b/index2.py @@ -0,0 +1,190 @@ +#======================================================================= + +__version__ = '''0.0.01''' +__sub_version__ = '''20120229165907''' +__copyright__ = '''(c) Alex A. Naanou 2011''' + + +#----------------------------------------------------------------------- +# The first index.py might be a little too complicated. try and resolve +# this as so: +# - list all relevant files (RAW, XMP, JPG, PSD, ...) +# - group by path (closeness) +# - deepest common path to contain all files with common name. +# this will fail if we have different files with same names. +# +#----------------------------------------------------------------------- + +import os +import json +import zipfile +import uuid +import time + +from itertools import izip, izip_longest + +from pli.logictypes import ANY, OR + +from pprint import pprint + + +#----------------------------------------------------------------------- + +##CONFIG_NAME = 'test_config.json' +CONFIG_NAME = 'tmp_config.json' + +config = json.load(open(CONFIG_NAME)) + +RAW = OR( + 'NEF', 'nef', + 'CRW', 'crw', + 'CR2', 'cr2', + 'X3F', 'x3f' +) + +JPEG = OR( + 'JPG', 'jpg', + 'JPEG', 'jpeg' +) + +PSD = OR( + 'PSD', 'psd' +) + +TIFF = OR( + 'TIFF', 'tiff', + 'TIF', 'tif' +) + +XMP = OR( + 'XMP', 'xmp' +) + +ITEM = OR(RAW, JPEG, PSD, TIFF, XMP) + +TYPES = { + 'raw': RAW, + 'jpeg': JPEG, + 'psd': PSD, + 'tiff': TIFF, + 'xmp': XMP, +} + + +SUBTREE_CLASSES = { + 'preview': 'preview', + 'preview (RAW)': 'RAW preview', +} + + +#----------------------------------------------------------------------- + +##!!! we will need to normalize the paths to one single scheme (either relative or absolute)... +def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=False): + ''' + yields: + (, , ), + ''' + for path, dirs, files in os.walk(root): + # XXX is this correct... + path = path.split(os.path.sep) + # process files... + for f in files: + name, ext = os.path.splitext(f) + # we need the extension wothout the dot... + ext = ext[1:] + # filter by ext... + if ext == type: + if not include_root_path: + yield path[len(root.split(os.path.sep)):], name, ext + else: + yield path, name, ext + + + + +#----------------------------------------------------------------------- +if __name__ == '__main__': +## lst = list(list_files(config['ARCHIVE_ROOT'])) +## +## print len(lst) +## pprint(lst[0]) +## +## json.dump(lst, file(os.path.join('test', 'flatfilelist.json'), 'w')) + + lst = json.load(file(os.path.join('test', 'flatfilelist.json'))) + print len(lst) + +## lst.sort() + # sort via name, ext, path + lst.sort(key=lambda e: (e[1], e[-1], e[0])) + + ##!!! duplicate a raw file... + for p, n, t in lst: + if t == RAW: + lst += [(p, n, t)] + break + + # index by name (indexing preparation)... + # { + # : [ + # (, , ), + # ... + # ], + # ... + # } + index = {} + for p, n, t in lst: + if n in index: + index[n] += [(p, n, t)] + else: + index[n] = [(p, n, t)] + + # index via a propper GID... + # split similarly named but different files... + GID_index = {} + for name, l in index.items(): + + ##!!! gid construction should be a customizable function in itself... + # main gid criteria: + # - unique + # - calculable from the item (preferably any sub-item) +## GID = '%s-%s' % (uuid.uuid4().hex, name) + ##!!! get RAW file creation date from EXIF... + GID = '%s-%s' % (hex(long(time.time()*1000))[2:-1], name) + + l.sort() + + raws = [e for e in l if e[-1] == RAW] + if len(raws) > 1: + # split the group into c seporate groups... + # strategies: + # - path proximity (distance) + # - metadata + ##!!! + print 'duplicates: %s (%sx)' % (name, len(raws)) +## raise TypeError, 'found %s RAW files with identical names (%s).' % (len(raws), name) + else: + GID_index[GID] = { + 'gid': GID, + 'name': name, + 'RAW': raws, + 'XMP': [e for e in l if e[-1] == XMP], + 'JPG': [e for e in l if e[-1] == JPEG], + 'PSD': [e for e in l if e[-1] == PSD], + 'TIFF': [e for e in l if e[-1] == TIFF], + 'other': [e for e in l if e[-1] != OR(TIFF, PSD, JPEG, XMP, RAW)], + } + + print len(GID_index), GID + + + + + + + + + +#======================================================================= +# vim:set ts=4 sw=4 nowrap :