diff --git a/gid.py b/gid.py index 6d642298..446b460b 100755 --- a/gid.py +++ b/gid.py @@ -1,13 +1,15 @@ #======================================================================= __version__ = '''0.0.01''' -__sub_version__ = '''20120310183438''' +__sub_version__ = '''20120313182702''' __copyright__ = '''(c) Alex A. Naanou 2011''' #----------------------------------------------------------------------- import os +import sha +import md5 import pyexiv2 as metadata @@ -20,7 +22,7 @@ import pyexiv2 as metadata # XXX not yet sure if this is unique enough to avoid conflicts if one # photographer has enough cameras... # XXX also might be wise to add a photographer ID into here... -def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%H%M%S'): +def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%H%M%S', hash_func=sha.sha): ''' Calgulate image GID. @@ -35,8 +37,12 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-% Example: Alex_A.Naanou-20110627-195706-DSC_1234 + If hash_func is not None, then the function will be used to henerate + a hex hash from the above string. + Supported fields: - %(artist)s - Exif.Image.Artist field, stripped and spaces replaced with underscores. + %(artist)s - Exif.Image.Artist field, stripped and spaces replaced + with underscores. %(date)s - Exif.Image.DateTime formated to date_format argument. %(name)s - file name. @@ -57,9 +63,18 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-% if '%(artist)s' in format: data['artist'] = i['Exif.Image.Artist'].value.strip().replace(' ', '_') + if hash_func is not None: + return hash_func(format % data).hexdigest() return format % data +#----------------------------------------------------------------------- +if __name__ == '__main__': + pass + + + + #======================================================================= # vim:set ts=4 sw=4 nowrap : diff --git a/index.py b/index.py index 631fedae..01ee346f 100755 --- a/index.py +++ b/index.py @@ -1,7 +1,7 @@ #======================================================================= __version__ = '''0.0.01''' -__sub_version__ = '''20120202193619''' +__sub_version__ = '''20120313183420''' __copyright__ = '''(c) Alex A. Naanou 2011''' @@ -11,12 +11,14 @@ import os import json import zipfile import uuid +from pprint import pprint from itertools import izip, izip_longest from pli.logictypes import ANY, OR -from pprint import pprint + +import store #----------------------------------------------------------------------- @@ -244,240 +246,6 @@ def split_images(index): -#----------------------------------------------------------------------- -# XXX is this a good way to serialize the actual data in the fs??? - -# NOTE: these will work with any topoloy and create a flat index... -def save_file_index(index, path, index_depth=1, ext='.json'): - ''' - - NOTE: index_depth with value greater than 2 is an overkill. - ''' - root_index = {} - for k, v in index.items(): - if index_depth > 0: - d = [] - rest = k - # build index path... - for i in xrange(index_depth): - d += [rest[:2]] - rest = rest[2:] - # recursive directory construction... - if not os.path.exists(os.path.join(path, *d)): - os.mkdir(os.path.join(path, *d)) - p = os.path.join(path, *d + [k + ext]) - else: - p = os.path.join(path, k + ext) - json.dump(v, file(p, 'w'), indent=4, separators=(', ', ': ')) - root_index[k] = p -## print '.', - return root_index - - -def load_file_index(path, ext='.json', pack_ext='.pack'): - ''' - ''' - d = {} - for p, _, files in os.walk(path): - for f in files: - # handle single files... - if f.endswith(ext): - d[os.path.splitext(f)[0]] = json.load(file(os.path.join(p, f))) - # handle packs... - elif f.endswith(pack_ext): - pack = zipfile.ZipFile(os.path.join(p, f)) - # load elements form the pack... - for name in pack.namelist(): - if name.endswith(ext): - d[os.path.splitext(name)[0]] = json.loads(pack.read(name)) - return d - - -# XXX should we remove empty dirs here??? -def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False): - ''' - - NOTE: if keep_files is True, keep_dirs option will be ignored. - ''' - z = zipfile.ZipFile(os.path.join(path, 'index' + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED) - for p, _, files in os.walk(path): - for f in files: - if f.endswith(ext): - z.write(os.path.join(p, f), os.path.split(f)[-1]) - if not keep_files: - os.remove(os.path.join(p, f)) - # XXX this will not remove empty dirs (push one - # level up for that...) - if not keep_dirs and p != path: - ##!!! check if dir is empty.... - try: - # NOTE: this will fail for non-empty dirs... - os.rmdir(os.path.join(p)) - except: - pass - z.close() - -##!!! get path by name helper... -##!!! - - -#----------------------------------------------------------------------- -# lazy dict-like objects that read and write (optional) the fs... - -import pli.pattern.mixin.mapping as mapping -import pli.objutils as objutils - -# XXX might be good to do a path index... -##!!! make this archive/file structure-agnostic... -class Index(mapping.Mapping): - __json_ext__ = '.json' - __pack_ext__ = '.pack' - - def __init__(self, path): - ''' - ''' - self._path = path - - # specific interface... - ##!!! make this support different depths... - def __locations__(self, name): - ''' - ''' - ext = self.__json_ext__ - name += ext - # build probable locations... - return ( - name, - # look in a directory... - os.path.join(name[:2], name), - ##!!! HACK: make this dynamic... - os.path.join(name[:2], name[2:4], name), - ) - - # mapping interface... - def __getitem__(self, name): - ''' - ''' -## ext = self.__json_ext__ - pack_ext = self.__pack_ext__ -## file_name = name + ext - locations = self.__locations__(name) - # look of the file directly... - for n in locations: - if os.path.exists(os.path.join(self._path, n)): - return json.load(file(os.path.join(self._path, n))) - # try and locate a file in a pack... - for p, _, files in os.walk(self._path): - # files are searched sorted by their name... - files.sort() - for f in files: -## ##!!! do we need to look in odd named directories... -## if f == file_name: -## return json.load(file(os.path.join(p, file_name))) - if f.endswith(pack_ext): - z = zipfile.ZipFile(os.path.join(p, f)) - for n in locations: - if n in z.namelist(): - return json.loads(z.read(n)) - raise KeyError, name - def __setitem__(self, name, value): - ''' - ''' - raise NotImplementedError - def __delitem__(self, name): - ''' - ''' - raise NotImplementedError - def __iter__(self): - ''' - ''' - visited = [] - packs = [] - ext = self.__json_ext__ - pack_ext = self.__pack_ext__ - for p, _, files in os.walk(self._path): - for f in files: - if f.endswith(ext) and f not in visited: - visited += [f] - yield os.path.splitext(f)[0] - elif f.endswith(pack_ext): - packs += [os.path.join(p, f)] - for pack in packs: - z = zipfile.ZipFile(pack) - for name in z.namelist(): - if name not in visited: - visited += [name] - yield os.path.splitext(name)[0] - - -REMOVED = object() - -class IndexWithCache(Index): - ''' - ''' - objutils.createonaccess('_cache', dict) - - __sync__ = False - - def __getitem__(self, name): - ''' - ''' - if name in self._cache: - res = self._cache[name] - if res is REMOVED: - raise KeyError, name - return res - res = self._cache[name] = super(IndexWithCache, self).__getitem__(name) - return res - def __setitem__(self, name, value): - ''' - ''' - self._cache[name] = value - if self.__sync__: - self.cache_flush(name) - ##!!! - def __delitem__(self, name): - ''' - ''' - self._cache[name] = REMOVED - if self.__sync__: - self.cache_flush(name) - def __iter__(self): - ''' - ''' - cache = self._cache - for e in cache: - yield e - for e in super(IndexWithCache, self).__iter__(): - if e not in cache: - yield e - - # cache management... - ##!!! removed items will not get flushed yet... - # XXX to make removing elements history compatible, one way to go - # is to write a specifc value to the file, thus making it - # shadow the original value... - def cache_flush(self, *keys): - ''' - ''' - if keys == (): - return save_file_index(self._cache, self._path) - flush = {} - for k in keys: - if k is REMOVED: - # remove file... -## raise NotImplementedError - ##!!! - continue - flush[k] = self[k] - return save_file_index(flush, self._path) - def cache_drop(self): - ''' - ''' - del self._cache - - - #----------------------------------------------------------------------- ##!!! test implementation: rewrite... import pyexiv2 as metadata @@ -566,7 +334,7 @@ def build_image_cache(ic, min_rating, dest, tmp_path, preview_size=900): continue ic.cache_flush() - pack_file_index(ic._path, keep_files=False) + store.pack_file_index(ic._path, keep_files=False) return res @@ -594,27 +362,27 @@ if __name__ == '__main__': - root_index = save_file_index(index, os.path.join('test', 'index'), index_depth=1) + root_index = store.save_file_index(index, os.path.join('test', 'index'), index_depth=1) ## ##!!! this is not used in anything yet... ## json.dump(root_index, file(os.path.join('test', 'index', 'file_index.json'), 'w')) - pack_file_index(os.path.join('test', 'index'), keep_files=False) + store.pack_file_index(os.path.join('test', 'index'), keep_files=False) - d = load_file_index(os.path.join('test', 'index')) + d = store.load_file_index(os.path.join('test', 'index')) print len(d) k = d.keys()[0] - i = Index(os.path.join('test', 'index')) + i = store.Index(os.path.join('test', 'index')) print len(i) ## print i[k] - ic = IndexWithCache(os.path.join('test', 'index')) + ic = store.IndexWithCache(os.path.join('test', 'index')) print ic[k] @@ -622,13 +390,13 @@ if __name__ == '__main__': ic.cache_flush() - pack_file_index(ic._path, keep_files=False) + store.pack_file_index(ic._path, keep_files=False) ic.__sync__ = True ic['111111111111111111111111111111111'] = {} - pack_file_index(ic._path, keep_files=False) + store.pack_file_index(ic._path, keep_files=False) ##!!! revise... @@ -647,12 +415,12 @@ if __name__ == '__main__': full = dict(json.load(file(os.path.join('test', 'filelist of 20k files.json')))) print 'writing files...' - root_index = save_file_index(full, os.path.join('test', 'index'), index_depth=1) + root_index = store.save_file_index(full, os.path.join('test', 'index'), index_depth=1) print 'packing files...' # NOTE: the initial archiving seems REALLY SLOW, but working with # small numbers of files from the archive seems adequate... - pack_file_index(os.path.join('test', 'index'), keep_files=True) + store.pack_file_index(os.path.join('test', 'index'), keep_files=True) diff --git a/store.py b/store.py new file mode 100755 index 00000000..b9eae6cc --- /dev/null +++ b/store.py @@ -0,0 +1,250 @@ +#======================================================================= + +__version__ = '''0.0.01''' +__sub_version__ = '''20120313183119''' +__copyright__ = '''(c) Alex A. Naanou 2011''' + + +#----------------------------------------------------------------------- + +import os +import json +import zipfile + + +#----------------------------------------------------------------------- +# XXX is this a good way to serialize the actual data in the fs??? + +# NOTE: these will work with any topoloy and create a flat index... +def save_file_index(index, path, index_depth=1, ext='.json'): + ''' + + NOTE: index_depth with value greater than 2 is an overkill. + ''' + root_index = {} + for k, v in index.items(): + if index_depth > 0: + d = [] + rest = k + # build index path... + for i in xrange(index_depth): + d += [rest[:2]] + rest = rest[2:] + # recursive directory construction... + if not os.path.exists(os.path.join(path, *d)): + os.mkdir(os.path.join(path, *d)) + p = os.path.join(path, *d + [k + ext]) + else: + p = os.path.join(path, k + ext) + json.dump(v, file(p, 'w'), indent=4, separators=(', ', ': ')) + root_index[k] = p +## print '.', + return root_index + + +def load_file_index(path, ext='.json', pack_ext='.pack'): + ''' + ''' + d = {} + for p, _, files in os.walk(path): + for f in files: + # handle single files... + if f.endswith(ext): + d[os.path.splitext(f)[0]] = json.load(file(os.path.join(p, f))) + # handle packs... + elif f.endswith(pack_ext): + pack = zipfile.ZipFile(os.path.join(p, f)) + # load elements form the pack... + for name in pack.namelist(): + if name.endswith(ext): + d[os.path.splitext(name)[0]] = json.loads(pack.read(name)) + return d + + +# XXX should we remove empty dirs here??? +def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False): + ''' + + NOTE: if keep_files is True, keep_dirs option will be ignored. + ''' + z = zipfile.ZipFile(os.path.join(path, 'index' + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED) + for p, _, files in os.walk(path): + for f in files: + if f.endswith(ext): + z.write(os.path.join(p, f), os.path.split(f)[-1]) + if not keep_files: + os.remove(os.path.join(p, f)) + # XXX this will not remove empty dirs (push one + # level up for that...) + if not keep_dirs and p != path: + ##!!! check if dir is empty.... + try: + # NOTE: this will fail for non-empty dirs... + os.rmdir(os.path.join(p)) + except: + pass + z.close() + +##!!! get path by name helper... +##!!! + + +#----------------------------------------------------------------------- +# lazy dict-like objects that read and write (optional) the fs... + +import pli.pattern.mixin.mapping as mapping +import pli.objutils as objutils + +# XXX might be good to do a path index... +##!!! make this archive/file structure-agnostic... +class Index(mapping.Mapping): + __json_ext__ = '.json' + __pack_ext__ = '.pack' + + def __init__(self, path): + ''' + ''' + self._path = path + + # specific interface... + ##!!! make this support different depths... + def __locations__(self, name): + ''' + ''' + ext = self.__json_ext__ + name += ext + # build probable locations... + return ( + name, + # look in a directory... + os.path.join(name[:2], name), + ##!!! HACK: make this dynamic... + os.path.join(name[:2], name[2:4], name), + ) + + # mapping interface... + def __getitem__(self, name): + ''' + ''' +## ext = self.__json_ext__ + pack_ext = self.__pack_ext__ +## file_name = name + ext + locations = self.__locations__(name) + # look of the file directly... + for n in locations: + if os.path.exists(os.path.join(self._path, n)): + return json.load(file(os.path.join(self._path, n))) + # try and locate a file in a pack... + for p, _, files in os.walk(self._path): + # files are searched sorted by their name... + files.sort() + for f in files: +## ##!!! do we need to look in odd named directories... +## if f == file_name: +## return json.load(file(os.path.join(p, file_name))) + if f.endswith(pack_ext): + z = zipfile.ZipFile(os.path.join(p, f)) + for n in locations: + if n in z.namelist(): + return json.loads(z.read(n)) + raise KeyError, name + def __setitem__(self, name, value): + ''' + ''' + raise NotImplementedError + def __delitem__(self, name): + ''' + ''' + raise NotImplementedError + def __iter__(self): + ''' + ''' + visited = [] + packs = [] + ext = self.__json_ext__ + pack_ext = self.__pack_ext__ + for p, _, files in os.walk(self._path): + for f in files: + if f.endswith(ext) and f not in visited: + visited += [f] + yield os.path.splitext(f)[0] + elif f.endswith(pack_ext): + packs += [os.path.join(p, f)] + for pack in packs: + z = zipfile.ZipFile(pack) + for name in z.namelist(): + if name not in visited: + visited += [name] + yield os.path.splitext(name)[0] + + +REMOVED = object() + +class IndexWithCache(Index): + ''' + ''' + objutils.createonaccess('_cache', dict) + + __sync__ = False + + def __getitem__(self, name): + ''' + ''' + if name in self._cache: + res = self._cache[name] + if res is REMOVED: + raise KeyError, name + return res + res = self._cache[name] = super(IndexWithCache, self).__getitem__(name) + return res + def __setitem__(self, name, value): + ''' + ''' + self._cache[name] = value + if self.__sync__: + self.cache_flush(name) + ##!!! + def __delitem__(self, name): + ''' + ''' + self._cache[name] = REMOVED + if self.__sync__: + self.cache_flush(name) + def __iter__(self): + ''' + ''' + cache = self._cache + for e in cache: + yield e + for e in super(IndexWithCache, self).__iter__(): + if e not in cache: + yield e + + # cache management... + ##!!! removed items will not get flushed yet... + # XXX to make removing elements history compatible, one way to go + # is to write a specifc value to the file, thus making it + # shadow the original value... + def cache_flush(self, *keys): + ''' + ''' + if keys == (): + return save_file_index(self._cache, self._path) + flush = {} + for k in keys: + if k is REMOVED: + # remove file... +## raise NotImplementedError + ##!!! + continue + flush[k] = self[k] + return save_file_index(flush, self._path) + def cache_drop(self): + ''' + ''' + del self._cache + + + +#======================================================================= +# vim:set ts=4 sw=4 nowrap :