diff --git a/gid.py b/gid.py index 446b460b..a767adda 100755 --- a/gid.py +++ b/gid.py @@ -1,7 +1,7 @@ #======================================================================= __version__ = '''0.0.01''' -__sub_version__ = '''20120313182702''' +__sub_version__ = '''20120313223928''' __copyright__ = '''(c) Alex A. Naanou 2011''' @@ -22,7 +22,10 @@ import pyexiv2 as metadata # XXX not yet sure if this is unique enough to avoid conflicts if one # photographer has enough cameras... # XXX also might be wise to add a photographer ID into here... -def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%H%M%S', hash_func=sha.sha): +def image_gid(path, format='%(artist)s-%(date)s-%(name)s', + date_format='%Y%m%d-%H%M%S', + default_artist='Unknown', + hash_func=sha.sha): ''' Calgulate image GID. @@ -53,15 +56,19 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-% data = { 'name': os.path.splitext(os.path.split(path)[-1])[0], } + ##!!! this might fail... + i = metadata.ImageMetadata('%s' % path) + i.read() # check if we need a date in the id... if '%(date)s' in format: - i = metadata.ImageMetadata('%s' % path) - i.read() d = i['Exif.Image.DateTime'].value data['date'] = d.strftime(date_format) # check if we need an artist... if '%(artist)s' in format: - data['artist'] = i['Exif.Image.Artist'].value.strip().replace(' ', '_') + try: + data['artist'] = i['Exif.Image.Artist'].value.strip().replace(' ', '_') + except KeyError: + data['artist'] = default_artist if hash_func is not None: return hash_func(format % data).hexdigest() diff --git a/index2.py b/index2.py index c9fa8940..76d7f4c9 100755 --- a/index2.py +++ b/index2.py @@ -1,7 +1,7 @@ #======================================================================= __version__ = '''0.0.01''' -__sub_version__ = '''20120310191654''' +__sub_version__ = '''20120313224544''' __copyright__ = '''(c) Alex A. Naanou 2011''' @@ -27,13 +27,15 @@ import pyexiv2 as metadata from pli.logictypes import ANY, OR +import store from gid import image_gid #----------------------------------------------------------------------- -CONFIG_NAME = 'test_config.json' +##CONFIG_NAME = 'test_config.json' ##CONFIG_NAME = 'tmp_config.json' +CONFIG_NAME = 'tmp_config.json.bak' config = json.load(open(CONFIG_NAME)) @@ -42,12 +44,13 @@ RAW = OR( 'NEF', 'nef', 'CRW', 'crw', 'CR2', 'cr2', - 'X3F', 'x3f' + 'X3F', 'x3f', + 'DNG', 'dng', ) JPEG = OR( 'JPG', 'jpg', - 'JPEG', 'jpeg' + 'JPEG', 'jpeg', ) PSD = OR( @@ -196,12 +199,15 @@ def split_by_raws(raws, lst, failed): return sets -def gid_index(index): +def gid_index(index, existing=None): ''' ''' # index via a propper GID... # split similarly named but different files... - res = {} + if existing is None: + res = {} + else: + res = existing failed = [] for name, l in index.iteritems(): l.sort() @@ -235,7 +241,7 @@ def gid_index(index): # mostly intended for importing... 'ctime': raw[3], ##!!! make these more general... - 'RAW': raws, + 'RAW': [e for e in l if e[2] == RAW], 'XMP': [e for e in l if e[2] == XMP], 'JPG': [e for e in l if e[2] == JPEG], 'PSD': [e for e in l if e[2] == PSD], @@ -249,6 +255,8 @@ def gid_index(index): #----------------------------------------------------------------------- if __name__ == '__main__': + INDEX_PATH = os.path.join('test', 'index2') + FILE_LIST = os.path.join('test', 'flatfilelist.json') BUILD_FILE_LIST = False if os.path.exists(FILE_LIST) else True @@ -256,19 +264,25 @@ if __name__ == '__main__': if BUILD_FILE_LIST: lst = list(list_files(config['ARCHIVE_ROOT'])) - print len(lst) + print 'found files:', len(lst) pprint(lst[0]) json.dump(lst, file(FILE_LIST, 'w')) + print 'saved...' lst = json.load(file(FILE_LIST)) - print len(lst) + print 'loaded:', len(lst) index = index_by_name(lst) - GID_index, failed = gid_index(index) +## GID_index = store.IndexWithCache(INDEX_PATH) + GID_index = store.Index(INDEX_PATH) + + GID_index, failed = gid_index(index, GID_index) + + json.dump(failed, file(os.path.join('test', 'failed-to-categorise.json'), 'w')) @@ -292,6 +306,12 @@ if __name__ == '__main__': pprint(GID_index.values()[0]) + store.save_file_index(GID_index, INDEX_PATH) + +## store.pack_file_index(INDEX_PATH) + + + diff --git a/store.py b/store.py index b9eae6cc..d819692d 100755 --- a/store.py +++ b/store.py @@ -1,7 +1,7 @@ #======================================================================= __version__ = '''0.0.01''' -__sub_version__ = '''20120313183119''' +__sub_version__ = '''20120313211552''' __copyright__ = '''(c) Alex A. Naanou 2011''' @@ -11,10 +11,14 @@ import os import json import zipfile +import pli.pattern.mixin.mapping as mapping +import pli.objutils as objutils + #----------------------------------------------------------------------- # XXX is this a good way to serialize the actual data in the fs??? +#-----------------------------------------------------save_file_index--- # NOTE: these will work with any topoloy and create a flat index... def save_file_index(index, path, index_depth=1, ext='.json'): ''' @@ -42,6 +46,7 @@ def save_file_index(index, path, index_depth=1, ext='.json'): return root_index +#-----------------------------------------------------load_file_index--- def load_file_index(path, ext='.json', pack_ext='.pack'): ''' ''' @@ -61,12 +66,15 @@ def load_file_index(path, ext='.json', pack_ext='.pack'): return d +#-----------------------------------------------------pack_file_index--- # XXX should we remove empty dirs here??? +##!!! this may creae duplicate files within the pack... def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False): ''' NOTE: if keep_files is True, keep_dirs option will be ignored. ''' + ##!!! this will not remove original entries if they exist... z = zipfile.ZipFile(os.path.join(path, 'index' + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED) for p, _, files in os.walk(path): for f in files: @@ -92,14 +100,15 @@ def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_ #----------------------------------------------------------------------- # lazy dict-like objects that read and write (optional) the fs... -import pli.pattern.mixin.mapping as mapping -import pli.objutils as objutils - +#---------------------------------------------------------------Index--- # XXX might be good to do a path index... ##!!! make this archive/file structure-agnostic... class Index(mapping.Mapping): + ''' + ''' __json_ext__ = '.json' __pack_ext__ = '.pack' + __index_depth__ = 2 def __init__(self, path): ''' @@ -151,7 +160,8 @@ class Index(mapping.Mapping): def __setitem__(self, name, value): ''' ''' - raise NotImplementedError + save_file_index({name: value}, self._path, index_depth=self.__index_depth__) +## raise NotImplementedError def __delitem__(self, name): ''' ''' @@ -178,8 +188,11 @@ class Index(mapping.Mapping): yield os.path.splitext(name)[0] + +#----------------------------------------------------------------------- REMOVED = object() +#------------------------------------------------------IndexWithCache--- class IndexWithCache(Index): ''' ''' @@ -229,7 +242,7 @@ class IndexWithCache(Index): ''' ''' if keys == (): - return save_file_index(self._cache, self._path) + return save_file_index(self._cache, self._path, index_depth=self.__index_depth__) flush = {} for k in keys: if k is REMOVED: @@ -238,7 +251,7 @@ class IndexWithCache(Index): ##!!! continue flush[k] = self[k] - return save_file_index(flush, self._path) + return save_file_index(flush, self._path, index_depth=self.__index_depth__) def cache_drop(self): ''' '''