From 7a03bdfdc6d6aa01446bcb69c54e5519014473d0 Mon Sep 17 00:00:00 2001 From: "Alex A. Naanou" Date: Thu, 15 Mar 2012 15:25:03 +0400 Subject: [PATCH] lots of tweaks and fixes, mostly minor... Signed-off-by: Alex A. Naanou --- gid.py | 17 +++++++++++++---- index.py | 18 +++++++++--------- index2.py | 19 ++++++++++++------- store.py | 53 +++++++++++++++++++++++++++++++++++++++-------------- 4 files changed, 73 insertions(+), 34 deletions(-) diff --git a/gid.py b/gid.py index a767adda..e5565074 100755 --- a/gid.py +++ b/gid.py @@ -1,7 +1,7 @@ #======================================================================= __version__ = '''0.0.01''' -__sub_version__ = '''20120313223928''' +__sub_version__ = '''20120315140451''' __copyright__ = '''(c) Alex A. Naanou 2011''' @@ -10,6 +10,7 @@ __copyright__ = '''(c) Alex A. Naanou 2011''' import os import sha import md5 +import time import pyexiv2 as metadata @@ -22,9 +23,11 @@ import pyexiv2 as metadata # XXX not yet sure if this is unique enough to avoid conflicts if one # photographer has enough cameras... # XXX also might be wise to add a photographer ID into here... -def image_gid(path, format='%(artist)s-%(date)s-%(name)s', +def image_gid(path, date=None, + format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%H%M%S', default_artist='Unknown', + use_ctime=False, hash_func=sha.sha): ''' Calgulate image GID. @@ -61,8 +64,14 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s', i.read() # check if we need a date in the id... if '%(date)s' in format: - d = i['Exif.Image.DateTime'].value - data['date'] = d.strftime(date_format) + if date is not None: + data['date'] = time.strftime(date_format, time.gmtime(date)) + elif use_ctime: + date = os.path.getctime(path) + data['date'] = time.strftime(date_format, time.gmtime(date)) + else: + date = i['Exif.Image.DateTime'].value + data['date'] = date.strftime(date_format) # check if we need an artist... if '%(artist)s' in format: try: diff --git a/index.py b/index.py index 01ee346f..8769e4ff 100755 --- a/index.py +++ b/index.py @@ -1,7 +1,7 @@ #======================================================================= __version__ = '''0.0.01''' -__sub_version__ = '''20120313183420''' +__sub_version__ = '''20120315151711''' __copyright__ = '''(c) Alex A. Naanou 2011''' @@ -334,7 +334,7 @@ def build_image_cache(ic, min_rating, dest, tmp_path, preview_size=900): continue ic.cache_flush() - store.pack_file_index(ic._path, keep_files=False) + store.pack(ic._path, keep_files=False) return res @@ -362,14 +362,14 @@ if __name__ == '__main__': - root_index = store.save_file_index(index, os.path.join('test', 'index'), index_depth=1) + root_index = store.dump(index, os.path.join('test', 'index'), index_depth=1) ## ##!!! this is not used in anything yet... ## json.dump(root_index, file(os.path.join('test', 'index', 'file_index.json'), 'w')) - store.pack_file_index(os.path.join('test', 'index'), keep_files=False) + store.pack(os.path.join('test', 'index'), keep_files=False) - d = store.load_file_index(os.path.join('test', 'index')) + d = store.load(os.path.join('test', 'index')) print len(d) @@ -390,13 +390,13 @@ if __name__ == '__main__': ic.cache_flush() - store.pack_file_index(ic._path, keep_files=False) + store.pack(ic._path, keep_files=False) ic.__sync__ = True ic['111111111111111111111111111111111'] = {} - store.pack_file_index(ic._path, keep_files=False) + store.pack(ic._path, keep_files=False) ##!!! revise... @@ -415,12 +415,12 @@ if __name__ == '__main__': full = dict(json.load(file(os.path.join('test', 'filelist of 20k files.json')))) print 'writing files...' - root_index = store.save_file_index(full, os.path.join('test', 'index'), index_depth=1) + root_index = store.dump(full, os.path.join('test', 'index'), index_depth=1) print 'packing files...' # NOTE: the initial archiving seems REALLY SLOW, but working with # small numbers of files from the archive seems adequate... - store.pack_file_index(os.path.join('test', 'index'), keep_files=True) + store.pack(os.path.join('test', 'index'), keep_files=True) diff --git a/index2.py b/index2.py index 76d7f4c9..3ae31c10 100755 --- a/index2.py +++ b/index2.py @@ -1,7 +1,7 @@ #======================================================================= __version__ = '''0.0.01''' -__sub_version__ = '''20120313224544''' +__sub_version__ = '''20120315151510''' __copyright__ = '''(c) Alex A. Naanou 2011''' @@ -33,9 +33,9 @@ from gid import image_gid #----------------------------------------------------------------------- -##CONFIG_NAME = 'test_config.json' +CONFIG_NAME = 'test_config.json' ##CONFIG_NAME = 'tmp_config.json' -CONFIG_NAME = 'tmp_config.json.bak' +##CONFIG_NAME = 'tmp_config.json.bak' config = json.load(open(CONFIG_NAME)) @@ -87,6 +87,7 @@ SUBTREE_CLASSES = { +#----------------------------------------------------------list_files--- ##!!! we will need to normalize the paths to one single scheme (either relative or absolute)... # XXX might need to fetch file data too... def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=False, include_ctime=True): @@ -114,6 +115,7 @@ def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=Fal yield path, name, ext +#----------------------------------------------------------common_len--- def common_len(a, *b): ''' ''' @@ -123,6 +125,7 @@ def common_len(a, *b): return len(min(*(a,) + b)) +#-------------------------------------------------------path_distance--- ##!!! is this meaningless? def path_distance(a, b): ''' @@ -130,6 +133,7 @@ def path_distance(a, b): return len(a) + len(b) - common_len(a, b)*2 +#-------------------------------------------------------index_by_name--- def index_by_name(lst): ''' index by file name (indexing preparation)... @@ -158,6 +162,7 @@ def index_by_name(lst): +#-------------------------------------------------------split_by_raws--- def split_by_raws(raws, lst, failed): ''' ''' @@ -199,6 +204,7 @@ def split_by_raws(raws, lst, failed): return sets +#-----------------------------------------------------------gid_index--- def gid_index(index, existing=None): ''' ''' @@ -252,6 +258,7 @@ def gid_index(index, existing=None): return res, failed + #----------------------------------------------------------------------- if __name__ == '__main__': @@ -306,11 +313,9 @@ if __name__ == '__main__': pprint(GID_index.values()[0]) - store.save_file_index(GID_index, INDEX_PATH) - -## store.pack_file_index(INDEX_PATH) - +## store.dump(GID_index, INDEX_PATH) + store.pack(INDEX_PATH) diff --git a/store.py b/store.py index d819692d..3c6ee0f5 100755 --- a/store.py +++ b/store.py @@ -1,7 +1,7 @@ #======================================================================= __version__ = '''0.0.01''' -__sub_version__ = '''20120313211552''' +__sub_version__ = '''20120315152600''' __copyright__ = '''(c) Alex A. Naanou 2011''' @@ -18,12 +18,30 @@ import pli.objutils as objutils #----------------------------------------------------------------------- # XXX is this a good way to serialize the actual data in the fs??? -#-----------------------------------------------------save_file_index--- +#----------------------------------------------------------------dump--- # NOTE: these will work with any topoloy and create a flat index... -def save_file_index(index, path, index_depth=1, ext='.json'): +# XXX should this know anything about data versions??? +def dump(index, path, index_depth=1, ext='.json'): ''' + store an index in fs store. - NOTE: index_depth with value greater than 2 is an overkill. + by default the structure is as follows: + + key: abcdefg + path: ab/abcdefg (index_depth=1) + + + index_depth sets the directory structure, if 0 a flat store is + created. here is an example path for index_depth=2 + + path: ab/cd/abcdefg + + the dict value is stored in the file in JSON format. + + NOTE: this can be used with parts of a dict. + NOTE: existing data will be overwritten. + NOTE: store balancing depends on key structure. + NOTE: index_depth with value greater than 2 is likely an overkill. ''' root_index = {} for k, v in index.items(): @@ -42,13 +60,19 @@ def save_file_index(index, path, index_depth=1, ext='.json'): p = os.path.join(path, k + ext) json.dump(v, file(p, 'w'), indent=4, separators=(', ', ': ')) root_index[k] = p -## print '.', return root_index #-----------------------------------------------------load_file_index--- -def load_file_index(path, ext='.json', pack_ext='.pack'): +def load(path, ext='.json', pack_ext='.pack'): ''' + load data from fs store. + + for data format see dump(...). + + NOTE: this will load the whole data set. + NOTE: unpacked data shadows packed data. + NOTE: this does not care about topology. ''' d = {} for p, _, files in os.walk(path): @@ -68,9 +92,13 @@ def load_file_index(path, ext='.json', pack_ext='.pack'): #-----------------------------------------------------pack_file_index--- # XXX should we remove empty dirs here??? -##!!! this may creae duplicate files within the pack... -def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False): +# XXX this will create duplicate files within the pack +# only the last is accesible but this might cause trouble elsewhere... +# NOTE: this should be done in the background (possible race-condition +# with removing a file while it is being read) +def pack(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False): ''' + pack an fs data store. NOTE: if keep_files is True, keep_dirs option will be ignored. ''' @@ -93,8 +121,6 @@ def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_ pass z.close() -##!!! get path by name helper... -##!!! #----------------------------------------------------------------------- @@ -160,8 +186,7 @@ class Index(mapping.Mapping): def __setitem__(self, name, value): ''' ''' - save_file_index({name: value}, self._path, index_depth=self.__index_depth__) -## raise NotImplementedError + dump({name: value}, self._path, index_depth=self.__index_depth__) def __delitem__(self, name): ''' ''' @@ -242,7 +267,7 @@ class IndexWithCache(Index): ''' ''' if keys == (): - return save_file_index(self._cache, self._path, index_depth=self.__index_depth__) + return dump(self._cache, self._path, index_depth=self.__index_depth__) flush = {} for k in keys: if k is REMOVED: @@ -251,7 +276,7 @@ class IndexWithCache(Index): ##!!! continue flush[k] = self[k] - return save_file_index(flush, self._path, index_depth=self.__index_depth__) + return dump(flush, self._path, index_depth=self.__index_depth__) def cache_drop(self): ''' '''