lots of tweaks and fixes, mostly minor...

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
This commit is contained in:
Alex A. Naanou 2012-03-15 15:25:03 +04:00
parent bbe1377e32
commit 7a03bdfdc6
4 changed files with 73 additions and 34 deletions

17
gid.py
View File

@ -1,7 +1,7 @@
#======================================================================= #=======================================================================
__version__ = '''0.0.01''' __version__ = '''0.0.01'''
__sub_version__ = '''20120313223928''' __sub_version__ = '''20120315140451'''
__copyright__ = '''(c) Alex A. Naanou 2011''' __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -10,6 +10,7 @@ __copyright__ = '''(c) Alex A. Naanou 2011'''
import os import os
import sha import sha
import md5 import md5
import time
import pyexiv2 as metadata import pyexiv2 as metadata
@ -22,9 +23,11 @@ import pyexiv2 as metadata
# XXX not yet sure if this is unique enough to avoid conflicts if one # XXX not yet sure if this is unique enough to avoid conflicts if one
# photographer has enough cameras... # photographer has enough cameras...
# XXX also might be wise to add a photographer ID into here... # XXX also might be wise to add a photographer ID into here...
def image_gid(path, format='%(artist)s-%(date)s-%(name)s', def image_gid(path, date=None,
format='%(artist)s-%(date)s-%(name)s',
date_format='%Y%m%d-%H%M%S', date_format='%Y%m%d-%H%M%S',
default_artist='Unknown', default_artist='Unknown',
use_ctime=False,
hash_func=sha.sha): hash_func=sha.sha):
''' '''
Calgulate image GID. Calgulate image GID.
@ -61,8 +64,14 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s',
i.read() i.read()
# check if we need a date in the id... # check if we need a date in the id...
if '%(date)s' in format: if '%(date)s' in format:
d = i['Exif.Image.DateTime'].value if date is not None:
data['date'] = d.strftime(date_format) data['date'] = time.strftime(date_format, time.gmtime(date))
elif use_ctime:
date = os.path.getctime(path)
data['date'] = time.strftime(date_format, time.gmtime(date))
else:
date = i['Exif.Image.DateTime'].value
data['date'] = date.strftime(date_format)
# check if we need an artist... # check if we need an artist...
if '%(artist)s' in format: if '%(artist)s' in format:
try: try:

View File

@ -1,7 +1,7 @@
#======================================================================= #=======================================================================
__version__ = '''0.0.01''' __version__ = '''0.0.01'''
__sub_version__ = '''20120313183420''' __sub_version__ = '''20120315151711'''
__copyright__ = '''(c) Alex A. Naanou 2011''' __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -334,7 +334,7 @@ def build_image_cache(ic, min_rating, dest, tmp_path, preview_size=900):
continue continue
ic.cache_flush() ic.cache_flush()
store.pack_file_index(ic._path, keep_files=False) store.pack(ic._path, keep_files=False)
return res return res
@ -362,14 +362,14 @@ if __name__ == '__main__':
root_index = store.save_file_index(index, os.path.join('test', 'index'), index_depth=1) root_index = store.dump(index, os.path.join('test', 'index'), index_depth=1)
## ##!!! this is not used in anything yet... ## ##!!! this is not used in anything yet...
## json.dump(root_index, file(os.path.join('test', 'index', 'file_index.json'), 'w')) ## json.dump(root_index, file(os.path.join('test', 'index', 'file_index.json'), 'w'))
store.pack_file_index(os.path.join('test', 'index'), keep_files=False) store.pack(os.path.join('test', 'index'), keep_files=False)
d = store.load_file_index(os.path.join('test', 'index')) d = store.load(os.path.join('test', 'index'))
print len(d) print len(d)
@ -390,13 +390,13 @@ if __name__ == '__main__':
ic.cache_flush() ic.cache_flush()
store.pack_file_index(ic._path, keep_files=False) store.pack(ic._path, keep_files=False)
ic.__sync__ = True ic.__sync__ = True
ic['111111111111111111111111111111111'] = {} ic['111111111111111111111111111111111'] = {}
store.pack_file_index(ic._path, keep_files=False) store.pack(ic._path, keep_files=False)
##!!! revise... ##!!! revise...
@ -415,12 +415,12 @@ if __name__ == '__main__':
full = dict(json.load(file(os.path.join('test', 'filelist of 20k files.json')))) full = dict(json.load(file(os.path.join('test', 'filelist of 20k files.json'))))
print 'writing files...' print 'writing files...'
root_index = store.save_file_index(full, os.path.join('test', 'index'), index_depth=1) root_index = store.dump(full, os.path.join('test', 'index'), index_depth=1)
print 'packing files...' print 'packing files...'
# NOTE: the initial archiving seems REALLY SLOW, but working with # NOTE: the initial archiving seems REALLY SLOW, but working with
# small numbers of files from the archive seems adequate... # small numbers of files from the archive seems adequate...
store.pack_file_index(os.path.join('test', 'index'), keep_files=True) store.pack(os.path.join('test', 'index'), keep_files=True)

View File

@ -1,7 +1,7 @@
#======================================================================= #=======================================================================
__version__ = '''0.0.01''' __version__ = '''0.0.01'''
__sub_version__ = '''20120313224544''' __sub_version__ = '''20120315151510'''
__copyright__ = '''(c) Alex A. Naanou 2011''' __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -33,9 +33,9 @@ from gid import image_gid
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
##CONFIG_NAME = 'test_config.json' CONFIG_NAME = 'test_config.json'
##CONFIG_NAME = 'tmp_config.json' ##CONFIG_NAME = 'tmp_config.json'
CONFIG_NAME = 'tmp_config.json.bak' ##CONFIG_NAME = 'tmp_config.json.bak'
config = json.load(open(CONFIG_NAME)) config = json.load(open(CONFIG_NAME))
@ -87,6 +87,7 @@ SUBTREE_CLASSES = {
#----------------------------------------------------------list_files---
##!!! we will need to normalize the paths to one single scheme (either relative or absolute)... ##!!! we will need to normalize the paths to one single scheme (either relative or absolute)...
# XXX might need to fetch file data too... # XXX might need to fetch file data too...
def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=False, include_ctime=True): def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=False, include_ctime=True):
@ -114,6 +115,7 @@ def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=Fal
yield path, name, ext yield path, name, ext
#----------------------------------------------------------common_len---
def common_len(a, *b): def common_len(a, *b):
''' '''
''' '''
@ -123,6 +125,7 @@ def common_len(a, *b):
return len(min(*(a,) + b)) return len(min(*(a,) + b))
#-------------------------------------------------------path_distance---
##!!! is this meaningless? ##!!! is this meaningless?
def path_distance(a, b): def path_distance(a, b):
''' '''
@ -130,6 +133,7 @@ def path_distance(a, b):
return len(a) + len(b) - common_len(a, b)*2 return len(a) + len(b) - common_len(a, b)*2
#-------------------------------------------------------index_by_name---
def index_by_name(lst): def index_by_name(lst):
''' '''
index by file name (indexing preparation)... index by file name (indexing preparation)...
@ -158,6 +162,7 @@ def index_by_name(lst):
#-------------------------------------------------------split_by_raws---
def split_by_raws(raws, lst, failed): def split_by_raws(raws, lst, failed):
''' '''
''' '''
@ -199,6 +204,7 @@ def split_by_raws(raws, lst, failed):
return sets return sets
#-----------------------------------------------------------gid_index---
def gid_index(index, existing=None): def gid_index(index, existing=None):
''' '''
''' '''
@ -252,6 +258,7 @@ def gid_index(index, existing=None):
return res, failed return res, failed
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
if __name__ == '__main__': if __name__ == '__main__':
@ -306,11 +313,9 @@ if __name__ == '__main__':
pprint(GID_index.values()[0]) pprint(GID_index.values()[0])
store.save_file_index(GID_index, INDEX_PATH) ## store.dump(GID_index, INDEX_PATH)
## store.pack_file_index(INDEX_PATH)
store.pack(INDEX_PATH)

View File

@ -1,7 +1,7 @@
#======================================================================= #=======================================================================
__version__ = '''0.0.01''' __version__ = '''0.0.01'''
__sub_version__ = '''20120313211552''' __sub_version__ = '''20120315152600'''
__copyright__ = '''(c) Alex A. Naanou 2011''' __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -18,12 +18,30 @@ import pli.objutils as objutils
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
# XXX is this a good way to serialize the actual data in the fs??? # XXX is this a good way to serialize the actual data in the fs???
#-----------------------------------------------------save_file_index--- #----------------------------------------------------------------dump---
# NOTE: these will work with any topoloy and create a flat index... # NOTE: these will work with any topoloy and create a flat index...
def save_file_index(index, path, index_depth=1, ext='.json'): # XXX should this know anything about data versions???
def dump(index, path, index_depth=1, ext='.json'):
''' '''
store an index in fs store.
NOTE: index_depth with value greater than 2 is an overkill. by default the structure is as follows:
key: abcdefg
path: ab/abcdefg (index_depth=1)
index_depth sets the directory structure, if 0 a flat store is
created. here is an example path for index_depth=2
path: ab/cd/abcdefg
the dict value is stored in the file in JSON format.
NOTE: this can be used with parts of a dict.
NOTE: existing data will be overwritten.
NOTE: store balancing depends on key structure.
NOTE: index_depth with value greater than 2 is likely an overkill.
''' '''
root_index = {} root_index = {}
for k, v in index.items(): for k, v in index.items():
@ -42,13 +60,19 @@ def save_file_index(index, path, index_depth=1, ext='.json'):
p = os.path.join(path, k + ext) p = os.path.join(path, k + ext)
json.dump(v, file(p, 'w'), indent=4, separators=(', ', ': ')) json.dump(v, file(p, 'w'), indent=4, separators=(', ', ': '))
root_index[k] = p root_index[k] = p
## print '.',
return root_index return root_index
#-----------------------------------------------------load_file_index--- #-----------------------------------------------------load_file_index---
def load_file_index(path, ext='.json', pack_ext='.pack'): def load(path, ext='.json', pack_ext='.pack'):
''' '''
load data from fs store.
for data format see dump(...).
NOTE: this will load the whole data set.
NOTE: unpacked data shadows packed data.
NOTE: this does not care about topology.
''' '''
d = {} d = {}
for p, _, files in os.walk(path): for p, _, files in os.walk(path):
@ -68,9 +92,13 @@ def load_file_index(path, ext='.json', pack_ext='.pack'):
#-----------------------------------------------------pack_file_index--- #-----------------------------------------------------pack_file_index---
# XXX should we remove empty dirs here??? # XXX should we remove empty dirs here???
##!!! this may creae duplicate files within the pack... # XXX this will create duplicate files within the pack
def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False): # only the last is accesible but this might cause trouble elsewhere...
# NOTE: this should be done in the background (possible race-condition
# with removing a file while it is being read)
def pack(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False):
''' '''
pack an fs data store.
NOTE: if keep_files is True, keep_dirs option will be ignored. NOTE: if keep_files is True, keep_dirs option will be ignored.
''' '''
@ -93,8 +121,6 @@ def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_
pass pass
z.close() z.close()
##!!! get path by name helper...
##!!!
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
@ -160,8 +186,7 @@ class Index(mapping.Mapping):
def __setitem__(self, name, value): def __setitem__(self, name, value):
''' '''
''' '''
save_file_index({name: value}, self._path, index_depth=self.__index_depth__) dump({name: value}, self._path, index_depth=self.__index_depth__)
## raise NotImplementedError
def __delitem__(self, name): def __delitem__(self, name):
''' '''
''' '''
@ -242,7 +267,7 @@ class IndexWithCache(Index):
''' '''
''' '''
if keys == (): if keys == ():
return save_file_index(self._cache, self._path, index_depth=self.__index_depth__) return dump(self._cache, self._path, index_depth=self.__index_depth__)
flush = {} flush = {}
for k in keys: for k in keys:
if k is REMOVED: if k is REMOVED:
@ -251,7 +276,7 @@ class IndexWithCache(Index):
##!!! ##!!!
continue continue
flush[k] = self[k] flush[k] = self[k]
return save_file_index(flush, self._path, index_depth=self.__index_depth__) return dump(flush, self._path, index_depth=self.__index_depth__)
def cache_drop(self): def cache_drop(self):
''' '''
''' '''