lots of fixes, and some code reorganization...

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
This commit is contained in:
Alex A. Naanou 2012-03-13 22:48:46 +04:00
parent 388215cdf3
commit bbe1377e32
3 changed files with 62 additions and 22 deletions

15
gid.py
View File

@ -1,7 +1,7 @@
#======================================================================= #=======================================================================
__version__ = '''0.0.01''' __version__ = '''0.0.01'''
__sub_version__ = '''20120313182702''' __sub_version__ = '''20120313223928'''
__copyright__ = '''(c) Alex A. Naanou 2011''' __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -22,7 +22,10 @@ import pyexiv2 as metadata
# XXX not yet sure if this is unique enough to avoid conflicts if one # XXX not yet sure if this is unique enough to avoid conflicts if one
# photographer has enough cameras... # photographer has enough cameras...
# XXX also might be wise to add a photographer ID into here... # XXX also might be wise to add a photographer ID into here...
def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%H%M%S', hash_func=sha.sha): def image_gid(path, format='%(artist)s-%(date)s-%(name)s',
date_format='%Y%m%d-%H%M%S',
default_artist='Unknown',
hash_func=sha.sha):
''' '''
Calgulate image GID. Calgulate image GID.
@ -53,15 +56,19 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%
data = { data = {
'name': os.path.splitext(os.path.split(path)[-1])[0], 'name': os.path.splitext(os.path.split(path)[-1])[0],
} }
# check if we need a date in the id... ##!!! this might fail...
if '%(date)s' in format:
i = metadata.ImageMetadata('%s' % path) i = metadata.ImageMetadata('%s' % path)
i.read() i.read()
# check if we need a date in the id...
if '%(date)s' in format:
d = i['Exif.Image.DateTime'].value d = i['Exif.Image.DateTime'].value
data['date'] = d.strftime(date_format) data['date'] = d.strftime(date_format)
# check if we need an artist... # check if we need an artist...
if '%(artist)s' in format: if '%(artist)s' in format:
try:
data['artist'] = i['Exif.Image.Artist'].value.strip().replace(' ', '_') data['artist'] = i['Exif.Image.Artist'].value.strip().replace(' ', '_')
except KeyError:
data['artist'] = default_artist
if hash_func is not None: if hash_func is not None:
return hash_func(format % data).hexdigest() return hash_func(format % data).hexdigest()

View File

@ -1,7 +1,7 @@
#======================================================================= #=======================================================================
__version__ = '''0.0.01''' __version__ = '''0.0.01'''
__sub_version__ = '''20120310191654''' __sub_version__ = '''20120313224544'''
__copyright__ = '''(c) Alex A. Naanou 2011''' __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -27,13 +27,15 @@ import pyexiv2 as metadata
from pli.logictypes import ANY, OR from pli.logictypes import ANY, OR
import store
from gid import image_gid from gid import image_gid
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
CONFIG_NAME = 'test_config.json' ##CONFIG_NAME = 'test_config.json'
##CONFIG_NAME = 'tmp_config.json' ##CONFIG_NAME = 'tmp_config.json'
CONFIG_NAME = 'tmp_config.json.bak'
config = json.load(open(CONFIG_NAME)) config = json.load(open(CONFIG_NAME))
@ -42,12 +44,13 @@ RAW = OR(
'NEF', 'nef', 'NEF', 'nef',
'CRW', 'crw', 'CRW', 'crw',
'CR2', 'cr2', 'CR2', 'cr2',
'X3F', 'x3f' 'X3F', 'x3f',
'DNG', 'dng',
) )
JPEG = OR( JPEG = OR(
'JPG', 'jpg', 'JPG', 'jpg',
'JPEG', 'jpeg' 'JPEG', 'jpeg',
) )
PSD = OR( PSD = OR(
@ -196,12 +199,15 @@ def split_by_raws(raws, lst, failed):
return sets return sets
def gid_index(index): def gid_index(index, existing=None):
''' '''
''' '''
# index via a propper GID... # index via a propper GID...
# split similarly named but different files... # split similarly named but different files...
if existing is None:
res = {} res = {}
else:
res = existing
failed = [] failed = []
for name, l in index.iteritems(): for name, l in index.iteritems():
l.sort() l.sort()
@ -235,7 +241,7 @@ def gid_index(index):
# mostly intended for importing... # mostly intended for importing...
'ctime': raw[3], 'ctime': raw[3],
##!!! make these more general... ##!!! make these more general...
'RAW': raws, 'RAW': [e for e in l if e[2] == RAW],
'XMP': [e for e in l if e[2] == XMP], 'XMP': [e for e in l if e[2] == XMP],
'JPG': [e for e in l if e[2] == JPEG], 'JPG': [e for e in l if e[2] == JPEG],
'PSD': [e for e in l if e[2] == PSD], 'PSD': [e for e in l if e[2] == PSD],
@ -249,6 +255,8 @@ def gid_index(index):
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
if __name__ == '__main__': if __name__ == '__main__':
INDEX_PATH = os.path.join('test', 'index2')
FILE_LIST = os.path.join('test', 'flatfilelist.json') FILE_LIST = os.path.join('test', 'flatfilelist.json')
BUILD_FILE_LIST = False if os.path.exists(FILE_LIST) else True BUILD_FILE_LIST = False if os.path.exists(FILE_LIST) else True
@ -256,19 +264,25 @@ if __name__ == '__main__':
if BUILD_FILE_LIST: if BUILD_FILE_LIST:
lst = list(list_files(config['ARCHIVE_ROOT'])) lst = list(list_files(config['ARCHIVE_ROOT']))
print len(lst) print 'found files:', len(lst)
pprint(lst[0]) pprint(lst[0])
json.dump(lst, file(FILE_LIST, 'w')) json.dump(lst, file(FILE_LIST, 'w'))
print 'saved...'
lst = json.load(file(FILE_LIST)) lst = json.load(file(FILE_LIST))
print len(lst) print 'loaded:', len(lst)
index = index_by_name(lst) index = index_by_name(lst)
GID_index, failed = gid_index(index) ## GID_index = store.IndexWithCache(INDEX_PATH)
GID_index = store.Index(INDEX_PATH)
GID_index, failed = gid_index(index, GID_index)
json.dump(failed, file(os.path.join('test', 'failed-to-categorise.json'), 'w'))
@ -292,6 +306,12 @@ if __name__ == '__main__':
pprint(GID_index.values()[0]) pprint(GID_index.values()[0])
store.save_file_index(GID_index, INDEX_PATH)
## store.pack_file_index(INDEX_PATH)

View File

@ -1,7 +1,7 @@
#======================================================================= #=======================================================================
__version__ = '''0.0.01''' __version__ = '''0.0.01'''
__sub_version__ = '''20120313183119''' __sub_version__ = '''20120313211552'''
__copyright__ = '''(c) Alex A. Naanou 2011''' __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -11,10 +11,14 @@ import os
import json import json
import zipfile import zipfile
import pli.pattern.mixin.mapping as mapping
import pli.objutils as objutils
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
# XXX is this a good way to serialize the actual data in the fs??? # XXX is this a good way to serialize the actual data in the fs???
#-----------------------------------------------------save_file_index---
# NOTE: these will work with any topoloy and create a flat index... # NOTE: these will work with any topoloy and create a flat index...
def save_file_index(index, path, index_depth=1, ext='.json'): def save_file_index(index, path, index_depth=1, ext='.json'):
''' '''
@ -42,6 +46,7 @@ def save_file_index(index, path, index_depth=1, ext='.json'):
return root_index return root_index
#-----------------------------------------------------load_file_index---
def load_file_index(path, ext='.json', pack_ext='.pack'): def load_file_index(path, ext='.json', pack_ext='.pack'):
''' '''
''' '''
@ -61,12 +66,15 @@ def load_file_index(path, ext='.json', pack_ext='.pack'):
return d return d
#-----------------------------------------------------pack_file_index---
# XXX should we remove empty dirs here??? # XXX should we remove empty dirs here???
##!!! this may creae duplicate files within the pack...
def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False): def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False):
''' '''
NOTE: if keep_files is True, keep_dirs option will be ignored. NOTE: if keep_files is True, keep_dirs option will be ignored.
''' '''
##!!! this will not remove original entries if they exist...
z = zipfile.ZipFile(os.path.join(path, 'index' + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED) z = zipfile.ZipFile(os.path.join(path, 'index' + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED)
for p, _, files in os.walk(path): for p, _, files in os.walk(path):
for f in files: for f in files:
@ -92,14 +100,15 @@ def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
# lazy dict-like objects that read and write (optional) the fs... # lazy dict-like objects that read and write (optional) the fs...
import pli.pattern.mixin.mapping as mapping #---------------------------------------------------------------Index---
import pli.objutils as objutils
# XXX might be good to do a path index... # XXX might be good to do a path index...
##!!! make this archive/file structure-agnostic... ##!!! make this archive/file structure-agnostic...
class Index(mapping.Mapping): class Index(mapping.Mapping):
'''
'''
__json_ext__ = '.json' __json_ext__ = '.json'
__pack_ext__ = '.pack' __pack_ext__ = '.pack'
__index_depth__ = 2
def __init__(self, path): def __init__(self, path):
''' '''
@ -151,7 +160,8 @@ class Index(mapping.Mapping):
def __setitem__(self, name, value): def __setitem__(self, name, value):
''' '''
''' '''
raise NotImplementedError save_file_index({name: value}, self._path, index_depth=self.__index_depth__)
## raise NotImplementedError
def __delitem__(self, name): def __delitem__(self, name):
''' '''
''' '''
@ -178,8 +188,11 @@ class Index(mapping.Mapping):
yield os.path.splitext(name)[0] yield os.path.splitext(name)[0]
#-----------------------------------------------------------------------
REMOVED = object() REMOVED = object()
#------------------------------------------------------IndexWithCache---
class IndexWithCache(Index): class IndexWithCache(Index):
''' '''
''' '''
@ -229,7 +242,7 @@ class IndexWithCache(Index):
''' '''
''' '''
if keys == (): if keys == ():
return save_file_index(self._cache, self._path) return save_file_index(self._cache, self._path, index_depth=self.__index_depth__)
flush = {} flush = {}
for k in keys: for k in keys:
if k is REMOVED: if k is REMOVED:
@ -238,7 +251,7 @@ class IndexWithCache(Index):
##!!! ##!!!
continue continue
flush[k] = self[k] flush[k] = self[k]
return save_file_index(flush, self._path) return save_file_index(flush, self._path, index_depth=self.__index_depth__)
def cache_drop(self): def cache_drop(self):
''' '''
''' '''