lots of minor tweeks and changes...

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
This commit is contained in:
Alex A. Naanou 2013-03-27 18:13:52 +04:00
parent 4734dfd4aa
commit 3d41a07c7a
4 changed files with 143 additions and 27 deletions

11
gid.py
View File

@ -1,7 +1,7 @@
#======================================================================= #=======================================================================
__version__ = '''0.0.01''' __version__ = '''0.0.01'''
__sub_version__ = '''20130322142905''' __sub_version__ = '''20130325203750'''
__copyright__ = '''(c) Alex A. Naanou 2011''' __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -52,7 +52,8 @@ def image_gid(path, date=None,
Supported fields: Supported fields:
%(artist)s - Exif.Image.Artist field, stripped and spaces replaced %(artist)s - Exif.Image.Artist field, stripped and spaces replaced
with underscores. with underscores.
%(date)s - Exif.Image.DateTime formated to date_format argument. If no artist info is set this will be set to default_artist.
%(date)s - Exif.Photo.DateTimeOriginal formated to date_format argument.
%(name)s - file name. %(name)s - file name.
NOTE: date and time are the date and time the image was made ('Exif.Image.DateTime') NOTE: date and time are the date and time the image was made ('Exif.Image.DateTime')
@ -77,7 +78,6 @@ def image_gid(path, date=None,
date = os.path.getctime(path) date = os.path.getctime(path)
data['date'] = time.strftime(date_format, time.gmtime(date)) data['date'] = time.strftime(date_format, time.gmtime(date))
else: else:
## date = i['Exif.Image.DateTime'].value
date = i['Exif.Photo.DateTimeOriginal'].value date = i['Exif.Photo.DateTimeOriginal'].value
data['date'] = date.strftime(date_format) data['date'] = date.strftime(date_format)
# check if we need an artist... # check if we need an artist...
@ -85,7 +85,10 @@ def image_gid(path, date=None,
data['artist'] = default_artist data['artist'] = default_artist
if i is not None: if i is not None:
try: try:
data['artist'] = i['Exif.Image.Artist'].value.strip().replace(' ', '_') # set the artist if in EXIF...
a = i['Exif.Image.Artist'].value.strip().replace(' ', '_')
if a != '':
data['artist'] = a
except KeyError: except KeyError:
pass pass

View File

@ -1,7 +1,7 @@
#======================================================================= #=======================================================================
__version__ = '''0.0.01''' __version__ = '''0.0.01'''
__sub_version__ = '''20130319151025''' __sub_version__ = '''20130326030151'''
__copyright__ = '''(c) Alex A. Naanou 2011''' __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -83,6 +83,9 @@ TYPES = {
'xmp': XMP, 'xmp': XMP,
} }
SKIP_DIRS = '.sys2'
SKIP_MARKER = '.skipindexing'
SUBTREE_CLASSES = { SUBTREE_CLASSES = {
'preview': 'preview', 'preview': 'preview',
@ -97,12 +100,22 @@ SUBTREE_CLASSES = {
#----------------------------------------------------------list_files--- #----------------------------------------------------------list_files---
##!!! we will need to normalize the paths to one single scheme (either relative or absolute)... ##!!! we will need to normalize the paths to one single scheme (either relative or absolute)...
# XXX might need to fetch file data too... # XXX might need to fetch file data too...
def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=False, include_ctime=True): def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM,
include_root_path=False, include_ctime=True,
skip_marker=SKIP_MARKER, skip_dirs=SKIP_DIRS):
''' '''
yields: yields:
(<path>, <name>, <ext>[, <ctime>]), (<path>, <name>, <ext>[, <ctime>]),
''' '''
for orig_path, dirs, files in os.walk(root): for orig_path, dirs, files in os.walk(root):
# skip dir trees containing skip_filename...
if skip_marker in files:
del dirs[:]
continue
# skip dirs...
while skip_dirs in dirs:
dirs.remove(skip_dirs)
# XXX is this correct... # XXX is this correct...
path = orig_path.split(os.path.sep) path = orig_path.split(os.path.sep)
# remove root from path... # remove root from path...
@ -125,6 +138,7 @@ def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=Fal
#----------------------------------------------------------common_len--- #----------------------------------------------------------common_len---
def common_len(a, *b): def common_len(a, *b):
''' '''
calculate the common path length.
''' '''
for i, l in enumerate(izip(*(a,) + b)): for i, l in enumerate(izip(*(a,) + b)):
if len(set(l)) != 1: if len(set(l)) != 1:
@ -174,13 +188,12 @@ def split_by_raws(raws, lst, failed):
''' '''
''' '''
## raws = [e for e in lst if e[2] == RAW] ## raws = [e for e in lst if e[2] == RAW]
# top level common path...
common = common_len(*[ e[0] for e in raws ]) common = common_len(*[ e[0] for e in raws ])
# NOTE: do not change the order of raws after this point # NOTE: do not change the order of raws after this point
# and till the end of the loop... # and till the end of the loop...
# XXX revise if there is a simpler way... # XXX revise if there is a simpler way...
##!!! this kills code like sets[0][1] += [...]
## sets = [ (r, [r]) for r in raws ]
sets = [ [r, [r]] for r in raws ] sets = [ [r, [r]] for r in raws ]
for e in lst: for e in lst:
@ -199,7 +212,6 @@ def split_by_raws(raws, lst, failed):
failed += [e] failed += [e]
# found a location... # found a location...
elif c > common: elif c > common:
##!!! for some odd reason this does not work....
sets[i][1] += [e] sets[i][1] += [e]
# file in an odd location ##!!! list these locations... # file in an odd location ##!!! list these locations...
else: else:
@ -207,14 +219,15 @@ def split_by_raws(raws, lst, failed):
##!!! try different strategies here... ##!!! try different strategies here...
##!!! ##!!!
failed += [e] failed += [e]
## return sets, failed
return sets return sets
#-----------------------------------------------------------gid_index--- #-----------------------------------------------------------gid_index---
##!!! this will rewrite existing data -- should only update...
def gid_index(index, existing=None): def gid_index(index, existing=None):
''' '''
''' '''
skipped = []
# index via a propper GID... # index via a propper GID...
# split similarly named but different files... # split similarly named but different files...
if existing is None: if existing is None:
@ -222,34 +235,41 @@ def gid_index(index, existing=None):
else: else:
res = existing res = existing
failed = [] failed = []
im_n = 0
up_n = 0
new_n = 0
for name, l in index.iteritems(): for name, l in index.iteritems():
l.sort() l.sort()
raws = [e for e in l if e[2] == RAW] raws = [e for e in l if e[2] == RAW]
# multiple raw files... # multiple raw files...
if len(raws) > 1: if len(raws) > 1:
# spit this into a seporate func...
sets = split_by_raws(raws, l, failed) sets = split_by_raws(raws, l, failed)
# single raw... # single raw...
elif len(raws) == 1: elif len(raws) == 1:
sets = [(raws[0], l)] sets = [(raws[0], l)]
# no raw files... # no raw files...
else: else:
print 'no raw file found for "%s"...' % os.path.join(name) print (' '*78), '\rno raw file found for "%s"...' % os.path.join(name)
sets = [] sets = []
##!!! need to report this in a usable way... ##!!! need to report this in a usable way...
failed += l failed += l
# add actual elements to index... # add actual elements to index...
for raw, l in sets: for raw, l in sets:
im_n += 1
print 'Processing image:', im_n, 'new:', new_n, 'updated:', up_n, '\r',
# get file GID... # get file GID...
GID = image_gid('%s.%s' % (os.path.join(*[config['ARCHIVE_ROOT']] + raw[0] + [raw[1]]), raw[2])) GID = image_gid('%s.%s' % (os.path.join(*[config['ARCHIVE_ROOT']] + raw[0] + [raw[1]]), raw[2]))
##!!! normalize the image format... ##!!! normalize the image format...
res[GID] = { img = {
'gid': GID, 'gid': GID,
'name': name, 'name': name,
'imported': time.time(), 'imported': time.time(),
'updated': time.time(),
# NOTE: this might get distorted on archiving or # NOTE: this might get distorted on archiving or
# copying... # copying...
# mostly intended for importing... # mostly intended for importing...
@ -262,8 +282,30 @@ def gid_index(index, existing=None):
'TIFF': [e for e in l if e[2] == TIFF], 'TIFF': [e for e in l if e[2] == TIFF],
'other': [e for e in l if e[2] != OR(TIFF, PSD, JPEG, XMP, RAW)], 'other': [e for e in l if e[2] != OR(TIFF, PSD, JPEG, XMP, RAW)],
} }
# add new data...
if GID not in res:
res[GID] = img
new_n += 1
# update existing...
else:
cur = res[GID]
updating = False
for k, v in img.iteritems():
# skip
if k in ('imported', 'name', 'gid', 'ctime', 'updated'):
continue
if v != cur[k]:
cur[k] = v
updating = True
# do the actual update...
if updating:
cur['updated'] = time.time()
res[GID] = cur
up_n += 1
else:
skipped += [GID]
return res, failed return res, failed, skipped
@ -282,7 +324,7 @@ if __name__ == '__main__':
lst = list(list_files(config['ARCHIVE_ROOT'])) lst = list(list_files(config['ARCHIVE_ROOT']))
print 'found files:', len(lst) print 'found files:', len(lst)
pprint(lst[0]) ## pprint(lst[0])
json.dump(lst, file(FILE_LIST, 'w')) json.dump(lst, file(FILE_LIST, 'w'))
print 'saved...' print 'saved...'
@ -315,9 +357,26 @@ if __name__ == '__main__':
## GID_index = store.IndexWithCache(INDEX_PATH) ## GID_index = store.IndexWithCache(INDEX_PATH)
GID_index = store.Index(INDEX_PATH) GID_index = store.Index(INDEX_PATH)
##!!! only check for updates... # a cheating waw to say if we are empty...
index_empty = True
for k in GID_index.iterkeys():
index_empty = False
break
GID_index, failed = gid_index(index, GID_index) t0 = time.time()
if not index_empty:
print 'updating...'
##!!! this takes a substantially longer time initially... (about 30x longer)
GID_index, failed, skipped = gid_index(index, GID_index)
else:
print 'indexing...'
GID_index, failed, skipped = gid_index(index)
store.dump(GID_index, INDEX_PATH, index_depth=2)
t1 = time.time()
print 'done in:', t1-t0, 'seconds.'
json.dump(failed, file(os.path.join('test', 'failed-to-categorise.json'), 'w')) json.dump(failed, file(os.path.join('test', 'failed-to-categorise.json'), 'w'))
@ -336,13 +395,15 @@ if __name__ == '__main__':
indexed: %s indexed: %s
raws: %s raws: %s
failed: %s failed: %s
skipped: %s
''' % ( ''' % (
len(GID_index), len(GID_index),
len([ e for e in lst if e[2] == RAW]), len([ e for e in lst if e[2] == RAW]),
len(failed)) len(failed),
len(skipped))
##!!! this is really slow because it pulls ALL the data... wonder who wrote this? :) ## ##!!! this is really slow because it pulls ALL the data... wonder who wrote this? :)
pprint(GID_index.itervalues().next()) ## pprint(GID_index.itervalues().next())
## store.dump(GID_index, INDEX_PATH) ## store.dump(GID_index, INDEX_PATH)

View File

@ -1,7 +1,7 @@
#======================================================================= #=======================================================================
__version__ = '''0.0.01''' __version__ = '''0.0.01'''
__sub_version__ = '''20130322155314''' __sub_version__ = '''20130325114759'''
__copyright__ = '''(c) Alex A. Naanou 2011''' __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -20,9 +20,6 @@ import store
CONFIG_NAME = 'P7000_config.json' CONFIG_NAME = 'P7000_config.json'
#-----------------------------------------------------------------------
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
if __name__ == '__main__': if __name__ == '__main__':
from optparse import OptionParser from optparse import OptionParser

View File

@ -1,7 +1,7 @@
#======================================================================= #=======================================================================
__version__ = '''0.0.01''' __version__ = '''0.0.01'''
__sub_version__ = '''20130319150549''' __sub_version__ = '''20130325170937'''
__copyright__ = '''(c) Alex A. Naanou 2011''' __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -10,6 +10,7 @@ __copyright__ = '''(c) Alex A. Naanou 2011'''
import os import os
import json import json
import zipfile import zipfile
import time
import pli.pattern.mixin.mapping as mapping import pli.pattern.mixin.mapping as mapping
import pli.objutils as objutils import pli.objutils as objutils
@ -67,6 +68,7 @@ def dump(index, path, index_depth=1, ext='.json'):
#----------------------------------------------------------------load--- #----------------------------------------------------------------load---
##!!! make an iterator version...
def load(path, ext='.json', pack_ext='.pack'): def load(path, ext='.json', pack_ext='.pack'):
''' '''
load data from fs store. load data from fs store.
@ -99,14 +101,23 @@ def load(path, ext='.json', pack_ext='.pack'):
# only the last is accesible but this might cause trouble elsewhere... # only the last is accesible but this might cause trouble elsewhere...
# NOTE: this should be done in the background (possible race-condition # NOTE: this should be done in the background (possible race-condition
# with removing a file while it is being read) # with removing a file while it is being read)
def pack(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False): def pack(path, pack_name='%(timestamp)s', ext='.json', pack_ext='.pack',
keep_files=False, keep_dirs=False, date_format='%Y%m%d-%H%M%S'):
''' '''
pack an fs data store. pack an fs data store.
Supported fields in pack_name:
%(timestamp)s - time stamp in the date_format format
NOTE: if keep_files is True, keep_dirs option will be ignored. NOTE: if keep_files is True, keep_dirs option will be ignored.
NOTE: if pack_name is static and a pack file with that name exists
then the files will be added to that pack.
''' '''
data = {
'timestamp': time.strftime(date_format),
}
##!!! this will not remove original entries if they exist... ##!!! this will not remove original entries if they exist...
z = zipfile.ZipFile(os.path.join(path, 'index' + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED) z = zipfile.ZipFile(os.path.join(path, (pack_name % data) + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED)
for p, _, files in os.walk(path): for p, _, files in os.walk(path):
for f in files: for f in files:
if f.endswith(ext): if f.endswith(ext):
@ -125,6 +136,24 @@ def pack(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False)
z.close() z.close()
#-----------------------------------------------------------cleanpack---
def cleanpack(path, pack_name='%(timestamp)s', ext='.json', pack_ext='.pack',
keep_files=False, keep_dirs=False, date_format='%Y%m%d-%H%M%S'):
'''
make a clean pack, removing duplicate enteries.
'''
data = {
'timestamp': time.strftime(date_format),
}
name = os.path.join(path, (pack_name % data) + pack_ext)
##!!! this will load the whole monster to memory, need something better...
index = load(path)
z = zipfile.ZipFile(name, 'w', compression=zipfile.ZIP_DEFLATED)
for k, v in index.iteritems():
z.writestr(k + ext, json.dumps(v, indent=4, separators=(', ', ': ')))
z.close()
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
# lazy dict-like objects that read and write (optional) the fs... # lazy dict-like objects that read and write (optional) the fs...
@ -216,6 +245,16 @@ class Index(mapping.Mapping):
yield os.path.splitext(name)[0] yield os.path.splitext(name)[0]
#-------------------------------------------------------IndexWtihPack---
class IndexWtihPack(object):
'''
'''
def pack(self):
'''
pack the index.
'''
pack(self._path)
#----------------------------------------------------------------------- #-----------------------------------------------------------------------
REMOVED = object() REMOVED = object()
@ -286,6 +325,22 @@ class IndexWithCache(Index):
del self._cache del self._cache
#---------------------------------------------------IndexWithSubIndex---
##class IndexWithSubIndex(Index):
## '''
## '''
## def indexby(self, attr):
## '''
## '''
## self._sub_indexs
## for e in self:
## pass
## def getby(self, attr, value):
## '''
## '''
## pass
#======================================================================= #=======================================================================
# vim:set ts=4 sw=4 nowrap : # vim:set ts=4 sw=4 nowrap :