lots of minor tweeks and changes...

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
2025-10-28 18:00:09 +00:00 · 2013-03-27 18:13:52 +04:00 · 2013-03-27 18:13:52 +04:00 · 3d41a07c7a
commit 3d41a07c7a
parent 4734dfd4aa
4 changed files with 143 additions and 27 deletions
--- a/gid.py
+++ b/gid.py
@ -1,7 +1,7 @@
 #=======================================================================
 __version__ = '''0.0.01'''
-__sub_version__ = '''20130322142905'''
+__sub_version__ = '''20130325203750'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -52,7 +52,8 @@ def image_gid(path, date=None,
 	Supported fields:
 		%(artist)s	- Exif.Image.Artist field, stripped and spaces replaced
 					  with underscores.
-		%(date)s	- Exif.Image.DateTime formated to date_format argument.
+					  If no artist info is set this will be set to default_artist.
 		%(date)s	- Exif.Photo.DateTimeOriginal formated to date_format argument.
 		%(name)s	- file name.
 	NOTE: date and time are the date and time the image was made ('Exif.Image.DateTime')
@ -77,7 +78,6 @@ def image_gid(path, date=None,
 			date = os.path.getctime(path)
 			data['date'] = time.strftime(date_format, time.gmtime(date))
 		else:
 ##			date = i['Exif.Image.DateTime'].value
 			date = i['Exif.Photo.DateTimeOriginal'].value
 			data['date'] = date.strftime(date_format)
 	# check if we need an artist...
@ -85,7 +85,10 @@ def image_gid(path, date=None,
 		data['artist'] = default_artist
 		if i is not None:
 			try:
-				data['artist'] = i['Exif.Image.Artist'].value.strip().replace(' ', '_')
+				# set the artist if in EXIF...
 				a = i['Exif.Image.Artist'].value.strip().replace(' ', '_')
 				if a != '':
 					data['artist'] = a
 			except KeyError:
 				pass
--- a/index2.py
+++ b/index2.py
@ -1,7 +1,7 @@
 #=======================================================================
 __version__ = '''0.0.01'''
-__sub_version__ = '''20130319151025'''
+__sub_version__ = '''20130326030151'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -83,6 +83,9 @@ TYPES = {
 	'xmp': XMP,
 }
 SKIP_DIRS = '.sys2'
 SKIP_MARKER = '.skipindexing'
 SUBTREE_CLASSES = {
 	'preview': 'preview', 
@ -97,12 +100,22 @@ SUBTREE_CLASSES = {
 #----------------------------------------------------------list_files---
 ##!!! we will need to normalize the paths to one single scheme (either relative or absolute)...
 # XXX might need to fetch file data too...
-def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=False, include_ctime=True):
+def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, 
 		include_root_path=False, include_ctime=True, 
 		skip_marker=SKIP_MARKER, skip_dirs=SKIP_DIRS):
 	'''
 	yields:
 		(<path>, <name>, <ext>[, <ctime>]),
 	'''
 	for orig_path, dirs, files in os.walk(root):
 		# skip dir trees containing skip_filename...
 		if skip_marker in files:
 			del dirs[:]
 			continue
 		# skip dirs...
 		while skip_dirs in dirs:
 			dirs.remove(skip_dirs)
 		# XXX is this correct...
 		path = orig_path.split(os.path.sep)
 		# remove root from path...
@ -125,6 +138,7 @@ def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=Fal
 #----------------------------------------------------------common_len---
 def common_len(a, *b):
 	'''
 	calculate the common path length.
 	'''
 	for i, l in enumerate(izip(*(a,) + b)):
 		if len(set(l)) != 1:
@ -174,13 +188,12 @@ def split_by_raws(raws, lst, failed):
 	'''
 	'''
 ##	raws = [e for e in lst if e[2] == RAW] 
 	# top level common path...
 	common = common_len(*[ e[0] for e in raws ])
 	# NOTE: do not change the order of raws after this point
 	# 		and till the end of the loop...
 	# 		XXX revise if there is a simpler way...
 	##!!! this kills code like sets[0][1] += [...]
 ##	sets = [ (r, [r]) for r in raws ]
 	sets = [ [r, [r]] for r in raws ]
 	for e in lst:
@ -199,7 +212,6 @@ def split_by_raws(raws, lst, failed):
 			failed += [e]
 		# found a location...
 		elif c > common:
 			##!!! for some odd reason this does not work....
 			sets[i][1] += [e]
 		# file in an odd location ##!!! list these locations...
 		else:
@ -207,14 +219,15 @@ def split_by_raws(raws, lst, failed):
 			##!!! try different strategies here...
 			##!!!
 			failed += [e]
 ##	return sets, failed
 	return sets
 #-----------------------------------------------------------gid_index---
 ##!!! this will rewrite existing data -- should only update...
 def gid_index(index, existing=None):
 	'''
 	'''
 	skipped = []
 	# index via a propper GID...
 	# split similarly named but different files...
 	if existing is None:
@ -222,34 +235,41 @@ def gid_index(index, existing=None):
 	else:
 		res = existing
 	failed = []
 	im_n = 0
 	up_n = 0
 	new_n = 0
 	for name, l in index.iteritems():
 		l.sort()
 		raws = [e for e in l if e[2] == RAW] 
 		# multiple raw files...
 		if len(raws) > 1:
 			# spit this into a seporate func...
 			sets = split_by_raws(raws, l, failed)
 		# single raw...
 		elif len(raws) == 1:
 			sets = [(raws[0], l)]
 		# no raw files...
 		else:
-			print 'no raw file found for "%s"...' % os.path.join(name)
+			print (' '*78), '\rno raw file found for "%s"...' % os.path.join(name)
 			sets = []
 			##!!! need to report this in a usable way...
 			failed += l
 		# add actual elements to index...
 		for raw, l in sets:
 			im_n += 1
 			print 'Processing image:', im_n, 'new:', new_n, 'updated:', up_n, '\r',
 			# get file GID...
 			GID = image_gid('%s.%s' % (os.path.join(*[config['ARCHIVE_ROOT']] + raw[0] + [raw[1]]), raw[2]))
 			##!!! normalize the image format...
-			res[GID] = {
+			img = {
 				'gid': GID,
 				'name': name,
 				'imported': time.time(),
 				'updated': time.time(),
 				# NOTE: this might get distorted on archiving or
 				# 		copying...
 				# 		mostly intended for importing...
@ -262,8 +282,30 @@ def gid_index(index, existing=None):
 				'TIFF': [e for e in l if e[2] == TIFF],
 				'other': [e for e in l if e[2] != OR(TIFF, PSD, JPEG, XMP, RAW)],
 			}
 			# add new data...
 			if GID not in res:
 				res[GID] = img
 				new_n += 1
 			# update existing...
 			else:
 				cur = res[GID]
 				updating = False
 				for k, v in img.iteritems():
 					# skip 
 					if k in ('imported', 'name', 'gid', 'ctime', 'updated'):
 						continue
 					if v != cur[k]:
 						cur[k] = v
 						updating = True
 				# do the actual update...
 				if updating:
 					cur['updated'] = time.time()
 					res[GID] = cur
 					up_n += 1
 				else:
 					skipped += [GID]
-	return res, failed
+	return res, failed, skipped
@ -282,7 +324,7 @@ if __name__ == '__main__':
 		lst = list(list_files(config['ARCHIVE_ROOT']))
 		print 'found files:', len(lst)
-		pprint(lst[0])
+##		pprint(lst[0])
 		json.dump(lst, file(FILE_LIST, 'w'))
 		print 'saved...'
@ -315,9 +357,26 @@ if __name__ == '__main__':
 ##	GID_index = store.IndexWithCache(INDEX_PATH)
 	GID_index = store.Index(INDEX_PATH)
-	##!!! only check for updates...
+	# a cheating waw to say if we are empty...
 	index_empty = True
 	for k in GID_index.iterkeys():
 		index_empty = False
 		break
-	GID_index, failed = gid_index(index, GID_index)
+	t0 = time.time()
 	if not index_empty:
 		print 'updating...'
 		##!!! this takes a substantially longer time initially... (about 30x longer)
 		GID_index, failed, skipped = gid_index(index, GID_index)
 	else:
 		print 'indexing...'
 		GID_index, failed, skipped = gid_index(index)
 		store.dump(GID_index, INDEX_PATH, index_depth=2)
 	t1 = time.time()
 	print 'done in:', t1-t0, 'seconds.'
 	json.dump(failed, file(os.path.join('test', 'failed-to-categorise.json'), 'w'))
@ -336,13 +395,15 @@ if __name__ == '__main__':
 	indexed: %s
 	raws: %s
 	failed: %s
 	skipped: %s
 	''' % (
 			len(GID_index), 
 			len([ e for e in lst if e[2] == RAW]), 
-			len(failed))
+			len(failed),
 			len(skipped))
-	##!!! this is really slow because it pulls ALL the data... wonder who wrote this? :)
+##	##!!! this is really slow because it pulls ALL the data... wonder who wrote this? :)
-	pprint(GID_index.itervalues().next())
+##	pprint(GID_index.itervalues().next())
 ##	store.dump(GID_index, INDEX_PATH)
--- a/locate.py
+++ b/locate.py
@ -1,7 +1,7 @@
 #=======================================================================
 __version__ = '''0.0.01'''
-__sub_version__ = '''20130322155314'''
+__sub_version__ = '''20130325114759'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -20,9 +20,6 @@ import store
 CONFIG_NAME = 'P7000_config.json'
 #-----------------------------------------------------------------------
 #-----------------------------------------------------------------------
 if __name__ == '__main__':
 	from optparse import OptionParser
--- a/store.py
+++ b/store.py
@ -1,7 +1,7 @@
 #=======================================================================
 __version__ = '''0.0.01'''
-__sub_version__ = '''20130319150549'''
+__sub_version__ = '''20130325170937'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -10,6 +10,7 @@ __copyright__ = '''(c) Alex A. Naanou 2011'''
 import os
 import json
 import zipfile
 import time
 import pli.pattern.mixin.mapping as mapping
 import pli.objutils as objutils
@ -67,6 +68,7 @@ def dump(index, path, index_depth=1, ext='.json'):
 #----------------------------------------------------------------load---
 ##!!! make an iterator version...
 def load(path, ext='.json', pack_ext='.pack'):
 	'''
 	load data from fs store.
@ -99,14 +101,23 @@ def load(path, ext='.json', pack_ext='.pack'):
 # 	  only the last is accesible but this might cause trouble elsewhere...
 # NOTE: this should be done in the background (possible race-condition
 # 		with removing a file while it is being read)
-def pack(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False):
+def pack(path, pack_name='%(timestamp)s', ext='.json', pack_ext='.pack', 
 		keep_files=False, keep_dirs=False, date_format='%Y%m%d-%H%M%S'):
 	'''
 	pack an fs data store.
 	Supported fields in pack_name:
 		%(timestamp)s		- time stamp in the date_format format
 	NOTE: if keep_files is True, keep_dirs option will be ignored.
 	NOTE: if pack_name is static and a pack file with that name exists 
 			then the files will be added to that pack.
 	'''
 	data = {
 		'timestamp': time.strftime(date_format),
 	}
 	##!!! this will not remove original entries if they exist...
-	z = zipfile.ZipFile(os.path.join(path, 'index' + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED)
+	z = zipfile.ZipFile(os.path.join(path, (pack_name % data) + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED)
 	for p, _, files in os.walk(path):
 		for f in files: 
 			if f.endswith(ext):
@ -125,6 +136,24 @@ def pack(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False)
 	z.close()
 #-----------------------------------------------------------cleanpack---
 def cleanpack(path, pack_name='%(timestamp)s', ext='.json', pack_ext='.pack', 
 		keep_files=False, keep_dirs=False, date_format='%Y%m%d-%H%M%S'):
 	'''
 	make a clean pack, removing duplicate enteries.
 	'''
 	data = {
 		'timestamp': time.strftime(date_format),
 	}
 	name = os.path.join(path, (pack_name % data) + pack_ext)
 	##!!! this will load the whole monster to memory, need something better...
 	index = load(path)
 	z = zipfile.ZipFile(name, 'w', compression=zipfile.ZIP_DEFLATED)
 	for k, v in index.iteritems():
 		z.writestr(k + ext, json.dumps(v, indent=4, separators=(', ', ': ')))
 	z.close()
 #-----------------------------------------------------------------------
 # lazy dict-like objects that read and write (optional) the fs...
@ -216,6 +245,16 @@ class Index(mapping.Mapping):
 					yield os.path.splitext(name)[0]
 #-------------------------------------------------------IndexWtihPack---
 class IndexWtihPack(object):
 	'''
 	'''
 	def pack(self):
 		'''
 		pack the index.
 		'''
 		pack(self._path)
 #-----------------------------------------------------------------------
 REMOVED = object()
@ -286,6 +325,22 @@ class IndexWithCache(Index):
 		del self._cache
 #---------------------------------------------------IndexWithSubIndex---
 ##class IndexWithSubIndex(Index):
 ##	'''
 ##	'''
 ##	def indexby(self, attr):
 ##		'''
 ##		'''
 ##		self._sub_indexs
 ##		for e in self:
 ##			pass
 ##	def getby(self, attr, value):
 ##		'''
 ##		'''
 ##		pass
 #=======================================================================
 #                                            vim:set ts=4 sw=4 nowrap :