lots of fixes, and some code reorganization...

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
2025-10-28 18:00:09 +00:00 · 2012-03-13 22:48:46 +04:00 · 2012-03-13 22:48:46 +04:00 · bbe1377e32
commit bbe1377e32
parent 388215cdf3
3 changed files with 62 additions and 22 deletions
--- a/gid.py
+++ b/gid.py
@ -1,7 +1,7 @@
 #=======================================================================

 __version__ = '''0.0.01'''
-__sub_version__ = '''20120313182702'''
+__sub_version__ = '''20120313223928'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''


@ -22,7 +22,10 @@ import pyexiv2 as metadata
 # XXX not yet sure if this is unique enough to avoid conflicts if one
 # 	  photographer has enough cameras...
 # XXX also might be wise to add a photographer ID into here...
-def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%H%M%S', hash_func=sha.sha):
+def image_gid(path, format='%(artist)s-%(date)s-%(name)s', 
+		date_format='%Y%m%d-%H%M%S', 
+		default_artist='Unknown',
+		hash_func=sha.sha):
 	'''
 	Calgulate image GID.

@ -53,15 +56,19 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%
 	data = {
 		'name': os.path.splitext(os.path.split(path)[-1])[0],
 	}
+	##!!! this might fail...
+	i = metadata.ImageMetadata('%s' % path)
+	i.read()
 	# check if we need a date in the id...
 	if '%(date)s' in format:
-		i = metadata.ImageMetadata('%s' % path)
-		i.read()
 		d = i['Exif.Image.DateTime'].value
 		data['date'] = d.strftime(date_format)
 	# check if we need an artist...
 	if '%(artist)s' in format:
-		data['artist'] = i['Exif.Image.Artist'].value.strip().replace(' ', '_')
+		try:
+			data['artist'] = i['Exif.Image.Artist'].value.strip().replace(' ', '_')
+		except KeyError:
+			data['artist'] = default_artist
 	
 	if hash_func is not None:
 		return hash_func(format % data).hexdigest()
--- a/index2.py
+++ b/index2.py
@ -1,7 +1,7 @@
 #=======================================================================

 __version__ = '''0.0.01'''
-__sub_version__ = '''20120310191654'''
+__sub_version__ = '''20120313224544'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''


@ -27,13 +27,15 @@ import pyexiv2 as metadata

 from pli.logictypes import ANY, OR

+import store
 from gid import image_gid


 #-----------------------------------------------------------------------

-CONFIG_NAME = 'test_config.json'
+##CONFIG_NAME = 'test_config.json'
 ##CONFIG_NAME = 'tmp_config.json'
+CONFIG_NAME = 'tmp_config.json.bak'

 config = json.load(open(CONFIG_NAME))

@ -42,12 +44,13 @@ RAW = OR(
 	'NEF', 'nef', 
 	'CRW', 'crw',
 	'CR2', 'cr2',
-	'X3F', 'x3f'
+	'X3F', 'x3f',
+	'DNG', 'dng',
 )

 JPEG = OR(
 	'JPG', 'jpg', 
-	'JPEG', 'jpeg'
+	'JPEG', 'jpeg',
 )

 PSD = OR(
@ -196,12 +199,15 @@ def split_by_raws(raws, lst, failed):
 	return sets


-def gid_index(index):
+def gid_index(index, existing=None):
 	'''
 	'''
 	# index via a propper GID...
 	# split similarly named but different files...
-	res = {}
+	if existing is None:
+		res = {}
+	else:
+		res = existing
 	failed = []
 	for name, l in index.iteritems():
 		l.sort()
@ -235,7 +241,7 @@ def gid_index(index):
 				# 		mostly intended for importing...
 				'ctime': raw[3], 
 				##!!! make these more general...
-				'RAW': raws,
+				'RAW': [e for e in l if e[2] == RAW],
 				'XMP': [e for e in l if e[2] == XMP],
 				'JPG': [e for e in l if e[2] == JPEG],
 				'PSD': [e for e in l if e[2] == PSD],
@ -249,6 +255,8 @@ def gid_index(index):
 #-----------------------------------------------------------------------
 if __name__ == '__main__':

+	INDEX_PATH = os.path.join('test', 'index2')
+
 	FILE_LIST = os.path.join('test', 'flatfilelist.json')
 	BUILD_FILE_LIST = False if os.path.exists(FILE_LIST) else True

@ -256,19 +264,25 @@ if __name__ == '__main__':
 	if BUILD_FILE_LIST:
 		lst = list(list_files(config['ARCHIVE_ROOT']))
 	
-		print len(lst)
+		print 'found files:', len(lst)
 		pprint(lst[0])
 	
 		json.dump(lst, file(FILE_LIST, 'w'))
+		print 'saved...'

 	lst = json.load(file(FILE_LIST))
-	print len(lst)
+	print 'loaded:', len(lst)


 	index = index_by_name(lst)


-	GID_index, failed = gid_index(index)
+##	GID_index = store.IndexWithCache(INDEX_PATH)
+	GID_index = store.Index(INDEX_PATH)
+
+	GID_index, failed = gid_index(index, GID_index)
+
+	json.dump(failed, file(os.path.join('test', 'failed-to-categorise.json'), 'w'))



@ -292,6 +306,12 @@ if __name__ == '__main__':

 	pprint(GID_index.values()[0])

+	store.save_file_index(GID_index, INDEX_PATH)
+
+##	store.pack_file_index(INDEX_PATH)
+
+
+



--- a/store.py
+++ b/store.py
@ -1,7 +1,7 @@
 #=======================================================================

 __version__ = '''0.0.01'''
-__sub_version__ = '''20120313183119'''
+__sub_version__ = '''20120313211552'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''


@ -11,10 +11,14 @@ import os
 import json
 import zipfile

+import pli.pattern.mixin.mapping as mapping
+import pli.objutils as objutils
+

 #-----------------------------------------------------------------------
 # XXX is this a good way to serialize the actual data in the fs???

+#-----------------------------------------------------save_file_index---
 # NOTE: these will work with any topoloy and create a flat index...
 def save_file_index(index, path, index_depth=1, ext='.json'):
 	'''
@ -42,6 +46,7 @@ def save_file_index(index, path, index_depth=1, ext='.json'):
 	return root_index


+#-----------------------------------------------------load_file_index---
 def load_file_index(path, ext='.json', pack_ext='.pack'):
 	'''
 	'''
@ -61,12 +66,15 @@ def load_file_index(path, ext='.json', pack_ext='.pack'):
 	return d


+#-----------------------------------------------------pack_file_index---
 # XXX should we remove empty dirs here???
+##!!! this may creae duplicate files within the pack...
 def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False):
 	'''

 	NOTE: if keep_files is True, keep_dirs option will be ignored.
 	'''
+	##!!! this will not remove original entries if they exist...
 	z = zipfile.ZipFile(os.path.join(path, 'index' + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED)
 	for p, _, files in os.walk(path):
 		for f in files: 
@ -92,14 +100,15 @@ def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_
 #-----------------------------------------------------------------------
 # lazy dict-like objects that read and write (optional) the fs...

-import pli.pattern.mixin.mapping as mapping
-import pli.objutils as objutils
-
+#---------------------------------------------------------------Index---
 # XXX might be good to do a path index...
 ##!!! make this archive/file structure-agnostic...
 class Index(mapping.Mapping):
+	'''
+	'''
 	__json_ext__ = '.json'
 	__pack_ext__ = '.pack'
+	__index_depth__ = 2

 	def __init__(self, path):
 		'''
@ -151,7 +160,8 @@ class Index(mapping.Mapping):
 	def __setitem__(self, name, value):
 		'''
 		'''
-		raise NotImplementedError
+		save_file_index({name: value}, self._path, index_depth=self.__index_depth__)
+##		raise NotImplementedError
 	def __delitem__(self, name):
 		'''
 		'''
@ -178,8 +188,11 @@ class Index(mapping.Mapping):
 					yield os.path.splitext(name)[0]


+
+#-----------------------------------------------------------------------
 REMOVED = object()

+#------------------------------------------------------IndexWithCache---
 class IndexWithCache(Index):
 	'''
 	'''
@ -229,7 +242,7 @@ class IndexWithCache(Index):
 		'''
 		'''
 		if keys == ():
-			return save_file_index(self._cache, self._path)
+			return save_file_index(self._cache, self._path, index_depth=self.__index_depth__)
 		flush = {}
 		for k in keys:
 			if k is REMOVED:
@ -238,7 +251,7 @@ class IndexWithCache(Index):
 				##!!!
 				continue
 			flush[k] = self[k]
-		return save_file_index(flush, self._path)
+		return save_file_index(flush, self._path, index_depth=self.__index_depth__)
 	def cache_drop(self):
 		'''
 		'''