split store functionality... still needs work.

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
2025-10-29 02:10:08 +00:00 · 2012-03-13 18:36:53 +04:00 · 2012-03-13 18:36:53 +04:00 · 388215cdf3
commit 388215cdf3
parent d8fd5bbb10
3 changed files with 282 additions and 249 deletions
--- a/gid.py
+++ b/gid.py
@ -1,13 +1,15 @@
 #=======================================================================

 __version__ = '''0.0.01'''
-__sub_version__ = '''20120310183438'''
+__sub_version__ = '''20120313182702'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''


 #-----------------------------------------------------------------------

 import os
+import sha
+import md5

 import pyexiv2 as metadata

@ -20,7 +22,7 @@ import pyexiv2 as metadata
 # XXX not yet sure if this is unique enough to avoid conflicts if one
 # 	  photographer has enough cameras...
 # XXX also might be wise to add a photographer ID into here...
-def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%H%M%S'):
+def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%H%M%S', hash_func=sha.sha):
 	'''
 	Calgulate image GID.

@ -35,8 +37,12 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%
 	Example:
 		Alex_A.Naanou-20110627-195706-DSC_1234	

+	If hash_func is not None, then the function will be used to henerate 
+	a hex hash from the above string.
+
 	Supported fields:
-		%(artist)s	- Exif.Image.Artist field, stripped and spaces replaced with underscores.
+		%(artist)s	- Exif.Image.Artist field, stripped and spaces replaced
+					  with underscores.
 		%(date)s	- Exif.Image.DateTime formated to date_format argument.
 		%(name)s	- file name.

@ -57,9 +63,18 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%
 	if '%(artist)s' in format:
 		data['artist'] = i['Exif.Image.Artist'].value.strip().replace(' ', '_')
 	
+	if hash_func is not None:
+		return hash_func(format % data).hexdigest()
 	return format % data



+#-----------------------------------------------------------------------
+if __name__ == '__main__':
+	pass
+
+
+
+
 #=======================================================================
 #                                            vim:set ts=4 sw=4 nowrap :
--- a/index.py
+++ b/index.py
@ -1,7 +1,7 @@
 #=======================================================================

 __version__ = '''0.0.01'''
-__sub_version__ = '''20120202193619'''
+__sub_version__ = '''20120313183420'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''


@ -11,12 +11,14 @@ import os
 import json
 import zipfile
 import uuid
+from pprint import pprint

 from itertools import izip, izip_longest

 from pli.logictypes import ANY, OR

-from pprint import pprint
+
+import store


 #-----------------------------------------------------------------------
@ -244,240 +246,6 @@ def split_images(index):



-#-----------------------------------------------------------------------
-# XXX is this a good way to serialize the actual data in the fs???
-
-# NOTE: these will work with any topoloy and create a flat index...
-def save_file_index(index, path, index_depth=1, ext='.json'):
-	'''
-
-	NOTE: index_depth with value greater than 2 is an overkill.
-	'''
-	root_index = {}
-	for k, v in index.items():
-		if index_depth > 0:
-			d = []
-			rest = k
-			# build index path...
-			for i in xrange(index_depth):
-				d += [rest[:2]]
-				rest = rest[2:]
-				# recursive directory construction...
-				if not os.path.exists(os.path.join(path, *d)):
-					os.mkdir(os.path.join(path, *d))
-			p = os.path.join(path, *d + [k + ext])
-		else:
-			p = os.path.join(path, k + ext)
-		json.dump(v, file(p, 'w'), indent=4, separators=(', ', ': '))
-		root_index[k] = p
-##		print '.',
-	return root_index
-
-
-def load_file_index(path, ext='.json', pack_ext='.pack'):
-	'''
-	'''
-	d = {}
-	for p, _, files in os.walk(path):
-		for f in files:
-			# handle single files...
-			if f.endswith(ext):
-				d[os.path.splitext(f)[0]] = json.load(file(os.path.join(p, f)))
-			# handle packs...
-			elif f.endswith(pack_ext):
-				pack = zipfile.ZipFile(os.path.join(p, f))
-				# load elements form the pack...
-				for name in pack.namelist():
-					if name.endswith(ext):
-						d[os.path.splitext(name)[0]] = json.loads(pack.read(name))
-	return d
-
-
-# XXX should we remove empty dirs here???
-def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False):
-	'''
-
-	NOTE: if keep_files is True, keep_dirs option will be ignored.
-	'''
-	z = zipfile.ZipFile(os.path.join(path, 'index' + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED)
-	for p, _, files in os.walk(path):
-		for f in files: 
-			if f.endswith(ext):
-				z.write(os.path.join(p, f), os.path.split(f)[-1])
-				if not keep_files:
-					os.remove(os.path.join(p, f))
-					# XXX this will not remove empty dirs (push one
-					#     level up for that...)
-					if not keep_dirs and p != path:
-						##!!! check if dir is empty....
-						try:
-							# NOTE: this will fail for non-empty dirs...
-							os.rmdir(os.path.join(p))
-						except:
-							pass
-	z.close()
-	
-##!!! get path by name helper...
-##!!!
-
-
-#-----------------------------------------------------------------------
-# lazy dict-like objects that read and write (optional) the fs...
-
-import pli.pattern.mixin.mapping as mapping
-import pli.objutils as objutils
-
-# XXX might be good to do a path index...
-##!!! make this archive/file structure-agnostic...
-class Index(mapping.Mapping):
-	__json_ext__ = '.json'
-	__pack_ext__ = '.pack'
-
-	def __init__(self, path):
-		'''
-		'''
-		self._path = path
-	
-	# specific interface...
-	##!!! make this support different depths...
-	def __locations__(self, name):
-		'''
-		'''
-		ext = self.__json_ext__
-		name += ext
-		# build probable locations...
-		return (
-				name,
-				# look in a directory...
-				os.path.join(name[:2], name),
-				##!!! HACK: make this dynamic...
-				os.path.join(name[:2], name[2:4], name),
-		)
-	
-	# mapping interface...
-	def __getitem__(self, name):
-		'''
-		'''
-##		ext = self.__json_ext__
-		pack_ext = self.__pack_ext__
-##		file_name = name + ext
-		locations = self.__locations__(name)
-		# look of the file directly...
-		for n in locations:
-			if os.path.exists(os.path.join(self._path, n)):
-				return json.load(file(os.path.join(self._path, n)))
-		# try and locate a file in a pack...
-		for p, _, files in os.walk(self._path):
-			# files are searched sorted by their name...
-			files.sort()
-			for f in files:
-##				##!!! do we need to look in odd named directories...
-##				if f == file_name:
-##					return json.load(file(os.path.join(p, file_name)))
-				if f.endswith(pack_ext):
-					z = zipfile.ZipFile(os.path.join(p, f))
-					for n in locations:
-						if n in z.namelist():
-							return json.loads(z.read(n))
-		raise KeyError, name
-	def __setitem__(self, name, value):
-		'''
-		'''
-		raise NotImplementedError
-	def __delitem__(self, name):
-		'''
-		'''
-		raise NotImplementedError
-	def __iter__(self):
-		'''
-		'''
-		visited = []
-		packs = []
-		ext = self.__json_ext__
-		pack_ext = self.__pack_ext__
-		for p, _, files in os.walk(self._path):
-			for f in files:
-				if f.endswith(ext) and f not in visited:
-					visited += [f]
-					yield os.path.splitext(f)[0]
-				elif f.endswith(pack_ext):
-					packs += [os.path.join(p, f)]
-		for pack in packs:
-			z = zipfile.ZipFile(pack)
-			for name in z.namelist():
-				if name not in visited:
-					visited += [name]
-					yield os.path.splitext(name)[0]
-
-
-REMOVED = object()
-
-class IndexWithCache(Index):
-	'''
-	'''
-	objutils.createonaccess('_cache', dict)
-
-	__sync__ = False
-
-	def __getitem__(self, name):
-		'''
-		'''
-		if name in self._cache:
-			res = self._cache[name]
-			if res is REMOVED:
-				raise KeyError, name
-			return res
-		res = self._cache[name] = super(IndexWithCache, self).__getitem__(name)
-		return res
-	def __setitem__(self, name, value):
-		'''
-		'''
-		self._cache[name] = value
-		if self.__sync__:
-			self.cache_flush(name)
-	##!!!
-	def __delitem__(self, name):
-		'''
-		'''
-		self._cache[name] = REMOVED
-		if self.__sync__:
-			self.cache_flush(name)
-	def __iter__(self):
-		'''
-		'''
-		cache = self._cache
-		for e in cache:
-			yield e
-		for e in super(IndexWithCache, self).__iter__():
-			if e not in cache:
-				yield e
-	
-	# cache management...
-	##!!! removed items will not get flushed yet...
-	# XXX to make removing elements history compatible, one way to go
-	#     is to write a specifc value to the file, thus making it
-	#     shadow the original value...
-	def cache_flush(self, *keys):
-		'''
-		'''
-		if keys == ():
-			return save_file_index(self._cache, self._path)
-		flush = {}
-		for k in keys:
-			if k is REMOVED:
-				# remove file...
-##				raise NotImplementedError
-				##!!!
-				continue
-			flush[k] = self[k]
-		return save_file_index(flush, self._path)
-	def cache_drop(self):
-		'''
-		'''
-		del self._cache
-
-
-
 #-----------------------------------------------------------------------
 ##!!! test implementation: rewrite...
 import pyexiv2 as metadata
@ -566,7 +334,7 @@ def build_image_cache(ic, min_rating, dest, tmp_path, preview_size=900):
 			continue

 	ic.cache_flush()
-	pack_file_index(ic._path, keep_files=False)
+	store.pack_file_index(ic._path, keep_files=False)

 	return res

@ -594,27 +362,27 @@ if __name__ == '__main__':



-	root_index = save_file_index(index, os.path.join('test', 'index'), index_depth=1)
+	root_index = store.save_file_index(index, os.path.join('test', 'index'), index_depth=1)

 ##	##!!! this is not used in anything yet...
 ##	json.dump(root_index, file(os.path.join('test', 'index', 'file_index.json'), 'w'))

-	pack_file_index(os.path.join('test', 'index'), keep_files=False)
+	store.pack_file_index(os.path.join('test', 'index'), keep_files=False)

-	d = load_file_index(os.path.join('test', 'index'))
+	d = store.load_file_index(os.path.join('test', 'index'))


 	print len(d)

 	k = d.keys()[0]

-	i = Index(os.path.join('test', 'index'))
+	i = store.Index(os.path.join('test', 'index'))

 	print len(i)

 ##	print i[k]

-	ic = IndexWithCache(os.path.join('test', 'index'))
+	ic = store.IndexWithCache(os.path.join('test', 'index'))

 	print ic[k]

@ -622,13 +390,13 @@ if __name__ == '__main__':

 	ic.cache_flush()

-	pack_file_index(ic._path, keep_files=False)
+	store.pack_file_index(ic._path, keep_files=False)

 	ic.__sync__ = True

 	ic['111111111111111111111111111111111'] = {}

-	pack_file_index(ic._path, keep_files=False)
+	store.pack_file_index(ic._path, keep_files=False)


 	##!!! revise...
@ -647,12 +415,12 @@ if __name__ == '__main__':
 		full = dict(json.load(file(os.path.join('test', 'filelist of 20k files.json'))))

 		print 'writing files...'
-		root_index = save_file_index(full, os.path.join('test', 'index'), index_depth=1)
+		root_index = store.save_file_index(full, os.path.join('test', 'index'), index_depth=1)

 		print 'packing files...'
 		# NOTE: the initial archiving seems REALLY SLOW, but working with
 		# 		small numbers of files from the archive seems adequate...
-		pack_file_index(os.path.join('test', 'index'), keep_files=True)
+		store.pack_file_index(os.path.join('test', 'index'), keep_files=True)



--- a/store.py
+++ b/store.py
@ -0,0 +1,250 @@
+#=======================================================================
+
+__version__ = '''0.0.01'''
+__sub_version__ = '''20120313183119'''
+__copyright__ = '''(c) Alex A. Naanou 2011'''
+
+
+#-----------------------------------------------------------------------
+
+import os
+import json
+import zipfile
+
+
+#-----------------------------------------------------------------------
+# XXX is this a good way to serialize the actual data in the fs???
+
+# NOTE: these will work with any topoloy and create a flat index...
+def save_file_index(index, path, index_depth=1, ext='.json'):
+	'''
+
+	NOTE: index_depth with value greater than 2 is an overkill.
+	'''
+	root_index = {}
+	for k, v in index.items():
+		if index_depth > 0:
+			d = []
+			rest = k
+			# build index path...
+			for i in xrange(index_depth):
+				d += [rest[:2]]
+				rest = rest[2:]
+				# recursive directory construction...
+				if not os.path.exists(os.path.join(path, *d)):
+					os.mkdir(os.path.join(path, *d))
+			p = os.path.join(path, *d + [k + ext])
+		else:
+			p = os.path.join(path, k + ext)
+		json.dump(v, file(p, 'w'), indent=4, separators=(', ', ': '))
+		root_index[k] = p
+##		print '.',
+	return root_index
+
+
+def load_file_index(path, ext='.json', pack_ext='.pack'):
+	'''
+	'''
+	d = {}
+	for p, _, files in os.walk(path):
+		for f in files:
+			# handle single files...
+			if f.endswith(ext):
+				d[os.path.splitext(f)[0]] = json.load(file(os.path.join(p, f)))
+			# handle packs...
+			elif f.endswith(pack_ext):
+				pack = zipfile.ZipFile(os.path.join(p, f))
+				# load elements form the pack...
+				for name in pack.namelist():
+					if name.endswith(ext):
+						d[os.path.splitext(name)[0]] = json.loads(pack.read(name))
+	return d
+
+
+# XXX should we remove empty dirs here???
+def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False):
+	'''
+
+	NOTE: if keep_files is True, keep_dirs option will be ignored.
+	'''
+	z = zipfile.ZipFile(os.path.join(path, 'index' + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED)
+	for p, _, files in os.walk(path):
+		for f in files: 
+			if f.endswith(ext):
+				z.write(os.path.join(p, f), os.path.split(f)[-1])
+				if not keep_files:
+					os.remove(os.path.join(p, f))
+					# XXX this will not remove empty dirs (push one
+					#     level up for that...)
+					if not keep_dirs and p != path:
+						##!!! check if dir is empty....
+						try:
+							# NOTE: this will fail for non-empty dirs...
+							os.rmdir(os.path.join(p))
+						except:
+							pass
+	z.close()
+	
+##!!! get path by name helper...
+##!!!
+
+
+#-----------------------------------------------------------------------
+# lazy dict-like objects that read and write (optional) the fs...
+
+import pli.pattern.mixin.mapping as mapping
+import pli.objutils as objutils
+
+# XXX might be good to do a path index...
+##!!! make this archive/file structure-agnostic...
+class Index(mapping.Mapping):
+	__json_ext__ = '.json'
+	__pack_ext__ = '.pack'
+
+	def __init__(self, path):
+		'''
+		'''
+		self._path = path
+	
+	# specific interface...
+	##!!! make this support different depths...
+	def __locations__(self, name):
+		'''
+		'''
+		ext = self.__json_ext__
+		name += ext
+		# build probable locations...
+		return (
+				name,
+				# look in a directory...
+				os.path.join(name[:2], name),
+				##!!! HACK: make this dynamic...
+				os.path.join(name[:2], name[2:4], name),
+		)
+	
+	# mapping interface...
+	def __getitem__(self, name):
+		'''
+		'''
+##		ext = self.__json_ext__
+		pack_ext = self.__pack_ext__
+##		file_name = name + ext
+		locations = self.__locations__(name)
+		# look of the file directly...
+		for n in locations:
+			if os.path.exists(os.path.join(self._path, n)):
+				return json.load(file(os.path.join(self._path, n)))
+		# try and locate a file in a pack...
+		for p, _, files in os.walk(self._path):
+			# files are searched sorted by their name...
+			files.sort()
+			for f in files:
+##				##!!! do we need to look in odd named directories...
+##				if f == file_name:
+##					return json.load(file(os.path.join(p, file_name)))
+				if f.endswith(pack_ext):
+					z = zipfile.ZipFile(os.path.join(p, f))
+					for n in locations:
+						if n in z.namelist():
+							return json.loads(z.read(n))
+		raise KeyError, name
+	def __setitem__(self, name, value):
+		'''
+		'''
+		raise NotImplementedError
+	def __delitem__(self, name):
+		'''
+		'''
+		raise NotImplementedError
+	def __iter__(self):
+		'''
+		'''
+		visited = []
+		packs = []
+		ext = self.__json_ext__
+		pack_ext = self.__pack_ext__
+		for p, _, files in os.walk(self._path):
+			for f in files:
+				if f.endswith(ext) and f not in visited:
+					visited += [f]
+					yield os.path.splitext(f)[0]
+				elif f.endswith(pack_ext):
+					packs += [os.path.join(p, f)]
+		for pack in packs:
+			z = zipfile.ZipFile(pack)
+			for name in z.namelist():
+				if name not in visited:
+					visited += [name]
+					yield os.path.splitext(name)[0]
+
+
+REMOVED = object()
+
+class IndexWithCache(Index):
+	'''
+	'''
+	objutils.createonaccess('_cache', dict)
+
+	__sync__ = False
+
+	def __getitem__(self, name):
+		'''
+		'''
+		if name in self._cache:
+			res = self._cache[name]
+			if res is REMOVED:
+				raise KeyError, name
+			return res
+		res = self._cache[name] = super(IndexWithCache, self).__getitem__(name)
+		return res
+	def __setitem__(self, name, value):
+		'''
+		'''
+		self._cache[name] = value
+		if self.__sync__:
+			self.cache_flush(name)
+	##!!!
+	def __delitem__(self, name):
+		'''
+		'''
+		self._cache[name] = REMOVED
+		if self.__sync__:
+			self.cache_flush(name)
+	def __iter__(self):
+		'''
+		'''
+		cache = self._cache
+		for e in cache:
+			yield e
+		for e in super(IndexWithCache, self).__iter__():
+			if e not in cache:
+				yield e
+	
+	# cache management...
+	##!!! removed items will not get flushed yet...
+	# XXX to make removing elements history compatible, one way to go
+	#     is to write a specifc value to the file, thus making it
+	#     shadow the original value...
+	def cache_flush(self, *keys):
+		'''
+		'''
+		if keys == ():
+			return save_file_index(self._cache, self._path)
+		flush = {}
+		for k in keys:
+			if k is REMOVED:
+				# remove file...
+##				raise NotImplementedError
+				##!!!
+				continue
+			flush[k] = self[k]
+		return save_file_index(flush, self._path)
+	def cache_drop(self):
+		'''
+		'''
+		del self._cache
+
+
+
+#=======================================================================
+#                                            vim:set ts=4 sw=4 nowrap :