From 7a03bdfdc6d6aa01446bcb69c54e5519014473d0 Mon Sep 17 00:00:00 2001
From: "Alex A. Naanou" <alex.nanou@gmail.com>
Date: Thu, 15 Mar 2012 15:25:03 +0400
Subject: [PATCH] lots of tweaks and fixes, mostly minor...

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
---
 gid.py    | 17 +++++++++++++----
 index.py  | 18 +++++++++---------
 index2.py | 19 ++++++++++++-------
 store.py  | 53 +++++++++++++++++++++++++++++++++++++++--------------
 4 files changed, 73 insertions(+), 34 deletions(-)

diff --git a/gid.py b/gid.py
index a767adda..e5565074 100755
--- a/gid.py
+++ b/gid.py
@@ -1,7 +1,7 @@
 #=======================================================================
 
 __version__ = '''0.0.01'''
-__sub_version__ = '''20120313223928'''
+__sub_version__ = '''20120315140451'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''
 
 
@@ -10,6 +10,7 @@ __copyright__ = '''(c) Alex A. Naanou 2011'''
 import os
 import sha
 import md5
+import time
 
 import pyexiv2 as metadata
 
@@ -22,9 +23,11 @@ import pyexiv2 as metadata
 # XXX not yet sure if this is unique enough to avoid conflicts if one
 # 	  photographer has enough cameras...
 # XXX also might be wise to add a photographer ID into here...
-def image_gid(path, format='%(artist)s-%(date)s-%(name)s', 
+def image_gid(path, date=None, 
+		format='%(artist)s-%(date)s-%(name)s', 
 		date_format='%Y%m%d-%H%M%S', 
 		default_artist='Unknown',
+		use_ctime=False,
 		hash_func=sha.sha):
 	'''
 	Calgulate image GID.
@@ -61,8 +64,14 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s',
 	i.read()
 	# check if we need a date in the id...
 	if '%(date)s' in format:
-		d = i['Exif.Image.DateTime'].value
-		data['date'] = d.strftime(date_format)
+		if date is not None:
+			data['date'] = time.strftime(date_format, time.gmtime(date))
+		elif use_ctime:
+			date = os.path.getctime(path)
+			data['date'] = time.strftime(date_format, time.gmtime(date))
+		else:
+			date = i['Exif.Image.DateTime'].value
+			data['date'] = date.strftime(date_format)
 	# check if we need an artist...
 	if '%(artist)s' in format:
 		try:
diff --git a/index.py b/index.py
index 01ee346f..8769e4ff 100755
--- a/index.py
+++ b/index.py
@@ -1,7 +1,7 @@
 #=======================================================================
 
 __version__ = '''0.0.01'''
-__sub_version__ = '''20120313183420'''
+__sub_version__ = '''20120315151711'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''
 
 
@@ -334,7 +334,7 @@ def build_image_cache(ic, min_rating, dest, tmp_path, preview_size=900):
 			continue
 
 	ic.cache_flush()
-	store.pack_file_index(ic._path, keep_files=False)
+	store.pack(ic._path, keep_files=False)
 
 	return res
 
@@ -362,14 +362,14 @@ if __name__ == '__main__':
 
 
 
-	root_index = store.save_file_index(index, os.path.join('test', 'index'), index_depth=1)
+	root_index = store.dump(index, os.path.join('test', 'index'), index_depth=1)
 
 ##	##!!! this is not used in anything yet...
 ##	json.dump(root_index, file(os.path.join('test', 'index', 'file_index.json'), 'w'))
 
-	store.pack_file_index(os.path.join('test', 'index'), keep_files=False)
+	store.pack(os.path.join('test', 'index'), keep_files=False)
 
-	d = store.load_file_index(os.path.join('test', 'index'))
+	d = store.load(os.path.join('test', 'index'))
 
 
 	print len(d)
@@ -390,13 +390,13 @@ if __name__ == '__main__':
 
 	ic.cache_flush()
 
-	store.pack_file_index(ic._path, keep_files=False)
+	store.pack(ic._path, keep_files=False)
 
 	ic.__sync__ = True
 
 	ic['111111111111111111111111111111111'] = {}
 
-	store.pack_file_index(ic._path, keep_files=False)
+	store.pack(ic._path, keep_files=False)
 
 
 	##!!! revise...
@@ -415,12 +415,12 @@ if __name__ == '__main__':
 		full = dict(json.load(file(os.path.join('test', 'filelist of 20k files.json'))))
 
 		print 'writing files...'
-		root_index = store.save_file_index(full, os.path.join('test', 'index'), index_depth=1)
+		root_index = store.dump(full, os.path.join('test', 'index'), index_depth=1)
 
 		print 'packing files...'
 		# NOTE: the initial archiving seems REALLY SLOW, but working with
 		# 		small numbers of files from the archive seems adequate...
-		store.pack_file_index(os.path.join('test', 'index'), keep_files=True)
+		store.pack(os.path.join('test', 'index'), keep_files=True)
 
 
 
diff --git a/index2.py b/index2.py
index 76d7f4c9..3ae31c10 100755
--- a/index2.py
+++ b/index2.py
@@ -1,7 +1,7 @@
 #=======================================================================
 
 __version__ = '''0.0.01'''
-__sub_version__ = '''20120313224544'''
+__sub_version__ = '''20120315151510'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''
 
 
@@ -33,9 +33,9 @@ from gid import image_gid
 
 #-----------------------------------------------------------------------
 
-##CONFIG_NAME = 'test_config.json'
+CONFIG_NAME = 'test_config.json'
 ##CONFIG_NAME = 'tmp_config.json'
-CONFIG_NAME = 'tmp_config.json.bak'
+##CONFIG_NAME = 'tmp_config.json.bak'
 
 config = json.load(open(CONFIG_NAME))
 
@@ -87,6 +87,7 @@ SUBTREE_CLASSES = {
 
 
 
+#----------------------------------------------------------list_files---
 ##!!! we will need to normalize the paths to one single scheme (either relative or absolute)...
 # XXX might need to fetch file data too...
 def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=False, include_ctime=True):
@@ -114,6 +115,7 @@ def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=Fal
 					yield path, name, ext
 
 
+#----------------------------------------------------------common_len---
 def common_len(a, *b):
 	'''
 	'''
@@ -123,6 +125,7 @@ def common_len(a, *b):
 	return len(min(*(a,) + b))
 
 
+#-------------------------------------------------------path_distance---
 ##!!! is this meaningless?
 def path_distance(a, b):
 	'''
@@ -130,6 +133,7 @@ def path_distance(a, b):
 	return len(a) + len(b) - common_len(a, b)*2
 
 
+#-------------------------------------------------------index_by_name---
 def index_by_name(lst):
 	'''
 	index by file name (indexing preparation)...
@@ -158,6 +162,7 @@ def index_by_name(lst):
 
 
 
+#-------------------------------------------------------split_by_raws---
 def split_by_raws(raws, lst, failed):
 	'''
 	'''
@@ -199,6 +204,7 @@ def split_by_raws(raws, lst, failed):
 	return sets
 
 
+#-----------------------------------------------------------gid_index---
 def gid_index(index, existing=None):
 	'''
 	'''
@@ -252,6 +258,7 @@ def gid_index(index, existing=None):
 	return res, failed
 
 
+
 #-----------------------------------------------------------------------
 if __name__ == '__main__':
 
@@ -306,11 +313,9 @@ if __name__ == '__main__':
 
 	pprint(GID_index.values()[0])
 
-	store.save_file_index(GID_index, INDEX_PATH)
-
-##	store.pack_file_index(INDEX_PATH)
-
+##	store.dump(GID_index, INDEX_PATH)
 
+	store.pack(INDEX_PATH)
 
 
 
diff --git a/store.py b/store.py
index d819692d..3c6ee0f5 100755
--- a/store.py
+++ b/store.py
@@ -1,7 +1,7 @@
 #=======================================================================
 
 __version__ = '''0.0.01'''
-__sub_version__ = '''20120313211552'''
+__sub_version__ = '''20120315152600'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''
 
 
@@ -18,12 +18,30 @@ import pli.objutils as objutils
 #-----------------------------------------------------------------------
 # XXX is this a good way to serialize the actual data in the fs???
 
-#-----------------------------------------------------save_file_index---
+#----------------------------------------------------------------dump---
 # NOTE: these will work with any topoloy and create a flat index...
-def save_file_index(index, path, index_depth=1, ext='.json'):
+# XXX should this know anything about data versions???
+def dump(index, path, index_depth=1, ext='.json'):
 	'''
+	store an index in fs store.
 
-	NOTE: index_depth with value greater than 2 is an overkill.
+	by default the structure is as follows:
+
+		key: abcdefg
+		path: ab/abcdefg	(index_depth=1)
+
+
+	index_depth sets the directory structure, if 0 a flat store is 
+	created. here is an example path for index_depth=2
+
+		path: ab/cd/abcdefg
+
+	the dict value is stored in the file in JSON format.
+
+	NOTE: this can be used with parts of a dict.
+	NOTE: existing data will be overwritten.
+	NOTE: store balancing depends on key structure.
+	NOTE: index_depth with value greater than 2 is likely an overkill.
 	'''
 	root_index = {}
 	for k, v in index.items():
@@ -42,13 +60,19 @@ def save_file_index(index, path, index_depth=1, ext='.json'):
 			p = os.path.join(path, k + ext)
 		json.dump(v, file(p, 'w'), indent=4, separators=(', ', ': '))
 		root_index[k] = p
-##		print '.',
 	return root_index
 
 
 #-----------------------------------------------------load_file_index---
-def load_file_index(path, ext='.json', pack_ext='.pack'):
+def load(path, ext='.json', pack_ext='.pack'):
 	'''
+	load data from fs store.
+
+	for data format see dump(...).
+
+	NOTE: this will load the whole data set.
+	NOTE: unpacked data shadows packed data.
+	NOTE: this does not care about topology.
 	'''
 	d = {}
 	for p, _, files in os.walk(path):
@@ -68,9 +92,13 @@ def load_file_index(path, ext='.json', pack_ext='.pack'):
 
 #-----------------------------------------------------pack_file_index---
 # XXX should we remove empty dirs here???
-##!!! this may creae duplicate files within the pack...
-def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False):
+# XXX this will create duplicate files within the pack
+# 	  only the last is accesible but this might cause trouble elsewhere...
+# NOTE: this should be done in the background (possible race-condition
+# 		with removing a file while it is being read)
+def pack(path, ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False):
 	'''
+	pack an fs data store.
 
 	NOTE: if keep_files is True, keep_dirs option will be ignored.
 	'''
@@ -93,8 +121,6 @@ def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_
 							pass
 	z.close()
 	
-##!!! get path by name helper...
-##!!!
 
 
 #-----------------------------------------------------------------------
@@ -160,8 +186,7 @@ class Index(mapping.Mapping):
 	def __setitem__(self, name, value):
 		'''
 		'''
-		save_file_index({name: value}, self._path, index_depth=self.__index_depth__)
-##		raise NotImplementedError
+		dump({name: value}, self._path, index_depth=self.__index_depth__)
 	def __delitem__(self, name):
 		'''
 		'''
@@ -242,7 +267,7 @@ class IndexWithCache(Index):
 		'''
 		'''
 		if keys == ():
-			return save_file_index(self._cache, self._path, index_depth=self.__index_depth__)
+			return dump(self._cache, self._path, index_depth=self.__index_depth__)
 		flush = {}
 		for k in keys:
 			if k is REMOVED:
@@ -251,7 +276,7 @@ class IndexWithCache(Index):
 				##!!!
 				continue
 			flush[k] = self[k]
-		return save_file_index(flush, self._path, index_depth=self.__index_depth__)
+		return dump(flush, self._path, index_depth=self.__index_depth__)
 	def cache_drop(self):
 		'''
 		'''