rewritten image_gid(...) to be more flexible and stable (also squashed a fiew bugs), and some minor changes...

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
2025-10-31 03:10:07 +00:00 · 2012-03-02 16:17:12 +04:00 · 2012-03-02 16:17:12 +04:00 · 75b5629a11
commit 75b5629a11
parent 23ded78fbd
1 changed files with 57 additions and 44 deletions
--- a/index2.py
+++ b/index2.py
@ -1,7 +1,7 @@
 #=======================================================================
 __version__ = '''0.0.01'''
-__sub_version__ = '''20120302022717'''
+__sub_version__ = '''20120302161602'''
 __copyright__ = '''(c) Alex A. Naanou 2011'''
@ -84,35 +84,61 @@ SUBTREE_CLASSES = {
 # XXX need a strategy to check if two files that have the same GID are
 # 	  identical, and if so, need to destinguish them in the GID...
 # 	  might be a good idea to add a file hash
-def image_gid(path):
+# XXX not yet sure if this is unique enough to avoid conflicts if one
 # 	  photographer has enough cameras...
 # XXX also might be wise to add a photographer ID into here...
 def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%H%M%S'):
 	'''
 	Calgulate image GID.
-	Format:
+	Main gid criteria:
-		<date>-<time>-<filename>
+	 	- unique
 	 	- calculable from the item (preferably any sub-item)
 	 	- human-readable
 	Default format:
 		<artist>-<datetime>-<filename>
 	Example:
 		20110627-195706-DSC_1234	
 	Supported fields:
 		%(artist)s	- Exif.Image.Artist field, stripped and spaces replaced with underscores.
 		%(date)s	- Exif.Image.DateTime formated to date_format argument.
 		%(name)s	- file name.
 	NOTE: date and time are the date and time the image was made ('Exif.Image.DateTime')
 	NOTE: need EXIF data to generate a GID
 	'''
-	i = metadata.ImageMetadata('%s' % path)
+	data = {
-	i.read()
+		'name': os.path.splitext(os.path.split(path)[-1])[0]
-	d = i['Exif.Image.DateTime'].value
+	}
-	return '%s-%s' % (d.strftime('%Y%m%d-%H%M%S'), name)
+	# check if we need a date in the id...
 	if '%(date)s' in format:
 		i = metadata.ImageMetadata('%s' % path)
 		i.read()
 		d = i['Exif.Image.DateTime'].value
 		data['date'] = d.strftime(date_format)
 	# check if we need an artist...
 	if '%(artist)s' in format:
 		data['artist'] = i['Exif.Image.Artist'].value.strip().replace(' ', '_')
 	return format % data
 ##!!! we will need to normalize the paths to one single scheme (either relative or absolute)...
 # XXX might need to fetch file data too...
-def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=False):
+def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=False, include_ctime=True):
 	'''
 	yields:
-		(<path>, <name>, <ext>),
+		(<path>, <name>, <ext>[, <ctime>]),
 	'''
-	for path, dirs, files in os.walk(root):
+	for orig_path, dirs, files in os.walk(root):
 		# XXX is this correct...
-		path = path.split(os.path.sep)
+		path = orig_path.split(os.path.sep)
 		# remove root from path...
 		if not include_root_path:
 			path = path[len(root.split(os.path.sep)):]
 		# process files...
 		for f in files:
 			name, ext = os.path.splitext(f)
@ -120,8 +146,9 @@ def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=Fal
 			ext = ext[1:]
 			# filter by ext...
 			if ext == type:
-				if not include_root_path:
+				if include_ctime:
-					yield path[len(root.split(os.path.sep)):], name, ext
+					t = os.path.getctime(os.path.join(orig_path, f))
 					yield path, name, ext, t
 				else:
 					yield path, name, ext
@ -141,20 +168,13 @@ if __name__ == '__main__':
 		print len(lst)
 		pprint(lst[0])
-		json.dump(lst, file(FILE_LIST), 'w')
+		json.dump(lst, file(FILE_LIST, 'w'))
 	lst = json.load(file(FILE_LIST))
 	print len(lst)
 ##	lst.sort()
 	# sort via name, ext, path
-	lst.sort(key=lambda e: (e[1], e[-1], e[0]))
+	lst.sort(key=lambda e: (e[1], e[2], e[0]))
 	##!!! duplicate a raw file...
 	for p, n, t in lst:
 		if t == RAW:
 			lst += [(p, n, t)]
 			break
 	# index by name (indexing preparation)...
 	# {
@ -165,11 +185,11 @@ if __name__ == '__main__':
 	# 	...
 	# }
 	index = {}
-	for p, n, t in lst:
+	for p, n, t, c in lst:
 		if n in index:
-			index[n] += [(p, n, t)]
+			index[n] += [(p, n, t, c)]
 		else:
-			index[n] = [(p, n, t)]
+			index[n] = [(p, n, t, c)]
 	# index via a propper GID...
 	# split similarly named but different files...
@ -178,7 +198,7 @@ if __name__ == '__main__':
 		l.sort()
-		raws = [e for e in l if e[-1] == RAW] 
+		raws = [e for e in l if e[2] == RAW] 
 		for raw in raws:
 			if len(raws) > 1:
@ -190,28 +210,21 @@ if __name__ == '__main__':
 				##!!!
 				print 'skipping.'
 				break
-			##!!! gid construction should be a customizable function in itself...
+			# get file GID...
-			# main gid criteria:
+			GID = image_gid('%s.%s' % (os.path.join(*[config['ARCHIVE_ROOT']] + raw[0] + [raw[1]]), raw[2]))
 			# 	- unique
 			# 	- calculable from the item (preferably any sub-item)
 			# 	- human-readable
 ##			GID = '%s-%s' % (uuid.uuid4().hex, name)
 			##!!! get RAW file creation date from EXIF...
 			# XXX to avoid further ambiguity need to encode the camera
 			# into file name, e.g. S01_1234 for SLR 01 and RO1_4321 for
 			# rangefinder 01 and finally C01 for compact 01, etc.
 			GID = image_gid('%s.%s' % (os.path.join(*[config['ARCHIVE_ROOT']] + raw[0] + [raw[1]]), raw[-1]))
 			GID_index[GID] = {
 				'gid': GID,
 				'name': name,
 				'imported': time.time(),
 				# NOTE: this might get distorted on archiving...
 				'ctime': raw[3], 
 				'RAW': raws,
-				'XMP': [e for e in l if e[-1] == XMP],
+				'XMP': [e for e in l if e[2] == XMP],
-				'JPG': [e for e in l if e[-1] == JPEG],
+				'JPG': [e for e in l if e[2] == JPEG],
-				'PSD': [e for e in l if e[-1] == PSD],
+				'PSD': [e for e in l if e[2] == PSD],
-				'TIFF': [e for e in l if e[-1] == TIFF],
+				'TIFF': [e for e in l if e[2] == TIFF],
-				'other': [e for e in l if e[-1] != OR(TIFF, PSD, JPEG, XMP, RAW)],
+				'other': [e for e in l if e[2] != OR(TIFF, PSD, JPEG, XMP, RAW)],
 			}
@ -224,7 +237,7 @@ if __name__ == '__main__':
 	print GID
-	print len(GID_index), len([ e for e in lst if e[-1] == RAW])
+	print len(GID_index), len([ e for e in lst if e[2] == RAW])
 	pprint(GID_index.values()[0])