mirror of
				https://github.com/flynx/ImageGrid.git
				synced 2025-10-30 19:00:09 +00:00 
			
		
		
		
	- implemented basic grouping strategy for files with identical names based on path, still not all corner cases covered
- need to fix tests because RAWs are grouped from different locations via GID (correct but the test is now wrong) - need to reorganize the code (index2.py) Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
This commit is contained in:
		
							parent
							
								
									75b5629a11
								
							
						
					
					
						commit
						c3db4c5724
					
				
							
								
								
									
										88
									
								
								index2.py
									
									
									
									
									
								
							
							
						
						
									
										88
									
								
								index2.py
									
									
									
									
									
								
							| @ -1,7 +1,7 @@ | |||||||
| #======================================================================= | #======================================================================= | ||||||
| 
 | 
 | ||||||
| __version__ = '''0.0.01''' | __version__ = '''0.0.01''' | ||||||
| __sub_version__ = '''20120302161602''' | __sub_version__ = '''20120303020603''' | ||||||
| __copyright__ = '''(c) Alex A. Naanou 2011''' | __copyright__ = '''(c) Alex A. Naanou 2011''' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -37,6 +37,7 @@ CONFIG_NAME = 'test_config.json' | |||||||
| 
 | 
 | ||||||
| config = json.load(open(CONFIG_NAME)) | config = json.load(open(CONFIG_NAME)) | ||||||
| 
 | 
 | ||||||
|  | # XXX move this to a context-dependant module... | ||||||
| RAW = OR( | RAW = OR( | ||||||
| 	'NEF', 'nef',  | 	'NEF', 'nef',  | ||||||
| 	'CRW', 'crw', | 	'CRW', 'crw', | ||||||
| @ -111,7 +112,7 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-% | |||||||
| 	NOTE: need EXIF data to generate a GID | 	NOTE: need EXIF data to generate a GID | ||||||
| 	''' | 	''' | ||||||
| 	data = { | 	data = { | ||||||
| 		'name': os.path.splitext(os.path.split(path)[-1])[0] | 		'name': os.path.splitext(os.path.split(path)[-1])[0], | ||||||
| 	} | 	} | ||||||
| 	# check if we need a date in the id... | 	# check if we need a date in the id... | ||||||
| 	if '%(date)s' in format: | 	if '%(date)s' in format: | ||||||
| @ -153,6 +154,23 @@ def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=Fal | |||||||
| 					yield path, name, ext | 					yield path, name, ext | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def common_len(a, *b): | ||||||
|  | 	''' | ||||||
|  | 	''' | ||||||
|  | 	for i, l in enumerate(izip(*(a,) + b)): | ||||||
|  | 		if len(set(l)) != 1: | ||||||
|  | 			return i | ||||||
|  | 	return len(min(*(a,) + b)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | ##!!! is this meaningless? | ||||||
|  | def path_distance(a, b): | ||||||
|  | 	''' | ||||||
|  | 	''' | ||||||
|  | 	return len(a) + len(b) - common_len(a, b)*2 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| #----------------------------------------------------------------------- | #----------------------------------------------------------------------- | ||||||
| @ -194,22 +212,58 @@ if __name__ == '__main__': | |||||||
| 	# index via a propper GID... | 	# index via a propper GID... | ||||||
| 	# split similarly named but different files... | 	# split similarly named but different files... | ||||||
| 	GID_index = {} | 	GID_index = {} | ||||||
|  | 	failed = [] | ||||||
| 	for name, l in index.items(): | 	for name, l in index.items(): | ||||||
| 
 | 
 | ||||||
| 		l.sort() | 		l.sort() | ||||||
| 
 | 
 | ||||||
| 		raws = [e for e in l if e[2] == RAW]  | 		raws = [e for e in l if e[2] == RAW]  | ||||||
| 
 | 
 | ||||||
| 		for raw in raws: | 		# handle multiple raw files... | ||||||
| 		if len(raws) > 1: | 		if len(raws) > 1: | ||||||
| 				print 'duplicates: %s (%sx)...' % (name, len(raws)), | 			common = common_len(*[ e[0] for e in raws ]) | ||||||
| 				# split the group into c seporate groups... | 
 | ||||||
| 				# strategies: | 			# NOTE: do not change the order of raws after this point | ||||||
| 				# 	- path proximity (distance) | 			# 		and till the end of the loop... | ||||||
| 				# 	- metadata | 			# 		XXX revise if there is a simpler way... | ||||||
|  | 			sets = [ (r, [r]) for r in raws ] | ||||||
|  | 
 | ||||||
|  | 			for e in l: | ||||||
|  | 				if e[2] == RAW: | ||||||
|  | 					continue | ||||||
|  | 				# check if we are closer to other raws... | ||||||
|  | 				# NOTE: this depends on stability of order in raws | ||||||
|  | 				c_index = [(common_len(r[0], e[0]), r, i) for i, r in enumerate(raws)] | ||||||
|  | 				c, raw, i = max(*c_index) | ||||||
|  | 				if c_index.count([c, ANY, ANY]) > 1: | ||||||
|  | 					# a file is at a path junction exactly... | ||||||
|  | 					print '    !!! can\'t decide where to put %s.%s...' % (e[1], e[2]) | ||||||
|  | 					##!!! try different strategies here... | ||||||
| 					##!!! | 					##!!! | ||||||
| 				print 'skipping.' | 					failed += [e] | ||||||
| 				break | 				elif c > common: | ||||||
|  | 					# found a propper location... | ||||||
|  | 					s = sets[i][1] | ||||||
|  | 					s += [e] | ||||||
|  | 					##!!! for some reason this does not work.... | ||||||
|  | ##					sets[i][1] += [e] | ||||||
|  | 				else: | ||||||
|  | 					print '    !!! can\'t decide where to put %s.%s...' % (e[1], e[2]) | ||||||
|  | 					##!!! try different strategies here... | ||||||
|  | 					##!!! | ||||||
|  | 					failed += [e] | ||||||
|  | 		# single raw... | ||||||
|  | 		elif len(raws) == 1: | ||||||
|  | 			sets = [(raws[0], l)] | ||||||
|  | 		# no raw files... | ||||||
|  | 		else: | ||||||
|  | 			print 'no raw file found for "%s"...' % os.path.join(name) | ||||||
|  | 			sets = [] | ||||||
|  | 			##!!! need to report this in a usable way... | ||||||
|  | 			failed += l | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 		for raw, l in sets: | ||||||
| 			# get file GID... | 			# get file GID... | ||||||
| 			GID = image_gid('%s.%s' % (os.path.join(*[config['ARCHIVE_ROOT']] + raw[0] + [raw[1]]), raw[2])) | 			GID = image_gid('%s.%s' % (os.path.join(*[config['ARCHIVE_ROOT']] + raw[0] + [raw[1]]), raw[2])) | ||||||
| 
 | 
 | ||||||
| @ -217,7 +271,9 @@ if __name__ == '__main__': | |||||||
| 				'gid': GID, | 				'gid': GID, | ||||||
| 				'name': name, | 				'name': name, | ||||||
| 				'imported': time.time(), | 				'imported': time.time(), | ||||||
| 				# NOTE: this might get distorted on archiving... | 				# NOTE: this might get distorted on archiving or | ||||||
|  | 				# 		copying... | ||||||
|  | 				# 		mostly intended for importing... | ||||||
| 				'ctime': raw[3],  | 				'ctime': raw[3],  | ||||||
| 				'RAW': raws, | 				'RAW': raws, | ||||||
| 				'XMP': [e for e in l if e[2] == XMP], | 				'XMP': [e for e in l if e[2] == XMP], | ||||||
| @ -235,9 +291,13 @@ if __name__ == '__main__': | |||||||
| 	# 			- find new subtrees | 	# 			- find new subtrees | ||||||
| 	# 			- find modified items (file date diff) | 	# 			- find modified items (file date diff) | ||||||
| 	 | 	 | ||||||
| 
 | 	# NOTE: raws number here may be more than indexed because some raws  | ||||||
| 	print GID | 	# 		may get grouped by GID | ||||||
| 	print len(GID_index), len([ e for e in lst if e[2] == RAW]) | 	print '''results: | ||||||
|  | 	indexed: %s | ||||||
|  | 	raws: %s | ||||||
|  | 	failed: %s | ||||||
|  | 	''' % (len(GID_index), len([ e for e in lst if e[2] == RAW]), len(failed)) | ||||||
| 
 | 
 | ||||||
| 	pprint(GID_index.values()[0]) | 	pprint(GID_index.values()[0]) | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user