ImageGrid/index2.py

#=======================================================================

__version__ = '''0.0.01'''
__sub_version__ = '''20120302014841'''
__copyright__ = '''(c) Alex A. Naanou 2011'''


#-----------------------------------------------------------------------
# The first index.py might be a little too complicated. try and resolve
# this as so:
# 	- list all relevant files (RAW, XMP, JPG, PSD, ...)
# 	- group by path (closeness)
# 		- deepest common path to contain all files with common name.
# 		  this will fail if we have different files with same names.
#
#-----------------------------------------------------------------------

import os
import json
import zipfile
import uuid
import time

from itertools import izip, izip_longest

from pli.logictypes import ANY, OR

from pprint import pprint


#-----------------------------------------------------------------------

CONFIG_NAME = 'test_config.json'
##CONFIG_NAME = 'tmp_config.json'

config = json.load(open(CONFIG_NAME))

RAW = OR(
	'NEF', 'nef',
	'CRW', 'crw',
	'CR2', 'cr2',
	'X3F', 'x3f'
)

JPEG = OR(
	'JPG', 'jpg',
	'JPEG', 'jpeg'
)

PSD = OR(
	'PSD', 'psd'
)

TIFF = OR(
	'TIFF', 'tiff',
	'TIF', 'tif'
)

XMP = OR(
	'XMP', 'xmp'
)

ITEM = OR(RAW, JPEG, PSD, TIFF, XMP)

TYPES = {
	'raw': RAW,
	'jpeg': JPEG,
	'psd': PSD,
	'tiff': TIFF,
	'xmp': XMP,
}


SUBTREE_CLASSES = {
	'preview': 'preview',
	'preview (RAW)': 'RAW preview',
}


#-----------------------------------------------------------------------

##!!! we will need to normalize the paths to one single scheme (either relative or absolute)...
# XXX might need to fetch file data too...
def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=False):
	'''
	yields:
		(<path>, <name>, <ext>),
	'''
	for path, dirs, files in os.walk(root):
		# XXX is this correct...
		path = path.split(os.path.sep)
		# process files...
		for f in files:
			name, ext = os.path.splitext(f)
			# we need the extension wothout the dot...
			ext = ext[1:]
			# filter by ext...
			if ext == type:
				if not include_root_path:
					yield path[len(root.split(os.path.sep)):], name, ext
				else:
					yield path, name, ext


#-----------------------------------------------------------------------
if __name__ == '__main__':

	FILE_LIST = os.path.join('test', 'flatfilelist.json')
	BUILD_FILE_LIST = False if os.path.exists(FILE_LIST) else True


	if BUILD_FILE_LIST:
		lst = list(list_files(config['ARCHIVE_ROOT']))

		print len(lst)
		pprint(lst[0])

		json.dump(lst, file(FILE_LIST), 'w')

	lst = json.load(file(FILE_LIST))
	print len(lst)

##	lst.sort()
	# sort via name, ext, path
	lst.sort(key=lambda e: (e[1], e[-1], e[0]))

	##!!! duplicate a raw file...
	for p, n, t in lst:
		if t == RAW:
			lst += [(p, n, t)]
			break

	# index by name (indexing preparation)...
	# {
	# 	<name> : [
	# 		(<path>, <name>, <type>),
	# 		...
	# 	],
	# 	...
	# }
	index = {}
	for p, n, t in lst:
		if n in index:
			index[n] += [(p, n, t)]
		else:
			index[n] = [(p, n, t)]

	# index via a propper GID...
	# split similarly named but different files...
	GID_index = {}
	for name, l in index.items():

		l.sort()

		raws = [e for e in l if e[-1] == RAW]

		for raw in raws:
			if len(raws) > 1:
				print 'duplicates: %s (%sx)...' % (name, len(raws)),
				# split the group into c seporate groups...
				# strategies:
				# 	- path proximity (distance)
				# 	- metadata
				##!!!
				print 'skipping.'
				break
			##!!! gid construction should be a customizable function in itself...
			# main gid criteria:
			# 	- unique
			# 	- calculable from the item (preferably any sub-item)
##			GID = '%s-%s' % (uuid.uuid4().hex, name)
			##!!! get RAW file creation date from EXIF...
##			GID = '%s-%s' % (hex(long(time.time()*1000))[2:-1].upper(), name)
			# GID should be human-readable...
			# XXX to avoid further ambiguity need to encode the camera
			# into file name, e.g. S01_1234 for SLR 01 and RO1_4321 for
			# rangefinder 01 and finally C01 for compact 01, etc.
			GID = '%s-%s' % (time.strftime('%Y%m%d-%H%M%S'), name)

			GID_index[GID] = {
				'gid': GID,
				'name': name,
				'RAW': raws,
				'XMP': [e for e in l if e[-1] == XMP],
				'JPG': [e for e in l if e[-1] == JPEG],
				'PSD': [e for e in l if e[-1] == PSD],
				'TIFF': [e for e in l if e[-1] == TIFF],
				'other': [e for e in l if e[-1] != OR(TIFF, PSD, JPEG, XMP, RAW)],
			}


	##!!! TODO: archive descriptions to help index/tag items...

	# NOTE: each import from an existing archive will be as follows:
	# 			- full listing
	# 			- find new subtrees
	# 			- find modified items (file date diff)


	print GID
	print len(GID_index), len([ e for e in lst if e[-1] == RAW])

	pprint(GID_index.values()[0])


#=======================================================================
#                                            vim:set ts=4 sw=4 nowrap :