mirror of
https://github.com/flynx/ImageGrid.git
synced 2025-10-28 18:00:09 +00:00
got the grouping mostly working. corner cases still fail (run index.py to see the ungrouped files)
Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
This commit is contained in:
parent
ebd2865fed
commit
0816077356
221
index.py
221
index.py
@ -1,7 +1,7 @@
|
||||
#=======================================================================
|
||||
|
||||
__version__ = '''0.0.01'''
|
||||
__sub_version__ = '''20111103010916'''
|
||||
__sub_version__ = '''20111110184147'''
|
||||
__copyright__ = '''(c) Alex A. Naanou 2011'''
|
||||
|
||||
|
||||
@ -9,28 +9,55 @@ __copyright__ = '''(c) Alex A. Naanou 2011'''
|
||||
|
||||
import os
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from pli.logictypes import OR
|
||||
from itertools import izip, izip_longest
|
||||
|
||||
from pli.logictypes import ANY, OR
|
||||
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
CONFIG_NAME = 'config.json'
|
||||
CONFIG_NAME = 'test_config.json'
|
||||
|
||||
config = json.load(open(CONFIG_NAME))
|
||||
|
||||
ITEM_EXTENSIONS = (
|
||||
# RAW formats...
|
||||
'NEF', 'nef',
|
||||
# JPEGs...
|
||||
'JPG', 'JPEG', 'jpg', 'jpeg',
|
||||
# Editid images...
|
||||
'PSD', 'psd',
|
||||
'TIFF', 'tiff', 'TIF', 'tif',
|
||||
# metadata sidecar files...
|
||||
'XMP', 'xmp',
|
||||
RAW = OR(
|
||||
'NEF', 'nef',
|
||||
'CRW', 'crw',
|
||||
'CR2', 'cr2',
|
||||
'X3F', 'x3f'
|
||||
)
|
||||
|
||||
JPEG = OR(
|
||||
'JPG', 'jpg',
|
||||
'JPEG', 'jpeg'
|
||||
)
|
||||
|
||||
PSD = OR(
|
||||
'PSD', 'psd'
|
||||
)
|
||||
|
||||
TIFF = OR(
|
||||
'TIFF', 'tiff',
|
||||
'TIF', 'tif'
|
||||
)
|
||||
|
||||
XMP = OR(
|
||||
'XMP', 'xmp'
|
||||
)
|
||||
|
||||
ITEM = OR(RAW, JPEG, PSD, TIFF, XMP)
|
||||
|
||||
TYPES = {
|
||||
'raw': RAW,
|
||||
'jpeg': JPEG,
|
||||
'psd': PSD,
|
||||
'tiff': TIFF,
|
||||
'xmp': XMP,
|
||||
}
|
||||
|
||||
|
||||
SUBTREE_CLASSES = {
|
||||
'preview': 'preview',
|
||||
'preview (RAW)': 'RAW preview',
|
||||
@ -39,22 +66,161 @@ SUBTREE_CLASSES = {
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
def list_files(root, sub_trees=SUBTREE_CLASSES, ext=OR(*ITEM_EXTENSIONS)):
|
||||
##!!! we will need to normalize the paths to one single scheme (either relative or absolute)...
|
||||
def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM):
|
||||
'''
|
||||
'''
|
||||
for path, dirs, files in os.walk(root):
|
||||
# clasify by subtree...
|
||||
p = os.path.split(path)
|
||||
subtree_type = None
|
||||
for t in sub_trees:
|
||||
if t in p:
|
||||
subtree_type = sub_trees[t]
|
||||
break
|
||||
path = path.split(os.path.sep)
|
||||
# process files...
|
||||
for f in files:
|
||||
name, ext = os.path.splitext(f)
|
||||
# we need the extension wothout the dot...
|
||||
ext = ext[1:]
|
||||
# filter by ext...
|
||||
if f.split('.')[-1] == ext:
|
||||
yield subtree_type, path, f
|
||||
if ext == type:
|
||||
yield path, name, ext
|
||||
|
||||
|
||||
# XXX need to split duplicate named raw files and corresponding
|
||||
# previews...
|
||||
def index_by_name(file_list, types=TYPES.items()):
|
||||
'''
|
||||
format:
|
||||
{
|
||||
<name>: {
|
||||
<ext>: [
|
||||
<path>,
|
||||
...
|
||||
],
|
||||
...
|
||||
},
|
||||
...
|
||||
}
|
||||
'''
|
||||
res = {}
|
||||
for path, name, ext in file_list:
|
||||
# normalize extension...
|
||||
ext = types[types.index((ANY, ext))][0]
|
||||
if name not in res:
|
||||
# create a name...
|
||||
res[name] = {}
|
||||
if ext not in res[name]:
|
||||
# create an extension...
|
||||
res[name][ext] = []
|
||||
# general case...
|
||||
## res[name][ext] += [(path, name, ext)]
|
||||
res[name][ext] += [path]
|
||||
return res
|
||||
|
||||
|
||||
# for this to work correctly it must:
|
||||
# - return unique paths
|
||||
# - non of the returnd paths can be a strict subset of any other...
|
||||
##!!!
|
||||
def split_common(paths):
|
||||
'''
|
||||
'''
|
||||
# pass 1: build list of common paths (None for all differences)
|
||||
# NOTE: we may have stray common path elements but we do
|
||||
# not care abut anything after a None...
|
||||
index = izip_longest(*paths)
|
||||
common = []
|
||||
for s in index:
|
||||
next = []
|
||||
for i in s:
|
||||
if s.count(i) > 1:
|
||||
next += [i]
|
||||
else:
|
||||
next += [None]
|
||||
common += [next]
|
||||
# pass 2: cap each common section with a unique element...
|
||||
common = [ list(e) for e in izip(*common)]
|
||||
for c, p in izip(common, paths):
|
||||
if None in c:
|
||||
i = c.index(None)
|
||||
if len(p) <= i:
|
||||
# NOTE: this is the case when we have a None
|
||||
# because a path just ended... i.e. there
|
||||
# was no different element to split at...
|
||||
# XXX do we need to break here?
|
||||
# XXX one way to go here is to simply ignore
|
||||
# such paths...
|
||||
##!!! XXX we will leave a None at the end of such paths for now...
|
||||
## del c[i]
|
||||
continue
|
||||
# in-place update and truncate the common path...
|
||||
c[i] = p[i]
|
||||
del c[i+1:]
|
||||
return common
|
||||
|
||||
# in essance this need to replace image name with a GID and split up
|
||||
# images that are identically named into seporate GIDs...
|
||||
def split_images(index):
|
||||
'''
|
||||
'''
|
||||
for name, data in index.items():
|
||||
# this will not let us lose the name of the image...
|
||||
data['name'] = name
|
||||
raw = data['raw']
|
||||
if len(raw) > 1:
|
||||
# split the images...
|
||||
# split images via closeness to one of the raw files...
|
||||
# XXX the simple way to split files is to remove the
|
||||
# common part of the path between two raw files and
|
||||
# then split the other files by root of the
|
||||
# subtree.
|
||||
# this will not work in one case:
|
||||
# - at least two of the raw files are in a deeper
|
||||
# subtree than the other accompanying files.
|
||||
# in this case wa can not use the topology to
|
||||
# decide which is wich and need either to use
|
||||
# some other means or to go inside the image...
|
||||
#
|
||||
# way to do this:
|
||||
# - build a subtree map -- list of paths until the
|
||||
# first unique directory
|
||||
# - split files by subtree path
|
||||
# - use a different strategy for files that are above
|
||||
# the subtrees...
|
||||
|
||||
common = split_common(raw)
|
||||
|
||||
# prepare the return structure...
|
||||
res = []
|
||||
for path in raw:
|
||||
##!!!
|
||||
res += [{
|
||||
'gid': uuid.uuid4(),
|
||||
'name': name,
|
||||
## 'raw': [path],
|
||||
}]
|
||||
# start splitting the data...
|
||||
for ext, paths in data.items():
|
||||
if ext not in TYPES:
|
||||
continue
|
||||
for path in paths:
|
||||
matches = {}
|
||||
for i, c in enumerate(common):
|
||||
if path[:len(c)] == c:
|
||||
matches[i] = len(c)
|
||||
if len(matches) == 1:
|
||||
i = matches.keys()[0]
|
||||
# we found a location...
|
||||
if ext not in res[i]:
|
||||
res[i][ext] = []
|
||||
res[i][ext] += [path]
|
||||
elif len(matches) > 1:
|
||||
raise Exception, 'got %s matches.' % len(matches)
|
||||
else:
|
||||
# XXX ungrouped...
|
||||
print '!!!!', path, name, ext
|
||||
|
||||
# yield the results...
|
||||
for e in res:
|
||||
yield e['gid'], res
|
||||
else:
|
||||
yield uuid.uuid4(), data
|
||||
|
||||
|
||||
|
||||
@ -64,6 +230,15 @@ if __name__ == '__main__':
|
||||
|
||||
print len(lst)
|
||||
|
||||
index = index_by_name(list_files(config['ARCHIVE_ROOT']))
|
||||
|
||||
print len(index)
|
||||
|
||||
json.dump(index, file(os.path.join('test', 'filelist.json'), 'w'))
|
||||
|
||||
index = list(split_images(index_by_name(list_files(config['ARCHIVE_ROOT']))))
|
||||
|
||||
print len(index)
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user