mirror of
https://github.com/flynx/ImageGrid.git
synced 2025-10-30 19:00:09 +00:00
- implemented basic grouping strategy for files with identical names based on path, still not all corner cases covered
- need to fix tests because RAWs are grouped from different locations via GID (correct but the test is now wrong) - need to reorganize the code (index2.py) Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
This commit is contained in:
parent
75b5629a11
commit
c3db4c5724
92
index2.py
92
index2.py
@ -1,7 +1,7 @@
|
|||||||
#=======================================================================
|
#=======================================================================
|
||||||
|
|
||||||
__version__ = '''0.0.01'''
|
__version__ = '''0.0.01'''
|
||||||
__sub_version__ = '''20120302161602'''
|
__sub_version__ = '''20120303020603'''
|
||||||
__copyright__ = '''(c) Alex A. Naanou 2011'''
|
__copyright__ = '''(c) Alex A. Naanou 2011'''
|
||||||
|
|
||||||
|
|
||||||
@ -37,6 +37,7 @@ CONFIG_NAME = 'test_config.json'
|
|||||||
|
|
||||||
config = json.load(open(CONFIG_NAME))
|
config = json.load(open(CONFIG_NAME))
|
||||||
|
|
||||||
|
# XXX move this to a context-dependant module...
|
||||||
RAW = OR(
|
RAW = OR(
|
||||||
'NEF', 'nef',
|
'NEF', 'nef',
|
||||||
'CRW', 'crw',
|
'CRW', 'crw',
|
||||||
@ -111,7 +112,7 @@ def image_gid(path, format='%(artist)s-%(date)s-%(name)s', date_format='%Y%m%d-%
|
|||||||
NOTE: need EXIF data to generate a GID
|
NOTE: need EXIF data to generate a GID
|
||||||
'''
|
'''
|
||||||
data = {
|
data = {
|
||||||
'name': os.path.splitext(os.path.split(path)[-1])[0]
|
'name': os.path.splitext(os.path.split(path)[-1])[0],
|
||||||
}
|
}
|
||||||
# check if we need a date in the id...
|
# check if we need a date in the id...
|
||||||
if '%(date)s' in format:
|
if '%(date)s' in format:
|
||||||
@ -153,6 +154,23 @@ def list_files(root, sub_trees=SUBTREE_CLASSES, type=ITEM, include_root_path=Fal
|
|||||||
yield path, name, ext
|
yield path, name, ext
|
||||||
|
|
||||||
|
|
||||||
|
def common_len(a, *b):
|
||||||
|
'''
|
||||||
|
'''
|
||||||
|
for i, l in enumerate(izip(*(a,) + b)):
|
||||||
|
if len(set(l)) != 1:
|
||||||
|
return i
|
||||||
|
return len(min(*(a,) + b))
|
||||||
|
|
||||||
|
|
||||||
|
##!!! is this meaningless?
|
||||||
|
def path_distance(a, b):
|
||||||
|
'''
|
||||||
|
'''
|
||||||
|
return len(a) + len(b) - common_len(a, b)*2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#-----------------------------------------------------------------------
|
#-----------------------------------------------------------------------
|
||||||
@ -194,22 +212,58 @@ if __name__ == '__main__':
|
|||||||
# index via a propper GID...
|
# index via a propper GID...
|
||||||
# split similarly named but different files...
|
# split similarly named but different files...
|
||||||
GID_index = {}
|
GID_index = {}
|
||||||
|
failed = []
|
||||||
for name, l in index.items():
|
for name, l in index.items():
|
||||||
|
|
||||||
l.sort()
|
l.sort()
|
||||||
|
|
||||||
raws = [e for e in l if e[2] == RAW]
|
raws = [e for e in l if e[2] == RAW]
|
||||||
|
|
||||||
for raw in raws:
|
# handle multiple raw files...
|
||||||
if len(raws) > 1:
|
if len(raws) > 1:
|
||||||
print 'duplicates: %s (%sx)...' % (name, len(raws)),
|
common = common_len(*[ e[0] for e in raws ])
|
||||||
# split the group into c seporate groups...
|
|
||||||
# strategies:
|
# NOTE: do not change the order of raws after this point
|
||||||
# - path proximity (distance)
|
# and till the end of the loop...
|
||||||
# - metadata
|
# XXX revise if there is a simpler way...
|
||||||
##!!!
|
sets = [ (r, [r]) for r in raws ]
|
||||||
print 'skipping.'
|
|
||||||
break
|
for e in l:
|
||||||
|
if e[2] == RAW:
|
||||||
|
continue
|
||||||
|
# check if we are closer to other raws...
|
||||||
|
# NOTE: this depends on stability of order in raws
|
||||||
|
c_index = [(common_len(r[0], e[0]), r, i) for i, r in enumerate(raws)]
|
||||||
|
c, raw, i = max(*c_index)
|
||||||
|
if c_index.count([c, ANY, ANY]) > 1:
|
||||||
|
# a file is at a path junction exactly...
|
||||||
|
print ' !!! can\'t decide where to put %s.%s...' % (e[1], e[2])
|
||||||
|
##!!! try different strategies here...
|
||||||
|
##!!!
|
||||||
|
failed += [e]
|
||||||
|
elif c > common:
|
||||||
|
# found a propper location...
|
||||||
|
s = sets[i][1]
|
||||||
|
s += [e]
|
||||||
|
##!!! for some reason this does not work....
|
||||||
|
## sets[i][1] += [e]
|
||||||
|
else:
|
||||||
|
print ' !!! can\'t decide where to put %s.%s...' % (e[1], e[2])
|
||||||
|
##!!! try different strategies here...
|
||||||
|
##!!!
|
||||||
|
failed += [e]
|
||||||
|
# single raw...
|
||||||
|
elif len(raws) == 1:
|
||||||
|
sets = [(raws[0], l)]
|
||||||
|
# no raw files...
|
||||||
|
else:
|
||||||
|
print 'no raw file found for "%s"...' % os.path.join(name)
|
||||||
|
sets = []
|
||||||
|
##!!! need to report this in a usable way...
|
||||||
|
failed += l
|
||||||
|
|
||||||
|
|
||||||
|
for raw, l in sets:
|
||||||
# get file GID...
|
# get file GID...
|
||||||
GID = image_gid('%s.%s' % (os.path.join(*[config['ARCHIVE_ROOT']] + raw[0] + [raw[1]]), raw[2]))
|
GID = image_gid('%s.%s' % (os.path.join(*[config['ARCHIVE_ROOT']] + raw[0] + [raw[1]]), raw[2]))
|
||||||
|
|
||||||
@ -217,7 +271,9 @@ if __name__ == '__main__':
|
|||||||
'gid': GID,
|
'gid': GID,
|
||||||
'name': name,
|
'name': name,
|
||||||
'imported': time.time(),
|
'imported': time.time(),
|
||||||
# NOTE: this might get distorted on archiving...
|
# NOTE: this might get distorted on archiving or
|
||||||
|
# copying...
|
||||||
|
# mostly intended for importing...
|
||||||
'ctime': raw[3],
|
'ctime': raw[3],
|
||||||
'RAW': raws,
|
'RAW': raws,
|
||||||
'XMP': [e for e in l if e[2] == XMP],
|
'XMP': [e for e in l if e[2] == XMP],
|
||||||
@ -235,9 +291,13 @@ if __name__ == '__main__':
|
|||||||
# - find new subtrees
|
# - find new subtrees
|
||||||
# - find modified items (file date diff)
|
# - find modified items (file date diff)
|
||||||
|
|
||||||
|
# NOTE: raws number here may be more than indexed because some raws
|
||||||
print GID
|
# may get grouped by GID
|
||||||
print len(GID_index), len([ e for e in lst if e[2] == RAW])
|
print '''results:
|
||||||
|
indexed: %s
|
||||||
|
raws: %s
|
||||||
|
failed: %s
|
||||||
|
''' % (len(GID_index), len([ e for e in lst if e[2] == RAW]), len(failed))
|
||||||
|
|
||||||
pprint(GID_index.values()[0])
|
pprint(GID_index.values()[0])
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user