From ce698aa3498ea52b9bfe51189ef7b08e5c0a939c Mon Sep 17 00:00:00 2001 From: "Alex A. Naanou" Date: Tue, 13 Dec 2011 18:30:24 +0400 Subject: [PATCH] - added 20K item test code... - added variable depth index (index.Index is not done yet -- hard-coded depths) XXX index.Index needs to be topology-agnostic. Signed-off-by: Alex A. Naanou --- index.py | 74 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 57 insertions(+), 17 deletions(-) diff --git a/index.py b/index.py index 4fc31835..00b8ffb0 100755 --- a/index.py +++ b/index.py @@ -1,7 +1,7 @@ #======================================================================= __version__ = '''0.0.01''' -__sub_version__ = '''20111209012407''' +__sub_version__ = '''20111213182632''' __copyright__ = '''(c) Alex A. Naanou 2011''' @@ -248,16 +248,24 @@ def split_images(index): # XXX is this a good way to serialize the actual data in the fs??? # NOTE: these will work with any topoloy and create a flat index... -def save_file_index(index, path, flat_index=False, ext='.json'): +def save_file_index(index, path, index_depth=1, ext='.json'): ''' + + NOTE: index_depth with value greater than 2 is an overkill. ''' root_index = {} for k, v in index.items(): - if not flat_index: - d = k[:2] - if not os.path.exists(os.path.join(path, d)): - os.mkdir(os.path.join(path, d)) - p = os.path.join(path, d, k + ext) + if index_depth > 0: + d = [] + rest = k + # build index path... + for i in xrange(index_depth): + d += [rest[:2]] + rest = rest[2:] + # recursive directory construction... + if not os.path.exists(os.path.join(path, *d)): + os.mkdir(os.path.join(path, *d)) + p = os.path.join(path, *d + [k + ext]) else: p = os.path.join(path, k + ext) json.dump(v, file(p, 'w'), indent=4, separators=(', ', ': ')) @@ -310,15 +318,17 @@ def pack_file_index(path, ext='.json', pack_ext='.pack', keep_files=False, keep_ z.close() ##!!! get path by name helper... +##!!! #----------------------------------------------------------------------- -##!!! add a lazy dict-like object that reads and writes (optional) the fs... +# lazy dict-like objects that read and write (optional) the fs... import pli.pattern.mixin.mapping as mapping import pli.objutils as objutils # XXX might be good to do a path index... +##!!! make this archive/file structure-agnostic... class Index(mapping.Mapping): __json_ext__ = '.json' __pack_ext__ = '.pack' @@ -327,18 +337,31 @@ class Index(mapping.Mapping): ''' ''' self._path = path - def __getitem__(self, name): + + # specific interface... + ##!!! make this support different depths... + def __locations__(self, name): ''' ''' ext = self.__json_ext__ - pack_ext = self.__pack_ext__ - file_name = name + ext + name += ext # build probable locations... - locations = ( - file_name, + return ( + name, # look in a directory... - os.path.join(name[:2], file_name), + os.path.join(name[:2], name), + ##!!! HACK: make this dynamic... + os.path.join(name[:2], name[2:4], name), ) + + # mapping interface... + def __getitem__(self, name): + ''' + ''' +## ext = self.__json_ext__ + pack_ext = self.__pack_ext__ +## file_name = name + ext + locations = self.__locations__(name) # look of the file directly... for n in locations: if os.path.exists(os.path.join(self._path, n)): @@ -353,8 +376,9 @@ class Index(mapping.Mapping): ## return json.load(file(os.path.join(p, file_name))) if f.endswith(pack_ext): z = zipfile.ZipFile(os.path.join(p, f)) - if file_name in z.namelist(): - return json.loads(z.read(file_name)) + for n in locations: + if n in z.namelist(): + return json.loads(z.read(n)) raise KeyError, name def __setitem__(self, name, value): ''' @@ -476,7 +500,7 @@ if __name__ == '__main__': - root_index = save_file_index(index, os.path.join('test', 'index'), flat_index=False) + root_index = save_file_index(index, os.path.join('test', 'index'), index_depth=1) ## ##!!! this is not used in anything yet... ## json.dump(root_index, file(os.path.join('test', 'index', 'file_index.json'), 'w')) @@ -515,6 +539,22 @@ if __name__ == '__main__': os.remove(os.path.join('test', 'index', 'index.pack')) + TEST_20K_ITEMS = False + + if TEST_20K_ITEMS: + print 'doing a 20k test...' + + print 'loading...' + full = dict(json.load(file(os.path.join('test', 'filelist of 20k files.json')))) + + print 'writing files...' + root_index = save_file_index(full, os.path.join('test', 'index'), index_depth=1) + + print 'packing files...' + # NOTE: the initial archiving seems REALLY SLOW, but working with + # small numbers of files from the archive seems adequate... + pack_file_index(os.path.join('test', 'index'), keep_files=True) + #=======================================================================