#======================================================================= __version__ = '''0.0.01''' __sub_version__ = '''20130325170937''' __copyright__ = '''(c) Alex A. Naanou 2011''' #----------------------------------------------------------------------- import os import json import zipfile import time import pli.pattern.mixin.mapping as mapping import pli.objutils as objutils #----------------------------------------------------------------------- # XXX is this a good way to serialize the actual data in the fs??? #----------------------------------------------------------------dump--- # NOTE: these will work with any topoloy and create a flat index... # XXX should this know anything about data versions??? def dump(index, path, index_depth=1, ext='.json'): ''' store an index in fs store. by default the structure is as follows: key: abcdefg path: ab/abcdefg (index_depth=1) index_depth sets the directory structure, if 0 a flat store is created. here is an example path for index_depth=2 path: ab/cd/abcdefg the dict value is stored in the file in JSON format. NOTE: this can be used with parts of a dict. NOTE: existing data will be overwritten. NOTE: store balancing depends on key structure. NOTE: index_depth with value greater than 2 is likely an overkill. NOTE: at this point there is no support for recursive or linked data so everything will be unlinked/duplicated on dump, or in case of a recursion, json will choke with a ValueError... ''' root_index = {} for k, v in index.items(): if index_depth > 0: d = [] rest = k # build index path... for i in xrange(index_depth): d += [rest[:2]] rest = rest[2:] # recursive directory construction... if not os.path.exists(os.path.join(path, *d)): os.mkdir(os.path.join(path, *d)) p = os.path.join(path, *d + [k + ext]) else: p = os.path.join(path, k + ext) json.dump(v, file(p, 'w'), indent=4, separators=(', ', ': ')) root_index[k] = p return root_index #----------------------------------------------------------------load--- ##!!! make an iterator version... def load(path, ext='.json', pack_ext='.pack'): ''' load data from fs store. for data format see dump(...). NOTE: this will load the whole data set. NOTE: unpacked data shadows packed data. NOTE: this does not care about topology. ''' d = {} for p, _, files in os.walk(path): for f in files: # handle single files... if f.endswith(ext): d[os.path.splitext(f)[0]] = json.load(file(os.path.join(p, f))) # handle packs... elif f.endswith(pack_ext): pack = zipfile.ZipFile(os.path.join(p, f)) # load elements form the pack... for name in pack.namelist(): if name.endswith(ext): d[os.path.splitext(name)[0]] = json.loads(pack.read(name)) return d #----------------------------------------------------------------pack--- # XXX should we remove empty dirs here??? # XXX this will create duplicate files within the pack # only the last is accesible but this might cause trouble elsewhere... # NOTE: this should be done in the background (possible race-condition # with removing a file while it is being read) def pack(path, pack_name='%(timestamp)s', ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False, date_format='%Y%m%d-%H%M%S'): ''' pack an fs data store. Supported fields in pack_name: %(timestamp)s - time stamp in the date_format format NOTE: if keep_files is True, keep_dirs option will be ignored. NOTE: if pack_name is static and a pack file with that name exists then the files will be added to that pack. ''' data = { 'timestamp': time.strftime(date_format), } ##!!! this will not remove original entries if they exist... z = zipfile.ZipFile(os.path.join(path, (pack_name % data) + pack_ext), 'a', compression=zipfile.ZIP_DEFLATED) for p, _, files in os.walk(path): for f in files: if f.endswith(ext): z.write(os.path.join(p, f), os.path.split(f)[-1]) if not keep_files: os.remove(os.path.join(p, f)) # XXX this will not remove empty dirs (push one # level up for that...) if not keep_dirs and p != path: ##!!! check if dir is empty.... try: # NOTE: this will fail for non-empty dirs... os.rmdir(os.path.join(p)) except: pass z.close() #-----------------------------------------------------------cleanpack--- def cleanpack(path, pack_name='%(timestamp)s', ext='.json', pack_ext='.pack', keep_files=False, keep_dirs=False, date_format='%Y%m%d-%H%M%S'): ''' make a clean pack, removing duplicate enteries. ''' data = { 'timestamp': time.strftime(date_format), } name = os.path.join(path, (pack_name % data) + pack_ext) ##!!! this will load the whole monster to memory, need something better... index = load(path) z = zipfile.ZipFile(name, 'w', compression=zipfile.ZIP_DEFLATED) for k, v in index.iteritems(): z.writestr(k + ext, json.dumps(v, indent=4, separators=(', ', ': '))) z.close() #----------------------------------------------------------------------- # lazy dict-like objects that read and write (optional) the fs... #---------------------------------------------------------------Index--- # XXX might be good to do a path index... ##!!! make this archive/file structure-agnostic... class Index(mapping.Mapping): ''' ''' __json_ext__ = '.json' __pack_ext__ = '.pack' __index_depth__ = 2 def __init__(self, path): ''' ''' self._path = path # specific interface... ##!!! make this support different depths... def __locations__(self, name): ''' ''' ext = self.__json_ext__ name += ext # build probable locations... return ( name, # look in a directory... os.path.join(name[:2], name), ##!!! HACK: make this dynamic... os.path.join(name[:2], name[2:4], name), ) # mapping interface... def __getitem__(self, name): ''' ''' ## ext = self.__json_ext__ pack_ext = self.__pack_ext__ ## file_name = name + ext locations = self.__locations__(name) # look for the file directly... for n in locations: if os.path.exists(os.path.join(self._path, n)): return json.load(file(os.path.join(self._path, n))) # try and locate a file in a pack... for p, _, files in os.walk(self._path): # files are searched sorted by their name... files.sort() for f in files: ## ##!!! do we need to look in odd named directories... ## if f == file_name: ## return json.load(file(os.path.join(p, file_name))) if f.endswith(pack_ext): z = zipfile.ZipFile(os.path.join(p, f)) for n in locations: if n in z.namelist(): return json.loads(z.read(n)) raise KeyError, name def __setitem__(self, name, value): ''' ''' dump({name: value}, self._path, index_depth=self.__index_depth__) def __delitem__(self, name): ''' ''' raise NotImplementedError def __iter__(self): ''' ''' visited = [] packs = [] ext = self.__json_ext__ pack_ext = self.__pack_ext__ for p, _, files in os.walk(self._path): for f in files: if f.endswith(ext) and f not in visited: visited += [f] yield os.path.splitext(f)[0] elif f.endswith(pack_ext): packs += [os.path.join(p, f)] for pack in packs: z = zipfile.ZipFile(pack) for name in z.namelist(): if name not in visited: visited += [name] yield os.path.splitext(name)[0] #-------------------------------------------------------IndexWtihPack--- class IndexWtihPack(object): ''' ''' def pack(self): ''' pack the index. ''' pack(self._path) #----------------------------------------------------------------------- REMOVED = object() #------------------------------------------------------IndexWithCache--- class IndexWithCache(Index): ''' ''' objutils.createonaccess('_cache', dict) __sync__ = False def __getitem__(self, name): ''' ''' if name in self._cache: res = self._cache[name] if res is REMOVED: raise KeyError, name return res res = self._cache[name] = super(IndexWithCache, self).__getitem__(name) return res def __setitem__(self, name, value): ''' ''' self._cache[name] = value if self.__sync__: self.cache_flush(name) ##!!! def __delitem__(self, name): ''' ''' self._cache[name] = REMOVED if self.__sync__: self.cache_flush(name) def __iter__(self): ''' ''' cache = self._cache for e in cache: yield e for e in super(IndexWithCache, self).__iter__(): if e not in cache: yield e # cache management... ##!!! removed items will not get flushed yet... # XXX to make removing elements history compatible, one way to go # is to write a specifc value to the file, thus making it # shadow the original value... def cache_flush(self, *keys): ''' ''' if keys == (): return dump(self._cache, self._path, index_depth=self.__index_depth__) flush = {} for k in keys: if k is REMOVED: # remove file... ## raise NotImplementedError ##!!! continue flush[k] = self[k] return dump(flush, self._path, index_depth=self.__index_depth__) def cache_drop(self): ''' ''' del self._cache #---------------------------------------------------IndexWithSubIndex--- ##class IndexWithSubIndex(Index): ## ''' ## ''' ## def indexby(self, attr): ## ''' ## ''' ## self._sub_indexs ## for e in self: ## pass ## def getby(self, attr, value): ## ''' ## ''' ## pass #======================================================================= # vim:set ts=4 sw=4 nowrap :