diff --git a/TODO.otl b/TODO.otl new file mode 100755 index 00000000..a68dc748 --- /dev/null +++ b/TODO.otl @@ -0,0 +1,2 @@ +[X] 100% general tasks + [X] need unique image id diff --git a/__init__.py b/__init__.py new file mode 100755 index 00000000..8f61259a --- /dev/null +++ b/__init__.py @@ -0,0 +1,11 @@ +#======================================================================= + +__version__ = '''0.0.01''' +__sub_version__ = '''20110905135439''' +__copyright__ = '''(c) Alex A. Naanou 2011''' + + +#----------------------------------------------------------------------- + +#======================================================================= +# vim:set ts=4 sw=4 nowrap : diff --git a/data.py b/data.py new file mode 100755 index 00000000..4712264c --- /dev/null +++ b/data.py @@ -0,0 +1,155 @@ +#======================================================================= + +__version__ = '''0.0.01''' +__sub_version__ = '''20110930190347''' +__copyright__ = '''(c) Alex A. Naanou 2011''' + + +#----------------------------------------------------------------------- + +import time +import uuid +import pickle +import pli.tags.tagset as tagset +from pli.logictypes import OR, ANY + + +#----------------------------------------------------------------------- +# +# basic data: +# Image: +# { +# 'name': NAME, +# 'title': TITLE, +# +# 'preview': LINK, +# 'original': LINK, +# +# +# 'tags': [ +# TAG, +# ... +# ], +# 'links': [ +# ... +# ] +# } +# +# +#----------------------------------------------------------------------- +# basic patterns... + +##!!! +IMAGE_ID = ANY +NAME = ANY +PATH = ANY + +LINK_TYPE = OR('preview', 'original', 'image') + +LINK = { + 'name': NAME, + 'path': PATH, + 'type': LINK_TYPE, +} + +IMAGE = { + 'id': IMAGE_ID, + 'name': NAME, + + 'preview': LINK, + 'original': LINK, + + 'links': None, + + 'metadata': None, +} + + + +#----------------------------------------------------------------------- + +index = { + 'tags': tagset.DictTagSet(), + 'paths': {}, +} + + +def import_image(path, index): + ''' + ''' + # create an ID... + uid = uuid.uuid1() + while uid in index['paths']: + uid = uuid.uuid1() + + # add file to index... + index['paths'][uid] = path + index['tags'].tag(uid, 'type:image', 'image') + + return uid + + +if __name__ == '__main__': + + t = index['tags'] + + t0 = time.time() + print 'generating data...', + + for i in xrange(500): + n = 'moo%05d' % i + uid = import_image(n, index) + if n[-1] == '0': + t.tag(uid, '0') + if n[-1] == '5': + t.tag(uid, '5') + if n.endswith('10'): + t.tag(uid, '10') + + t1 = time.time() + print 'done (%s).' % (t1 - t0) + + + + t0 = time.time() + print 'saving data...', + + pickle.dump(index, open('dummy.index', 'w')) + + t1 = time.time() + print 'done (%s).' % (t1 - t0) + + + + t0 = time.time() + print 'loading data...', + + index = pickle.load(open('dummy.index')) + + t1 = time.time() + print 'done (%s).' % (t1 - t0) + + + from profile import run + + + t0 = time.time() + print 'getting number of elements...', + +## n = len(index['tags'].all('0').objects()) +## n = len(index['tags'].all('10').objects()) +## n = len(index['tags'].any('0', '10').objects()) +## n = len(index['tags'].all('0').none('10').objects()) +## n = len(index['tags'].all('type:image')) + run('''n = len(index['tags'].all('type:image'))''') + + t1 = time.time() + print 'done (%s).' % (t1 - t0) + + print 'and the number is:', n + print 'tagset size is:', len(index['tags']) + + + +#======================================================================= +# vim:set ts=4 sw=4 nowrap : diff --git a/tags.py b/tags.py new file mode 100755 index 00000000..c3fc823b --- /dev/null +++ b/tags.py @@ -0,0 +1,289 @@ +#======================================================================= + +__version__ = '''0.0.01''' +__sub_version__ = '''20111001000000''' +__copyright__ = '''(c) Alex A. Naanou 2011''' + + +#----------------------------------------------------------------------- + +import uuid +import pli.objutils as objutils +import pli.pattern.proxy.utils as putils + + +#----------------------------------------------------------------------- +#------------------------------------------------------AbstractTagSet--- +class AbstractTagSet(object): + ''' + ''' + pass + + +#---------------------------------------------------------BasicTagSet--- +class BasicTagSet(AbstractTagSet): + ''' + ''' + objutils.createonaccess('_index', dict) + + def tag(self, obj, *tags): + ''' + ''' + index = self._index + for tag in tags: + if tag not in index: + index[tag] = set() + index[tag].add(obj) + return self + def untag(self, obj, *tags): + ''' + ''' + index = self._index + for tag in tags: + index[tag].remove(obj) + return self + + # selectors... + def all(self, *tags): + ''' + ''' + index = self._index + pool = [] + for tag in tags: + pool += [index[tag]] + pool.sort(key=len) + # if we have atleast one empty set then we have an empty + # result... + if len(pool[0]) == 0: + return set() + # initially get the largest pool element... + # NOTE: this is an optimization -- we first intersect the + # largest population with the smallest, giving the rest a + # far smaller population to work with... + res = set(pool.pop(-1)) + # now we get the total intersection of elements... + for s in pool: + res.intersection_update(s) + return res + def any(self, *tags): + ''' + ''' + index = self._index + res = set() + for tag in tags: + res.update(index[tag]) + return res + ##!!! slow !!!## + def none(self, *tags): + ''' + ''' + # XXX is this the best way yo do this? + index = self._index + bad = self.any(*tags) + other_tags = set(index.keys()).difference(tags) + return self.any(*other_tags).difference(bad) + + +#-----------------------------------------TagSetWithReverseIndexMixin--- +class TagSetWithReverseIndexMixin(AbstractTagSet): + ''' + ''' + objutils.createonaccess('_reverse_index', '_build_reverse_index', local_attr_tpl='%s_data') + + ##!!! slow !!!## + def _build_reverse_index(self): + ''' + ''' + res = {} + index = self._index + # XXX this is really ugly!! + for obj in self.objects(): + res[obj] = set(t for t in index if obj in index[t]) + return res + def _reset_reverse_index(self): + if hasattr(self, '_reverse_index_data'): + del self._reverse_index_data + self._reverse_index_data + + # these need to update the cache (_reverse_index) + def tag(self, obj, *tags): + ''' + ''' + super(TagSetWithReverseIndexMixin, self).tag(obj, *tags) + # update cache... + if obj in self._reverse_index: + self._reverse_index[obj] = set() + self._reverse_index[obj].update(tags) + return self + def untag(self, obj, *tags): + ''' + ''' + super(TagSetWithReverseIndexMixin, self).untag(obj, *tags) + # update cache... + if obj in self._reverse_index: + self._reverse_index[obj].difference_update(tags) + if len(self._reverse_index[obj]) == 0: + del self._reverse_index[obj] + return self + + # specific interface... + def tags(self, *objs): + ''' + return a list of all the tags that tag the given objects. + + if no objects are given return all the tags. + ''' + if objs == (): + return set(self._index.keys()) + res = set() + rev_index = self._reverse_index + for obj in objs: + res.update(rev_index[obj]) + return res + def objects(self): + return self._reverse_index.keys() + + +#--------------------------------------------------------------TagSet--- +class TagSet(TagSetWithReverseIndexMixin, BasicTagSet): + ''' + ''' + pass + + + +#----------------------------------------------------------------------- +#-----------------------------------------------TagSetWithObjectIndex--- +class TagSetWithObjectIndex(object): + ''' + ''' + objutils.createonaccess('_index', BasicTagSet) + objutils.createonaccess('_objects', dict) + objutils.createonaccess('_cache', '_build_cache', local_attr_tpl='%s_data') + + # internal interface... + def _build_cache(self): + return dict(((b, a) for a, b in self._objects.items())) + def _reset_cache(self): + if hasattr(self, '_cache_data'): + del self._cache_data + self._cache_data + + # these need to manupulate the cache... + def tag(self, obj, *tags): + ''' + ''' + uid = self._cache.get(obj, uuid.uuid1()) + self._index.tag(uid, *tags) + if uid not in self._objects: + self._objects[uid] = obj + self._cache[obj] = uid + return self + def untag(self, obj, *tags): + uid = self._cache[obj] + self._index.untag(uid, *tags) + # update cache... + if uid not in self._index.objects(): + del self._objects[uid] + return self + + def _proxy_op(name): + def _op(self, *tags): + ''' + ''' + return set(self._objects[uid] for uid in getattr(self._index, name)(*tags)) + return _op + all = _proxy_op('all') + any = _proxy_op('any') + none = _proxy_op('none') + del _proxy_op + + putils.proxymethods(( + 'tags', + ), '_index') + + def objects(self): + return self._objects.values() + def getuid(self, obj): + ''' + ''' + return self._cache.get(obj, None) + + + +#----------------------------------------------------------------------- +if __name__ == '__main__': + pass + + from time import time + import cPickle as pickle + + ts = TagSet() +## ts = TagSetWithObjectIndex() + + + N = 100000 + obj_tpl = 'image%010d' + + def create_tagset(): + for i in xrange(N): + n = obj_tpl % i + ts.tag(n, 'image') + if n.endswith('0'): + ts.tag(n, '0') + if n.endswith('5'): + ts.tag(n, '5') + if n.endswith('10'): + ts.tag(n, '10') + + def save_tagset(): + pickle.dump(ts, open('tags.db', 'w')) + + def load_tagset(): + print 'loading tagset...', + t0 = time() + ts = pickle.load(open('tags.db')) + t1 = time() + print 'done (%.3fs).' % (t1-t0) + return ts + + ts = load_tagset() + + print len(ts.tags()) + print len(ts.objects()) + print len(ts.all('10')) + print len(ts.all('10', '0')) + print len(ts.any('10', '5')) + print len(ts.none('10', '5')) + + print ts.tags(obj_tpl % 0) + print ts.tags(obj_tpl % 10) + + + print 'selecting (all)...', + t0 = time() + ts.all('10', '0') + t1 = time() + print 'done (%.3fs).' % (t1-t0) + print 'selecting (any)...', + t0 = time() + ts.any('10', '5') + t1 = time() + print 'done (%.3fs).' % (t1-t0) + print 'selecting (none)...', + t0 = time() + ts.none('10', '5') + t1 = time() + print 'done (%.3fs).' % (t1-t0) + + print 'getting object tags...', + t0 = time() + res = ts.tags(obj_tpl % 10) + t1 = time() + print 'done (%.3fs).' % (t1-t0) + + + + +#======================================================================= +# vim:set ts=4 sw=4 nowrap :