ImageGrid/tags.py
Alex A. Naanou bca3bef9c3 added some code, still in the "unstructured play" stage...
Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
2011-10-01 00:08:31 +04:00

290 lines
6.5 KiB
Python
Executable File

#=======================================================================
__version__ = '''0.0.01'''
__sub_version__ = '''20111001000000'''
__copyright__ = '''(c) Alex A. Naanou 2011'''
#-----------------------------------------------------------------------
import uuid
import pli.objutils as objutils
import pli.pattern.proxy.utils as putils
#-----------------------------------------------------------------------
#------------------------------------------------------AbstractTagSet---
class AbstractTagSet(object):
'''
'''
pass
#---------------------------------------------------------BasicTagSet---
class BasicTagSet(AbstractTagSet):
'''
'''
objutils.createonaccess('_index', dict)
def tag(self, obj, *tags):
'''
'''
index = self._index
for tag in tags:
if tag not in index:
index[tag] = set()
index[tag].add(obj)
return self
def untag(self, obj, *tags):
'''
'''
index = self._index
for tag in tags:
index[tag].remove(obj)
return self
# selectors...
def all(self, *tags):
'''
'''
index = self._index
pool = []
for tag in tags:
pool += [index[tag]]
pool.sort(key=len)
# if we have atleast one empty set then we have an empty
# result...
if len(pool[0]) == 0:
return set()
# initially get the largest pool element...
# NOTE: this is an optimization -- we first intersect the
# largest population with the smallest, giving the rest a
# far smaller population to work with...
res = set(pool.pop(-1))
# now we get the total intersection of elements...
for s in pool:
res.intersection_update(s)
return res
def any(self, *tags):
'''
'''
index = self._index
res = set()
for tag in tags:
res.update(index[tag])
return res
##!!! slow !!!##
def none(self, *tags):
'''
'''
# XXX is this the best way yo do this?
index = self._index
bad = self.any(*tags)
other_tags = set(index.keys()).difference(tags)
return self.any(*other_tags).difference(bad)
#-----------------------------------------TagSetWithReverseIndexMixin---
class TagSetWithReverseIndexMixin(AbstractTagSet):
'''
'''
objutils.createonaccess('_reverse_index', '_build_reverse_index', local_attr_tpl='%s_data')
##!!! slow !!!##
def _build_reverse_index(self):
'''
'''
res = {}
index = self._index
# XXX this is really ugly!!
for obj in self.objects():
res[obj] = set(t for t in index if obj in index[t])
return res
def _reset_reverse_index(self):
if hasattr(self, '_reverse_index_data'):
del self._reverse_index_data
self._reverse_index_data
# these need to update the cache (_reverse_index)
def tag(self, obj, *tags):
'''
'''
super(TagSetWithReverseIndexMixin, self).tag(obj, *tags)
# update cache...
if obj in self._reverse_index:
self._reverse_index[obj] = set()
self._reverse_index[obj].update(tags)
return self
def untag(self, obj, *tags):
'''
'''
super(TagSetWithReverseIndexMixin, self).untag(obj, *tags)
# update cache...
if obj in self._reverse_index:
self._reverse_index[obj].difference_update(tags)
if len(self._reverse_index[obj]) == 0:
del self._reverse_index[obj]
return self
# specific interface...
def tags(self, *objs):
'''
return a list of all the tags that tag the given objects.
if no objects are given return all the tags.
'''
if objs == ():
return set(self._index.keys())
res = set()
rev_index = self._reverse_index
for obj in objs:
res.update(rev_index[obj])
return res
def objects(self):
return self._reverse_index.keys()
#--------------------------------------------------------------TagSet---
class TagSet(TagSetWithReverseIndexMixin, BasicTagSet):
'''
'''
pass
#-----------------------------------------------------------------------
#-----------------------------------------------TagSetWithObjectIndex---
class TagSetWithObjectIndex(object):
'''
'''
objutils.createonaccess('_index', BasicTagSet)
objutils.createonaccess('_objects', dict)
objutils.createonaccess('_cache', '_build_cache', local_attr_tpl='%s_data')
# internal interface...
def _build_cache(self):
return dict(((b, a) for a, b in self._objects.items()))
def _reset_cache(self):
if hasattr(self, '_cache_data'):
del self._cache_data
self._cache_data
# these need to manupulate the cache...
def tag(self, obj, *tags):
'''
'''
uid = self._cache.get(obj, uuid.uuid1())
self._index.tag(uid, *tags)
if uid not in self._objects:
self._objects[uid] = obj
self._cache[obj] = uid
return self
def untag(self, obj, *tags):
uid = self._cache[obj]
self._index.untag(uid, *tags)
# update cache...
if uid not in self._index.objects():
del self._objects[uid]
return self
def _proxy_op(name):
def _op(self, *tags):
'''
'''
return set(self._objects[uid] for uid in getattr(self._index, name)(*tags))
return _op
all = _proxy_op('all')
any = _proxy_op('any')
none = _proxy_op('none')
del _proxy_op
putils.proxymethods((
'tags',
), '_index')
def objects(self):
return self._objects.values()
def getuid(self, obj):
'''
'''
return self._cache.get(obj, None)
#-----------------------------------------------------------------------
if __name__ == '__main__':
pass
from time import time
import cPickle as pickle
ts = TagSet()
## ts = TagSetWithObjectIndex()
N = 100000
obj_tpl = 'image%010d'
def create_tagset():
for i in xrange(N):
n = obj_tpl % i
ts.tag(n, 'image')
if n.endswith('0'):
ts.tag(n, '0')
if n.endswith('5'):
ts.tag(n, '5')
if n.endswith('10'):
ts.tag(n, '10')
def save_tagset():
pickle.dump(ts, open('tags.db', 'w'))
def load_tagset():
print 'loading tagset...',
t0 = time()
ts = pickle.load(open('tags.db'))
t1 = time()
print 'done (%.3fs).' % (t1-t0)
return ts
ts = load_tagset()
print len(ts.tags())
print len(ts.objects())
print len(ts.all('10'))
print len(ts.all('10', '0'))
print len(ts.any('10', '5'))
print len(ts.none('10', '5'))
print ts.tags(obj_tpl % 0)
print ts.tags(obj_tpl % 10)
print 'selecting (all)...',
t0 = time()
ts.all('10', '0')
t1 = time()
print 'done (%.3fs).' % (t1-t0)
print 'selecting (any)...',
t0 = time()
ts.any('10', '5')
t1 = time()
print 'done (%.3fs).' % (t1-t0)
print 'selecting (none)...',
t0 = time()
ts.none('10', '5')
t1 = time()
print 'done (%.3fs).' % (t1-t0)
print 'getting object tags...',
t0 = time()
res = ts.tags(obj_tpl % 10)
t1 = time()
print 'done (%.3fs).' % (t1-t0)
#=======================================================================
# vim:set ts=4 sw=4 nowrap :