added some code, still in the "unstructured play" stage...

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
This commit is contained in:
Alex A. Naanou 2011-10-01 00:08:31 +04:00
parent 3a75276f0a
commit bca3bef9c3
4 changed files with 457 additions and 0 deletions

2
TODO.otl Executable file
View File

@ -0,0 +1,2 @@
[X] 100% general tasks
[X] need unique image id

11
__init__.py Executable file
View File

@ -0,0 +1,11 @@
#=======================================================================
__version__ = '''0.0.01'''
__sub_version__ = '''20110905135439'''
__copyright__ = '''(c) Alex A. Naanou 2011'''
#-----------------------------------------------------------------------
#=======================================================================
# vim:set ts=4 sw=4 nowrap :

155
data.py Executable file
View File

@ -0,0 +1,155 @@
#=======================================================================
__version__ = '''0.0.01'''
__sub_version__ = '''20110930190347'''
__copyright__ = '''(c) Alex A. Naanou 2011'''
#-----------------------------------------------------------------------
import time
import uuid
import pickle
import pli.tags.tagset as tagset
from pli.logictypes import OR, ANY
#-----------------------------------------------------------------------
#
# basic data:
# Image:
# {
# 'name': NAME,
# 'title': TITLE,
#
# 'preview': LINK,
# 'original': LINK,
#
#
# 'tags': [
# TAG,
# ...
# ],
# 'links': [
# ...
# ]
# }
#
#
#-----------------------------------------------------------------------
# basic patterns...
##!!!
IMAGE_ID = ANY
NAME = ANY
PATH = ANY
LINK_TYPE = OR('preview', 'original', 'image')
LINK = {
'name': NAME,
'path': PATH,
'type': LINK_TYPE,
}
IMAGE = {
'id': IMAGE_ID,
'name': NAME,
'preview': LINK,
'original': LINK,
'links': None,
'metadata': None,
}
#-----------------------------------------------------------------------
index = {
'tags': tagset.DictTagSet(),
'paths': {},
}
def import_image(path, index):
'''
'''
# create an ID...
uid = uuid.uuid1()
while uid in index['paths']:
uid = uuid.uuid1()
# add file to index...
index['paths'][uid] = path
index['tags'].tag(uid, 'type:image', 'image')
return uid
if __name__ == '__main__':
t = index['tags']
t0 = time.time()
print 'generating data...',
for i in xrange(500):
n = 'moo%05d' % i
uid = import_image(n, index)
if n[-1] == '0':
t.tag(uid, '0')
if n[-1] == '5':
t.tag(uid, '5')
if n.endswith('10'):
t.tag(uid, '10')
t1 = time.time()
print 'done (%s).' % (t1 - t0)
t0 = time.time()
print 'saving data...',
pickle.dump(index, open('dummy.index', 'w'))
t1 = time.time()
print 'done (%s).' % (t1 - t0)
t0 = time.time()
print 'loading data...',
index = pickle.load(open('dummy.index'))
t1 = time.time()
print 'done (%s).' % (t1 - t0)
from profile import run
t0 = time.time()
print 'getting number of elements...',
## n = len(index['tags'].all('0').objects())
## n = len(index['tags'].all('10').objects())
## n = len(index['tags'].any('0', '10').objects())
## n = len(index['tags'].all('0').none('10').objects())
## n = len(index['tags'].all('type:image'))
run('''n = len(index['tags'].all('type:image'))''')
t1 = time.time()
print 'done (%s).' % (t1 - t0)
print 'and the number is:', n
print 'tagset size is:', len(index['tags'])
#=======================================================================
# vim:set ts=4 sw=4 nowrap :

289
tags.py Executable file
View File

@ -0,0 +1,289 @@
#=======================================================================
__version__ = '''0.0.01'''
__sub_version__ = '''20111001000000'''
__copyright__ = '''(c) Alex A. Naanou 2011'''
#-----------------------------------------------------------------------
import uuid
import pli.objutils as objutils
import pli.pattern.proxy.utils as putils
#-----------------------------------------------------------------------
#------------------------------------------------------AbstractTagSet---
class AbstractTagSet(object):
'''
'''
pass
#---------------------------------------------------------BasicTagSet---
class BasicTagSet(AbstractTagSet):
'''
'''
objutils.createonaccess('_index', dict)
def tag(self, obj, *tags):
'''
'''
index = self._index
for tag in tags:
if tag not in index:
index[tag] = set()
index[tag].add(obj)
return self
def untag(self, obj, *tags):
'''
'''
index = self._index
for tag in tags:
index[tag].remove(obj)
return self
# selectors...
def all(self, *tags):
'''
'''
index = self._index
pool = []
for tag in tags:
pool += [index[tag]]
pool.sort(key=len)
# if we have atleast one empty set then we have an empty
# result...
if len(pool[0]) == 0:
return set()
# initially get the largest pool element...
# NOTE: this is an optimization -- we first intersect the
# largest population with the smallest, giving the rest a
# far smaller population to work with...
res = set(pool.pop(-1))
# now we get the total intersection of elements...
for s in pool:
res.intersection_update(s)
return res
def any(self, *tags):
'''
'''
index = self._index
res = set()
for tag in tags:
res.update(index[tag])
return res
##!!! slow !!!##
def none(self, *tags):
'''
'''
# XXX is this the best way yo do this?
index = self._index
bad = self.any(*tags)
other_tags = set(index.keys()).difference(tags)
return self.any(*other_tags).difference(bad)
#-----------------------------------------TagSetWithReverseIndexMixin---
class TagSetWithReverseIndexMixin(AbstractTagSet):
'''
'''
objutils.createonaccess('_reverse_index', '_build_reverse_index', local_attr_tpl='%s_data')
##!!! slow !!!##
def _build_reverse_index(self):
'''
'''
res = {}
index = self._index
# XXX this is really ugly!!
for obj in self.objects():
res[obj] = set(t for t in index if obj in index[t])
return res
def _reset_reverse_index(self):
if hasattr(self, '_reverse_index_data'):
del self._reverse_index_data
self._reverse_index_data
# these need to update the cache (_reverse_index)
def tag(self, obj, *tags):
'''
'''
super(TagSetWithReverseIndexMixin, self).tag(obj, *tags)
# update cache...
if obj in self._reverse_index:
self._reverse_index[obj] = set()
self._reverse_index[obj].update(tags)
return self
def untag(self, obj, *tags):
'''
'''
super(TagSetWithReverseIndexMixin, self).untag(obj, *tags)
# update cache...
if obj in self._reverse_index:
self._reverse_index[obj].difference_update(tags)
if len(self._reverse_index[obj]) == 0:
del self._reverse_index[obj]
return self
# specific interface...
def tags(self, *objs):
'''
return a list of all the tags that tag the given objects.
if no objects are given return all the tags.
'''
if objs == ():
return set(self._index.keys())
res = set()
rev_index = self._reverse_index
for obj in objs:
res.update(rev_index[obj])
return res
def objects(self):
return self._reverse_index.keys()
#--------------------------------------------------------------TagSet---
class TagSet(TagSetWithReverseIndexMixin, BasicTagSet):
'''
'''
pass
#-----------------------------------------------------------------------
#-----------------------------------------------TagSetWithObjectIndex---
class TagSetWithObjectIndex(object):
'''
'''
objutils.createonaccess('_index', BasicTagSet)
objutils.createonaccess('_objects', dict)
objutils.createonaccess('_cache', '_build_cache', local_attr_tpl='%s_data')
# internal interface...
def _build_cache(self):
return dict(((b, a) for a, b in self._objects.items()))
def _reset_cache(self):
if hasattr(self, '_cache_data'):
del self._cache_data
self._cache_data
# these need to manupulate the cache...
def tag(self, obj, *tags):
'''
'''
uid = self._cache.get(obj, uuid.uuid1())
self._index.tag(uid, *tags)
if uid not in self._objects:
self._objects[uid] = obj
self._cache[obj] = uid
return self
def untag(self, obj, *tags):
uid = self._cache[obj]
self._index.untag(uid, *tags)
# update cache...
if uid not in self._index.objects():
del self._objects[uid]
return self
def _proxy_op(name):
def _op(self, *tags):
'''
'''
return set(self._objects[uid] for uid in getattr(self._index, name)(*tags))
return _op
all = _proxy_op('all')
any = _proxy_op('any')
none = _proxy_op('none')
del _proxy_op
putils.proxymethods((
'tags',
), '_index')
def objects(self):
return self._objects.values()
def getuid(self, obj):
'''
'''
return self._cache.get(obj, None)
#-----------------------------------------------------------------------
if __name__ == '__main__':
pass
from time import time
import cPickle as pickle
ts = TagSet()
## ts = TagSetWithObjectIndex()
N = 100000
obj_tpl = 'image%010d'
def create_tagset():
for i in xrange(N):
n = obj_tpl % i
ts.tag(n, 'image')
if n.endswith('0'):
ts.tag(n, '0')
if n.endswith('5'):
ts.tag(n, '5')
if n.endswith('10'):
ts.tag(n, '10')
def save_tagset():
pickle.dump(ts, open('tags.db', 'w'))
def load_tagset():
print 'loading tagset...',
t0 = time()
ts = pickle.load(open('tags.db'))
t1 = time()
print 'done (%.3fs).' % (t1-t0)
return ts
ts = load_tagset()
print len(ts.tags())
print len(ts.objects())
print len(ts.all('10'))
print len(ts.all('10', '0'))
print len(ts.any('10', '5'))
print len(ts.none('10', '5'))
print ts.tags(obj_tpl % 0)
print ts.tags(obj_tpl % 10)
print 'selecting (all)...',
t0 = time()
ts.all('10', '0')
t1 = time()
print 'done (%.3fs).' % (t1-t0)
print 'selecting (any)...',
t0 = time()
ts.any('10', '5')
t1 = time()
print 'done (%.3fs).' % (t1-t0)
print 'selecting (none)...',
t0 = time()
ts.none('10', '5')
t1 = time()
print 'done (%.3fs).' % (t1-t0)
print 'getting object tags...',
t0 = time()
res = ts.tags(obj_tpl % 10)
t1 = time()
print 'done (%.3fs).' % (t1-t0)
#=======================================================================
# vim:set ts=4 sw=4 nowrap :