added some code, still in the "unstructured play" stage...

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
2025-10-28 18:00:09 +00:00 · 2011-10-01 00:08:31 +04:00 · 2011-10-01 00:08:31 +04:00 · bca3bef9c3
commit bca3bef9c3
parent 3a75276f0a
4 changed files with 457 additions and 0 deletions
--- a/TODO.otl
+++ b/TODO.otl
@ -0,0 +1,2 @@
+[X] 100% general tasks
+	[X] need unique image id
--- a/init.py
+++ b/init.py
@ -0,0 +1,11 @@
+#=======================================================================
+
+__version__ = '''0.0.01'''
+__sub_version__ = '''20110905135439'''
+__copyright__ = '''(c) Alex A. Naanou 2011'''
+
+
+#-----------------------------------------------------------------------
+
+#=======================================================================
+#                                            vim:set ts=4 sw=4 nowrap :
--- a/data.py
+++ b/data.py
@ -0,0 +1,155 @@
+#=======================================================================
+
+__version__ = '''0.0.01'''
+__sub_version__ = '''20110930190347'''
+__copyright__ = '''(c) Alex A. Naanou 2011'''
+
+
+#-----------------------------------------------------------------------
+
+import time
+import uuid
+import pickle
+import pli.tags.tagset as tagset
+from pli.logictypes import OR, ANY
+
+
+#-----------------------------------------------------------------------
+#
+# basic data:
+# 	Image:
+# 	{
+# 		'name': NAME,
+# 		'title': TITLE,
+#
+# 		'preview': LINK,
+# 		'original': LINK,
+#
+#
+# 		'tags': [
+# 			TAG, 
+# 			...
+# 		],
+# 		'links': [
+# 			...
+# 		]
+# 	}
+#
+#
+#-----------------------------------------------------------------------
+# basic patterns...
+
+##!!!
+IMAGE_ID = ANY
+NAME = ANY
+PATH = ANY
+
+LINK_TYPE = OR('preview', 'original', 'image')
+
+LINK = {
+	'name': NAME,
+	'path': PATH,
+	'type': LINK_TYPE,
+}
+
+IMAGE = {
+	'id': IMAGE_ID,
+	'name': NAME,
+
+	'preview': LINK,
+	'original': LINK,
+
+	'links': None,
+
+	'metadata': None,
+}
+
+
+
+#-----------------------------------------------------------------------
+
+index = {
+	'tags': tagset.DictTagSet(),
+	'paths': {},
+}
+
+
+def import_image(path, index):
+	'''
+	'''
+	# create an ID...
+	uid = uuid.uuid1()
+	while uid in index['paths']:
+		uid = uuid.uuid1()
+
+	# add file to index...
+	index['paths'][uid] = path
+	index['tags'].tag(uid, 'type:image', 'image')
+
+	return uid
+
+
+if __name__ == '__main__':
+
+	t = index['tags']
+
+	t0 = time.time()
+	print 'generating data...',
+
+	for i in xrange(500):
+		n = 'moo%05d' % i
+		uid = import_image(n, index)
+		if n[-1] == '0':
+			t.tag(uid, '0')
+		if n[-1] == '5':
+			t.tag(uid, '5')
+		if n.endswith('10'):
+			t.tag(uid, '10')
+
+	t1 = time.time()
+	print 'done (%s).' % (t1 - t0)
+
+
+
+	t0 = time.time()
+	print 'saving data...',
+
+	pickle.dump(index, open('dummy.index', 'w'))
+
+	t1 = time.time()
+	print 'done (%s).' % (t1 - t0)
+
+
+
+	t0 = time.time()
+	print 'loading data...',
+
+	index = pickle.load(open('dummy.index'))
+
+	t1 = time.time()
+	print 'done (%s).' % (t1 - t0)
+
+
+	from profile import run
+
+
+	t0 = time.time()
+	print 'getting number of elements...',
+
+##	n = len(index['tags'].all('0').objects())
+##	n = len(index['tags'].all('10').objects())
+##	n = len(index['tags'].any('0', '10').objects())
+##	n = len(index['tags'].all('0').none('10').objects())
+##	n = len(index['tags'].all('type:image'))
+	run('''n = len(index['tags'].all('type:image'))''')
+
+	t1 = time.time()
+	print 'done (%s).' % (t1 - t0)
+
+	print 'and the number is:', n
+	print 'tagset size is:', len(index['tags'])
+
+
+
+#=======================================================================
+#                                            vim:set ts=4 sw=4 nowrap :
--- a/tags.py
+++ b/tags.py
@ -0,0 +1,289 @@
+#=======================================================================
+
+__version__ = '''0.0.01'''
+__sub_version__ = '''20111001000000'''
+__copyright__ = '''(c) Alex A. Naanou 2011'''
+
+
+#-----------------------------------------------------------------------
+
+import uuid
+import pli.objutils as objutils
+import pli.pattern.proxy.utils as putils
+
+
+#-----------------------------------------------------------------------
+#------------------------------------------------------AbstractTagSet---
+class AbstractTagSet(object):
+	'''
+	'''
+	pass
+
+
+#---------------------------------------------------------BasicTagSet---
+class BasicTagSet(AbstractTagSet):
+	'''
+	'''
+	objutils.createonaccess('_index', dict)
+
+	def tag(self, obj, *tags):
+		'''
+		'''
+		index = self._index
+		for tag in tags:
+			if tag not in index:
+				index[tag] = set()
+			index[tag].add(obj)
+		return self
+	def untag(self, obj, *tags):
+		'''
+		'''
+		index = self._index
+		for tag in tags:
+			index[tag].remove(obj)
+		return self
+	
+	# selectors...
+	def all(self, *tags):
+		'''
+		'''
+		index = self._index
+		pool = []
+		for tag in tags:
+			pool += [index[tag]]
+		pool.sort(key=len)
+		# if we have atleast one empty set then we have an empty
+		# result...
+		if len(pool[0]) == 0:
+			return set()
+		# initially get the largest pool element... 
+		# NOTE: this is an optimization -- we first intersect the
+		# 		largest population with the smallest, giving the rest a
+		# 		far smaller population to work with...
+		res = set(pool.pop(-1))
+		# now we get the total intersection of elements...
+		for s in pool:
+			res.intersection_update(s)
+		return res
+	def any(self, *tags):
+		'''
+		'''
+		index = self._index
+		res = set()
+		for tag in tags:
+			res.update(index[tag])
+		return res
+	##!!! slow !!!##
+	def none(self, *tags):
+		'''
+		'''
+		# XXX is this the best way yo do this?
+		index = self._index
+		bad = self.any(*tags)
+		other_tags = set(index.keys()).difference(tags)
+		return self.any(*other_tags).difference(bad)
+
+
+#-----------------------------------------TagSetWithReverseIndexMixin---
+class TagSetWithReverseIndexMixin(AbstractTagSet):
+	'''
+	'''
+	objutils.createonaccess('_reverse_index', '_build_reverse_index', local_attr_tpl='%s_data')
+
+	##!!! slow !!!##
+	def _build_reverse_index(self):
+		'''
+		'''
+		res = {}
+		index = self._index
+		# XXX this is really ugly!!
+		for obj in self.objects():
+			res[obj] = set(t for t in index if obj in index[t])
+		return res
+	def _reset_reverse_index(self):
+		if hasattr(self, '_reverse_index_data'):
+			del self._reverse_index_data
+		self._reverse_index_data
+	
+	# these need to update the cache (_reverse_index)
+	def tag(self, obj, *tags):
+		'''
+		'''
+		super(TagSetWithReverseIndexMixin, self).tag(obj, *tags)
+		# update cache...
+		if obj in self._reverse_index:
+			self._reverse_index[obj] = set()
+		self._reverse_index[obj].update(tags)
+		return self
+	def untag(self, obj, *tags):
+		'''
+		'''
+		super(TagSetWithReverseIndexMixin, self).untag(obj, *tags)
+		# update cache...
+		if obj in self._reverse_index:
+			self._reverse_index[obj].difference_update(tags)
+			if len(self._reverse_index[obj]) == 0:
+				del self._reverse_index[obj]
+		return self
+	
+	# specific interface...
+	def tags(self, *objs):
+		'''
+		return a list of all the tags that tag the given objects.
+
+		if no objects are given return all the tags.
+		'''
+		if objs == ():
+			return set(self._index.keys())
+		res = set()
+		rev_index = self._reverse_index
+		for obj in objs:
+			res.update(rev_index[obj])
+		return res
+	def objects(self):
+		return self._reverse_index.keys()
+
+
+#--------------------------------------------------------------TagSet---
+class TagSet(TagSetWithReverseIndexMixin, BasicTagSet):
+	'''
+	'''
+	pass
+		
+
+
+#-----------------------------------------------------------------------
+#-----------------------------------------------TagSetWithObjectIndex---
+class TagSetWithObjectIndex(object):
+	'''
+	'''
+	objutils.createonaccess('_index', BasicTagSet)
+	objutils.createonaccess('_objects', dict)
+	objutils.createonaccess('_cache', '_build_cache', local_attr_tpl='%s_data')
+
+	# internal interface...
+	def _build_cache(self):
+		return dict(((b, a) for a, b in self._objects.items()))
+	def _reset_cache(self):
+		if hasattr(self, '_cache_data'):
+			del self._cache_data
+		self._cache_data
+	
+	# these need to manupulate the cache...
+	def tag(self, obj, *tags):
+		'''
+		'''
+		uid = self._cache.get(obj, uuid.uuid1())
+		self._index.tag(uid, *tags)
+		if uid not in self._objects:
+			self._objects[uid] = obj
+			self._cache[obj] = uid
+		return self
+	def untag(self, obj, *tags):
+		uid = self._cache[obj]
+		self._index.untag(uid, *tags)
+		# update cache...
+		if uid not in self._index.objects():
+			del self._objects[uid]
+		return self
+
+	def _proxy_op(name):
+		def _op(self, *tags):
+			'''
+			'''
+			return set(self._objects[uid] for uid in getattr(self._index, name)(*tags))
+		return _op
+	all = _proxy_op('all')
+	any = _proxy_op('any')
+	none = _proxy_op('none')
+	del _proxy_op
+
+	putils.proxymethods((
+		'tags',
+		), '_index')
+
+	def objects(self):
+		return self._objects.values()
+	def getuid(self, obj):
+		'''
+		'''
+		return self._cache.get(obj, None)
+	
+
+
+#-----------------------------------------------------------------------
+if __name__ == '__main__':
+	pass
+
+	from time import time
+	import cPickle as pickle
+
+	ts = TagSet()
+##	ts = TagSetWithObjectIndex()
+
+
+	N = 100000
+	obj_tpl = 'image%010d'
+
+	def create_tagset():
+		for i in xrange(N):
+			n = obj_tpl % i
+			ts.tag(n, 'image')
+			if n.endswith('0'):
+				ts.tag(n, '0')
+			if n.endswith('5'):
+				ts.tag(n, '5')
+			if n.endswith('10'):
+				ts.tag(n, '10')
+
+	def save_tagset():
+		pickle.dump(ts, open('tags.db', 'w'))
+
+	def load_tagset():
+		print 'loading tagset...',
+		t0 = time()
+		ts = pickle.load(open('tags.db'))
+		t1 = time()
+		print 'done (%.3fs).' % (t1-t0)
+		return ts
+
+	ts = load_tagset()
+
+	print len(ts.tags())
+	print len(ts.objects())
+	print len(ts.all('10'))
+	print len(ts.all('10', '0'))
+	print len(ts.any('10', '5'))
+	print len(ts.none('10', '5'))
+
+	print ts.tags(obj_tpl % 0)
+	print ts.tags(obj_tpl % 10)
+
+
+	print 'selecting (all)...',
+	t0 = time()
+	ts.all('10', '0')
+	t1 = time()
+	print 'done (%.3fs).' % (t1-t0)
+	print 'selecting (any)...',
+	t0 = time()
+	ts.any('10', '5')
+	t1 = time()
+	print 'done (%.3fs).' % (t1-t0)
+	print 'selecting (none)...',
+	t0 = time()
+	ts.none('10', '5')
+	t1 = time()
+	print 'done (%.3fs).' % (t1-t0)
+
+	print 'getting object tags...',
+	t0 = time()
+	res = ts.tags(obj_tpl % 10)
+	t1 = time()
+	print 'done (%.3fs).' % (t1-t0)
+
+
+
+
+#=======================================================================
+#                                            vim:set ts=4 sw=4 nowrap :