utf-8 support for buildcache now working -- I HATE PYTHON FOR HOW "CONSISTENT" ITS APIS ARE, ESPECIALLY FOR STRING ENCODINGS...

Signed-off-by: Alex A. Naanou <alex.nanou@gmail.com>
This commit is contained in:
Alex A. Naanou 2013-10-23 03:07:32 +04:00
parent 45e4c31dc1
commit ded5ed0aec

View File

@ -1,7 +1,8 @@
# -*- coding:utf-8 -*-
#=======================================================================
__version__ = '''0.0.01'''
__sub_version__ = '''20131021154045'''
__sub_version__ = '''20131023030800'''
__copyright__ = '''(c) Alex A. Naanou 2011'''
@ -16,6 +17,9 @@ import time
import tempfile
from optparse import OptionParser, OptionGroup
# XXX hack...
from io import open
try:
import pyexiv2 as metadata
except:
@ -28,6 +32,10 @@ import gid
#-----------------------------------------------------------------------
##!!! I Hate Python for this!
##!!! ...there seems no good way to get this...
DEFAULT_ENCODING = 'cp1251'
CONFIG = {
'absolute-path': False,
'ignore-orientation': False,
@ -117,6 +125,26 @@ IMAGE = OR(
#-----------------------------------------------------------------------
# Helpers...
##!!! I hate python in everything that concerns encodings....
RESERVED_URL_CHARS = '%;/?:@&=+$, '
RESERVED_URL_TRANSLATION = dict([(RESERVED_URL_CHARS[i], '%'+e)
for i, e
# get the propper encodings...
in enumerate(urllib2.quote(RESERVED_URL_CHARS).split('%'))])
def quote(s, safe=''):
for k, v in RESERVED_URL_TRANSLATION.items():
if k in safe:
continue
s = s.replace(k, v)
return s
def unquote(s):
for k, v in RESERVED_URL_TRANSLATION.items():
s = s.replace(v, k)
return s
#------------------------------------------------------------pathjoin---
def pathjoin(*p):
'''
@ -133,9 +161,13 @@ def getpath(root, path, absolute=False):
if path[0] in ('\\', '/'):
path = path[1:]
if absolute == True:
return 'file:///' + urllib2.quote(pathjoin(root, path), safe='/:')
## ##!!! urllib2/urllib quote breaks on unicode...
## return 'file:///' + urllib2.quote(pathjoin(root, path), safe='/:')
return 'file:///' + quote(pathjoin(root, path), safe='/:')
else:
return urllib2.quote(pathjoin(path), safe='/:')
## ##!!! urllib2/urllib quote breaks on unicode...
## return urllib2.quote(pathjoin(path), safe='/:')
return quote(pathjoin(path), safe='/:')
#-------------------------------------------------------------log_err---
@ -144,9 +176,9 @@ def log_err(path, e, source_file, target_file):
'''
err_file = pathjoin(path, CONFIG['error'])
if not os.path.exists(err_file):
err = file(err_file, 'w')
err = open(err_file, 'w')
else:
err = file(err_file, 'a')
err = open(err_file, 'a')
with err:
err.write(ERR_LOG % {
'source-file': source_file,
@ -300,7 +332,7 @@ def getimages(path, config=CONFIG, verbosity=0):
config['images'],
# XXX avoid hardcoded sufexes...
lambda n: n.endswith('-images-diff.json'),
lambda data, path: (data.update(json.load(file(path))), data)[-1],
lambda data, path: (data.update(json.load(open(path))), data)[-1],
{},
verbosity=verbosity)
@ -313,7 +345,7 @@ def getdata(path, config=CONFIG, verbosity=0):
pathjoin(path, config['cache-dir']),
lambda n: n.endswith(config['data']),
lambda n: n == config['data'],
lambda path: json.load(file(path)),
lambda path: json.load(open(path)),
{},
verbosity=verbosity)
@ -326,7 +358,7 @@ def getmarked(path, config=CONFIG, verbosity=0):
pathjoin(path, config['cache-dir']),
lambda n: n.endswith(config['marked']),
lambda n: n == config['marked'],
lambda path: json.load(file(path)),
lambda path: json.load(open(path)),
[],
verbosity=verbosity)
@ -377,7 +409,7 @@ def build_images(path, config=CONFIG, gid_generator=hash_gid, dry_run=False, ver
if not full_scan and os.path.exists(filelist):
if verbosity >= 1:
print 'Loading: %s' % filelist
with file(filelist) as f:
with open(filelist) as f:
old_files = json.load(f)
cur_files = files[:]
# strip the processed files...
@ -387,15 +419,27 @@ def build_images(path, config=CONFIG, gid_generator=hash_gid, dry_run=False, ver
if verbosity >= 1:
print 'Writing: %s' % filelist
if not dry_run:
with file(filelist, 'w') as f:
json.dump(cur_files, f, indent=4, ensure_ascii=config['force-ascii'])
with open(filelist, 'w', encoding='utf-8') as f:
## ##!!! json.dump writes some "strings" as unicode and some as str
## ##!!! this breaks fp.write(...)...
## json.dump(cur_files, f, indent=4, ensure_ascii=config['force-ascii'])
s = json.dumps(cur_files, f, indent=4, ensure_ascii=config['force-ascii'])
if type(s) != unicode:
s = s.decode(DEFAULT_ENCODING)
f.write(s)
# just write the list...
else:
if verbosity >= 1:
print 'Writing: %s' % filelist
if not dry_run:
with file(filelist, 'w') as f:
json.dump(files, f, indent=4, ensure_ascii=config['force-ascii'])
with open(filelist, 'w', encoding='utf-8') as f:
## ##!!! json.dump writes some "strings" as unicode and some as str
## ##!!! this breaks fp.write(...)...
## json.dump(files, f, indent=4, ensure_ascii=config['force-ascii'])
s = json.dumps(files, f, indent=4, ensure_ascii=config['force-ascii'])
if type(s) != unicode:
s = s.decode(DEFAULT_ENCODING)
f.write(s)
for name in files:
fname, ext = os.path.splitext(name)
@ -417,7 +461,7 @@ def build_images(path, config=CONFIG, gid_generator=hash_gid, dry_run=False, ver
source_path = pathjoin(path, cache_dir, CONFIG['cache-structure']['preview'], fname + '.jpg')
with file(source_path, 'w+b') as p:
with open(source_path, 'w+b') as p:
p.write(preview.data)
# copy metadata...
@ -707,8 +751,14 @@ def build_cache(path, config=CONFIG, gid_generator=hash_gid,
print 'Writing: %s' % n
if not dry_run:
##!!! DO NOT OVERWRITE EXISTING DATA...
with file(n, 'w') as f:
json.dump(d, f, indent=4, ensure_ascii=config['force-ascii'])
with open(n, 'w', encoding='utf-8') as f:
## ##!!! json.dump writes some "strings" as unicode and some as str
## ##!!! this breaks fp.write(...)...
## json.dump(d, f, indent=4, ensure_ascii=config['force-ascii'])
s = json.dumps(d, f, indent=4, ensure_ascii=config['force-ascii'])
if type(s) != unicode:
s = s.decode(DEFAULT_ENCODING)
f.write(s)
return data
@ -820,10 +870,13 @@ def handle_commandline():
# prepare the path...
if len(args) < 1:
IN_PATH = '.'
IN_PATH = u'.'
else:
IN_PATH = args[0]
IN_PATH = IN_PATH.replace('\\', '/')
##!!! need to convert this ut utf-8...
if type(IN_PATH) != unicode:
IN_PATH = IN_PATH.decode(DEFAULT_ENCODING)
config = {}
config.update(CONFIG)
@ -832,11 +885,11 @@ def handle_commandline():
config_name = options.config_file
# local to script...
if os.path.exists(config_name):
with file(config_name) as f:
with open(config_name) as f:
config.update(json.load(f))
# local to target...
if os.path.exists(os.path.join(IN_PATH, config_name)):
with file(os.path.join(IN_PATH, config_name)) as f:
with open(os.path.join(IN_PATH, config_name)) as f:
config.update(json.load(f))
# update config according to set args...
@ -857,8 +910,14 @@ def handle_commandline():
# configuration stuff...
# write a local configuration...
if options.config_save_local:
with file(os.path.join(IN_PATH, config_name), 'w') as f:
f.write(json.dumps(config, sort_keys=True, indent=4, ensure_ascii=config['force-ascii']))
with open(os.path.join(IN_PATH, config_name), 'w', encoding='utf-8') as f:
## ##!!! json.dump writes some "strings" as unicode and some as str
## ##!!! this breaks fp.write(...)...
## f.write(json.dumps(config, sort_keys=True, indent=4, ensure_ascii=config['force-ascii']))
s = json.dumps(config, sort_keys=True, indent=4, ensure_ascii=config['force-ascii'])
if type(s) != unicode:
s = s.decode(DEFAULT_ENCODING)
f.write(s)
# print configuration data...
if True in (options.config_defaults_print, options.config_print):
@ -917,4 +976,4 @@ if __name__ == '__main__':
#=======================================================================
# vim:set ts=4 sw=4 nowrap :
# vim:set ts=4 sw=4 nowrap encoding=utf-8 :