I modified Denis Otkidach to answer a bit, so I will add my changes as a community wiki in case anyone else is interested:
from sqlalchemy import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import types
from pyuca import Collator
class MyUnicode(types.TypeDecorator):
impl = types.Unicode
def get_col_spec(self):
return 'Unicode COLLATE mycollation'
collator = Collator('allkeys.txt')
def mycollation(value1, value2):
if False:
return cmp(collator.sort_key(value1),
collator.sort_key(value2))
else:
import unicodedata
return cmp(unicodedata.normalize('NFD', unicode(value1)).lower(),
unicodedata.normalize('NFD', unicode(value2)).lower())
metadata = MetaData()
Base = declarative_base(metadata=metadata)
class Item(Base):
__tablename__ = 'CollatedTable'
id = Column(Integer, primary_key=True)
value = Column(MyUnicode(), nullable=False, unique=True)
engine = create_engine('sqlite://')
engine.echo = True
engine.raw_connection().create_collation('mycollation', mycollation)
metadata.create_all(engine)
session = sessionmaker(engine)()
for word in [u"ĉambr", u"ĉar", u"car'", u"carin'", u"ĉe", u"ĉef'",
u"centjar'", u"centr'", u"cerb'", u"cert'", u"ĉes'", u"ceter'",
u"zimble", u'bumble',
u'apple', u'ápple', u'ãpple',
u'đjango', u'django']:
item = Item(value=word)
session.add(item)
session.commit()
for item in session.query(Item).order_by(Item.value):
print item.value
source
share