import os
import os
import shelve
class Keycachedata:
def deleteKey(self, key):
self.key = key
self.__checkreadFile()
if self.__verifyKey():
del self.cachedatafile[self.key]
#print('key data saved.')
self.keydatafile.close()
def savekey(self, key, keydata):
self.key = key
self.__checkreadFile()
#print('cachefile prior to keydata mod:',self.keydatafile)
if self.__verifykey():
print('Key verified.')
self.keydatafile[self.key] = keydata
print('key data saved.')
#print(self.keydatafile)
self.keydatafile.close()
def savekeydata(self, keydata):
self.__checkreadFile()
#self.keydatafile[self.key] = keydata
if self.__verifykey():
self.keydatafile[self.key] = keydata
print('key data saved.')
self.keydatafile.close()
def addkey(self, key):
self.key = key
self.__checkreadFile()
keycheck = self.__verifykey()
if not keycheck:
self.keydatafile[self.key] = {}
print('key added.')
else:
print('key already exists.')
return keycheck
self.keydatafile.close()
def retrievekey(self, key):
self.key = key
self.__checkreadFile()
self.keydata = {}
if self.__verifykey():
self.keydata = self.keydatafile[self.key]
self.keydatafile.close()
return self.keydata
def retrievekeyname(self):
return self.key
def retrievekeylist(self):
keylist = []
self.__checkreadFile()
for key in self.keydatafile:
keylist.append(key)
self.keydatafile.close()
return keylist
def iskeydatempty(self):
check = False
self.__checkreadFile()
if len(self.keydatafile) == 0:
check = True
self.keydatafile.close()
return check
def returnFirstkey(self):
self.__checkreadFile()
keylist = list(self.keydatafile.keys())
firstkeykey = keylist[0]
self.keydatafile.close()
return [firstkeykey, self.keydatafile[firstkeykey]]
def __verifykey(self):
check = False
if self.key in self.keydatafile:
check = True
return check
def verifykey(self, keyname):
check = False
self.__checkreadFile()
if keyname in self.keydatafile:
check = True
self.keydatafile.close()
return check
def __readFile(self):
print('hitting read shelve file on keycachedata class.')
self.keydatafile = shelve.open(self.pathfile, 'w')
def __createFile(self):
print('hitting create shelve file on keycachedata class.')
self.keydatafile = shelve.open(self.pathfile, 'n')
def __checkreadFile(self):
file = ''
if os.name == 'nt':
file = '\\qacrossworddata'
elif os.name == 'posix':
file = '/qacrossworddata'
self.pathfile = self.path+ file
if os.access(self.pathfile, os.F_OK):
if os.access(self.pathfile, os.R_OK):
if os.access(self.pathfile, os.W_OK):
self.__readFile()
else:
self.__createFile()
else:
self.__createFile()
else:
self.__createFile()
## def checkkey(self):
##
## self.__checkreadFile()
## self.__verifykey()
def __init__(self):
self.path = os.getcwd()
self.key = ''
#self.__checkreadFile()
KEYSPOP = 15
KEYSPOPALT = 14 #alternate keys detection
#the following site provide unicode tables that can be used with this
#importer
#http://www.utf8-chartable.de
#Haven't included try exceptions in this program, so every file
#should be consistently valid and of the same format for this
#to work. i.e. any table written with more then 4 tab separated keys
# and less then 3 tab separated keys
# will not work in this file reader. If so, you'll need to restructure
#tables or code differently, as I've structured reader around
#website tables mentioned above.
def startendwhitespaceremove(stringline):
retstring = ''
stringarray = stringline.split()
if len(stringarray) == 1:
retstring = stringarray[0]
elif len(stringarray) > 1:
retstring = stringarray[0]
for string in stringarray[1:len(stringarray)]:
retstring += ' ' + string
return retstring
def preappendunicodedata(filename = '', quickfinddict={}):
def bisectionsearch(intval, keyset):
returnval = []
lkeyset = keyset[0: len(keyset)/2]
ukeyset = keyset[len(keyset)/2: len(keyset)]
minlkey = lkeyset[0][0]
maxlkeyrange = lkeyset[len(lkeyset)-1]
maxlkey = maxlkeyrange[len(maxlkeyrange)-1]
minukey = ukeyset[0][0]
maxukeyrange = ukeyset[len(ukeyset)-1]
maxukey = maxukeyrange[len(maxukeyrange)-1]
if not intval < minlkey and not intval > maxlkey:
if not len(lkeyset) == 1:
returnval = bisectionsearch(intval, lkeyset)
else:
return lkeyset
elif not intval < minukey and not intval > maxukey:
if not len(ukeyset) == 1:
returnval = bisectionsearch(intval, ukeyset)
else:
return ukeyset
return returnval
#here we set data up in a dictionary for referencing,
#making one pass through the data set for key value
#pairing. Here we set up a 2 dimension tuple which
#records the name for the unicode character and its
#indexing in set of keys...this index will aid us
#quickly setting up block address searching later.
a = open(filename)
dicta = {}
blocksdict ={}
name = ''
uc = '' #this is unicode character point representation
#this will still needed to be converted into a form
#recognizable by python after read parsing unicode tables.
count = 0
for line in a:
if len(line.split(';')) == KEYSPOP:
unilist = line.split(';')
uc, name, case = unilist[0], unilist[1], unilist[2]
#This import design is structured for
#present unicode data format information
#please consult tables to verify for correct
#importation.
elif len(line.split(';')) == KEYSPOPALT:
unilist = line.split(';')
uc, name, case = unilist[0], unilist[1], unilist[2]
if not len(name) == 0:
intval = int('0x'+uc,0)
rangekey = bisectionsearch(intval,sorted(quickfinddict))
if len(rangekey) == 1:
if rangekey[0] in quickfinddict:
blockident, blockname = quickfinddict[rangekey[0]]
dicta[uc] = (name, count, case, blockident)
blockident = tuple(blockident)
if blockident in blocksdict:
blockdict = blocksdict[blockident]
blockdict[uc] = name
else:
blocksdict[blockident] = {}
blockdict = blocksdict[blockident]
blockdict[uc] = name
count += 1
return (dicta,blocksdict)
def readunicodefile(lname = 'LATIN', unicodedat = {}, blocksdict = {},
block = (0,0)):
def findnearest(blockpos, minmaxswitch):
#convert hex to decimal
blockposa = '0x'+blockpos
print('blockpos ', blockposa)
## print(blockposa)
blockposadec = int(blockposa, 0)
find = False
print(minmaxswitch)
while not find:
if minmaxswitch:
blockposadec -= 1
else:
blockposadec += 1
blockposahexstring = hex(blockposadec)
blockposa = blockposahexstring[2:len(blockposahexstring)]
if 'x' in blockposa:
blockposa = blockposa.split('x')[1]
elif 'X' in blockposa:
blockposa = blockposa.split('X')[1]
blockposa = blockposa.upper()
if blockposa in unicodedat:
find = True
## print('Find status: ', blockposa)
return blockposa
#a = open(filename)
dicta = {}
dicta[lname+'_LOWER'] = {}
dicta[lname+'_UPPER'] = {}
dicta[lname+'_OTHER'] = {}
## dicta[lname+'_LOWER_REV'] = {}
## dicta(lname+'_UPPER_REV'] = {}
name = ''
uc = '' #this is unicode character point representation
#this will still needed to be converted into a form
#recognizable by python after read parsing unicode tables.
#code blocks table doesn't necessarily garauntee existence in
#table for a given range a respective table element, so we may
#may need in appropriate blocks range find nearest minimal
#maximal element
blockdict = blocksdict[tuple(block)]
for uc in blockdict:
name = unicodedat[uc][0]
case = unicodedat[uc][2]
LCLstring = lname + ' CAPITAL LETTER'
LSLstring = lname + ' SMALL LETTER'
Lstring = lname
if not len(name) == 0:
if 'Lu' in case:
chardict = dicta[lname+'_UPPER']
ucconv = '0x'+uc
ucconv = eval('unichr('+ucconv+')')
ucconv = ucconv.encode('utf-8')
chardict[len(chardict)+1] = ucconv
elif 'Ll' in case:
chardict = dicta[lname+'_LOWER']
ucconv = '0x'+uc
ucconv = eval('unichr('+ucconv+')')
ucconv = ucconv.encode('utf-8')
chardict[len(chardict)+1] = ucconv
elif 'Lo' in case:
chardict = dicta[lname+'_OTHER']
ucconv = '0x'+uc
ucconv = eval('unichr('+ucconv+')')
ucconv = ucconv.encode('utf-8')
chardict[len(chardict)+1] = ucconv
return dicta
def createkeyboardrows(dicta, lname):
kblangpack = {}
kblangpack['LOWER'] = {}
kblangpack['UPPER'] = {}
#Idea here is to restructure dictionaries into
#rowdictionaries that can easily be passed in a 2 dimensional
#array type form
#First several rows are designated to non variant lower or upper
#case letter forms, variant forms are those of the base character
#plus accenting, hooks, and so forth.
#First collect pop data which will determine row dict creation pops
def countpop():
count = 0
for chardict in dicta:
count += len(dicta[chardict])
return count
#next create row dictionaries based upon pops
#allocating 10 column spacing here
def createkbrowdicts():
# hmmm...above may not be necessary
#start building on lower base dict
def lowerupperkey(keyswitch):
chardict = {}
base_keyswitchdict = dicta[lname+'_'+keyswitch]
for charkey in base_keyswitchdict:
if not len(chardict) < 11:
lowerdict = kblangpack[keyswitch]
lowerdict[len(lowerdict)+1] = chardict
kblangpack[keyswitch] = lowerdict
chardict = {}
chardict[len(chardict)+1] = base_keyswitchdict[charkey]
lowerdict = kblangpack[keyswitch]
lowerdict[len(lowerdict)+1] = chardict
kblangpack[keyswitch] = lowerdict
def otherkey(keyswitch):
chardict = {}
keydict = dicta[lname + '_OTHER']
for charkey in keydict:
#need to correct control expressions here,
#appears to overwrite previous kblangpack work,
#needs to append not overwrite.
if not len(chardict) < 11:
lowerdict = kblangpack[keyswitch]
lowerdict[len(lowerdict)+1] = chardict
kblangpack[keyswitch] = lowerdict
chardict = {}
chardict[len(chardict)+1] = keydict[charkey]
lowerdict = kblangpack[keyswitch]
lowerdict[len(lowerdict)+1] = chardict
kblangpack[keyswitch] = lowerdict
lowerupperkey('LOWER')
chardict = lowerupperkey('UPPER')
otherkey('LOWER')
otherkey('UPPER')
createkbrowdicts()
return kblangpack
def blockimporter(blockfilepath = ''):
returndict = {}
quickfinddict = {}
a = open(blockfilepath)
for line in a:
linesplit = line.split(';')
if len(linesplit) == 2:
block, name = linesplit
if not len(block) == 0:
block = startendwhitespaceremove(block)
blocklist = block.split('..')
if not len(name) == 0:
name = startendwhitespaceremove(name)
name = name.upper()
returndict[name] = blocklist
block1 = int('0x'+blocklist[0],0)
block2 = int('0x'+blocklist[1],0)
quickfinddict[tuple(range(block1, block2+1))] = (blocklist, name)
return (returndict,quickfinddict)
def importer(blockfilepath, unicodedatfilepath):
blockdict,quickfinddict = blockimporter(blockfilepath)
unicodedat,blocksdict = preappendunicodedata(unicodedatfilepath,
quickfinddict)
for blockname in blockdict:
#k block name needs possible further parsing since
#it is used in our search filtration algorithm
#hmm...we could do away with this completely since
#lower and upper case delineations are provided
#so we just need to import and read character case
#coding here.
block = blockdict[blockname]
print('processing: ', blockname)
dicta = readunicodefile(blockname , unicodedat, blocksdict,
block)
dictb = createkeyboardrows(dicta, blockname)
keydatamanager = Keycachedata()
langpackdict = keydatamanager.retrievekey('Language')
langpackdict[blockname] = dictb
keydatamanager.savekey('Language', langpackdict)
importer(stringpathtounicodeblocksfilegoeshere,
stringpathtounicodedatafilegoeshere)
This program basically creates a character set library from existing unicode table data in utf-8 format, not sure if library already exists in Ubuntu, so I may have re invented something of the wheel here doing this, but this nicely stores character set data of case type forms 'Lu', 'Ll', and 'Lo' (upper, lower, and other)...this doesn't include punctuation, control characters and a range of other types...you could manually tweak code filters for doing this if you like using the source above.
Also I haven't structured for parsing avoidance on unicode table comment lines, simply remove these, and you may need to remove last blocks line as well from blocks table.
Also the shelve object file is stored in the same directory that the program is run on. You will need to create the key 'Language' prior to running in order to save table data to this shelve file. You can do this using the usual python console commands:
>>import shelve
>>a = shelve(stringpathtoshelvefilenamehere, 'n')
>>a['Language'] = {}
or use the Keycachedata class above for key creation...using filename for module importation this can be done as follows:
>>from filename import Keycachedata
>>a = Keycachedata()
>>a.addkey('Language')
Then run script above.
To access this data, you can do so using Keycachedata module as follows:
>>a = Keycachedata()
>>Languagedict = a.retrievekey('Language')
Then Languagedict will contain all such imported language data table infomation, so you simply need to provide the language key which should be all caps, or you can find keys at console by:
>>list(Languagedict.keys())
No comments:
Post a Comment