import os import os import shelve class Keycachedata: def deleteKey(self, key): self.key = key self.__checkreadFile() if self.__verifyKey(): del self.cachedatafile[self.key] #print('key data saved.') self.keydatafile.close() def savekey(self, key, keydata): self.key = key self.__checkreadFile() #print('cachefile prior to keydata mod:',self.keydatafile) if self.__verifykey(): print('Key verified.') self.keydatafile[self.key] = keydata print('key data saved.') #print(self.keydatafile) self.keydatafile.close() def savekeydata(self, keydata): self.__checkreadFile() #self.keydatafile[self.key] = keydata if self.__verifykey(): self.keydatafile[self.key] = keydata print('key data saved.') self.keydatafile.close() def addkey(self, key): self.key = key self.__checkreadFile() keycheck = self.__verifykey() if not keycheck: self.keydatafile[self.key] = {} print('key added.') else: print('key already exists.') return keycheck self.keydatafile.close() def retrievekey(self, key): self.key = key self.__checkreadFile() self.keydata = {} if self.__verifykey(): self.keydata = self.keydatafile[self.key] self.keydatafile.close() return self.keydata def retrievekeyname(self): return self.key def retrievekeylist(self): keylist = [] self.__checkreadFile() for key in self.keydatafile: keylist.append(key) self.keydatafile.close() return keylist def iskeydatempty(self): check = False self.__checkreadFile() if len(self.keydatafile) == 0: check = True self.keydatafile.close() return check def returnFirstkey(self): self.__checkreadFile() keylist = list(self.keydatafile.keys()) firstkeykey = keylist[0] self.keydatafile.close() return [firstkeykey, self.keydatafile[firstkeykey]] def __verifykey(self): check = False if self.key in self.keydatafile: check = True return check def verifykey(self, keyname): check = False self.__checkreadFile() if keyname in self.keydatafile: check = True self.keydatafile.close() return check def __readFile(self): print('hitting read shelve file on keycachedata class.') self.keydatafile = shelve.open(self.pathfile, 'w') def __createFile(self): print('hitting create shelve file on keycachedata class.') self.keydatafile = shelve.open(self.pathfile, 'n') def __checkreadFile(self): file = '' if os.name == 'nt': file = '\\qacrossworddata' elif os.name == 'posix': file = '/qacrossworddata' self.pathfile = self.path+ file if os.access(self.pathfile, os.F_OK): if os.access(self.pathfile, os.R_OK): if os.access(self.pathfile, os.W_OK): self.__readFile() else: self.__createFile() else: self.__createFile() else: self.__createFile() ## def checkkey(self): ## ## self.__checkreadFile() ## self.__verifykey() def __init__(self): self.path = os.getcwd() self.key = '' #self.__checkreadFile() KEYSPOP = 15 KEYSPOPALT = 14 #alternate keys detection #the following site provide unicode tables that can be used with this #importer #http://www.utf8-chartable.de #Haven't included try exceptions in this program, so every file #should be consistently valid and of the same format for this #to work. i.e. any table written with more then 4 tab separated keys # and less then 3 tab separated keys # will not work in this file reader. If so, you'll need to restructure #tables or code differently, as I've structured reader around #website tables mentioned above. def startendwhitespaceremove(stringline): retstring = '' stringarray = stringline.split() if len(stringarray) == 1: retstring = stringarray[0] elif len(stringarray) > 1: retstring = stringarray[0] for string in stringarray[1:len(stringarray)]: retstring += ' ' + string return retstring def preappendunicodedata(filename = '', quickfinddict={}): def bisectionsearch(intval, keyset): returnval = [] lkeyset = keyset[0: len(keyset)/2] ukeyset = keyset[len(keyset)/2: len(keyset)] minlkey = lkeyset[0][0] maxlkeyrange = lkeyset[len(lkeyset)-1] maxlkey = maxlkeyrange[len(maxlkeyrange)-1] minukey = ukeyset[0][0] maxukeyrange = ukeyset[len(ukeyset)-1] maxukey = maxukeyrange[len(maxukeyrange)-1] if not intval < minlkey and not intval > maxlkey: if not len(lkeyset) == 1: returnval = bisectionsearch(intval, lkeyset) else: return lkeyset elif not intval < minukey and not intval > maxukey: if not len(ukeyset) == 1: returnval = bisectionsearch(intval, ukeyset) else: return ukeyset return returnval #here we set data up in a dictionary for referencing, #making one pass through the data set for key value #pairing. Here we set up a 2 dimension tuple which #records the name for the unicode character and its #indexing in set of keys...this index will aid us #quickly setting up block address searching later. a = open(filename) dicta = {} blocksdict ={} name = '' uc = '' #this is unicode character point representation #this will still needed to be converted into a form #recognizable by python after read parsing unicode tables. count = 0 for line in a: if len(line.split(';')) == KEYSPOP: unilist = line.split(';') uc, name, case = unilist[0], unilist[1], unilist[2] #This import design is structured for #present unicode data format information #please consult tables to verify for correct #importation. elif len(line.split(';')) == KEYSPOPALT: unilist = line.split(';') uc, name, case = unilist[0], unilist[1], unilist[2] if not len(name) == 0: intval = int('0x'+uc,0) rangekey = bisectionsearch(intval,sorted(quickfinddict)) if len(rangekey) == 1: if rangekey[0] in quickfinddict: blockident, blockname = quickfinddict[rangekey[0]] dicta[uc] = (name, count, case, blockident) blockident = tuple(blockident) if blockident in blocksdict: blockdict = blocksdict[blockident] blockdict[uc] = name else: blocksdict[blockident] = {} blockdict = blocksdict[blockident] blockdict[uc] = name count += 1 return (dicta,blocksdict) def readunicodefile(lname = 'LATIN', unicodedat = {}, blocksdict = {}, block = (0,0)): def findnearest(blockpos, minmaxswitch): #convert hex to decimal blockposa = '0x'+blockpos print('blockpos ', blockposa) ## print(blockposa) blockposadec = int(blockposa, 0) find = False print(minmaxswitch) while not find: if minmaxswitch: blockposadec -= 1 else: blockposadec += 1 blockposahexstring = hex(blockposadec) blockposa = blockposahexstring[2:len(blockposahexstring)] if 'x' in blockposa: blockposa = blockposa.split('x')[1] elif 'X' in blockposa: blockposa = blockposa.split('X')[1] blockposa = blockposa.upper() if blockposa in unicodedat: find = True ## print('Find status: ', blockposa) return blockposa #a = open(filename) dicta = {} dicta[lname+'_LOWER'] = {} dicta[lname+'_UPPER'] = {} dicta[lname+'_OTHER'] = {} ## dicta[lname+'_LOWER_REV'] = {} ## dicta(lname+'_UPPER_REV'] = {} name = '' uc = '' #this is unicode character point representation #this will still needed to be converted into a form #recognizable by python after read parsing unicode tables. #code blocks table doesn't necessarily garauntee existence in #table for a given range a respective table element, so we may #may need in appropriate blocks range find nearest minimal #maximal element blockdict = blocksdict[tuple(block)] for uc in blockdict: name = unicodedat[uc][0] case = unicodedat[uc][2] LCLstring = lname + ' CAPITAL LETTER' LSLstring = lname + ' SMALL LETTER' Lstring = lname if not len(name) == 0: if 'Lu' in case: chardict = dicta[lname+'_UPPER'] ucconv = '0x'+uc ucconv = eval('unichr('+ucconv+')') ucconv = ucconv.encode('utf-8') chardict[len(chardict)+1] = ucconv elif 'Ll' in case: chardict = dicta[lname+'_LOWER'] ucconv = '0x'+uc ucconv = eval('unichr('+ucconv+')') ucconv = ucconv.encode('utf-8') chardict[len(chardict)+1] = ucconv elif 'Lo' in case: chardict = dicta[lname+'_OTHER'] ucconv = '0x'+uc ucconv = eval('unichr('+ucconv+')') ucconv = ucconv.encode('utf-8') chardict[len(chardict)+1] = ucconv return dicta def createkeyboardrows(dicta, lname): kblangpack = {} kblangpack['LOWER'] = {} kblangpack['UPPER'] = {} #Idea here is to restructure dictionaries into #rowdictionaries that can easily be passed in a 2 dimensional #array type form #First several rows are designated to non variant lower or upper #case letter forms, variant forms are those of the base character #plus accenting, hooks, and so forth. #First collect pop data which will determine row dict creation pops def countpop(): count = 0 for chardict in dicta: count += len(dicta[chardict]) return count #next create row dictionaries based upon pops #allocating 10 column spacing here def createkbrowdicts(): # hmmm...above may not be necessary #start building on lower base dict def lowerupperkey(keyswitch): chardict = {} base_keyswitchdict = dicta[lname+'_'+keyswitch] for charkey in base_keyswitchdict: if not len(chardict) < 11: lowerdict = kblangpack[keyswitch] lowerdict[len(lowerdict)+1] = chardict kblangpack[keyswitch] = lowerdict chardict = {} chardict[len(chardict)+1] = base_keyswitchdict[charkey] lowerdict = kblangpack[keyswitch] lowerdict[len(lowerdict)+1] = chardict kblangpack[keyswitch] = lowerdict def otherkey(keyswitch): chardict = {} keydict = dicta[lname + '_OTHER'] for charkey in keydict: #need to correct control expressions here, #appears to overwrite previous kblangpack work, #needs to append not overwrite. if not len(chardict) < 11: lowerdict = kblangpack[keyswitch] lowerdict[len(lowerdict)+1] = chardict kblangpack[keyswitch] = lowerdict chardict = {} chardict[len(chardict)+1] = keydict[charkey] lowerdict = kblangpack[keyswitch] lowerdict[len(lowerdict)+1] = chardict kblangpack[keyswitch] = lowerdict lowerupperkey('LOWER') chardict = lowerupperkey('UPPER') otherkey('LOWER') otherkey('UPPER') createkbrowdicts() return kblangpack def blockimporter(blockfilepath = ''): returndict = {} quickfinddict = {} a = open(blockfilepath) for line in a: linesplit = line.split(';') if len(linesplit) == 2: block, name = linesplit if not len(block) == 0: block = startendwhitespaceremove(block) blocklist = block.split('..') if not len(name) == 0: name = startendwhitespaceremove(name) name = name.upper() returndict[name] = blocklist block1 = int('0x'+blocklist[0],0) block2 = int('0x'+blocklist[1],0) quickfinddict[tuple(range(block1, block2+1))] = (blocklist, name) return (returndict,quickfinddict) def importer(blockfilepath, unicodedatfilepath): blockdict,quickfinddict = blockimporter(blockfilepath) unicodedat,blocksdict = preappendunicodedata(unicodedatfilepath, quickfinddict) for blockname in blockdict: #k block name needs possible further parsing since #it is used in our search filtration algorithm #hmm...we could do away with this completely since #lower and upper case delineations are provided #so we just need to import and read character case #coding here. block = blockdict[blockname] print('processing: ', blockname) dicta = readunicodefile(blockname , unicodedat, blocksdict, block) dictb = createkeyboardrows(dicta, blockname) keydatamanager = Keycachedata() langpackdict = keydatamanager.retrievekey('Language') langpackdict[blockname] = dictb keydatamanager.savekey('Language', langpackdict) importer(stringpathtounicodeblocksfilegoeshere, stringpathtounicodedatafilegoeshere)
This program basically creates a character set library from existing unicode table data in utf-8 format, not sure if library already exists in Ubuntu, so I may have re invented something of the wheel here doing this, but this nicely stores character set data of case type forms 'Lu', 'Ll', and 'Lo' (upper, lower, and other)...this doesn't include punctuation, control characters and a range of other types...you could manually tweak code filters for doing this if you like using the source above.
Also I haven't structured for parsing avoidance on unicode table comment lines, simply remove these, and you may need to remove last blocks line as well from blocks table.
Also the shelve object file is stored in the same directory that the program is run on. You will need to create the key 'Language' prior to running in order to save table data to this shelve file. You can do this using the usual python console commands:
>>import shelve
>>a = shelve(stringpathtoshelvefilenamehere, 'n')
>>a['Language'] = {}
or use the Keycachedata class above for key creation...using filename for module importation this can be done as follows:
>>from filename import Keycachedata
>>a = Keycachedata()
>>a.addkey('Language')
Then run script above.
To access this data, you can do so using Keycachedata module as follows:
>>a = Keycachedata()
>>Languagedict = a.retrievekey('Language')
Then Languagedict will contain all such imported language data table infomation, so you simply need to provide the language key which should be all caps, or you can find keys at console by:
>>list(Languagedict.keys())
No comments:
Post a Comment