Improve character table generation
This commit is contained in:
		
							parent
							
								
									61ba0db659
								
							
						
					
					
						commit
						af0421e8df
					
				
					 2 changed files with 49 additions and 36 deletions
				
			
		
							
								
								
									
										49
									
								
								generate_character_list.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								generate_character_list.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,49 @@
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					Download the latest unicode tables from  https://www.unicode.org and create a .txt file
 | 
				
			||||||
 | 
					containing all the names, blocks and character codes
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def main():
 | 
				
			||||||
 | 
					    indices = []
 | 
				
			||||||
 | 
					    blocks = []
 | 
				
			||||||
 | 
					    with open("Blocks.txt", "r") as block_file:
 | 
				
			||||||
 | 
					        for line in block_file.readlines():
 | 
				
			||||||
 | 
					            if line.startswith("#"):
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            l, name = line.split(";")
 | 
				
			||||||
 | 
					            start, stop = l.split("..")
 | 
				
			||||||
 | 
					            indices.append((int(start, 16), int(stop, 16)))
 | 
				
			||||||
 | 
					            blocks.append(name.strip())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def locate_block(code):
 | 
				
			||||||
 | 
					        for index, [start, stop] in enumerate(indices):
 | 
				
			||||||
 | 
					            if code > stop:
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                if code >= start:
 | 
				
			||||||
 | 
					                    return index
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    with open("unicode_list.txt", "w") as target:
 | 
				
			||||||
 | 
					        with open("Index.txt", "r") as names:
 | 
				
			||||||
 | 
					            for line in names.readlines():
 | 
				
			||||||
 | 
					                if line.startswith("#"):
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                name, code = line.strip().split("\t")
 | 
				
			||||||
 | 
					                try:
 | 
				
			||||||
 | 
					                    num = int(code, 16)
 | 
				
			||||||
 | 
					                except ValueError:
 | 
				
			||||||
 | 
					                    print("could not convert " + code)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                index = locate_block(num)
 | 
				
			||||||
 | 
					                if index is not None:
 | 
				
			||||||
 | 
					                    target.write(name + "\t" + code + "\t" + blocks[index] + "\n")
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    print(
 | 
				
			||||||
 | 
					                        "Code " + str(num) + " not found in block, char: " + unichr(num)
 | 
				
			||||||
 | 
					                    )
 | 
				
			||||||
 | 
					                    target.write(name + "\t" + code + "\t" + "\n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
					    main()
 | 
				
			||||||
| 
						 | 
					@ -1,36 +0,0 @@
 | 
				
			||||||
indices = []
 | 
					 | 
				
			||||||
blocks = []
 | 
					 | 
				
			||||||
with open('Blocks.txt', 'r') as block_file:
 | 
					 | 
				
			||||||
    for line in block_file.readlines():
 | 
					 | 
				
			||||||
        if line.startswith('#'):
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
        l, name = line.split(';')
 | 
					 | 
				
			||||||
        start, stop = l.split('..')
 | 
					 | 
				
			||||||
        indices.append((int(start, 16), int(stop, 16)))
 | 
					 | 
				
			||||||
        blocks.append(name.strip())
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def locate_block(code):
 | 
					 | 
				
			||||||
    for index, [start, stop] in enumerate(indices):
 | 
					 | 
				
			||||||
        if code > stop:
 | 
					 | 
				
			||||||
            continue
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            if code >= start:
 | 
					 | 
				
			||||||
                return index
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
with open('unicode_list.txt', 'w') as target:
 | 
					 | 
				
			||||||
    with open('Index.txt', 'r') as names:
 | 
					 | 
				
			||||||
        for line in names.readlines():
 | 
					 | 
				
			||||||
            if line.startswith('#'):
 | 
					 | 
				
			||||||
                continue
 | 
					 | 
				
			||||||
            name, code = line.strip().split('\t')
 | 
					 | 
				
			||||||
            try:
 | 
					 | 
				
			||||||
                num = int(code, 16)
 | 
					 | 
				
			||||||
            except ValueError:
 | 
					 | 
				
			||||||
                print('could not convert ' + code)
 | 
					 | 
				
			||||||
                continue
 | 
					 | 
				
			||||||
            index = locate_block(num)
 | 
					 | 
				
			||||||
            if index is not None:
 | 
					 | 
				
			||||||
                target.write(name + '\t' + code + '\t' + blocks[index] + '\n')
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                print('Code ' + str(num) + ' not found in block, char: ' + unichr(num))
 | 
					 | 
				
			||||||
                target.write(name + '\t' + code + '\t' + '\n')
 | 
					 | 
				
			||||||
		Reference in a new issue