Improve character table generation

This commit is contained in:
Vangelis Kostalas 2019-05-08 14:12:30 +03:00
parent 61ba0db659
commit af0421e8df
2 changed files with 49 additions and 36 deletions

View file

@ -0,0 +1,49 @@
"""
Download the latest unicode tables from https://www.unicode.org and create a .txt file
containing all the names, blocks and character codes
"""
def main():
indices = []
blocks = []
with open("Blocks.txt", "r") as block_file:
for line in block_file.readlines():
if line.startswith("#"):
continue
l, name = line.split(";")
start, stop = l.split("..")
indices.append((int(start, 16), int(stop, 16)))
blocks.append(name.strip())
def locate_block(code):
for index, [start, stop] in enumerate(indices):
if code > stop:
continue
else:
if code >= start:
return index
with open("unicode_list.txt", "w") as target:
with open("Index.txt", "r") as names:
for line in names.readlines():
if line.startswith("#"):
continue
name, code = line.strip().split("\t")
try:
num = int(code, 16)
except ValueError:
print("could not convert " + code)
continue
index = locate_block(num)
if index is not None:
target.write(name + "\t" + code + "\t" + blocks[index] + "\n")
else:
print(
"Code " + str(num) + " not found in block, char: " + unichr(num)
)
target.write(name + "\t" + code + "\t" + "\n")
if __name__ == "__main__":
main()

View file

@ -1,36 +0,0 @@
indices = []
blocks = []
with open('Blocks.txt', 'r') as block_file:
for line in block_file.readlines():
if line.startswith('#'):
continue
l, name = line.split(';')
start, stop = l.split('..')
indices.append((int(start, 16), int(stop, 16)))
blocks.append(name.strip())
def locate_block(code):
for index, [start, stop] in enumerate(indices):
if code > stop:
continue
else:
if code >= start:
return index
with open('unicode_list.txt', 'w') as target:
with open('Index.txt', 'r') as names:
for line in names.readlines():
if line.startswith('#'):
continue
name, code = line.strip().split('\t')
try:
num = int(code, 16)
except ValueError:
print('could not convert ' + code)
continue
index = locate_block(num)
if index is not None:
target.write(name + '\t' + code + '\t' + blocks[index] + '\n')
else:
print('Code ' + str(num) + ' not found in block, char: ' + unichr(num))
target.write(name + '\t' + code + '\t' + '\n')