51 lines
1.7 KiB
Python
51 lines
1.7 KiB
Python
"""
|
|
Download the latest unicode tables from https://www.unicode.org and create a .txt file
|
|
containing all the names, blocks and character codes
|
|
"""
|
|
|
|
|
|
def main():
|
|
indices = []
|
|
blocks = []
|
|
with open("Blocks.txt", "r") as block_file:
|
|
for line in block_file.readlines():
|
|
if line.startswith("#"):
|
|
continue
|
|
l, name = line.split(";")
|
|
start, stop = l.split("..")
|
|
indices.append((int(start, 16), int(stop, 16)))
|
|
blocks.append(name.strip())
|
|
|
|
def locate_block(code):
|
|
for index, [start, stop] in enumerate(indices):
|
|
if code > stop:
|
|
continue
|
|
else:
|
|
if code >= start:
|
|
return index
|
|
|
|
with open("unicode_list.txt", "w") as target:
|
|
with open("UnicodeData.txt", "r") as names:
|
|
for line in names.readlines():
|
|
attributes = line.strip().split(";")
|
|
code = attributes[0]
|
|
name = attributes[1]
|
|
comment = attributes[10]
|
|
try:
|
|
num = int(code, 16)
|
|
except ValueError:
|
|
print("could not convert " + code)
|
|
continue
|
|
index = locate_block(num)
|
|
if index is not None:
|
|
target.write(name + "\t" + comment + "\t" + code + "\t" + blocks[index] + "\n")
|
|
else:
|
|
print(
|
|
"Code " + str(num) + " not found in block, char: " + unichr(num)
|
|
)
|
|
target.write(name + "\t" + comment + "\t" + code + "\t" + "\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|