diff --git a/generate_character_list.py b/generate_character_list.py new file mode 100644 index 0000000..b4a1296 --- /dev/null +++ b/generate_character_list.py @@ -0,0 +1,49 @@ +""" +Download the latest unicode tables from https://www.unicode.org and create a .txt file +containing all the names, blocks and character codes +""" + + +def main(): + indices = [] + blocks = [] + with open("Blocks.txt", "r") as block_file: + for line in block_file.readlines(): + if line.startswith("#"): + continue + l, name = line.split(";") + start, stop = l.split("..") + indices.append((int(start, 16), int(stop, 16))) + blocks.append(name.strip()) + + def locate_block(code): + for index, [start, stop] in enumerate(indices): + if code > stop: + continue + else: + if code >= start: + return index + + with open("unicode_list.txt", "w") as target: + with open("Index.txt", "r") as names: + for line in names.readlines(): + if line.startswith("#"): + continue + name, code = line.strip().split("\t") + try: + num = int(code, 16) + except ValueError: + print("could not convert " + code) + continue + index = locate_block(num) + if index is not None: + target.write(name + "\t" + code + "\t" + blocks[index] + "\n") + else: + print( + "Code " + str(num) + " not found in block, char: " + unichr(num) + ) + target.write(name + "\t" + code + "\t" + "\n") + + +if __name__ == "__main__": + main() diff --git a/generate_master_file.py b/generate_master_file.py deleted file mode 100644 index 9583382..0000000 --- a/generate_master_file.py +++ /dev/null @@ -1,36 +0,0 @@ -indices = [] -blocks = [] -with open('Blocks.txt', 'r') as block_file: - for line in block_file.readlines(): - if line.startswith('#'): - continue - l, name = line.split(';') - start, stop = l.split('..') - indices.append((int(start, 16), int(stop, 16))) - blocks.append(name.strip()) - -def locate_block(code): - for index, [start, stop] in enumerate(indices): - if code > stop: - continue - else: - if code >= start: - return index - -with open('unicode_list.txt', 'w') as target: - with open('Index.txt', 'r') as names: - for line in names.readlines(): - if line.startswith('#'): - continue - name, code = line.strip().split('\t') - try: - num = int(code, 16) - except ValueError: - print('could not convert ' + code) - continue - index = locate_block(num) - if index is not None: - target.write(name + '\t' + code + '\t' + blocks[index] + '\n') - else: - print('Code ' + str(num) + ' not found in block, char: ' + unichr(num)) - target.write(name + '\t' + code + '\t' + '\n')