ulauncher-symbol/generate_character_list.py

"""
Download the latest unicode tables from  https://www.unicode.org and create a .txt file
containing all the names, blocks and character codes
"""


def main():
    indices = []
    blocks = []
    with open("Blocks.txt", "r") as block_file:
        for line in block_file.readlines():
            if line.startswith("#"):
                continue
            l, name = line.split(";")
            start, stop = l.split("..")
            indices.append((int(start, 16), int(stop, 16)))
            blocks.append(name.strip())

    def locate_block(code):
        for index, [start, stop] in enumerate(indices):
            if code > stop:
                continue
            else:
                if code >= start:
                    return index

    with open("unicode_list.txt", "w") as target:
        with open("UnicodeData.txt", "r") as names:
            for line in names.readlines():
                attributes = line.strip().split(";")
                code = attributes[0]
                name = attributes[1]
                comment = attributes[10]
                try:
                    num = int(code, 16)
                except ValueError:
                    print("could not convert " + code)
                    continue
                index = locate_block(num)
                if index is not None:
                    target.write(name + "\t" + comment + "\t" + code + "\t" + blocks[index] + "\n")
                else:
                    print(
                        "Code " + str(num) + " not found in block, char: " + unichr(num)
                    )
                    target.write(name + "\t" + comment + "\t" + code + "\t" + "\n")


if __name__ == "__main__":
    main()