Improve character table generation

2019-05-08 14:12:30 +03:00 · 2019-05-08 14:12:30 +03:00 · af0421e8df
commit af0421e8df
parent 61ba0db659
2 changed files with 49 additions and 36 deletions
--- a/generate_character_list.py
+++ b/generate_character_list.py
@ -0,0 +1,49 @@
+"""
+Download the latest unicode tables from  https://www.unicode.org and create a .txt file
+containing all the names, blocks and character codes
+"""
+
+
+def main():
+    indices = []
+    blocks = []
+    with open("Blocks.txt", "r") as block_file:
+        for line in block_file.readlines():
+            if line.startswith("#"):
+                continue
+            l, name = line.split(";")
+            start, stop = l.split("..")
+            indices.append((int(start, 16), int(stop, 16)))
+            blocks.append(name.strip())
+
+    def locate_block(code):
+        for index, [start, stop] in enumerate(indices):
+            if code > stop:
+                continue
+            else:
+                if code >= start:
+                    return index
+
+    with open("unicode_list.txt", "w") as target:
+        with open("Index.txt", "r") as names:
+            for line in names.readlines():
+                if line.startswith("#"):
+                    continue
+                name, code = line.strip().split("\t")
+                try:
+                    num = int(code, 16)
+                except ValueError:
+                    print("could not convert " + code)
+                    continue
+                index = locate_block(num)
+                if index is not None:
+                    target.write(name + "\t" + code + "\t" + blocks[index] + "\n")
+                else:
+                    print(
+                        "Code " + str(num) + " not found in block, char: " + unichr(num)
+                    )
+                    target.write(name + "\t" + code + "\t" + "\n")
+
+
+if __name__ == "__main__":
+    main()
--- a/generate_master_file.py
+++ b/generate_master_file.py
@ -1,36 +0,0 @@
-indices = []
-blocks = []
-with open('Blocks.txt', 'r') as block_file:
-    for line in block_file.readlines():
-        if line.startswith('#'):
-            continue
-        l, name = line.split(';')
-        start, stop = l.split('..')
-        indices.append((int(start, 16), int(stop, 16)))
-        blocks.append(name.strip())
-
-def locate_block(code):
-    for index, [start, stop] in enumerate(indices):
-        if code > stop:
-            continue
-        else:
-            if code >= start:
-                return index
-
-with open('unicode_list.txt', 'w') as target:
-    with open('Index.txt', 'r') as names:
-        for line in names.readlines():
-            if line.startswith('#'):
-                continue
-            name, code = line.strip().split('\t')
-            try:
-                num = int(code, 16)
-            except ValueError:
-                print('could not convert ' + code)
-                continue
-            index = locate_block(num)
-            if index is not None:
-                target.write(name + '\t' + code + '\t' + blocks[index] + '\n')
-            else:
-                print('Code ' + str(num) + ' not found in block, char: ' + unichr(num))
-                target.write(name + '\t' + code + '\t' + '\n')