From 64f644162b001ab2c491ce816abf437459720ef3 Mon Sep 17 00:00:00 2001
From: Vangelis Kostalas <kostalas.v@gmail.com>
Date: Fri, 17 May 2019 14:02:46 +0300
Subject: [PATCH] Changed the character block search algo to binary search

---
 generate_character_list.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)
diff --git a/generate_character_list.py b/generate_character_list.py
index b256d7f..efbe13f 100644
--- a/generate_character_list.py
+++ b/generate_character_list.py
@@ -23,11 +23,11 @@ def get_blocks():
 def get_data():
     """ Download the info file for Unicode blocks.
     """
+    logging.info("Downloading character data...")
     req = request.urlopen(
         "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"
     )
     content = req.read().decode()
-    logging.info("Downloading character data...")
     logging.info("Done")
     return content
 
@@ -53,26 +53,34 @@ def load_blocks():
         indices.append((int(start, 16), int(stop, 16)))
         blocks.append(name.strip())
 
-    def locate_block(code):
-        for index, [start, stop] in enumerate(indices):
-            if code > stop:
-                continue
-            else:
-                if code >= start:
-                    return blocks[index]
+    def locate_block(code, left=0, right=len(indices)):
+        """
+        Binary search on an ordered list of intervals.
+        """
+        half = left + (right - left) // 2
+        [start, end] = indices[half]
+        if start > code:
+            return locate_block(code, left, right=half)
+        elif end < code:
+            return locate_block(code, half, right=right)
+        else:
+            return blocks[half]
 
     return locate_block
 
 
 def main():
+    """ Read the character and block data and unite them to a text file containing the following fields:
+    `<character name>   <character comment> <code>  <block name>`
+    seperated by tab characters.
+    """
     get_block = load_blocks()
     characters = clean(get_data())
 
     logging.info("Parsing character data...")
-
     output = []
     for line in characters.split("\n"):
-        # Parse the needed data
+        # Parse the needed data from the character's line
         attributes = line.strip().split(";")
         code = attributes[0]
         name = attributes[1]