Changed the character block search algo to binary search
This commit is contained in:
		
							parent
							
								
									ae19a664f6
								
							
						
					
					
						commit
						64f644162b
					
				
					 1 changed files with 18 additions and 10 deletions
				
			
		| 
						 | 
					@ -23,11 +23,11 @@ def get_blocks():
 | 
				
			||||||
def get_data():
 | 
					def get_data():
 | 
				
			||||||
    """ Download the info file for Unicode blocks.
 | 
					    """ Download the info file for Unicode blocks.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					    logging.info("Downloading character data...")
 | 
				
			||||||
    req = request.urlopen(
 | 
					    req = request.urlopen(
 | 
				
			||||||
        "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"
 | 
					        "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    content = req.read().decode()
 | 
					    content = req.read().decode()
 | 
				
			||||||
    logging.info("Downloading character data...")
 | 
					 | 
				
			||||||
    logging.info("Done")
 | 
					    logging.info("Done")
 | 
				
			||||||
    return content
 | 
					    return content
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -53,26 +53,34 @@ def load_blocks():
 | 
				
			||||||
        indices.append((int(start, 16), int(stop, 16)))
 | 
					        indices.append((int(start, 16), int(stop, 16)))
 | 
				
			||||||
        blocks.append(name.strip())
 | 
					        blocks.append(name.strip())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def locate_block(code):
 | 
					    def locate_block(code, left=0, right=len(indices)):
 | 
				
			||||||
        for index, [start, stop] in enumerate(indices):
 | 
					        """
 | 
				
			||||||
            if code > stop:
 | 
					        Binary search on an ordered list of intervals.
 | 
				
			||||||
                continue
 | 
					        """
 | 
				
			||||||
            else:
 | 
					        half = left + (right - left) // 2
 | 
				
			||||||
                if code >= start:
 | 
					        [start, end] = indices[half]
 | 
				
			||||||
                    return blocks[index]
 | 
					        if start > code:
 | 
				
			||||||
 | 
					            return locate_block(code, left, right=half)
 | 
				
			||||||
 | 
					        elif end < code:
 | 
				
			||||||
 | 
					            return locate_block(code, half, right=right)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            return blocks[half]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return locate_block
 | 
					    return locate_block
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def main():
 | 
					def main():
 | 
				
			||||||
 | 
					    """ Read the character and block data and unite them to a text file containing the following fields:
 | 
				
			||||||
 | 
					    `<character name>   <character comment> <code>  <block name>`
 | 
				
			||||||
 | 
					    seperated by tab characters.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
    get_block = load_blocks()
 | 
					    get_block = load_blocks()
 | 
				
			||||||
    characters = clean(get_data())
 | 
					    characters = clean(get_data())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    logging.info("Parsing character data...")
 | 
					    logging.info("Parsing character data...")
 | 
				
			||||||
 | 
					 | 
				
			||||||
    output = []
 | 
					    output = []
 | 
				
			||||||
    for line in characters.split("\n"):
 | 
					    for line in characters.split("\n"):
 | 
				
			||||||
        # Parse the needed data
 | 
					        # Parse the needed data from the character's line
 | 
				
			||||||
        attributes = line.strip().split(";")
 | 
					        attributes = line.strip().split(";")
 | 
				
			||||||
        code = attributes[0]
 | 
					        code = attributes[0]
 | 
				
			||||||
        name = attributes[1]
 | 
					        name = attributes[1]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Reference in a new issue