Merge a362ff244e into c5d7ab5c94

2023-09-27 23:58:57 +01:00 · 2023-09-27 23:58:57 +01:00 · 9a199945c7
commit 9a199945c7
parent c5d7ab5c94 a362ff244e
4 changed files with 177 additions and 68 deletions
--- a/generate_character_list.py
+++ b/generate_character_list.py
@ -2,6 +2,7 @@
 Download the latest unicode tables from  https://www.unicode.org and create a .txt file
 containing all the names, blocks and character codes
 """
 import sys
 import os
 import logging
 from urllib import request
@ -9,13 +10,18 @@ from urllib import request
 curr_path = os.path.dirname(__file__)
 logging.basicConfig(level=logging.DEBUG)
 # Be compatible with both python 2 and 3
 if sys.version_info[0] >= 3:
    unichr = chr
 BASE_URL = "https://www.unicode.org/Public/UCD/latest/ucd"
 def get_blocks():
    """ Download the info file for Unicode blocks.
    """
    logging.info("Downloading block data...")
-    req = request.urlopen("https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt")
+    with request.urlopen(f"{BASE_URL}/Blocks.txt") as req:
-    content = req.read().decode()
+        content = req.read().decode()
    logging.info("Done")
    return content
@ -24,10 +30,8 @@ def get_data():
    """ Download the info file for Unicode blocks.
    """
    logging.info("Downloading character data...")
-    req = request.urlopen(
+    with request.urlopen(f"{BASE_URL}/UnicodeData.txt") as req:
-        "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"
+        content = req.read().decode()
    )
    content = req.read().decode()
    logging.info("Done")
    return content
@ -61,18 +65,19 @@ def load_blocks():
        [start, end] = indices[half]
        if start > code:
            return locate_block(code, left, right=half)
-        elif end < code:
+        if end < code:
            return locate_block(code, half, right=right)
-        else:
+        return blocks[half]
            return blocks[half]
    return locate_block
-def main():
+def main(out: str = "unicode_list.txt"):
-    """ Read the character and block data and unite them to a text file containing the following fields:
+    """Create the file with Unicode characters.
-    `<character name>   <character comment> <code>  <block name>`
+
-    seperated by tab characters.
+    Read the character and block data and unite them to a text file
    containing the following fields, separated by tab characters:
    `<character name> <character comment> <code> <block name>`
    """
    get_block = load_blocks()
    characters = clean(get_data())
@ -90,7 +95,7 @@ def main():
        try:
            num = int(code, 16)
        except ValueError:
-            logging.warn("Could not convert " + code)
+            logging.warning("Could not convert %s", code)
            continue
        # Find the character's block
@ -98,12 +103,23 @@ def main():
        if blk is not None:
            output.append("\t".join((name, comment, code, blk)))
        else:
-            logging.warn("Code %s not found in any block, char: %s", num, unichr(num))
+            logging.warning("Code %s not found in any block, char: %s", num, unichr(num))
            output.append(name + "\t" + comment + "\t" + code + "\t")
-    with open("unicode_list.txt", "w") as target:
+    with open(out, "w", encoding="utf-8") as target:
        target.write("\n".join(output))
 if __name__ == "__main__":
-    main()
+    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "path",
        type=str,
        help="the output path where to save the Unicode list.",
        default="unicode_list.txt",
    )
    args = parser.parse_args()
    main(args.path)
--- a/main.py
+++ b/main.py
@ -1,18 +1,24 @@
 import os
 import sys
 import codecs
-from os.path import join
+import time
 import math
 import shutil
 import html.entities
 import asyncio
 import logging
 from typing import Dict, Optional
 from operator import itemgetter
-import subprocess # for pip autoinstallation
+from ulauncher.utils.fuzzy_search import get_score
 from ulauncher.search.SortedList import SortedList
 from ulauncher.api.client.Extension import Extension
 from ulauncher.api.client.EventListener import EventListener
-from ulauncher.api.shared.event import KeywordQueryEvent, ItemEnterEvent
+from ulauncher.api.shared.event import KeywordQueryEvent
 from ulauncher.api.shared.item.ExtensionResultItem import ExtensionResultItem
 from ulauncher.api.shared.action.RenderResultListAction import RenderResultListAction
 from ulauncher.api.shared.action.CopyToClipboardAction import CopyToClipboardAction
-from ulauncher.api.shared.action.HideWindowAction import HideWindowAction
+
 logger = logging.getLogger(__name__)
 # Be compatible with both python 2 and 3
 if sys.version_info[0] >= 3:
@ -27,108 +33,173 @@ ICON_TEMPLATE = """
 </svg>
 """
-# For pip autoinstallation
+ExtensionPreferences = Dict[str, str]
-def ensure_import(package):
+UnicodeCharPreferences = Dict[str, int]
    try:
        return __import__(package)
    except ImportError:
        subprocess.call([sys.executable, "-m", "pip", "install", "--user", package])
    return __import__(package)
 # For HTML entity conversion
 htmlentities = ensure_import("htmlentities")
 class UnicodeChar:
-    """ Container class for unicode characters
+    """Container class for unicode characters."""
    """
    def __init__(self, name, comment, block, code):
-        self.name = name if name != '<control>' else comment
+        self.name = name if name != "<control>" else comment
        self.comment = comment
        self.block = block
        self.code = code
        self.character = unichr(int(code, 16))
    def get_search_name(self):
-        """ Called by `ulauncher.search.SortedList` to get the string
+        """Called to get the string that should be used in searches."""
-        that should be used in searches
+        return " ".join([self.character, self.code, self.name, self.comment])
        """
        return ' '.join([self.character, self.code, self.name, self.comment])
 class UnicodeCharExtension(Extension):
    unicode_path: str = "unicode_list.txt"
    def __init__(self):
-        super(UnicodeCharExtension, self).__init__()
+        super().__init__()
        check_cache_dir()
        self._load_character_table()
        self.subscribe(KeywordQueryEvent, KeywordQueryEventListener())
    def get_filename(self) -> str:
        """Default filename of the Unicode list."""
        return os.path.join(FILE_PATH, self.unicode_path)
    def _load_character_table(self):
-        """ Read the data file and load to memory
+        """Read the data file and load to memory."""
-        """
+        filename = self.get_filename()
        self.character_list = []
-        with open(join(FILE_PATH, "unicode_list.txt"), "r") as f:
+        with open(filename, "r", encoding="utf-8") as f:
            for line in f.readlines():
                name, comment, code, block = line.strip().split("\t")
                character = UnicodeChar(name, comment, block, code)
                self.character_list.append(character)
    @staticmethod
    async def refresh_unicode_list(path: str, preferences: UnicodeCharPreferences):
        """Check if the Unicode list file needs refresh."""
        # Get timestamp of the last time the file was modified
        timestamp = os.path.getmtime(path)
        # Number of days since the file was modified
        age = math.floor((time.time() - timestamp) / 3600)
        update_interval = preferences["update_interval"]
        if 0 < update_interval < age:
            await UnicodeCharExtension.update_unicode_list(path)
    @staticmethod
    async def update_unicode_list(path: str):
        """Re-generate an old Unicode list file."""
        # Save the file to a backup file if there is no backup.
        backup = path + ".bkp"
        if not os.path.isfile(backup):
            logger.info("backup the file with Unicode list to: %s", backup)
            shutil.copyfile(path, backup)
        import generate_character_list
        # Regenerate file with unicode list
        logger.info("regenerate the file with Unicode list: %s", path)
        generate_character_list.main(path)
    @staticmethod
    def get_preferences(
        input_preferences: ExtensionPreferences,
    ) -> UnicodeCharPreferences:
        """Parse preferences to the correct types."""
        preferences: UnicodeCharPreferences = {
            "result_limit": int(input_preferences["result_limit"]),
            "min_score": int(input_preferences["min_score"]),
            "update_interval": int(input_preferences["update_interval"]),
        }
        return preferences
    def search(self, query: str, preferences: UnicodeCharPreferences):
        """Return a list of result sorted by relevance to the query."""
        limit = preferences["result_limit"]
        min_score = preferences["min_score"]
        results = []
        for c in self.character_list:
            score = get_score(query, c.get_search_name())
            if score >= min_score:
                results.append((score, c))
                results = sorted(results, reverse=True, key=itemgetter(0))
                if len(results) > limit:
                    results = results[:limit]
        return [c for (s, c) in results]
 class KeywordQueryEventListener(EventListener):
    def on_event(self, event, extension):
        preferences = extension.get_preferences(extension.preferences)
        # Re-generate unicode list if it is too old.
        coro = extension.refresh_unicode_list(extension.get_filename(), preferences)
        # start the event loop and execute the coroutine
        asyncio.run(coro)
        items = []
-        arg = event.get_argument()
+        query = event.get_argument().strip()
-        if arg:
+        if query:
-            result_list = SortedList(arg, min_score=99, limit=10)
+            # Return best characters matching the query, ordered by score.
-            result_list.extend(extension.character_list)
+            results = extension.search(query, preferences)
-            for char in result_list:
+            for char in results:
                image_path = get_character_icon(char)
-                encoded = htmlentities.encode(char.character)
+                html_val = html_encode(char.character)
-                if "&" in encoded:
+                html_str = ""
-                    sep = " - "
+                if html_val:
-                    html = encoded
+                    html_str = f" - HTML: {html_val}"
-                else:
+
                    sep = ""
                    html = ""
                items.append(
                    ExtensionResultItem(
                        icon=image_path,
-                        name=char.name.capitalize() + " - " + char.character,
+                        name=f"{char.name.capitalize()} - {char.character}",
-                        description=char.block + " - Alt+Enter: " + html + sep + "Code: U+" + char.code,
+                        description=f"{char.block}{html_str} - Alt+Enter: U+{char.code}",
                        on_enter=CopyToClipboardAction(char.character),
-                        on_alt_enter=CopyToClipboardAction(html),
+                        on_alt_enter=CopyToClipboardAction(char.code),
                    )
                )
        return RenderResultListAction(items)
 def html_encode(char: str) -> Optional[str]:
    """Get the html encoded str corresponding to the unicode char, if it exist."""
    if ord(char) in html.entities.codepoint2name:
        html_var = html.entities.codepoint2name[ord(char)]
        return f"&{html_var};"
    return None
 def get_character_icon(char):
-    """ Check if there is an existing icon for this character and return its path
+    """Check if there is an existing icon for this character and return its path
    or create a new one and return its path.
    """
-    path = FILE_PATH + "images/cache/icon_%s.svg" % char.code
+    path = os.path.join(FILE_PATH, f"images/cache/icon_{char.code}.svg")
    if os.path.isfile(path):
        return path
    return create_character_icon(char)
 def create_character_icon(char, font="sans-serif"):
-    """ Create an SVG file containing the unicode glyph for char to be used
+    """Create an SVG file containing the unicode glyph for char to be used
    as a result icon.
    Note: this could be avoided by providing a gtk.PixBuf without creating a file,
    but ulauncher pickles the returned results, so it doesn't work currently.
    """
-    icon = ICON_TEMPLATE.replace("{symbol}", char.character).replace("{font}", font)
+    icon = ICON_TEMPLATE.format(symbol=char.character, font=font)
-    with codecs.open(
+    path = os.path.join(FILE_PATH, f"images/cache/icon_{char.code}.svg")
-        os.path.join(FILE_PATH, "images/cache/icon_%s.svg" % char.code), "w", "utf-8"
+    with codecs.open(path, "w", "utf-8") as target:
    ) as target:
        target.write(icon)
-    return os.path.join(FILE_PATH, "images/cache/icon_%s.svg" % char.code)
+    return path
 def check_cache_dir(path="images/cache"):
-    """ Check if the cache directory exists and if not create it.
+    """Check if the cache directory exists and if not create it."""
    """
    path = os.path.join(FILE_PATH, path)
    if not os.path.isdir(path):
        os.mkdir(path)
--- a/manifest.json
+++ b/manifest.json
@ -8,12 +8,33 @@
    "query_debounce": 0.5
  },
  "preferences": [
    {
      "id": "result_limit",
      "type": "input",
      "name": "Result limit",
      "description": "Number of results that should be returned.",
      "default_value": "10"
    },
    {
      "id": "min_score",
      "type": "input",
      "name": "Minimum score",
      "description": "Only display results with a matching score higher than this. If too high, e.g. higher than 200, no results will be shown.",
      "default_value": "0"
    },
    {
      "id": "symbol",
      "type": "keyword",
      "name": "Symbol",
      "description": "Search symbols in ASCII and Unicode. Enter to copy the symbol, alt+enter to copy the HTML entity. Dark mode friendly.",
      "default_value": "sym"
    },
    {
      "id": "update_interval",
      "type": "input",
      "name": "Update interval",
      "description": "Time interval (in days) after which the cached list of unicode symbols is updated by downloading the newest list. Set to -1 to never update the list.",
      "default_value": "90"
    }
  ]
 }
--- a/versions.json
+++ b/versions.json
@ -1,4 +1,5 @@
 [
  { "required_api_version": "^1.0.0", "commit": "API_v1" },
  { "required_api_version": "^2.0.0", "commit": "master" }
  { "required_api_version": "^3.0.0", "commit": "api-v3" }
 ]