improve extension

2023-09-27 23:45:52 +01:00 · 2023-09-27 23:45:52 +01:00 · a362ff244e
commit a362ff244e
parent c5d7ab5c94
4 changed files with 177 additions and 68 deletions
--- a/generate_character_list.py
+++ b/generate_character_list.py
@ -2,6 +2,7 @@
 Download the latest unicode tables from  https://www.unicode.org and create a .txt file
 containing all the names, blocks and character codes
 """
+import sys
 import os
 import logging
 from urllib import request
@ -9,13 +10,18 @@ from urllib import request
 curr_path = os.path.dirname(__file__)
 logging.basicConfig(level=logging.DEBUG)

+# Be compatible with both python 2 and 3
+if sys.version_info[0] >= 3:
+    unichr = chr
+
+BASE_URL = "https://www.unicode.org/Public/UCD/latest/ucd"

 def get_blocks():
    """ Download the info file for Unicode blocks.
    """
    logging.info("Downloading block data...")
-    req = request.urlopen("https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt")
-    content = req.read().decode()
+    with request.urlopen(f"{BASE_URL}/Blocks.txt") as req:
+        content = req.read().decode()
    logging.info("Done")
    return content

@ -24,10 +30,8 @@ def get_data():
    """ Download the info file for Unicode blocks.
    """
    logging.info("Downloading character data...")
-    req = request.urlopen(
-        "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"
-    )
-    content = req.read().decode()
+    with request.urlopen(f"{BASE_URL}/UnicodeData.txt") as req:
+        content = req.read().decode()
    logging.info("Done")
    return content

@ -61,18 +65,19 @@ def load_blocks():
        [start, end] = indices[half]
        if start > code:
            return locate_block(code, left, right=half)
-        elif end < code:
+        if end < code:
            return locate_block(code, half, right=right)
-        else:
-            return blocks[half]
+        return blocks[half]

    return locate_block


-def main():
-    """ Read the character and block data and unite them to a text file containing the following fields:
-    `<character name>   <character comment> <code>  <block name>`
-    seperated by tab characters.
+def main(out: str = "unicode_list.txt"):
+    """Create the file with Unicode characters.
+
+    Read the character and block data and unite them to a text file
+    containing the following fields, separated by tab characters:
+    `<character name> <character comment> <code> <block name>`
    """
    get_block = load_blocks()
    characters = clean(get_data())
@ -90,7 +95,7 @@ def main():
        try:
            num = int(code, 16)
        except ValueError:
-            logging.warn("Could not convert " + code)
+            logging.warning("Could not convert %s", code)
            continue

        # Find the character's block
@ -98,12 +103,23 @@ def main():
        if blk is not None:
            output.append("\t".join((name, comment, code, blk)))
        else:
-            logging.warn("Code %s not found in any block, char: %s", num, unichr(num))
+            logging.warning("Code %s not found in any block, char: %s", num, unichr(num))
            output.append(name + "\t" + comment + "\t" + code + "\t")

-    with open("unicode_list.txt", "w") as target:
+    with open(out, "w", encoding="utf-8") as target:
        target.write("\n".join(output))


 if __name__ == "__main__":
-    main()
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "path",
+        type=str,
+        help="the output path where to save the Unicode list.",
+        default="unicode_list.txt",
+    )
+
+    args = parser.parse_args()
+
+    main(args.path)
--- a/main.py
+++ b/main.py
@ -1,18 +1,24 @@
 import os
 import sys
 import codecs
-from os.path import join
+import time
+import math
+import shutil
+import html.entities
+import asyncio
+import logging
+from typing import Dict, Optional
+from operator import itemgetter

-import subprocess # for pip autoinstallation
-
-from ulauncher.search.SortedList import SortedList
+from ulauncher.utils.fuzzy_search import get_score
 from ulauncher.api.client.Extension import Extension
 from ulauncher.api.client.EventListener import EventListener
-from ulauncher.api.shared.event import KeywordQueryEvent, ItemEnterEvent
+from ulauncher.api.shared.event import KeywordQueryEvent
 from ulauncher.api.shared.item.ExtensionResultItem import ExtensionResultItem
 from ulauncher.api.shared.action.RenderResultListAction import RenderResultListAction
 from ulauncher.api.shared.action.CopyToClipboardAction import CopyToClipboardAction
-from ulauncher.api.shared.action.HideWindowAction import HideWindowAction
+
+logger = logging.getLogger(__name__)

 # Be compatible with both python 2 and 3
 if sys.version_info[0] >= 3:
@ -27,108 +33,173 @@ ICON_TEMPLATE = """
 </svg>
 """

-# For pip autoinstallation
-def ensure_import(package):
-    try:
-        return __import__(package)
-    except ImportError:
-        subprocess.call([sys.executable, "-m", "pip", "install", "--user", package])
-    return __import__(package)
+ExtensionPreferences = Dict[str, str]
+UnicodeCharPreferences = Dict[str, int]

-# For HTML entity conversion
-htmlentities = ensure_import("htmlentities")

 class UnicodeChar:
-    """ Container class for unicode characters
-    """
+    """Container class for unicode characters."""

    def __init__(self, name, comment, block, code):
-        self.name = name if name != '<control>' else comment
+        self.name = name if name != "<control>" else comment
        self.comment = comment
        self.block = block
        self.code = code
        self.character = unichr(int(code, 16))

    def get_search_name(self):
-        """ Called by `ulauncher.search.SortedList` to get the string
-        that should be used in searches
-        """
-        return ' '.join([self.character, self.code, self.name, self.comment])
+        """Called to get the string that should be used in searches."""
+        return " ".join([self.character, self.code, self.name, self.comment])


 class UnicodeCharExtension(Extension):
+    unicode_path: str = "unicode_list.txt"
+
    def __init__(self):
-        super(UnicodeCharExtension, self).__init__()
+        super().__init__()
        check_cache_dir()
        self._load_character_table()
        self.subscribe(KeywordQueryEvent, KeywordQueryEventListener())

+    def get_filename(self) -> str:
+        """Default filename of the Unicode list."""
+        return os.path.join(FILE_PATH, self.unicode_path)
+
    def _load_character_table(self):
-        """ Read the data file and load to memory
-        """
+        """Read the data file and load to memory."""
+        filename = self.get_filename()
+
        self.character_list = []
-        with open(join(FILE_PATH, "unicode_list.txt"), "r") as f:
+        with open(filename, "r", encoding="utf-8") as f:
            for line in f.readlines():
                name, comment, code, block = line.strip().split("\t")
                character = UnicodeChar(name, comment, block, code)
                self.character_list.append(character)

+    @staticmethod
+    async def refresh_unicode_list(path: str, preferences: UnicodeCharPreferences):
+        """Check if the Unicode list file needs refresh."""
+        # Get timestamp of the last time the file was modified
+        timestamp = os.path.getmtime(path)
+        # Number of days since the file was modified
+        age = math.floor((time.time() - timestamp) / 3600)
+
+        update_interval = preferences["update_interval"]
+
+        if 0 < update_interval < age:
+            await UnicodeCharExtension.update_unicode_list(path)
+
+    @staticmethod
+    async def update_unicode_list(path: str):
+        """Re-generate an old Unicode list file."""
+        # Save the file to a backup file if there is no backup.
+        backup = path + ".bkp"
+        if not os.path.isfile(backup):
+            logger.info("backup the file with Unicode list to: %s", backup)
+            shutil.copyfile(path, backup)
+
+        import generate_character_list
+
+        # Regenerate file with unicode list
+        logger.info("regenerate the file with Unicode list: %s", path)
+        generate_character_list.main(path)
+
+    @staticmethod
+    def get_preferences(
+        input_preferences: ExtensionPreferences,
+    ) -> UnicodeCharPreferences:
+        """Parse preferences to the correct types."""
+        preferences: UnicodeCharPreferences = {
+            "result_limit": int(input_preferences["result_limit"]),
+            "min_score": int(input_preferences["min_score"]),
+            "update_interval": int(input_preferences["update_interval"]),
+        }
+
+        return preferences
+
+    def search(self, query: str, preferences: UnicodeCharPreferences):
+        """Return a list of result sorted by relevance to the query."""
+        limit = preferences["result_limit"]
+        min_score = preferences["min_score"]
+
+        results = []
+        for c in self.character_list:
+            score = get_score(query, c.get_search_name())
+            if score >= min_score:
+                results.append((score, c))
+                results = sorted(results, reverse=True, key=itemgetter(0))
+                if len(results) > limit:
+                    results = results[:limit]
+
+        return [c for (s, c) in results]
+

 class KeywordQueryEventListener(EventListener):
    def on_event(self, event, extension):
+        preferences = extension.get_preferences(extension.preferences)
+        # Re-generate unicode list if it is too old.
+        coro = extension.refresh_unicode_list(extension.get_filename(), preferences)
+        # start the event loop and execute the coroutine
+        asyncio.run(coro)
+
        items = []
-        arg = event.get_argument()
-        if arg:
-            result_list = SortedList(arg, min_score=99, limit=10)
-            result_list.extend(extension.character_list)
-            for char in result_list:
+        query = event.get_argument().strip()
+        if query:
+            # Return best characters matching the query, ordered by score.
+            results = extension.search(query, preferences)
+            for char in results:
                image_path = get_character_icon(char)
-                encoded = htmlentities.encode(char.character)
-                if "&" in encoded:
-                    sep = " - "
-                    html = encoded
-                else:
-                    sep = ""
-                    html = ""
+                html_val = html_encode(char.character)
+                html_str = ""
+                if html_val:
+                    html_str = f" - HTML: {html_val}"
+
                items.append(
                    ExtensionResultItem(
                        icon=image_path,
-                        name=char.name.capitalize() + " - " + char.character,
-                        description=char.block + " - Alt+Enter: " + html + sep + "Code: U+" + char.code,
+                        name=f"{char.name.capitalize()} - {char.character}",
+                        description=f"{char.block}{html_str} - Alt+Enter: U+{char.code}",
                        on_enter=CopyToClipboardAction(char.character),
-                        on_alt_enter=CopyToClipboardAction(html),
+                        on_alt_enter=CopyToClipboardAction(char.code),
                    )
                )
        return RenderResultListAction(items)

+
+def html_encode(char: str) -> Optional[str]:
+    """Get the html encoded str corresponding to the unicode char, if it exist."""
+    if ord(char) in html.entities.codepoint2name:
+        html_var = html.entities.codepoint2name[ord(char)]
+        return f"&{html_var};"
+    return None
+
+
 def get_character_icon(char):
-    """ Check if there is an existing icon for this character and return its path
+    """Check if there is an existing icon for this character and return its path
    or create a new one and return its path.
    """
-    path = FILE_PATH + "images/cache/icon_%s.svg" % char.code
+    path = os.path.join(FILE_PATH, f"images/cache/icon_{char.code}.svg")
    if os.path.isfile(path):
        return path
    return create_character_icon(char)


 def create_character_icon(char, font="sans-serif"):
-    """ Create an SVG file containing the unicode glyph for char to be used
+    """Create an SVG file containing the unicode glyph for char to be used
    as a result icon.

    Note: this could be avoided by providing a gtk.PixBuf without creating a file,
    but ulauncher pickles the returned results, so it doesn't work currently.
    """
-    icon = ICON_TEMPLATE.replace("{symbol}", char.character).replace("{font}", font)
-    with codecs.open(
-        os.path.join(FILE_PATH, "images/cache/icon_%s.svg" % char.code), "w", "utf-8"
-    ) as target:
+    icon = ICON_TEMPLATE.format(symbol=char.character, font=font)
+    path = os.path.join(FILE_PATH, f"images/cache/icon_{char.code}.svg")
+    with codecs.open(path, "w", "utf-8") as target:
        target.write(icon)
-    return os.path.join(FILE_PATH, "images/cache/icon_%s.svg" % char.code)
+    return path


 def check_cache_dir(path="images/cache"):
-    """ Check if the cache directory exists and if not create it.
-    """
+    """Check if the cache directory exists and if not create it."""
    path = os.path.join(FILE_PATH, path)
    if not os.path.isdir(path):
        os.mkdir(path)
--- a/manifest.json
+++ b/manifest.json
@ -8,12 +8,33 @@
    "query_debounce": 0.5
  },
  "preferences": [
+    {
+      "id": "result_limit",
+      "type": "input",
+      "name": "Result limit",
+      "description": "Number of results that should be returned.",
+      "default_value": "10"
+    },
+    {
+      "id": "min_score",
+      "type": "input",
+      "name": "Minimum score",
+      "description": "Only display results with a matching score higher than this. If too high, e.g. higher than 200, no results will be shown.",
+      "default_value": "0"
+    },
    {
      "id": "symbol",
      "type": "keyword",
      "name": "Symbol",
      "description": "Search symbols in ASCII and Unicode. Enter to copy the symbol, alt+enter to copy the HTML entity. Dark mode friendly.",
      "default_value": "sym"
+    },
+    {
+      "id": "update_interval",
+      "type": "input",
+      "name": "Update interval",
+      "description": "Time interval (in days) after which the cached list of unicode symbols is updated by downloading the newest list. Set to -1 to never update the list.",
+      "default_value": "90"
    }
  ]
 }
--- a/versions.json
+++ b/versions.json
@ -1,4 +1,5 @@
 [
  { "required_api_version": "^1.0.0", "commit": "API_v1" },
  { "required_api_version": "^2.0.0", "commit": "master" }
+  { "required_api_version": "^3.0.0", "commit": "api-v3" }
 ]