This commit is contained in:
getzze 2023-09-27 23:58:57 +01:00 committed by GitHub
commit 9a199945c7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 177 additions and 68 deletions

View file

@ -2,6 +2,7 @@
Download the latest unicode tables from https://www.unicode.org and create a .txt file Download the latest unicode tables from https://www.unicode.org and create a .txt file
containing all the names, blocks and character codes containing all the names, blocks and character codes
""" """
import sys
import os import os
import logging import logging
from urllib import request from urllib import request
@ -9,13 +10,18 @@ from urllib import request
curr_path = os.path.dirname(__file__) curr_path = os.path.dirname(__file__)
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
# Be compatible with both python 2 and 3
if sys.version_info[0] >= 3:
unichr = chr
BASE_URL = "https://www.unicode.org/Public/UCD/latest/ucd"
def get_blocks(): def get_blocks():
""" Download the info file for Unicode blocks. """ Download the info file for Unicode blocks.
""" """
logging.info("Downloading block data...") logging.info("Downloading block data...")
req = request.urlopen("https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt") with request.urlopen(f"{BASE_URL}/Blocks.txt") as req:
content = req.read().decode() content = req.read().decode()
logging.info("Done") logging.info("Done")
return content return content
@ -24,10 +30,8 @@ def get_data():
""" Download the info file for Unicode blocks. """ Download the info file for Unicode blocks.
""" """
logging.info("Downloading character data...") logging.info("Downloading character data...")
req = request.urlopen( with request.urlopen(f"{BASE_URL}/UnicodeData.txt") as req:
"https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt" content = req.read().decode()
)
content = req.read().decode()
logging.info("Done") logging.info("Done")
return content return content
@ -61,18 +65,19 @@ def load_blocks():
[start, end] = indices[half] [start, end] = indices[half]
if start > code: if start > code:
return locate_block(code, left, right=half) return locate_block(code, left, right=half)
elif end < code: if end < code:
return locate_block(code, half, right=right) return locate_block(code, half, right=right)
else: return blocks[half]
return blocks[half]
return locate_block return locate_block
def main(): def main(out: str = "unicode_list.txt"):
""" Read the character and block data and unite them to a text file containing the following fields: """Create the file with Unicode characters.
`<character name> <character comment> <code> <block name>`
seperated by tab characters. Read the character and block data and unite them to a text file
containing the following fields, separated by tab characters:
`<character name> <character comment> <code> <block name>`
""" """
get_block = load_blocks() get_block = load_blocks()
characters = clean(get_data()) characters = clean(get_data())
@ -90,7 +95,7 @@ def main():
try: try:
num = int(code, 16) num = int(code, 16)
except ValueError: except ValueError:
logging.warn("Could not convert " + code) logging.warning("Could not convert %s", code)
continue continue
# Find the character's block # Find the character's block
@ -98,12 +103,23 @@ def main():
if blk is not None: if blk is not None:
output.append("\t".join((name, comment, code, blk))) output.append("\t".join((name, comment, code, blk)))
else: else:
logging.warn("Code %s not found in any block, char: %s", num, unichr(num)) logging.warning("Code %s not found in any block, char: %s", num, unichr(num))
output.append(name + "\t" + comment + "\t" + code + "\t") output.append(name + "\t" + comment + "\t" + code + "\t")
with open("unicode_list.txt", "w") as target: with open(out, "w", encoding="utf-8") as target:
target.write("\n".join(output)) target.write("\n".join(output))
if __name__ == "__main__": if __name__ == "__main__":
main() import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"path",
type=str,
help="the output path where to save the Unicode list.",
default="unicode_list.txt",
)
args = parser.parse_args()
main(args.path)

173
main.py
View file

@ -1,18 +1,24 @@
import os import os
import sys import sys
import codecs import codecs
from os.path import join import time
import math
import shutil
import html.entities
import asyncio
import logging
from typing import Dict, Optional
from operator import itemgetter
import subprocess # for pip autoinstallation from ulauncher.utils.fuzzy_search import get_score
from ulauncher.search.SortedList import SortedList
from ulauncher.api.client.Extension import Extension from ulauncher.api.client.Extension import Extension
from ulauncher.api.client.EventListener import EventListener from ulauncher.api.client.EventListener import EventListener
from ulauncher.api.shared.event import KeywordQueryEvent, ItemEnterEvent from ulauncher.api.shared.event import KeywordQueryEvent
from ulauncher.api.shared.item.ExtensionResultItem import ExtensionResultItem from ulauncher.api.shared.item.ExtensionResultItem import ExtensionResultItem
from ulauncher.api.shared.action.RenderResultListAction import RenderResultListAction from ulauncher.api.shared.action.RenderResultListAction import RenderResultListAction
from ulauncher.api.shared.action.CopyToClipboardAction import CopyToClipboardAction from ulauncher.api.shared.action.CopyToClipboardAction import CopyToClipboardAction
from ulauncher.api.shared.action.HideWindowAction import HideWindowAction
logger = logging.getLogger(__name__)
# Be compatible with both python 2 and 3 # Be compatible with both python 2 and 3
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
@ -27,108 +33,173 @@ ICON_TEMPLATE = """
</svg> </svg>
""" """
# For pip autoinstallation ExtensionPreferences = Dict[str, str]
def ensure_import(package): UnicodeCharPreferences = Dict[str, int]
try:
return __import__(package)
except ImportError:
subprocess.call([sys.executable, "-m", "pip", "install", "--user", package])
return __import__(package)
# For HTML entity conversion
htmlentities = ensure_import("htmlentities")
class UnicodeChar: class UnicodeChar:
""" Container class for unicode characters """Container class for unicode characters."""
"""
def __init__(self, name, comment, block, code): def __init__(self, name, comment, block, code):
self.name = name if name != '<control>' else comment self.name = name if name != "<control>" else comment
self.comment = comment self.comment = comment
self.block = block self.block = block
self.code = code self.code = code
self.character = unichr(int(code, 16)) self.character = unichr(int(code, 16))
def get_search_name(self): def get_search_name(self):
""" Called by `ulauncher.search.SortedList` to get the string """Called to get the string that should be used in searches."""
that should be used in searches return " ".join([self.character, self.code, self.name, self.comment])
"""
return ' '.join([self.character, self.code, self.name, self.comment])
class UnicodeCharExtension(Extension): class UnicodeCharExtension(Extension):
unicode_path: str = "unicode_list.txt"
def __init__(self): def __init__(self):
super(UnicodeCharExtension, self).__init__() super().__init__()
check_cache_dir() check_cache_dir()
self._load_character_table() self._load_character_table()
self.subscribe(KeywordQueryEvent, KeywordQueryEventListener()) self.subscribe(KeywordQueryEvent, KeywordQueryEventListener())
def get_filename(self) -> str:
"""Default filename of the Unicode list."""
return os.path.join(FILE_PATH, self.unicode_path)
def _load_character_table(self): def _load_character_table(self):
""" Read the data file and load to memory """Read the data file and load to memory."""
""" filename = self.get_filename()
self.character_list = [] self.character_list = []
with open(join(FILE_PATH, "unicode_list.txt"), "r") as f: with open(filename, "r", encoding="utf-8") as f:
for line in f.readlines(): for line in f.readlines():
name, comment, code, block = line.strip().split("\t") name, comment, code, block = line.strip().split("\t")
character = UnicodeChar(name, comment, block, code) character = UnicodeChar(name, comment, block, code)
self.character_list.append(character) self.character_list.append(character)
@staticmethod
async def refresh_unicode_list(path: str, preferences: UnicodeCharPreferences):
"""Check if the Unicode list file needs refresh."""
# Get timestamp of the last time the file was modified
timestamp = os.path.getmtime(path)
# Number of days since the file was modified
age = math.floor((time.time() - timestamp) / 3600)
update_interval = preferences["update_interval"]
if 0 < update_interval < age:
await UnicodeCharExtension.update_unicode_list(path)
@staticmethod
async def update_unicode_list(path: str):
"""Re-generate an old Unicode list file."""
# Save the file to a backup file if there is no backup.
backup = path + ".bkp"
if not os.path.isfile(backup):
logger.info("backup the file with Unicode list to: %s", backup)
shutil.copyfile(path, backup)
import generate_character_list
# Regenerate file with unicode list
logger.info("regenerate the file with Unicode list: %s", path)
generate_character_list.main(path)
@staticmethod
def get_preferences(
input_preferences: ExtensionPreferences,
) -> UnicodeCharPreferences:
"""Parse preferences to the correct types."""
preferences: UnicodeCharPreferences = {
"result_limit": int(input_preferences["result_limit"]),
"min_score": int(input_preferences["min_score"]),
"update_interval": int(input_preferences["update_interval"]),
}
return preferences
def search(self, query: str, preferences: UnicodeCharPreferences):
"""Return a list of result sorted by relevance to the query."""
limit = preferences["result_limit"]
min_score = preferences["min_score"]
results = []
for c in self.character_list:
score = get_score(query, c.get_search_name())
if score >= min_score:
results.append((score, c))
results = sorted(results, reverse=True, key=itemgetter(0))
if len(results) > limit:
results = results[:limit]
return [c for (s, c) in results]
class KeywordQueryEventListener(EventListener): class KeywordQueryEventListener(EventListener):
def on_event(self, event, extension): def on_event(self, event, extension):
preferences = extension.get_preferences(extension.preferences)
# Re-generate unicode list if it is too old.
coro = extension.refresh_unicode_list(extension.get_filename(), preferences)
# start the event loop and execute the coroutine
asyncio.run(coro)
items = [] items = []
arg = event.get_argument() query = event.get_argument().strip()
if arg: if query:
result_list = SortedList(arg, min_score=99, limit=10) # Return best characters matching the query, ordered by score.
result_list.extend(extension.character_list) results = extension.search(query, preferences)
for char in result_list: for char in results:
image_path = get_character_icon(char) image_path = get_character_icon(char)
encoded = htmlentities.encode(char.character) html_val = html_encode(char.character)
if "&" in encoded: html_str = ""
sep = " - " if html_val:
html = encoded html_str = f" - HTML: {html_val}"
else:
sep = ""
html = ""
items.append( items.append(
ExtensionResultItem( ExtensionResultItem(
icon=image_path, icon=image_path,
name=char.name.capitalize() + " - " + char.character, name=f"{char.name.capitalize()} - {char.character}",
description=char.block + " - Alt+Enter: " + html + sep + "Code: U+" + char.code, description=f"{char.block}{html_str} - Alt+Enter: U+{char.code}",
on_enter=CopyToClipboardAction(char.character), on_enter=CopyToClipboardAction(char.character),
on_alt_enter=CopyToClipboardAction(html), on_alt_enter=CopyToClipboardAction(char.code),
) )
) )
return RenderResultListAction(items) return RenderResultListAction(items)
def html_encode(char: str) -> Optional[str]:
"""Get the html encoded str corresponding to the unicode char, if it exist."""
if ord(char) in html.entities.codepoint2name:
html_var = html.entities.codepoint2name[ord(char)]
return f"&{html_var};"
return None
def get_character_icon(char): def get_character_icon(char):
""" Check if there is an existing icon for this character and return its path """Check if there is an existing icon for this character and return its path
or create a new one and return its path. or create a new one and return its path.
""" """
path = FILE_PATH + "images/cache/icon_%s.svg" % char.code path = os.path.join(FILE_PATH, f"images/cache/icon_{char.code}.svg")
if os.path.isfile(path): if os.path.isfile(path):
return path return path
return create_character_icon(char) return create_character_icon(char)
def create_character_icon(char, font="sans-serif"): def create_character_icon(char, font="sans-serif"):
""" Create an SVG file containing the unicode glyph for char to be used """Create an SVG file containing the unicode glyph for char to be used
as a result icon. as a result icon.
Note: this could be avoided by providing a gtk.PixBuf without creating a file, Note: this could be avoided by providing a gtk.PixBuf without creating a file,
but ulauncher pickles the returned results, so it doesn't work currently. but ulauncher pickles the returned results, so it doesn't work currently.
""" """
icon = ICON_TEMPLATE.replace("{symbol}", char.character).replace("{font}", font) icon = ICON_TEMPLATE.format(symbol=char.character, font=font)
with codecs.open( path = os.path.join(FILE_PATH, f"images/cache/icon_{char.code}.svg")
os.path.join(FILE_PATH, "images/cache/icon_%s.svg" % char.code), "w", "utf-8" with codecs.open(path, "w", "utf-8") as target:
) as target:
target.write(icon) target.write(icon)
return os.path.join(FILE_PATH, "images/cache/icon_%s.svg" % char.code) return path
def check_cache_dir(path="images/cache"): def check_cache_dir(path="images/cache"):
""" Check if the cache directory exists and if not create it. """Check if the cache directory exists and if not create it."""
"""
path = os.path.join(FILE_PATH, path) path = os.path.join(FILE_PATH, path)
if not os.path.isdir(path): if not os.path.isdir(path):
os.mkdir(path) os.mkdir(path)

View file

@ -8,12 +8,33 @@
"query_debounce": 0.5 "query_debounce": 0.5
}, },
"preferences": [ "preferences": [
{
"id": "result_limit",
"type": "input",
"name": "Result limit",
"description": "Number of results that should be returned.",
"default_value": "10"
},
{
"id": "min_score",
"type": "input",
"name": "Minimum score",
"description": "Only display results with a matching score higher than this. If too high, e.g. higher than 200, no results will be shown.",
"default_value": "0"
},
{ {
"id": "symbol", "id": "symbol",
"type": "keyword", "type": "keyword",
"name": "Symbol", "name": "Symbol",
"description": "Search symbols in ASCII and Unicode. Enter to copy the symbol, alt+enter to copy the HTML entity. Dark mode friendly.", "description": "Search symbols in ASCII and Unicode. Enter to copy the symbol, alt+enter to copy the HTML entity. Dark mode friendly.",
"default_value": "sym" "default_value": "sym"
},
{
"id": "update_interval",
"type": "input",
"name": "Update interval",
"description": "Time interval (in days) after which the cached list of unicode symbols is updated by downloading the newest list. Set to -1 to never update the list.",
"default_value": "90"
} }
] ]
} }

View file

@ -1,4 +1,5 @@
[ [
{ "required_api_version": "^1.0.0", "commit": "API_v1" }, { "required_api_version": "^1.0.0", "commit": "API_v1" },
{ "required_api_version": "^2.0.0", "commit": "master" } { "required_api_version": "^2.0.0", "commit": "master" }
{ "required_api_version": "^3.0.0", "commit": "api-v3" }
] ]