Skip to content
user202729 edited this page Aug 16, 2021 · 2 revisions

Generate entries from Unicode name

import unicodedata
import json
import re

data="abdefhijkmnopstuvwzæðŋɑɔəɛɝɡɪɫɹʃʊʒˈˌθ"  # modify this line

def name(ch: str)->str:
    result=unicodedata.name(ch)
    result=result.lower()
    result=result.replace("-", " ")

    for pattern, replacement in (  # to make the description easier to search for
                                   # can be modified
                                   # match whole words
            ("small letter", ""),
            ("lamda", "lambda"),
            ("letter", ""),
            ("sign", ""),
            ("mathematical", ""),
            ("latin", "Latin"),
            ("greek", "Greek"),
            ):
        result=re.sub(r"\b"+pattern+r"\b", replacement, result)
        result=re.sub("  +", " ", result)

    result=result.strip()
    
    return result

print(
        ",\n".join(
            json.dumps(
                [ch, name(ch), []]
                , ensure_ascii=False
                )
            for ch in data
            )
        )

Clone this wiki locally