diff --git a/helper/str_helper.go b/helper/str_helper.go index 01ed921..3527399 100644 --- a/helper/str_helper.go +++ b/helper/str_helper.go @@ -187,110 +187,8 @@ func GetCjkRange(code int) string { result = "EXT-I" } else if code == 0x3007 { result = "零" - } else if code >= 0x31C0 && code <= 0x31EF { - result = "笔画" - } else if code >= 0x2E80 && code <= 0x2EFF { - result = "部首补充" } else if code >= 0xE000 && code <= 0xF8FF { result = "PUA" - } else if code >= 0x2F00 && code <= 0x2FDF { - result = "康熙部首" - } else if code >= 0xF900 && code <= 0xFAD9 { - result = "兼容区" - } else if code >= 0xFE30 && code <= 0xFE4F { - result = "兼容形式" - } else if code >= 0x2F800 && code <= 0x2FA1D { - result = "兼容表意文字增补" - } else if code >= 0x3100 && code <= 0x312F { - result = "注音" - } else if code >= 0x17000 && code <= 0x187F7 { - result = "西夏文" - } else if code >= 0x18800 && code <= 0x18AFF { - result = "西夏文部首" - } else if code >= 0x18D00 && code <= 0x18D08 { - result = "西夏文增补" - } else if code >= 0x18B00 && code <= 0x18CFF { - result = "契丹文" - } else if code >= 0x0900 && code <= 0x097F { - result = "梵文" - } else if code >= 0x1B170 && code <= 0x1B2FF { - result = "女书" - } else if code >= 0x0F00 && code <= 0x0FFF { - result = "藏文" - } else if code >= 0x1800 && code <= 0x18AF { - result = "蒙古语" - } else if code >= 0x30A0 && code <= 0x30FF { - result = "日文片假名" - } else if code >= 0x31F0 && code <= 0x31FF { - result = "片假名扩展" - } else if code >= 0x3040 && code <= 0x309F { - result = "日文平假名" - } else if code >= 0x3000 && code <= 0x303F { - result = "符号和标点符号" - } else if code >= 0x2E00 && code <= 0x2E7F { - result = "补充标点符号" - } else if code >= 0x2200 && code <= 0x22FF { - result = "数学运算符号" - } else if code >= 0x2A00 && code <= 0x2AFF { - result = "补充数学运算符" - } else if code >= 0x2150 && code <= 0x218F { - result = "数字形式符号" - } else if code >= 0x2300 && code <= 0x23FF { - result = "杂项技术符号" - } else if code >= 0x27C0 && code <= 0x27EF { - result = "杂项数学符号-A" - } else if code >= 0x2980 && code <= 0x29FF { - result = "杂项数学符号-B" - } else if code >= 0x2190 && code <= 0x21FF { - result = "箭头符号" - } else if code >= 0x3190 && code <= 0x319F { - result = "汉文训读" - } else if code >= 0x3200 && code <= 0x32FF { - result = "带圈字符及月份" - } else if code >= 0x1F100 && code <= 0x1F1FF { - result = "带圈字母数字补充" - } else if code >= 0x1F200 && code <= 0x1F2FF { - result = "带圈表意文字补充" - } else if code >= 0x1D100 && code <= 0x1D1FF { - result = "音乐符号" - } else if code >= 0x1D360 && code <= 0x1D37F { - result = "算筹" - } else if code >= 0x1D300 && code <= 0x1D35F { - result = "太玄经符号" - } else if code >= 0x1F600 && code <= 0x1F64F { - result = "表情符号" - } else if code >= 0x2070 && code <= 0x209F { - result = "上下标" - } else if code >= 0x1D400 && code <= 0x1D7FF { - result = "字母和数字符号" - } else if code >= 0x25A0 && code <= 0x25FF { - result = "几何形状" - } else if code >= 0x1F780 && code <= 0x1F7FF { - result = "几何图形扩展" - } else if code >= 0x2630 && code <= 0x2637 { - result = "八卦" - } else if code >= 0x19904 && code <= 0x19967 { - result = "易经六十四卦符号" - } else if code >= 0x1FA00 && code <= 0x1FA6F { - result = "象棋符号" - } else if code >= 0x0370 && code <= 0x03FF { - result = "希腊语和科普特语" - } else if code >= 0x1F000 && code <= 0x1F02F { - result = "麻将牌" - } else if code >= 0x1F0A0 && code <= 0x1F0FF { - result = "扑克牌" - } else if code >= 0x20A0 && code <= 0x20CF { - result = "货币符号" - } else if code >= 0x1F680 && code <= 0x1F6FF { - result = "交通和地图符号" - } else if code >= 0x1F030 && code <= 0x1F09F { - result = "多米诺骨牌" - } else if code >= 0x1F300 && code <= 0x1F5FF { - result = "杂项符号和象形文字" - } else if code >= 0x2500 && code <= 0x257F { - result = "方框绘制字符" - } else if code >= 0x1F700 && code <= 0x1F77F { - result = "炼金术符号" } return result diff --git a/helper/unicode_helper.go b/helper/unicode_helper.go new file mode 100644 index 0000000..778714c --- /dev/null +++ b/helper/unicode_helper.go @@ -0,0 +1,2451 @@ +// +// unicode_helper.go +// Copyright (C) 2023 tiglog +// +// Distributed under terms of the MIT license. +// + +package helper + +import "encoding/json" + +var json_data = `[ + { + "zh": "基础拉丁语", + "en": "Basic Latin", + "start_str": "0x0000", + "end_str": "0x007F", + "start": 0, + "end": 127 + }, + { + "zh": "拉丁语增补", + "en": "Latin-1 Supplement", + "start_str": "0x0080", + "end_str": "0x00FF", + "start": 128, + "end": 255 + }, + { + "zh": "拉丁语扩展-A", + "en": "Latin Extended-A", + "start_str": "0x0100", + "end_str": "0x017F", + "start": 256, + "end": 383 + }, + { + "zh": "拉丁语扩展-B", + "en": "Latin Extended-B", + "start_str": "0x0180", + "end_str": "0x024F", + "start": 384, + "end": 591 + }, + { + "zh": "国际音标扩展", + "en": "IPA Extensions", + "start_str": "0x0250", + "end_str": "0x02AF", + "start": 592, + "end": 687 + }, + { + "zh": "间距修饰字符", + "en": "Spacing Modifier Letters", + "start_str": "0x02B0", + "end_str": "0x02FF", + "start": 688, + "end": 767 + }, + { + "zh": "组合变音标记", + "en": "Combining Diacritical Marks", + "start_str": "0x0300", + "end_str": "0x036F", + "start": 768, + "end": 879 + }, + { + "zh": "希腊语和科普特语", + "en": "Greek and Coptic", + "start_str": "0x0370", + "end_str": "0x03FF", + "start": 880, + "end": 1023 + }, + { + "zh": "西里尔文", + "en": "Cyrillic", + "start_str": "0x0400", + "end_str": "0x04FF", + "start": 1024, + "end": 1279 + }, + { + "zh": "西里尔文增补", + "en": "Cyrillic Supplement", + "start_str": "0x0500", + "end_str": "0x052F", + "start": 1280, + "end": 1327 + }, + { + "zh": "亚美尼亚语", + "en": "Armenian", + "start_str": "0x0530", + "end_str": "0x058F", + "start": 1328, + "end": 1423 + }, + { + "zh": "希伯来语", + "en": "Hebrew", + "start_str": "0x0590", + "end_str": "0x05FF", + "start": 1424, + "end": 1535 + }, + { + "zh": "阿拉伯语", + "en": "Arabic", + "start_str": "0x0600", + "end_str": "0x06FF", + "start": 1536, + "end": 1791 + }, + { + "zh": "叙利亚文", + "en": "Syriac", + "start_str": "0x0700", + "end_str": "0x074F", + "start": 1792, + "end": 1871 + }, + { + "zh": "阿拉伯语增补", + "en": "Arabic Supplement", + "start_str": "0x0750", + "end_str": "0x077F", + "start": 1872, + "end": 1919 + }, + { + "zh": "它拿字母", + "en": "Thaana", + "start_str": "0x0780", + "end_str": "0x07BF", + "start": 1920, + "end": 1983 + }, + { + "zh": "西非书面文字", + "en": "NKo", + "start_str": "0x07C0", + "end_str": "0x07FF", + "start": 1984, + "end": 2047 + }, + { + "zh": "撒玛利亚字母", + "en": "Samaritan", + "start_str": "0x0800", + "end_str": "0x083F", + "start": 2048, + "end": 2111 + }, + { + "zh": "曼达文", + "en": "Mandaic", + "start_str": "0x0840", + "end_str": "0x085F", + "start": 2112, + "end": 2143 + }, + { + "zh": "叙利亚文增补", + "en": "Syriac Supplement", + "start_str": "0x0860", + "end_str": "0x086F", + "start": 2144, + "end": 2159 + }, + { + "zh": "阿拉伯语扩展-A", + "en": "Arabic Extended-A", + "start_str": "0x08A0", + "end_str": "0x08FF", + "start": 2208, + "end": 2303 + }, + { + "zh": "梵文", + "en": "Devanagari", + "start_str": "0x0900", + "end_str": "0x097F", + "start": 2304, + "end": 2431 + }, + { + "zh": "孟加拉语", + "en": "Bengali", + "start_str": "0x0980", + "end_str": "0x09FF", + "start": 2432, + "end": 2559 + }, + { + "zh": "古木基文", + "en": "Gurmukhi", + "start_str": "0x0A00", + "end_str": "0x0A7F", + "start": 2560, + "end": 2687 + }, + { + "zh": "古吉拉特語", + "en": "Gujarati", + "start_str": "0x0A80", + "end_str": "0x0AFF", + "start": 2688, + "end": 2815 + }, + { + "zh": "奥里亚语", + "en": "Oriya", + "start_str": "0x0B00", + "end_str": "0x0B7F", + "start": 2816, + "end": 2943 + }, + { + "zh": "泰米尔语", + "en": "Tamil", + "start_str": "0x0B80", + "end_str": "0x0BFF", + "start": 2944, + "end": 3071 + }, + { + "zh": "泰卢固语", + "en": "Telugu", + "start_str": "0x0C00", + "end_str": "0x0C7F", + "start": 3072, + "end": 3199 + }, + { + "zh": "卡纳达语", + "en": "Kannada", + "start_str": "0x0C80", + "end_str": "0x0CFF", + "start": 3200, + "end": 3327 + }, + { + "zh": "马拉雅拉姆语", + "en": "Malayalam", + "start_str": "0x0D00", + "end_str": "0x0D7F", + "start": 3328, + "end": 3455 + }, + { + "zh": "僧伽罗语", + "en": "Sinhala", + "start_str": "0x0D80", + "end_str": "0x0DFF", + "start": 3456, + "end": 3583 + }, + { + "zh": "泰语", + "en": "Thai", + "start_str": "0x0E00", + "end_str": "0x0E7F", + "start": 3584, + "end": 3711 + }, + { + "zh": "老挝语", + "en": "Lao", + "start_str": "0x0E80", + "end_str": "0x0EFF", + "start": 3712, + "end": 3839 + }, + { + "zh": "藏文", + "en": "Tibetan", + "start_str": "0x0F00", + "end_str": "0x0FFF", + "start": 3840, + "end": 4095 + }, + { + "zh": "缅甸语", + "en": "Myanmar", + "start_str": "0x1000", + "end_str": "0x109F", + "start": 4096, + "end": 4255 + }, + { + "zh": "格鲁吉亚语", + "en": "Georgian", + "start_str": "0x10A0", + "end_str": "0x10FF", + "start": 4256, + "end": 4351 + }, + { + "zh": "韩文字母", + "en": "Hangul Jamo", + "start_str": "0x1100", + "end_str": "0x11FF", + "start": 4352, + "end": 4607 + }, + { + "zh": "阿姆哈拉语", + "en": "Ethiopic", + "start_str": "0x1200", + "end_str": "0x137F", + "start": 4608, + "end": 4991 + }, + { + "zh": "阿姆哈拉语增补", + "en": "Ethiopic Supplement", + "start_str": "0x1380", + "end_str": "0x139F", + "start": 4992, + "end": 5023 + }, + { + "zh": "切罗基语", + "en": "Cherokee", + "start_str": "0x13A0", + "end_str": "0x13FF", + "start": 5024, + "end": 5119 + }, + { + "zh": "统一加拿大原住民音节", + "en": "Unified Canadian Aboriginal Syllabics", + "start_str": "0x1400", + "end_str": "0x167F", + "start": 5120, + "end": 5759 + }, + { + "zh": "欧甘字母", + "en": "Ogham", + "start_str": "0x1680", + "end_str": "0x169F", + "start": 5760, + "end": 5791 + }, + { + "zh": "卢恩字母", + "en": "Runic", + "start_str": "0x16A0", + "end_str": "0x16FF", + "start": 5792, + "end": 5887 + }, + { + "zh": "他加禄语", + "en": "Tagalog", + "start_str": "0x1700", + "end_str": "0x171F", + "start": 5888, + "end": 5919 + }, + { + "zh": "哈努诺文", + "en": "Hanunoo", + "start_str": "0x1720", + "end_str": "0x173F", + "start": 5920, + "end": 5951 + }, + { + "zh": "布希德文", + "en": "Buhid", + "start_str": "0x1740", + "end_str": "0x175F", + "start": 5952, + "end": 5983 + }, + { + "zh": "塔格巴努亚文", + "en": "Tagbanwa", + "start_str": "0x1760", + "end_str": "0x177F", + "start": 5984, + "end": 6015 + }, + { + "zh": "高棉语", + "en": "Khmer", + "start_str": "0x1780", + "end_str": "0x17FF", + "start": 6016, + "end": 6143 + }, + { + "zh": "蒙古语", + "en": "Mongolian", + "start_str": "0x1800", + "end_str": "0x18AF", + "start": 6144, + "end": 6319 + }, + { + "zh": "统一加拿大原住民音节扩展", + "en": "Unified Canadian Aboriginal Syllabics Extended", + "start_str": "0x18B0", + "end_str": "0x18FF", + "start": 6320, + "end": 6399 + }, + { + "zh": "林布语", + "en": "Limbu", + "start_str": "0x1900", + "end_str": "0x194F", + "start": 6400, + "end": 6479 + }, + { + "zh": "德宏傣文", + "en": "Tai Le", + "start_str": "0x1950", + "end_str": "0x197F", + "start": 6480, + "end": 6527 + }, + { + "zh": "傣仂语", + "en": "New Tai Lue", + "start_str": "0x1980", + "end_str": "0x19DF", + "start": 6528, + "end": 6623 + }, + { + "zh": "高棉符号", + "en": "Khmer Symbols", + "start_str": "0x19E0", + "end_str": "0x19FF", + "start": 6624, + "end": 6655 + }, + { + "zh": "布吉语", + "en": "Buginese", + "start_str": "0x1A00", + "end_str": "0x1A1F", + "start": 6656, + "end": 6687 + }, + { + "zh": "老傣文", + "en": "Tai Tham", + "start_str": "0x1A20", + "end_str": "0x1AAF", + "start": 6688, + "end": 6831 + }, + { + "zh": "结合变音符号扩展", + "en": "Combining Diacritical Marks Extended", + "start_str": "0x1AB0", + "end_str": "0x1AFF", + "start": 6832, + "end": 6911 + }, + { + "zh": "巴厘语", + "en": "Balinese", + "start_str": "0x1B00", + "end_str": "0x1B7F", + "start": 6912, + "end": 7039 + }, + { + "zh": "巽他语", + "en": "Sundanese", + "start_str": "0x1B80", + "end_str": "0x1BBF", + "start": 7040, + "end": 7103 + }, + { + "zh": "巴塔克语", + "en": "Batak", + "start_str": "0x1BC0", + "end_str": "0x1BFF", + "start": 7104, + "end": 7167 + }, + { + "zh": "绒巴文", + "en": "Lepcha", + "start_str": "0x1C00", + "end_str": "0x1C4F", + "start": 7168, + "end": 7247 + }, + { + "zh": "桑塔利语字母", + "en": "Ol Chiki", + "start_str": "0x1C50", + "end_str": "0x1C7F", + "start": 7248, + "end": 7295 + }, + { + "zh": "西里尔文扩展-C", + "en": "Cyrillic Extended-C", + "start_str": "0x1C80", + "end_str": "0x1C8F", + "start": 7296, + "end": 7311 + }, + { + "zh": "格鲁吉亚文扩展", + "en": "Georgian Extended", + "start_str": "0x1C90", + "end_str": "0x1CBF", + "start": 7312, + "end": 7359 + }, + { + "zh": "巽他文增补", + "en": "Sundanese Supplement", + "start_str": "0x1CC0", + "end_str": "0x1CCF", + "start": 7360, + "end": 7375 + }, + { + "zh": "吠陀扩展", + "en": "Vedic Extensions", + "start_str": "0x1CD0", + "end_str": "0x1CFF", + "start": 7376, + "end": 7423 + }, + { + "zh": "音标扩展", + "en": "Phonetic Extensions", + "start_str": "0x1D00", + "end_str": "0x1D7F", + "start": 7424, + "end": 7551 + }, + { + "zh": "音标扩展增补", + "en": "Phonetic Extensions Supplement", + "start_str": "0x1D80", + "end_str": "0x1DBF", + "start": 7552, + "end": 7615 + }, + { + "zh": "结合变音标记增补", + "en": "Combining Diacritical Marks Supplement", + "start_str": "0x1DC0", + "end_str": "0x1DFF", + "start": 7616, + "end": 7679 + }, + { + "zh": "拉丁语扩展附加", + "en": "Latin Extended Additional", + "start_str": "0x1E00", + "end_str": "0x1EFF", + "start": 7680, + "end": 7935 + }, + { + "zh": "希腊语扩展", + "en": "Greek Extended", + "start_str": "0x1F00", + "end_str": "0x1FFF", + "start": 7936, + "end": 8191 + }, + { + "zh": "一般标点符号", + "en": "General Punctuation", + "start_str": "0x2000", + "end_str": "0x206F", + "start": 8192, + "end": 8303 + }, + { + "zh": "上标和下标", + "en": "Superscripts and Subscripts", + "start_str": "0x2070", + "end_str": "0x209F", + "start": 8304, + "end": 8351 + }, + { + "zh": "货币符号", + "en": "Currency Symbols", + "start_str": "0x20A0", + "end_str": "0x20CF", + "start": 8352, + "end": 8399 + }, + { + "zh": "结合符号的变音符号", + "en": "Combining Diacritical Marks for Symbols", + "start_str": "0x20D0", + "end_str": "0x20FF", + "start": 8400, + "end": 8447 + }, + { + "zh": "类字母符号", + "en": "Letterlike Symbols", + "start_str": "0x2100", + "end_str": "0x214F", + "start": 8448, + "end": 8527 + }, + { + "zh": "数字形式符号", + "en": "Number Forms", + "start_str": "0x2150", + "end_str": "0x218F", + "start": 8528, + "end": 8591 + }, + { + "zh": "箭头符号", + "en": "Arrows", + "start_str": "0x2190", + "end_str": "0x21FF", + "start": 8592, + "end": 8703 + }, + { + "zh": "数学运算符", + "en": "Mathematical Operators", + "start_str": "0x2200", + "end_str": "0x22FF", + "start": 8704, + "end": 8959 + }, + { + "zh": "杂项技术符号", + "en": "Miscellaneous Technical", + "start_str": "0x2300", + "end_str": "0x23FF", + "start": 8960, + "end": 9215 + }, + { + "zh": "控制图片", + "en": "Control Pictures", + "start_str": "0x2400", + "end_str": "0x243F", + "start": 9216, + "end": 9279 + }, + { + "zh": "光学字符识别", + "en": "Optical Character Recognition", + "start_str": "0x2440", + "end_str": "0x245F", + "start": 9280, + "end": 9311 + }, + { + "zh": "封闭式字母数字", + "en": "Enclosed Alphanumerics", + "start_str": "0x2460", + "end_str": "0x24FF", + "start": 9312, + "end": 9471 + }, + { + "zh": "方框绘制字符", + "en": "Box Drawing", + "start_str": "0x2500", + "end_str": "0x257F", + "start": 9472, + "end": 9599 + }, + { + "zh": "块状元素", + "en": "Block Elements", + "start_str": "0x2580", + "end_str": "0x259F", + "start": 9600, + "end": 9631 + }, + { + "zh": "几何形状", + "en": "Geometric Shapes", + "start_str": "0x25A0", + "end_str": "0x25FF", + "start": 9632, + "end": 9727 + }, + { + "zh": "杂项符号", + "en": "Miscellaneous Symbols", + "start_str": "0x2600", + "end_str": "0x26FF", + "start": 9728, + "end": 9983 + }, + { + "zh": "装饰符号", + "en": "Dingbats", + "start_str": "0x2700", + "end_str": "0x27BF", + "start": 9984, + "end": 10175 + }, + { + "zh": "杂项数学符号-A", + "en": "Miscellaneous Mathematical Symbols-A", + "start_str": "0x27C0", + "end_str": "0x27EF", + "start": 10176, + "end": 10223 + }, + { + "zh": "补充箭头-A", + "en": "Supplemental Arrows-A", + "start_str": "0x27F0", + "end_str": "0x27FF", + "start": 10224, + "end": 10239 + }, + { + "zh": "盲文", + "en": "Braille Patterns", + "start_str": "0x2800", + "end_str": "0x28FF", + "start": 10240, + "end": 10495 + }, + { + "zh": "补充箭头-B", + "en": "Supplemental Arrows-B", + "start_str": "0x2900", + "end_str": "0x297F", + "start": 10496, + "end": 10623 + }, + { + "zh": "杂项数学符号-B", + "en": "Miscellaneous Mathematical Symbols-B", + "start_str": "0x2980", + "end_str": "0x29FF", + "start": 10624, + "end": 10751 + }, + { + "zh": "补充数学运算符", + "en": "Supplemental Mathematical Operators", + "start_str": "0x2A00", + "end_str": "0x2AFF", + "start": 10752, + "end": 11007 + }, + { + "zh": "其他符号和箭头", + "en": "Miscellaneous Symbols and Arrows", + "start_str": "0x2B00", + "end_str": "0x2BFF", + "start": 11008, + "end": 11263 + }, + { + "zh": "格拉哥里字母", + "en": "Glagolitic", + "start_str": "0x2C00", + "end_str": "0x2C5F", + "start": 11264, + "end": 11359 + }, + { + "zh": "拉丁语扩展-C", + "en": "Latin Extended-C", + "start_str": "0x2C60", + "end_str": "0x2C7F", + "start": 11360, + "end": 11391 + }, + { + "zh": "科普特文", + "en": "Coptic", + "start_str": "0x2C80", + "end_str": "0x2CFF", + "start": 11392, + "end": 11519 + }, + { + "zh": "格鲁吉亚文增补", + "en": "Georgian Supplement", + "start_str": "0x2D00", + "end_str": "0x2D2F", + "start": 11520, + "end": 11567 + }, + { + "zh": "提非纳字母", + "en": "Tifinagh", + "start_str": "0x2D30", + "end_str": "0x2D7F", + "start": 11568, + "end": 11647 + }, + { + "zh": "阿姆哈拉语扩展", + "en": "Ethiopic Extended", + "start_str": "0x2D80", + "end_str": "0x2DDF", + "start": 11648, + "end": 11743 + }, + { + "zh": "西里尔文扩展-A", + "en": "Cyrillic Extended-A", + "start_str": "0x2DE0", + "end_str": "0x2DFF", + "start": 11744, + "end": 11775 + }, + { + "zh": "补充标点符号", + "en": "Supplemental Punctuation", + "start_str": "0x2E00", + "end_str": "0x2E7F", + "start": 11776, + "end": 11903 + }, + { + "zh": "中日韩汉字部首补充", + "en": "CJK Radicals Supplement", + "start_str": "0x2E80", + "end_str": "0x2EFF", + "start": 11904, + "end": 12031 + }, + { + "zh": "康熙部首", + "en": "Kangxi Radicals", + "start_str": "0x2F00", + "end_str": "0x2FDF", + "start": 12032, + "end": 12255 + }, + { + "zh": "表意文字描述字符", + "en": "Ideographic Description Characters", + "start_str": "0x2FF0", + "end_str": "0x2FFF", + "start": 12272, + "end": 12287 + }, + { + "zh": "中日韩符号和标点符号", + "en": "CJK Symbols and Punctuation", + "start_str": "0x3000", + "end_str": "0x303F", + "start": 12288, + "end": 12351 + }, + { + "zh": "日文平假名", + "en": "Hiragana", + "start_str": "0x3040", + "end_str": "0x309F", + "start": 12352, + "end": 12447 + }, + { + "zh": "日文片假名", + "en": "Katakana", + "start_str": "0x30A0", + "end_str": "0x30FF", + "start": 12448, + "end": 12543 + }, + { + "zh": "注音", + "en": "Bopomofo", + "start_str": "0x3100", + "end_str": "0x312F", + "start": 12544, + "end": 12591 + }, + { + "zh": "韩文兼容字母", + "en": "Hangul Compatibility Jamo", + "start_str": "0x3130", + "end_str": "0x318F", + "start": 12592, + "end": 12687 + }, + { + "zh": "汉文训读", + "en": "Kanbun", + "start_str": "0x3190", + "end_str": "0x319F", + "start": 12688, + "end": 12703 + }, + { + "zh": "注音符号扩展", + "en": "Bopomofo Extended", + "start_str": "0x31A0", + "end_str": "0x31BF", + "start": 12704, + "end": 12735 + }, + { + "zh": "中日韩汉语笔画", + "en": "CJK Strokes", + "start_str": "0x31C0", + "end_str": "0x31EF", + "start": 12736, + "end": 12783 + }, + { + "zh": "片假名扩展", + "en": "Katakana Phonetic Extensions", + "start_str": "0x31F0", + "end_str": "0x31FF", + "start": 12784, + "end": 12799 + }, + { + "zh": "中日韩带圈字符及月份", + "en": "Enclosed CJK Letters and Months", + "start_str": "0x3200", + "end_str": "0x32FF", + "start": 12800, + "end": 13055 + }, + { + "zh": "中日韩兼容", + "en": "CJK Compatibility", + "start_str": "0x3300", + "end_str": "0x33FF", + "start": 13056, + "end": 13311 + }, + { + "zh": "中日韩兼容表意文字扩展 A", + "en": "CJK Unified Ideographs Extension A", + "start_str": "0x3400", + "end_str": "0x4DBF", + "start": 13312, + "end": 19903 + }, + { + "zh": "易经六十四卦符号", + "en": "Yijing Hexagram Symbols", + "start_str": "0x4DC0", + "end_str": "0x4DFF", + "start": 19904, + "end": 19967 + }, + { + "zh": "中日韩统一表意文字", + "en": "CJK Unified Ideographs", + "start_str": "0x4E00", + "end_str": "0x9FFF", + "start": 19968, + "end": 40959 + }, + { + "zh": "彝族音节", + "en": "Yi Syllables", + "start_str": "0xA000", + "end_str": "0xA48F", + "start": 40960, + "end": 42127 + }, + { + "zh": "彝族部首", + "en": "Yi Radicals", + "start_str": "0xA490", + "end_str": "0xA4CF", + "start": 42128, + "end": 42191 + }, + { + "zh": "傈僳语", + "en": "Lisu", + "start_str": "0xA4D0", + "end_str": "0xA4FF", + "start": 42192, + "end": 42239 + }, + { + "zh": "瓦伊语", + "en": "Vai", + "start_str": "0xA500", + "end_str": "0xA63F", + "start": 42240, + "end": 42559 + }, + { + "zh": "西里尔文扩展-B", + "en": "Cyrillic Extended-B", + "start_str": "0xA640", + "end_str": "0xA69F", + "start": 42560, + "end": 42655 + }, + { + "zh": "巴姆穆语", + "en": "Bamum", + "start_str": "0xA6A0", + "end_str": "0xA6FF", + "start": 42656, + "end": 42751 + }, + { + "zh": "声调修饰符", + "en": "Modifier Tone Letters", + "start_str": "0xA700", + "end_str": "0xA71F", + "start": 42752, + "end": 42783 + }, + { + "zh": "拉丁语扩展-D", + "en": "Latin Extended-D", + "start_str": "0xA720", + "end_str": "0xA7FF", + "start": 42784, + "end": 43007 + }, + { + "zh": "锡尔赫特文", + "en": "Syloti Nagri", + "start_str": "0xA800", + "end_str": "0xA82F", + "start": 43008, + "end": 43055 + }, + { + "zh": "常用印度数字形式", + "en": "Common Indic Number Forms", + "start_str": "0xA830", + "end_str": "0xA83F", + "start": 43056, + "end": 43071 + }, + { + "zh": "八思巴字", + "en": "Phags-pa", + "start_str": "0xA840", + "end_str": "0xA87F", + "start": 43072, + "end": 43135 + }, + { + "zh": "索拉什特拉语", + "en": "Saurashtra", + "start_str": "0xA880", + "end_str": "0xA8DF", + "start": 43136, + "end": 43231 + }, + { + "zh": "天城文扩展", + "en": "Devanagari Extended", + "start_str": "0xA8E0", + "end_str": "0xA8FF", + "start": 43232, + "end": 43263 + }, + { + "zh": "克耶字母", + "en": "Kayah Li", + "start_str": "0xA900", + "end_str": "0xA92F", + "start": 43264, + "end": 43311 + }, + { + "zh": "拉让语", + "en": "Rejang", + "start_str": "0xA930", + "end_str": "0xA95F", + "start": 43312, + "end": 43359 + }, + { + "zh": "韩文字母扩展", + "en": "Hangul Jamo Extended-A", + "start_str": "0xA960", + "end_str": "0xA97F", + "start": 43360, + "end": 43391 + }, + { + "zh": "爪哇语", + "en": "Javanese", + "start_str": "0xA980", + "end_str": "0xA9DF", + "start": 43392, + "end": 43487 + }, + { + "zh": "缅甸语扩展-B", + "en": "Myanmar Extended-B", + "start_str": "0xA9E0", + "end_str": "0xA9FF", + "start": 43488, + "end": 43519 + }, + { + "zh": "占语", + "en": "Cham", + "start_str": "0xAA00", + "end_str": "0xAA5F", + "start": 43520, + "end": 43615 + }, + { + "zh": "缅甸语扩展-A", + "en": "Myanmar Extended-A", + "start_str": "0xAA60", + "end_str": "0xAA7F", + "start": 43616, + "end": 43647 + }, + { + "zh": "傣文", + "en": "Tai Viet", + "start_str": "0xAA80", + "end_str": "0xAADF", + "start": 43648, + "end": 43743 + }, + { + "zh": "曼尼普尔语扩展", + "en": "Meetei Mayek Extensions", + "start_str": "0xAAE0", + "end_str": "0xAAFF", + "start": 43744, + "end": 43775 + }, + { + "zh": "阿姆哈拉语扩展-A", + "en": "Ethiopic Extended-A", + "start_str": "0xAB00", + "end_str": "0xAB2F", + "start": 43776, + "end": 43823 + }, + { + "zh": "拉丁文扩展-E", + "en": "Latin Extended-E", + "start_str": "0xAB30", + "end_str": "0xAB6F", + "start": 43824, + "end": 43887 + }, + { + "zh": "切罗基语增补", + "en": "Cherokee Supplement", + "start_str": "0xAB70", + "end_str": "0xABBF", + "start": 43888, + "end": 43967 + }, + { + "zh": "曼尼普尔语", + "en": "Meetei Mayek", + "start_str": "0xABC0", + "end_str": "0xABFF", + "start": 43968, + "end": 44031 + }, + { + "zh": "韩文音节", + "en": "Hangul Syllables", + "start_str": "0xAC00", + "end_str": "0xD7AF", + "start": 44032, + "end": 55215 + }, + { + "zh": "韩文字母扩展-B", + "en": "Hangul Jamo Extended-B", + "start_str": "0xD7B0", + "end_str": "0xD7FF", + "start": 55216, + "end": 55295 + }, + { + "zh": "高位替代", + "en": "High Surrogates", + "start_str": "0xD800", + "end_str": "0xDB7F", + "start": 55296, + "end": 56191 + }, + { + "zh": "高位专用替代", + "en": "High Private Use Surrogates", + "start_str": "0xDB80", + "end_str": "0xDBFF", + "start": 56192, + "end": 56319 + }, + { + "zh": "低位专用", + "en": "Low Surrogates", + "start_str": "0xDC00", + "end_str": "0xDFFF", + "start": 56320, + "end": 57343 + }, + { + "zh": "私用区", + "en": "Private Use Area", + "start_str": "0xE000", + "end_str": "0xF8FF", + "start": 57344, + "end": 63743 + }, + { + "zh": "中日韩兼容表意文字", + "en": "CJK Compatibility Ideographs", + "start_str": "0xF900", + "end_str": "0xFAFF", + "start": 63744, + "end": 64255 + }, + { + "zh": "字母连写形式", + "en": "Alphabetic Presentation Forms", + "start_str": "0xFB00", + "end_str": "0xFB4F", + "start": 64256, + "end": 64335 + }, + { + "zh": "阿拉伯语表现形式-A", + "en": "Arabic Presentation Forms-A", + "start_str": "0xFB50", + "end_str": "0xFDFF", + "start": 64336, + "end": 65023 + }, + { + "zh": "变体选择器", + "en": "Variation Selectors", + "start_str": "0xFE00", + "end_str": "0xFE0F", + "start": 65024, + "end": 65039 + }, + { + "zh": "竖排形式", + "en": "Vertical Forms", + "start_str": "0xFE10", + "end_str": "0xFE1F", + "start": 65040, + "end": 65055 + }, + { + "zh": "组合用半符号", + "en": "Combining Half Marks", + "start_str": "0xFE20", + "end_str": "0xFE2F", + "start": 65056, + "end": 65071 + }, + { + "zh": "中日韩兼容形式", + "en": "CJK Compatibility Forms", + "start_str": "0xFE30", + "end_str": "0xFE4F", + "start": 65072, + "end": 65103 + }, + { + "zh": "小型变体形式", + "en": "Small Form Variants", + "start_str": "0xFE50", + "end_str": "0xFE6F", + "start": 65104, + "end": 65135 + }, + { + "zh": "阿拉伯语表现形式-B", + "en": "Arabic Presentation Forms-B", + "start_str": "0xFE70", + "end_str": "0xFEFF", + "start": 65136, + "end": 65279 + }, + { + "zh": "全角和半角字符", + "en": "Halfwidth and Fullwidth Forms", + "start_str": "0xFF00", + "end_str": "0xFFEF", + "start": 65280, + "end": 65519 + }, + { + "zh": "特殊字符", + "en": "Specials", + "start_str": "0xFFF0", + "end_str": "0xFFFF", + "start": 65520, + "end": 65535 + }, + { + "zh": "线形文字B音节", + "en": "Linear B Syllabary", + "start_str": "0x10000", + "end_str": "0x1007F", + "start": 65536, + "end": 65663 + }, + { + "zh": "线形文字B表意文字", + "en": "Linear B Ideograms", + "start_str": "0x10080", + "end_str": "0x100FF", + "start": 65664, + "end": 65791 + }, + { + "zh": "爱琴海数字", + "en": "Aegean Numbers", + "start_str": "0x10100", + "end_str": "0x1013F", + "start": 65792, + "end": 65855 + }, + { + "zh": "古希腊数字", + "en": "Ancient Greek Numbers", + "start_str": "0x10140", + "end_str": "0x1018F", + "start": 65856, + "end": 65935 + }, + { + "zh": "古罗马符号", + "en": "Ancient Symbols", + "start_str": "0x10190", + "end_str": "0x101CF", + "start": 65936, + "end": 65999 + }, + { + "zh": "斐斯托斯圆盘古文字", + "en": "Phaistos Disc", + "start_str": "0x101D0", + "end_str": "0x101FF", + "start": 66000, + "end": 66047 + }, + { + "zh": "吕基亚语", + "en": "Lycian", + "start_str": "0x10280", + "end_str": "0x1029F", + "start": 66176, + "end": 66207 + }, + { + "zh": "卡里亚字母", + "en": "Carian", + "start_str": "0x102A0", + "end_str": "0x102DF", + "start": 66208, + "end": 66271 + }, + { + "zh": "科普特闰余数字", + "en": "Coptic Epact Numbers", + "start_str": "0x102E0", + "end_str": "0x102FF", + "start": 66272, + "end": 66303 + }, + { + "zh": "古意大利字母", + "en": "Old Italic", + "start_str": "0x10300", + "end_str": "0x1032F", + "start": 66304, + "end": 66351 + }, + { + "zh": "哥特字母", + "en": "Gothic", + "start_str": "0x10330", + "end_str": "0x1034F", + "start": 66352, + "end": 66383 + }, + { + "zh": "古彼尔姆文", + "en": "Old Permic", + "start_str": "0x10350", + "end_str": "0x1037F", + "start": 66384, + "end": 66431 + }, + { + "zh": "乌加里特语", + "en": "Ugaritic", + "start_str": "0x10380", + "end_str": "0x1039F", + "start": 66432, + "end": 66463 + }, + { + "zh": "古波斯语", + "en": "Old Persian", + "start_str": "0x103A0", + "end_str": "0x103DF", + "start": 66464, + "end": 66527 + }, + { + "zh": "德瑟雷特字母", + "en": "Deseret", + "start_str": "0x10400", + "end_str": "0x1044F", + "start": 66560, + "end": 66639 + }, + { + "zh": "萧伯纳字母", + "en": "Shavian", + "start_str": "0x10450", + "end_str": "0x1047F", + "start": 66640, + "end": 66687 + }, + { + "zh": "奥斯曼亚字母", + "en": "Osmanya", + "start_str": "0x10480", + "end_str": "0x104AF", + "start": 66688, + "end": 66735 + }, + { + "zh": "欧塞奇字母", + "en": "Osage", + "start_str": "0x104B0", + "end_str": "0x104FF", + "start": 66736, + "end": 66815 + }, + { + "zh": "爱尔巴桑字母", + "en": "Elbasan", + "start_str": "0x10500", + "end_str": "0x1052F", + "start": 66816, + "end": 66863 + }, + { + "zh": "高加索阿尔巴尼亚语言", + "en": "Caucasian Albanian", + "start_str": "0x10530", + "end_str": "0x1056F", + "start": 66864, + "end": 66927 + }, + { + "zh": "线性文字A", + "en": "Linear A", + "start_str": "0x10600", + "end_str": "0x1077F", + "start": 67072, + "end": 67455 + }, + { + "zh": "塞浦路斯语音节", + "en": "Cypriot Syllabary", + "start_str": "0x10800", + "end_str": "0x1083F", + "start": 67584, + "end": 67647 + }, + { + "zh": "帝国阿拉姆語", + "en": "Imperial Aramaic", + "start_str": "0x10840", + "end_str": "0x1085F", + "start": 67648, + "end": 67679 + }, + { + "zh": "巴尔米拉字母", + "en": "Palmyrene", + "start_str": "0x10860", + "end_str": "0x1087F", + "start": 67680, + "end": 67711 + }, + { + "zh": "纳巴泰字母", + "en": "Nabataean", + "start_str": "0x10880", + "end_str": "0x108AF", + "start": 67712, + "end": 67759 + }, + { + "zh": "哈特兰字母", + "en": "Hatran", + "start_str": "0x108E0", + "end_str": "0x108FF", + "start": 67808, + "end": 67839 + }, + { + "zh": "腓尼基字母", + "en": "Phoenician", + "start_str": "0x10900", + "end_str": "0x1091F", + "start": 67840, + "end": 67871 + }, + { + "zh": "吕底亚语", + "en": "Lydian", + "start_str": "0x10920", + "end_str": "0x1093F", + "start": 67872, + "end": 67903 + }, + { + "zh": "麦罗埃象形文字", + "en": "Meroitic Hieroglyphs", + "start_str": "0x10980", + "end_str": "0x1099F", + "start": 67968, + "end": 67999 + }, + { + "zh": "麦罗埃文草体字", + "en": "Meroitic Cursive", + "start_str": "0x109A0", + "end_str": "0x109FF", + "start": 68000, + "end": 68095 + }, + { + "zh": "佉卢文", + "en": "Kharoshthi", + "start_str": "0x10A00", + "end_str": "0x10A5F", + "start": 68096, + "end": 68191 + }, + { + "zh": "古南部阿拉伯语", + "en": "Old South Arabian", + "start_str": "0x10A60", + "end_str": "0x10A7F", + "start": 68192, + "end": 68223 + }, + { + "zh": "古北部阿拉伯语", + "en": "Old North Arabian", + "start_str": "0x10A80", + "end_str": "0x10A9F", + "start": 68224, + "end": 68255 + }, + { + "zh": "摩尼字母", + "en": "Manichaean", + "start_str": "0x10AC0", + "end_str": "0x10AFF", + "start": 68288, + "end": 68351 + }, + { + "zh": "阿维斯陀字母", + "en": "Avestan", + "start_str": "0x10B00", + "end_str": "0x10B3F", + "start": 68352, + "end": 68415 + }, + { + "zh": "碑刻帕提亚文", + "en": "Inscriptional Parthian", + "start_str": "0x10B40", + "end_str": "0x10B5F", + "start": 68416, + "end": 68447 + }, + { + "zh": "碑刻巴列维文", + "en": "Inscriptional Pahlavi", + "start_str": "0x10B60", + "end_str": "0x10B7F", + "start": 68448, + "end": 68479 + }, + { + "zh": "诗篇巴列维文", + "en": "Psalter Pahlavi", + "start_str": "0x10B80", + "end_str": "0x10BAF", + "start": 68480, + "end": 68527 + }, + { + "zh": "古代突厥語", + "en": "Old Turkic", + "start_str": "0x10C00", + "end_str": "0x10C4F", + "start": 68608, + "end": 68687 + }, + { + "zh": "古匈牙利字母", + "en": "Old Hungarian", + "start_str": "0x10C80", + "end_str": "0x10CFF", + "start": 68736, + "end": 68863 + }, + { + "zh": "哈乃斐罗兴亚文字", + "en": "Hanifi Rohingya", + "start_str": "0x10D00", + "end_str": "0x10D3F", + "start": 68864, + "end": 68927 + }, + { + "zh": "鲁米数字符号", + "en": "Rumi Numeral Symbols", + "start_str": "0x10E60", + "end_str": "0x10E7F", + "start": 69216, + "end": 69247 + }, + { + "zh": "古粟特字母", + "en": "Old Sogdian", + "start_str": "0x10F00", + "end_str": "0x10F2F", + "start": 69376, + "end": 69423 + }, + { + "zh": "粟特字母", + "en": "Sogdian", + "start_str": "0x10F30", + "end_str": "0x10F6F", + "start": 69424, + "end": 69487 + }, + { + "zh": "以利买字母", + "en": "Elymaic", + "start_str": "0x10FE0", + "end_str": "0x10FFF", + "start": 69600, + "end": 69631 + }, + { + "zh": "婆罗米文", + "en": "Brahmi", + "start_str": "0x11000", + "end_str": "0x1107F", + "start": 69632, + "end": 69759 + }, + { + "zh": "凯提文", + "en": "Kaithi", + "start_str": "0x11080", + "end_str": "0x110CF", + "start": 69760, + "end": 69839 + }, + { + "zh": "索拉僧平文字", + "en": "Sora Sompeng", + "start_str": "0x110D0", + "end_str": "0x110FF", + "start": 69840, + "end": 69887 + }, + { + "zh": "查克马语", + "en": "Chakma", + "start_str": "0x11100", + "end_str": "0x1114F", + "start": 69888, + "end": 69967 + }, + { + "zh": "马哈雅尼文", + "en": "Mahajani", + "start_str": "0x11150", + "end_str": "0x1117F", + "start": 69968, + "end": 70015 + }, + { + "zh": "夏拉达文", + "en": "Sharada", + "start_str": "0x11180", + "end_str": "0x111DF", + "start": 70016, + "end": 70111 + }, + { + "zh": "古僧伽罗文数字", + "en": "Sinhala Archaic Numbers", + "start_str": "0x111E0", + "end_str": "0x111FF", + "start": 70112, + "end": 70143 + }, + { + "zh": "和卓文", + "en": "Khojki", + "start_str": "0x11200", + "end_str": "0x1124F", + "start": 70144, + "end": 70223 + }, + { + "zh": "木尔坦文", + "en": "Multani", + "start_str": "0x11280", + "end_str": "0x112AF", + "start": 70272, + "end": 70319 + }, + { + "zh": "库达瓦迪文", + "en": "Khudawadi", + "start_str": "0x112B0", + "end_str": "0x112FF", + "start": 70320, + "end": 70399 + }, + { + "zh": "古兰塔文", + "en": "Grantha", + "start_str": "0x11300", + "end_str": "0x1137F", + "start": 70400, + "end": 70527 + }, + { + "zh": "尼瓦尔语", + "en": "Newa", + "start_str": "0x11400", + "end_str": "0x1147F", + "start": 70656, + "end": 70783 + }, + { + "zh": "提尔胡塔文", + "en": "Tirhuta", + "start_str": "0x11480", + "end_str": "0x114DF", + "start": 70784, + "end": 70879 + }, + { + "zh": "悉昙文字", + "en": "Siddham", + "start_str": "0x11580", + "end_str": "0x115FF", + "start": 71040, + "end": 71167 + }, + { + "zh": "莫迪文", + "en": "Modi", + "start_str": "0x11600", + "end_str": "0x1165F", + "start": 71168, + "end": 71263 + }, + { + "zh": "蒙古语增补", + "en": "Mongolian Supplement", + "start_str": "0x11660", + "end_str": "0x1167F", + "start": 71264, + "end": 71295 + }, + { + "zh": "塔克里文", + "en": "Takri", + "start_str": "0x11680", + "end_str": "0x116CF", + "start": 71296, + "end": 71375 + }, + { + "zh": "阿洪姆语", + "en": "Ahom", + "start_str": "0x11700", + "end_str": "0x1173F", + "start": 71424, + "end": 71487 + }, + { + "zh": "多格拉语", + "en": "Dogra", + "start_str": "0x11800", + "end_str": "0x1184F", + "start": 71680, + "end": 71759 + }, + { + "zh": "瓦兰齐地文", + "en": "Warang Citi", + "start_str": "0x118A0", + "end_str": "0x118FF", + "start": 71840, + "end": 71935 + }, + { + "zh": "南迪城文", + "en": "Nandinagari", + "start_str": "0x119A0", + "end_str": "0x119FF", + "start": 72096, + "end": 72191 + }, + { + "zh": "札那巴札尔方形字母", + "en": "Zanabazar Square", + "start_str": "0x11A00", + "end_str": "0x11A4F", + "start": 72192, + "end": 72271 + }, + { + "zh": "索永布字母", + "en": "Soyombo", + "start_str": "0x11A50", + "end_str": "0x11AAF", + "start": 72272, + "end": 72367 + }, + { + "zh": "包钦豪文", + "en": "Pau Cin Hau", + "start_str": "0x11AC0", + "end_str": "0x11AFF", + "start": 72384, + "end": 72447 + }, + { + "zh": "拜克舒基文", + "en": "Bhaiksuki", + "start_str": "0x11C00", + "end_str": "0x11C6F", + "start": 72704, + "end": 72815 + }, + { + "zh": "玛钦文", + "en": "Marchen", + "start_str": "0x11C70", + "end_str": "0x11CBF", + "start": 72816, + "end": 72895 + }, + { + "zh": "马萨拉姆贡德文字", + "en": "Masaram Gondi", + "start_str": "0x11D00", + "end_str": "0x11D5F", + "start": 72960, + "end": 73055 + }, + { + "zh": "贡贾拉贡德文", + "en": "Gunjala Gondi", + "start_str": "0x11D60", + "end_str": "0x11DAF", + "start": 73056, + "end": 73135 + }, + { + "zh": "望加锡文", + "en": "Makasar", + "start_str": "0x11EE0", + "end_str": "0x11EFF", + "start": 73440, + "end": 73471 + }, + { + "zh": "泰米尔文增补", + "en": "Tamil Supplement", + "start_str": "0x11FC0", + "end_str": "0x11FFF", + "start": 73664, + "end": 73727 + }, + { + "zh": "楔形文字", + "en": "Cuneiform", + "start_str": "0x12000", + "end_str": "0x123FF", + "start": 73728, + "end": 74751 + }, + { + "zh": "楔形文字数字和标点符号", + "en": "Cuneiform Numbers and Punctuation", + "start_str": "0x12400", + "end_str": "0x1247F", + "start": 74752, + "end": 74879 + }, + { + "zh": "古代楔形文字", + "en": "Early Dynastic Cuneiform", + "start_str": "0x12480", + "end_str": "0x1254F", + "start": 74880, + "end": 75087 + }, + { + "zh": "埃及圣书体", + "en": "Egyptian Hieroglyphs", + "start_str": "0x13000", + "end_str": "0x1342F", + "start": 77824, + "end": 78895 + }, + { + "zh": "埃及圣书体格式控制", + "en": "Egyptian Hieroglyph Format Controls", + "start_str": "0x13430", + "end_str": "0x1343F", + "start": 78896, + "end": 78911 + }, + { + "zh": "安纳托利亚象形文字", + "en": "Anatolian Hieroglyphs", + "start_str": "0x14400", + "end_str": "0x1467F", + "start": 82944, + "end": 83583 + }, + { + "zh": "巴姆穆文字增补", + "en": "Bamum Supplement", + "start_str": "0x16800", + "end_str": "0x16A3F", + "start": 92160, + "end": 92735 + }, + { + "zh": "默禄文", + "en": "Mro", + "start_str": "0x16A40", + "end_str": "0x16A6F", + "start": 92736, + "end": 92783 + }, + { + "zh": "巴萨哇文字", + "en": "Bassa Vah", + "start_str": "0x16AD0", + "end_str": "0x16AFF", + "start": 92880, + "end": 92927 + }, + { + "zh": "帕哈苗文", + "en": "Pahawh Hmong", + "start_str": "0x16B00", + "end_str": "0x16B8F", + "start": 92928, + "end": 93071 + }, + { + "zh": "梅德法伊德林文", + "en": "Medefaidrin", + "start_str": "0x16E40", + "end_str": "0x16E9F", + "start": 93760, + "end": 93855 + }, + { + "zh": "柏格理苗文", + "en": "Miao", + "start_str": "0x16F00", + "end_str": "0x16F9F", + "start": 93952, + "end": 94111 + }, + { + "zh": "表意符号和标点符号", + "en": "Ideographic Symbols and Punctuation", + "start_str": "0x16FE0", + "end_str": "0x16FFF", + "start": 94176, + "end": 94207 + }, + { + "zh": "西夏文", + "en": "Tangut", + "start_str": "0x17000", + "end_str": "0x187FF", + "start": 94208, + "end": 100351 + }, + { + "zh": "西夏文部首", + "en": "Tangut Components", + "start_str": "0x18800", + "end_str": "0x18AFF", + "start": 100352, + "end": 101119 + }, + { + "zh": "日文假名补充", + "en": "Kana Supplement", + "start_str": "0x1B000", + "end_str": "0x1B0FF", + "start": 110592, + "end": 110847 + }, + { + "zh": "日文假名扩展-A", + "en": "Kana Extended-A", + "start_str": "0x1B100", + "end_str": "0x1B12F", + "start": 110848, + "end": 110895 + }, + { + "zh": "小型日文假名扩展", + "en": "Small Kana Extension", + "start_str": "0x1B130", + "end_str": "0x1B16F", + "start": 110896, + "end": 110959 + }, + { + "zh": "女书", + "en": "Nushu", + "start_str": "0x1B170", + "end_str": "0x1B2FF", + "start": 110960, + "end": 111359 + }, + { + "zh": "杜普雷速记", + "en": "Duployan", + "start_str": "0x1BC00", + "end_str": "0x1BC9F", + "start": 113664, + "end": 113823 + }, + { + "zh": "速记格式控制符", + "en": "Shorthand Format Controls", + "start_str": "0x1BCA0", + "end_str": "0x1BCAF", + "start": 113824, + "end": 113839 + }, + { + "zh": "拜占庭音乐符号", + "en": "Byzantine Musical Symbols", + "start_str": "0x1D000", + "end_str": "0x1D0FF", + "start": 118784, + "end": 119039 + }, + { + "zh": "音乐符号", + "en": "Musical Symbols", + "start_str": "0x1D100", + "end_str": "0x1D1FF", + "start": 119040, + "end": 119295 + }, + { + "zh": "古希腊音乐记号", + "en": "Ancient Greek Musical Notation", + "start_str": "0x1D200", + "end_str": "0x1D24F", + "start": 119296, + "end": 119375 + }, + { + "zh": "玛雅数字", + "en": "Mayan Numerals", + "start_str": "0x1D2E0", + "end_str": "0x1D2FF", + "start": 119520, + "end": 119551 + }, + { + "zh": "太玄经符号", + "en": "Tai Xuan Jing Symbols", + "start_str": "0x1D300", + "end_str": "0x1D35F", + "start": 119552, + "end": 119647 + }, + { + "zh": "算筹", + "en": "Counting Rod Numerals", + "start_str": "0x1D360", + "end_str": "0x1D37F", + "start": 119648, + "end": 119679 + }, + { + "zh": "字母和数字符号", + "en": "Mathematical Alphanumeric Symbols", + "start_str": "0x1D400", + "end_str": "0x1D7FF", + "start": 119808, + "end": 120831 + }, + { + "zh": "萨顿书写符号", + "en": "Sutton SignWriting", + "start_str": "0x1D800", + "end_str": "0x1DAAF", + "start": 120832, + "end": 121519 + }, + { + "zh": "格拉哥里字母增补", + "en": "Glagolitic Supplement", + "start_str": "0x1E000", + "end_str": "0x1E02F", + "start": 122880, + "end": 122927 + }, + { + "zh": "尼亚坑普阿绰苗文", + "en": "Nyiakeng Puachue Hmong", + "start_str": "0x1E100", + "end_str": "0x1E14F", + "start": 123136, + "end": 123215 + }, + { + "zh": "文乔字母", + "en": "Wancho", + "start_str": "0x1E2C0", + "end_str": "0x1E2FF", + "start": 123584, + "end": 123647 + }, + { + "zh": "门德基卡库文", + "en": "Mende Kikakui", + "start_str": "0x1E800", + "end_str": "0x1E8DF", + "start": 124928, + "end": 125151 + }, + { + "zh": "阿德拉姆字母", + "en": "Adlam", + "start_str": "0x1E900", + "end_str": "0x1E95F", + "start": 125184, + "end": 125279 + }, + { + "zh": "印度西亚格数字", + "en": "Indic Siyaq Numbers", + "start_str": "0x1EC70", + "end_str": "0x1ECBF", + "start": 126064, + "end": 126143 + }, + { + "zh": "奥斯曼西亚克数字", + "en": "Ottoman Siyaq Numbers", + "start_str": "0x1ED00", + "end_str": "0x1ED4F", + "start": 126208, + "end": 126287 + }, + { + "zh": "阿拉伯字母数字符号", + "en": "Arabic Mathematical Alphabetic Symbols", + "start_str": "0x1EE00", + "end_str": "0x1EEFF", + "start": 126464, + "end": 126719 + }, + { + "zh": "麻将牌", + "en": "Mahjong Tiles", + "start_str": "0x1F000", + "end_str": "0x1F02F", + "start": 126976, + "end": 127023 + }, + { + "zh": "多米诺骨牌", + "en": "Domino Tiles", + "start_str": "0x1F030", + "end_str": "0x1F09F", + "start": 127024, + "end": 127135 + }, + { + "zh": "扑克牌", + "en": "Playing Cards", + "start_str": "0x1F0A0", + "end_str": "0x1F0FF", + "start": 127136, + "end": 127231 + }, + { + "zh": "带圈字母数字补充", + "en": "Enclosed Alphanumeric Supplement", + "start_str": "0x1F100", + "end_str": "0x1F1FF", + "start": 127232, + "end": 127487 + }, + { + "zh": "带圈表意文字补充", + "en": "Enclosed Ideographic Supplement", + "start_str": "0x1F200", + "end_str": "0x1F2FF", + "start": 127488, + "end": 127743 + }, + { + "zh": "杂项符号和象形文字", + "en": "Miscellaneous Symbols and Pictographs", + "start_str": "0x1F300", + "end_str": "0x1F5FF", + "start": 127744, + "end": 128511 + }, + { + "zh": "表情符号", + "en": "Emoticons", + "start_str": "0x1F600", + "end_str": "0x1F64F", + "start": 128512, + "end": 128591 + }, + { + "zh": "装饰符号", + "en": "Ornamental Dingbats", + "start_str": "0x1F650", + "end_str": "0x1F67F", + "start": 128592, + "end": 128639 + }, + { + "zh": "交通和地图符号", + "en": "Transport and Map Symbols", + "start_str": "0x1F680", + "end_str": "0x1F6FF", + "start": 128640, + "end": 128767 + }, + { + "zh": "炼金术符号", + "en": "Alchemical Symbols", + "start_str": "0x1F700", + "end_str": "0x1F77F", + "start": 128768, + "end": 128895 + }, + { + "zh": "几何图形扩展", + "en": "Geometric Shapes Extended", + "start_str": "0x1F780", + "end_str": "0x1F7FF", + "start": 128896, + "end": 129023 + }, + { + "zh": "追加箭头-C", + "en": "Supplemental Arrows-C", + "start_str": "0x1F800", + "end_str": "0x1F8FF", + "start": 129024, + "end": 129279 + }, + { + "zh": "补充符号和象形文字", + "en": "Supplemental Symbols and Pictographs", + "start_str": "0x1F900", + "end_str": "0x1F9FF", + "start": 129280, + "end": 129535 + }, + { + "zh": "西洋棋符号", + "en": "Chess Symbols", + "start_str": "0x1FA00", + "end_str": "0x1FA6F", + "start": 129536, + "end": 129647 + }, + { + "zh": "符号和象形文字扩展-A", + "en": "Symbols and Pictographs Extended-A", + "start_str": "0x1FA70", + "end_str": "0x1FAFF", + "start": 129648, + "end": 129791 + }, + { + "zh": "中日韩统一表意文字扩展B区", + "en": "CJK Unified Ideographs Extension B", + "start_str": "0x20000", + "end_str": "0x2A6DF", + "start": 131072, + "end": 173791 + }, + { + "zh": "中日韩统一表意文字扩展C区", + "en": "CJK Unified Ideographs Extension C", + "start_str": "0x2A700", + "end_str": "0x2B73F", + "start": 173824, + "end": 177983 + }, + { + "zh": "中日韩统一表意文字扩展D区", + "en": "CJK Unified Ideographs Extension D", + "start_str": "0x2B740", + "end_str": "0x2B81F", + "start": 177984, + "end": 178207 + }, + { + "zh": "中日韩统一表意文字扩展E区", + "en": "CJK Unified Ideographs Extension E", + "start_str": "0x2B820", + "end_str": "0x2CEAF", + "start": 178208, + "end": 183983 + }, + { + "zh": "中日韩统一表意文字扩展F区", + "en": "CJK Unified Ideographs Extension F", + "start_str": "0x2CEB0", + "end_str": "0x2EBEF", + "start": 183984, + "end": 191471 + }, + { + "zh": "中日韩兼容表意文字增补", + "en": "CJK Compatibility Ideographs Supplement", + "start_str": "0x2F800", + "end_str": "0x2FA1F", + "start": 194560, + "end": 195103 + }, + { + "zh": "标签", + "en": "Tags", + "start_str": "0xE0000", + "end_str": "0xE007F", + "start": 917504, + "end": 917631 + }, + { + "zh": "变化选择器补充", + "en": "Variation Selectors Supplement", + "start_str": "0xE0100", + "end_str": "0xE01EF", + "start": 917760, + "end": 917999 + }, + { + "zh": "补充私人使用区-A", + "en": "Supplementary Private Use Area-A", + "start_str": "0xF0000", + "end_str": "0xFFFFF", + "start": 983040, + "end": 1048575 + }, + { + "zh": "补充私人使用区-B", + "en": "Supplementary Private Use Area-B", + "start_str": "0x100000", + "end_str": "0x10FFFF", + "start": 1048576, + "end": 1114111 + } +]` + +type UnicodeBlock struct { + Zh string + En string + StartStr string `json:"start_str"` + EndStr string `json:"end_str"` + Start int + End int +} + +type UnicodeHelper struct { + blocks []UnicodeBlock +} + +func (uh *UnicodeHelper) GetRange(code int) string { + for _, bl := range uh.blocks { + if code >= bl.Start && code <= bl.End { + return bl.Zh + } + } + return "" +} + +func (uh *UnicodeHelper) Init() error { + return json.Unmarshal([]byte(json_data), &uh.blocks) +} + +var unicode_helper *UnicodeHelper + +func NewUnicodeHelper() *UnicodeHelper { + if unicode_helper == nil { + unicode_helper = &UnicodeHelper{ + blocks: make([]UnicodeBlock, 0), + } + unicode_helper.Init() + } + return unicode_helper +} diff --git a/helper/unicode_helper_test.go b/helper/unicode_helper_test.go new file mode 100644 index 0000000..e22d49e --- /dev/null +++ b/helper/unicode_helper_test.go @@ -0,0 +1,21 @@ +// +// unicode_helper_test.go +// Copyright (C) 2023 tiglog +// +// Distributed under terms of the MIT license. +// + +package helper_test + +import ( + "fmt" + "testing" + + "git.hexq.cn/tiglog/golib/helper" +) + +func TestGetRange(t *testing.T) { + hp := helper.NewUnicodeHelper() + r1 := hp.GetRange(0xe0) + fmt.Println(r1) +}