+ "零点零零零零零零一": 0.0000001,
+ "零点零零零零零一": 0.000001,
+ "零点零零零零一": 0.00001,
+ "零点零零零一": 0.0001,
+ "零点零零一": 0.001,
+ "零点零一": 0.01,
+ "零点一": 0.1,
+ "负一": -1,
+ "负二": -2,
+ "负十": -10,
+ "负十一": -11,
+ "负一十一": -11,
+ # 古语
+ "廿二": 22,
+ }
+
+ self.normal_data_dict = {
+ "一一": 11,
+ "一一一": 111,
+ "壹壹": 11,
+ "壹壹壹": 111,
+ "零点零": 0,
+ "零点零零": 0,
+ "一七二零": 1720,
+ "一七二零点一": 1720.1,
+ "一七二零点一三四": 1720.134,
+ "一二三": 123,
+ "负零点一零": -0.1,
+ "负一七二零": -1720,
+ "负一七二零点一": -1720.1,
+ # 口语
+ "三万五": 35000,
+ "十三万五": 135000,
+ "两千六": 2600,
+ "一百二": 120,
+ "一百二十万三": 1203000,
+ # 繁体
+ "兩千六": 2600,
+ # 大写
+ "壹拾壹元": 11,
+ "壹佰壹拾壹圆": 111,
+ "壹拾壹圆": 11,
+ # 特殊
+ "〇": 0,
+ }
+
+ self.smart_data_dict = {
+ "100万": 1000000,
+ "100万三千": 1003000,
+ "200亿零四千230": 20000004230,
+ "一百点123": 100.123,
+ "10.1万": 101000,
+ "-10.1万": -101000,
+ "35.1亿": 3510000000,
+ "10.1": 10.1,
+ "-10.1": -10.1,
+ }
+
+ self.error_smart_datas = [
+ "10.1万零100",
+ "10..1万",
+ ]
+
+ self.error_normal_datas = [
+ "零点",
+ "点零",
+ "零点点",
+ "零点零大",
+ ]
+ self.error_normal_datas.extend(self.error_smart_datas)
+ self.error_normal_datas.extend(list(self.smart_data_dict.keys()))
+
+ self.error_strict_datas = [
+ "一一",
+ "壹壹",
+ "零点",
+ "点零",
+ "点一",
+ "百十一",
+ "十一十二",
+ "负十一十二",
+ "十七十八",
+ ]
+ self.error_strict_datas.extend(self.error_normal_datas)
+ self.error_strict_datas.extend(list(self.normal_data_dict.keys()))
+
+ # 不可修改位置
+ self.normal_data_dict.update(self.strict_data_dict)
+ self.smart_data_dict.update(self.normal_data_dict)
+
+ self.ca = Cn2An()
+
+ def test_cn2an(self) -> None:
+ for strict_item in self.strict_data_dict.keys():
+ self.assertEqual(self.ca.cn2an(strict_item, "strict"),
+ self.strict_data_dict[strict_item])
+
+ for normal_item in self.normal_data_dict.keys():
+ self.assertEqual(self.ca.cn2an(normal_item, "normal"),
+ self.normal_data_dict[normal_item])
+
+ for smart_item in self.smart_data_dict.keys():
+ self.assertEqual(self.ca.cn2an(smart_item, "smart"),
+ self.smart_data_dict[smart_item])
+
+ for error_strict_item in self.error_strict_datas:
+ try:
+ self.ca.cn2an(error_strict_item)
+ except ValueError as e:
+ self.assertEqual(type(e), ValueError)
+ else:
+ raise Exception(f'ValueError not raised: {error_strict_item}')
+
+ for error_normal_item in self.error_normal_datas:
+ try:
+ self.ca.cn2an(error_normal_item)
+ except ValueError as e:
+ self.assertEqual(type(e), ValueError)
+ else:
+ raise Exception(f'ValueError not raised: {error_normal_item}')
+
+ for error_smart_item in self.error_smart_datas:
+ try:
+ self.ca.cn2an(error_smart_item)
+ except ValueError as e:
+ self.assertEqual(type(e), ValueError)
+ else:
+ raise Exception(f'ValueError not raised: {error_smart_item}')
+
+
+if __name__ == '__main__':
+ unittest.main()
@@ -0,0 +1,135 @@ |
||
1 |
+NUMBER_CN2AN = { |
|
2 |
+ "零": 0, |
|
3 |
+ "〇": 0, |
|
4 |
+ "一": 1, |
|
5 |
+ "壹": 1, |
|
6 |
+ "幺": 1, |
|
7 |
+ "二": 2, |
|
8 |
+ "贰": 2, |
|
9 |
+ "两": 2, |
|
10 |
+ "三": 3, |
|
11 |
+ "叁": 3, |
|
12 |
+ "四": 4, |
|
13 |
+ "肆": 4, |
|
14 |
+ "五": 5, |
|
15 |
+ "伍": 5, |
|
16 |
+ "六": 6, |
|
17 |
+ "陆": 6, |
|
18 |
+ "七": 7, |
|
19 |
+ "柒": 7, |
|
20 |
+ "八": 8, |
|
21 |
+ "捌": 8, |
|
22 |
+ "九": 9, |
|
23 |
+ "玖": 9, |
|
24 |
+} |
|
25 |
+UNIT_CN2AN = { |
|
26 |
+ "十": 10, |
|
27 |
+ "拾": 10, |
|
28 |
+ "百": 100, |
|
29 |
+ "佰": 100, |
|
30 |
+ "千": 1000, |
|
31 |
+ "仟": 1000, |
|
32 |
+ "万": 10000, |
|
33 |
+ "亿": 100000000, |
|
34 |
+} |
|
35 |
+UNIT_LOW_AN2CN = { |
|
36 |
+ 10: "十", |
|
37 |
+ 100: "百", |
|
38 |
+ 1000: "千", |
|
39 |
+ 10000: "万", |
|
40 |
+ 100000000: "亿", |
|
41 |
+} |
|
42 |
+NUMBER_LOW_AN2CN = { |
|
43 |
+ 0: "零", |
|
44 |
+ 1: "一", |
|
45 |
+ 2: "二", |
|
46 |
+ 3: "三", |
|
47 |
+ 4: "四", |
|
48 |
+ 5: "五", |
|
49 |
+ 6: "六", |
|
50 |
+ 7: "七", |
|
51 |
+ 8: "八", |
|
52 |
+ 9: "九", |
|
53 |
+} |
|
54 |
+NUMBER_UP_AN2CN = { |
|
55 |
+ 0: "零", |
|
56 |
+ 1: "壹", |
|
57 |
+ 2: "贰", |
|
58 |
+ 3: "叁", |
|
59 |
+ 4: "肆", |
|
60 |
+ 5: "伍", |
|
61 |
+ 6: "陆", |
|
62 |
+ 7: "柒", |
|
63 |
+ 8: "捌", |
|
64 |
+ 9: "玖", |
|
65 |
+} |
|
66 |
+UNIT_LOW_ORDER_AN2CN = [ |
|
67 |
+ "", |
|
68 |
+ "十", |
|
69 |
+ "百", |
|
70 |
+ "千", |
|
71 |
+ "万", |
|
72 |
+ "十", |
|
73 |
+ "百", |
|
74 |
+ "千", |
|
75 |
+ "亿", |
|
76 |
+ "十", |
|
77 |
+ "百", |
|
78 |
+ "千", |
|
79 |
+ "万", |
|
80 |
+ "十", |
|
81 |
+ "百", |
|
82 |
+ "千", |
|
83 |
+] |
|
84 |
+UNIT_UP_ORDER_AN2CN = [ |
|
85 |
+ "", |
|
86 |
+ "拾", |
|
87 |
+ "佰", |
|
88 |
+ "仟", |
|
89 |
+ "万", |
|
90 |
+ "拾", |
|
91 |
+ "佰", |
|
92 |
+ "仟", |
|
93 |
+ "亿", |
|
94 |
+ "拾", |
|
95 |
+ "佰", |
|
96 |
+ "仟", |
|
97 |
+ "万", |
|
98 |
+ "拾", |
|
99 |
+ "佰", |
|
100 |
+ "仟", |
|
101 |
+] |
|
102 |
+STRICT_CN_NUMBER = { |
|
103 |
+ "零": "零", |
|
104 |
+ "一": "一壹", |
|
105 |
+ "二": "二贰", |
|
106 |
+ "三": "三叁", |
|
107 |
+ "四": "四肆", |
|
108 |
+ "五": "五伍", |
|
109 |
+ "六": "六陆", |
|
110 |
+ "七": "七柒", |
|
111 |
+ "八": "八捌", |
|
112 |
+ "九": "九玖", |
|
113 |
+ "十": "十拾", |
|
114 |
+ "百": "百佰", |
|
115 |
+ "千": "千仟", |
|
116 |
+ "万": "万", |
|
117 |
+ "亿": "亿", |
|
118 |
+} |
|
119 |
+NORMAL_CN_NUMBER = { |
|
120 |
+ "零": "零〇", |
|
121 |
+ "一": "一壹幺", |
|
122 |
+ "二": "二贰两", |
|
123 |
+ "三": "三叁仨", |
|
124 |
+ "四": "四肆", |
|
125 |
+ "五": "五伍", |
|
126 |
+ "六": "六陆", |
|
127 |
+ "七": "七柒", |
|
128 |
+ "八": "八捌", |
|
129 |
+ "九": "九玖", |
|
130 |
+ "十": "十拾", |
|
131 |
+ "百": "百佰", |
|
132 |
+ "千": "千仟", |
|
133 |
+ "万": "万", |
|
134 |
+ "亿": "亿", |
|
135 |
+} |
@@ -0,0 +1,29 @@ |
||
1 |
+import torbjorn as tbn |
|
2 |
+ |
|
3 |
+from .an2cn import An2Cn |
|
4 |
+from .cn2an import Cn2An |
|
5 |
+ |
|
6 |
+ac = An2Cn() |
|
7 |
+ca = Cn2An() |
|
8 |
+ |
|
9 |
+an = 9876543298765432 |
|
10 |
+cn = "九千八百七十六万五千四百三十二亿九千八百七十六万五千四百三十二" |
|
11 |
+ |
|
12 |
+ |
|
13 |
+@tbn.run_time |
|
14 |
+def run_cn2an_ten_thousand_times() -> None: |
|
15 |
+ for _ in range(10000): |
|
16 |
+ result = ca.cn2an(cn) |
|
17 |
+ assert result == an |
|
18 |
+ |
|
19 |
+ |
|
20 |
+@tbn.run_time |
|
21 |
+def run_an2cn_ten_thousand_times() -> None: |
|
22 |
+ for _ in range(10000): |
|
23 |
+ result = ac.an2cn(an) |
|
24 |
+ assert result == cn |
|
25 |
+ |
|
26 |
+ |
|
27 |
+if __name__ == '__main__': |
|
28 |
+ run_cn2an_ten_thousand_times() |
|
29 |
+ run_an2cn_ten_thousand_times() |
@@ -0,0 +1,104 @@ |
||
1 |
+import re |
|
2 |
+from warnings import warn |
|
3 |
+ |
|
4 |
+from .cn2an import Cn2An |
|
5 |
+from .an2cn import An2Cn |
|
6 |
+from .conf import UNIT_CN2AN |
|
7 |
+ |
|
8 |
+ |
|
9 |
+class Transform(object): |
|
10 |
+ def __init__(self) -> None: |
|
11 |
+ self.all_num = "零一二三四五六七八九" |
|
12 |
+ self.all_unit = "".join(list(UNIT_CN2AN.keys())) |
|
13 |
+ self.cn2an = Cn2An().cn2an |
|
14 |
+ self.an2cn = An2Cn().an2cn |
|
15 |
+ self.cn_pattern = f"负?([{self.all_num}{self.all_unit}]+点)?[{self.all_num}{self.all_unit}]+" |
|
16 |
+ self.smart_cn_pattern = f"-?([0-9]+.)?[0-9]+[{self.all_unit}]+" |
|
17 |
+ |
|
18 |
+ def transform(self, inputs: str, method: str = "cn2an") -> str: |
|
19 |
+ if method == "cn2an": |
|
20 |
+ inputs = inputs.replace("廿", "二十").replace("半", "0.5").replace("两", "2") |
|
21 |
+ # date |
|
22 |
+ inputs = re.sub( |
|
23 |
+ fr"((({self.smart_cn_pattern})|({self.cn_pattern}))年)?([{self.all_num}十]+月)?([{self.all_num}十]+日)?", |
|
24 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "date"), inputs) |
|
25 |
+ # fraction |
|
26 |
+ inputs = re.sub(fr"{self.cn_pattern}分之{self.cn_pattern}", |
|
27 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "fraction"), inputs) |
|
28 |
+ # percent |
|
29 |
+ inputs = re.sub(fr"百分之{self.cn_pattern}", |
|
30 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "percent"), inputs) |
|
31 |
+ # celsius |
|
32 |
+ inputs = re.sub(fr"{self.cn_pattern}摄氏度", |
|
33 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "celsius"), inputs) |
|
34 |
+ # number |
|
35 |
+ output = re.sub(self.cn_pattern, |
|
36 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "number"), inputs) |
|
37 |
+ |
|
38 |
+ elif method == "an2cn": |
|
39 |
+ # date |
|
40 |
+ inputs = re.sub(r"(\d{2,4}年)?(\d{1,2}月)?(\d{1,2}日)?", |
|
41 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "date"), inputs) |
|
42 |
+ # fraction |
|
43 |
+ inputs = re.sub(r"\d+/\d+", |
|
44 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "fraction"), inputs) |
|
45 |
+ # percent |
|
46 |
+ inputs = re.sub(r"-?(\d+\.)?\d+%", |
|
47 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "percent"), inputs) |
|
48 |
+ # celsius |
|
49 |
+ inputs = re.sub(r"\d+℃", |
|
50 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "celsius"), inputs) |
|
51 |
+ # number |
|
52 |
+ output = re.sub(r"-?(\d+\.)?\d+", |
|
53 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "number"), inputs) |
|
54 |
+ else: |
|
55 |
+ raise ValueError(f"error method: {method}, only support 'cn2an' and 'an2cn'!") |
|
56 |
+ |
|
57 |
+ return output |
|
58 |
+ |
|
59 |
+ def __sub_util(self, inputs, method: str = "cn2an", sub_mode: str = "number") -> str: |
|
60 |
+ try: |
|
61 |
+ if inputs: |
|
62 |
+ if method == "cn2an": |
|
63 |
+ if sub_mode == "date": |
|
64 |
+ return re.sub(fr"(({self.smart_cn_pattern})|({self.cn_pattern}))", |
|
65 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs) |
|
66 |
+ elif sub_mode == "fraction": |
|
67 |
+ if inputs[0] != "百": |
|
68 |
+ frac_result = re.sub(self.cn_pattern, |
|
69 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs) |
|
70 |
+ numerator, denominator = frac_result.split("分之") |
|
71 |
+ return f"{denominator}/{numerator}" |
|
72 |
+ else: |
|
73 |
+ return inputs |
|
74 |
+ elif sub_mode == "percent": |
|
75 |
+ return re.sub(f"(?<=百分之){self.cn_pattern}", |
|
76 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs).replace("百分之", "") + "%" |
|
77 |
+ elif sub_mode == "celsius": |
|
78 |
+ return re.sub(f"{self.cn_pattern}(?=摄氏度)", |
|
79 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs).replace("摄氏度", "℃") |
|
80 |
+ elif sub_mode == "number": |
|
81 |
+ return str(self.cn2an(inputs, "smart")) |
|
82 |
+ else: |
|
83 |
+ raise Exception(f"error sub_mode: {sub_mode} !") |
|
84 |
+ else: |
|
85 |
+ if sub_mode == "date": |
|
86 |
+ inputs = re.sub(r"\d+(?=年)", |
|
87 |
+ lambda x: self.an2cn(x.group(), "direct"), inputs) |
|
88 |
+ return re.sub(r"\d+", |
|
89 |
+ lambda x: self.an2cn(x.group(), "low"), inputs) |
|
90 |
+ elif sub_mode == "fraction": |
|
91 |
+ frac_result = re.sub(r"\d+", lambda x: self.an2cn(x.group(), "low"), inputs) |
|
92 |
+ numerator, denominator = frac_result.split("/") |
|
93 |
+ return f"{denominator}分之{numerator}" |
|
94 |
+ elif sub_mode == "celsius": |
|
95 |
+ return self.an2cn(inputs[:-1], "low") + "摄氏度" |
|
96 |
+ elif sub_mode == "percent": |
|
97 |
+ return "百分之" + self.an2cn(inputs[:-1], "low") |
|
98 |
+ elif sub_mode == "number": |
|
99 |
+ return self.an2cn(inputs, "low") |
|
100 |
+ else: |
|
101 |
+ raise Exception(f"error sub_mode: {sub_mode} !") |
|
102 |
+ except Exception as e: |
|
103 |
+ warn(str(e)) |
|
104 |
+ return inputs |
@@ -0,0 +1,40 @@ |
||
1 |
+import unittest |
|
2 |
+ |
|
3 |
+from .transform import Transform |
|
4 |
+ |
|
5 |
+ |
|
6 |
+class TransformTest(unittest.TestCase): |
|
7 |
+ def setUp(self) -> None: |
|
8 |
+ self.strict_data_dict = { |
|
9 |
+ "小王捡了100块钱": "小王捡了一百块钱", |
|
10 |
+ "用户增长最快的3个城市": "用户增长最快的三个城市", |
|
11 |
+ "小王的生日是2001年3月4日": "小王的生日是二零零一年三月四日", |
|
12 |
+ "小王的生日是2012年12月12日": "小王的生日是二零一二年十二月十二日", |
|
13 |
+ "今天股价上涨了8%": "今天股价上涨了百分之八", |
|
14 |
+ "第2天股价下降了-3.8%": "第二天股价下降了百分之负三点八", |
|
15 |
+ "抛出去的硬币为正面的概率是1/2": "抛出去的硬币为正面的概率是二分之一", |
|
16 |
+ "现在室内温度为39℃,很热啊!": "现在室内温度为三十九摄氏度,很热啊!", |
|
17 |
+ "创业板指9月9日早盘低开1.57%": "创业板指九月九日早盘低开百分之一点五七" |
|
18 |
+ } |
|
19 |
+ |
|
20 |
+ self.smart_data_dict = { |
|
21 |
+ "约2.5亿年~6500万年": "约250000000年~65000000年", |
|
22 |
+ "廿二日,日出东方": "22日,日出东方", |
|
23 |
+ "大陆": "大陆", |
|
24 |
+ "半斤": "0.5斤", |
|
25 |
+ "两个": "2个", |
|
26 |
+ } |
|
27 |
+ |
|
28 |
+ self.t = Transform() |
|
29 |
+ |
|
30 |
+ def test_transform(self) -> None: |
|
31 |
+ for strict_item in self.strict_data_dict.keys(): |
|
32 |
+ self.assertEqual(self.t.transform(strict_item, "an2cn"), self.strict_data_dict[strict_item]) |
|
33 |
+ self.assertEqual(self.t.transform(self.strict_data_dict[strict_item], "cn2an"), strict_item) |
|
34 |
+ |
|
35 |
+ for smart_item in self.smart_data_dict.keys(): |
|
36 |
+ self.assertEqual(self.t.transform(smart_item, "cn2an"), self.smart_data_dict[smart_item]) |
|
37 |
+ |
|
38 |
+ |
|
39 |
+if __name__ == '__main__': |
|
40 |
+ unittest.main() |