获取GB2312编码所有汉字

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
private static final Charset CHARSET = Charset.forName("GB2312");

/**
* 获取GB2312所有汉字
* “高位字节”的范围是0xB0-0xF7,“低位字节”的范围是0xA0-0xFE
*/
private static List<String> getGB2312() {
List<String> words = new ArrayList<>();
byte[] bytes = new byte[2];
for (int b1 = 176; b1 < 248; b1++) {
bytes[0] = (byte) b1;
for (int b2 = 161; b2 < 255; b2++) {
bytes[1] = (byte) b2;
words.add(new String(bytes, CHARSET));
}
}
return words;
}

/**
* 对于gb2312来讲,首字节码位从0×81至0×FE,尾字节码位分别是0×40至0×FE
*/
public static boolean isGB2312(String str) {
boolean isGB2312 = false;
char[] chars = str.toCharArray();
for (char c : chars) {
byte[] bytes = ("" + c).getBytes(CHARSET);
if (bytes.length == 2) {
int[] ints = new int[2];
ints[0] = bytes[0] & 0xff;
ints[1] = bytes[1] & 0xff;
if (ints[0] >= 0x81 && ints[0] <= 0xFE && ints[1] >= 0x40 && ints[1] <= 0xFE) {
isGB2312 = true;
break;
}
}
}
return isGB2312;
}
作者

雾非雾的情思

发布于

2022-08-22

更新于

2022-08-23

许可协议

评论