从一些编码是 GBK 或 Big5 的网站爬取得到的字符是乱码,需要转换一下,golang 有 text/encoding 库可以简单转换。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
package main
import (
"bytes"
"fmt"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/transform"
"io/ioutil"
)
//convert GBK to UTF-8
func DecodeGBK(s []byte) ([]byte, error) {
I := bytes.NewReader(s)
O := transform.NewReader(I, simplifiedchinese.GBK.NewDecoder())
d, e := ioutil.ReadAll(O)
if e != nil {
return nil, e
}
return d, nil
}
//convert UTF-8 to GBK
func EncodeGBK(s []byte) ([]byte, error) {
I := bytes.NewReader(s)
O := transform.NewReader(I, simplifiedchinese.GBK.NewEncoder())
d, e := ioutil.ReadAll(O)
if e != nil {
return nil, e
}
return d, nil
}
//convert BIG5 to UTF-8
func DecodeBig5(s []byte) ([]byte, error) {
I := bytes.NewReader(s)
O := transform.NewReader(I, traditionalchinese.Big5.NewDecoder())
d, e := ioutil.ReadAll(O)
if e != nil {
return nil, e
}
return d, nil
}
//convert UTF-8 to BIG5
func EncodeBig5(s []byte) ([]byte, error) {
I := bytes.NewReader(s)
O := transform.NewReader(I, traditionalchinese.Big5.NewEncoder())
d, e := ioutil.ReadAll(O)
if e != nil {
return nil, e
}
return d, nil
}
func main() {
utf8Byte := []byte("UTF-8字符包子")
fmt.Println(string(utf8Byte))
// UTF-8 转 BIG5
s, _ := EncodeBig5(utf8Byte)
fmt.Println(string(s))
// BIG5 转 UTF-8
s, _ = DecodeBig5(s)
fmt.Println(string(s))
// UTF-8 转 GBK
s, _ = EncodeGBK(s)
fmt.Println(string(s))
// GBK 转 UTF-8
s, _ = DecodeGBK(s)
fmt.Println(string(s))
}
输出:
1
2
3
4
5
UTF-8字符包子
UTF-8?r?????l
UTF-8字符包子
UTF-8?ַ?????
UTF-8字符包子
本文网址: https://pylist.com/topic/156.html 转摘请注明来源