将html编码转为utf8

gopm get -g -v golang.org/x/text  # 虽然也是官方库,但是没有包括在标准库里

将gbk转为utf8

如果原html就为utf-8,那么转化后就会乱码

package  main

import (
    "net/http"
    "io/ioutil"
    "fmt"
    "golang.org/x/text/transform"
    "golang.org/x/text/encoding/simplifiedchinese"
)

func main(){
    resp, err :=http.Get("https://www.zhenai.com/zhenghun")
    if err!=nil{
        panic(err)
    }
    defer resp.Body.Close()

    if resp.StatusCode != http.StatusOK {
        fmt.Printf("Error Status Code: %d", resp.StatusCode)
        return
    }
    utf8Reader :=transform.NewReader(resp.Body,simplifiedchinese.GBK.NewDecoder())

    all, err :=ioutil.ReadAll(utf8Reader)
    if err!=nil {
        panic(err)
    }
    fmt.Printf("%s\n",all)
}

改进

可以看到上面的代码通用型不强,你就这么敢肯定网页编码一定是gbk

还有一个库,可以自动帮助检测网页的编码

gopm get -g -v golang.org/x/net/html
package  main

import (
    "net/http"
    "io/ioutil"
    "fmt"
    "golang.org/x/text/transform"
    "io"
    "golang.org/x/text/encoding"
    "bufio"
    "golang.org/x/net/html/charset"
)

func main(){
    resp, err :=http.Get("https://www.zhenai.com/zhenghun")
    if err!=nil{
        panic(err)
    }
    defer resp.Body.Close()

    if resp.StatusCode != http.StatusOK {
        fmt.Printf("Error Status Code: %d", resp.StatusCode)
        return
    }
    e :=determinEncoding(resp.Body)
    utf8Reader := transform.NewReader(resp.Body,e.NewDecoder())
    all, err :=ioutil.ReadAll(utf8Reader)
    if err!=nil {
        panic(err)
    }
    fmt.Printf("%s\n",all)
}

func determinEncoding(r io.Reader) encoding.Encoding{
  bytes, err :=bufio.NewReader(r).Peek(1024)
  if err!=nil{
     panic(err)
  }
  e, _, _ :=charset.DetermineEncoding(bytes,"")
  return  e
}

results matching ""

    No results matching ""