diff options
author | Andy Balholm <andy@balholm.com> | 2015-02-10 10:07:30 -0800 |
---|---|---|
committer | Nigel Tao <nigeltao@golang.org> | 2015-02-10 23:47:13 +0000 |
commit | ec18079348e79eb393866e87d402a1a8cc580d7f (patch) | |
tree | 1ee30204ef5d63e549e816ae6969714165d66c58 | |
parent | 0075794919e30a27109ffc44e805d2dfaf233db0 (diff) | |
download | net-ec18079348e79eb393866e87d402a1a8cc580d7f.tar.gz |
x/net/html/charset: add NewReaderByName
This provides a CharsetReader function for xml.Decoder.
Change-Id: Id00787bbdee90d267d38c84c98a06f9e10d93336
Reviewed-on: https://go-review.googlesource.com/4420
Reviewed-by: Nigel Tao <nigeltao@golang.org>
-rw-r--r-- | html/charset/charset.go | 13 | ||||
-rw-r--r-- | html/charset/charset_test.go | 21 |
2 files changed, 34 insertions, 0 deletions
diff --git a/html/charset/charset.go b/html/charset/charset.go index 2e5f9ba..84e6062 100644 --- a/html/charset/charset.go +++ b/html/charset/charset.go @@ -10,6 +10,7 @@ package charset // import "golang.org/x/net/html/charset" import ( "bytes" + "fmt" "io" "mime" "strings" @@ -110,6 +111,18 @@ func NewReader(r io.Reader, contentType string) (io.Reader, error) { return r, nil } +// NewReaderByName returns a reader that converts from the specified charset to +// UTF-8. It returns an error if the charset is not one of the standard +// encodings for HTML. It is suitable for use as encoding/xml.Decoder's +// CharsetReader function. +func NewReaderByName(charset string, input io.Reader) (io.Reader, error) { + e, _ := Lookup(charset) + if e == nil { + return nil, fmt.Errorf("unsupported charset: %q", charset) + } + return transform.NewReader(input, e.NewDecoder()), nil +} + func prescan(content []byte) (e encoding.Encoding, name string) { z := html.NewTokenizer(bytes.NewReader(content)) for { diff --git a/html/charset/charset_test.go b/html/charset/charset_test.go index d309f75..44a1867 100644 --- a/html/charset/charset_test.go +++ b/html/charset/charset_test.go @@ -6,6 +6,7 @@ package charset import ( "bytes" + "encoding/xml" "io/ioutil" "runtime" "strings" @@ -213,3 +214,23 @@ func TestFromMeta(t *testing.T) { } } } + +func TestXML(t *testing.T) { + const s = "<?xml version=\"1.0\" encoding=\"windows-1252\"?><a><Word>r\xe9sum\xe9</Word></a>" + + d := xml.NewDecoder(strings.NewReader(s)) + d.CharsetReader = NewReaderByName + + var a struct { + Word string + } + err := d.Decode(&a) + if err != nil { + t.Fatalf("Decode: %v", err) + } + + want := "résumé" + if a.Word != want { + t.Errorf("got %q, want %q", a.Word, want) + } +} |