aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Balholm <andy@balholm.com>2015-02-10 10:07:30 -0800
committerNigel Tao <nigeltao@golang.org>2015-02-10 23:47:13 +0000
commitec18079348e79eb393866e87d402a1a8cc580d7f (patch)
tree1ee30204ef5d63e549e816ae6969714165d66c58
parent0075794919e30a27109ffc44e805d2dfaf233db0 (diff)
downloadnet-ec18079348e79eb393866e87d402a1a8cc580d7f.tar.gz
x/net/html/charset: add NewReaderByName
This provides a CharsetReader function for xml.Decoder. Change-Id: Id00787bbdee90d267d38c84c98a06f9e10d93336 Reviewed-on: https://go-review.googlesource.com/4420 Reviewed-by: Nigel Tao <nigeltao@golang.org>
-rw-r--r--html/charset/charset.go13
-rw-r--r--html/charset/charset_test.go21
2 files changed, 34 insertions, 0 deletions
diff --git a/html/charset/charset.go b/html/charset/charset.go
index 2e5f9ba..84e6062 100644
--- a/html/charset/charset.go
+++ b/html/charset/charset.go
@@ -10,6 +10,7 @@ package charset // import "golang.org/x/net/html/charset"
import (
"bytes"
+ "fmt"
"io"
"mime"
"strings"
@@ -110,6 +111,18 @@ func NewReader(r io.Reader, contentType string) (io.Reader, error) {
return r, nil
}
+// NewReaderByName returns a reader that converts from the specified charset to
+// UTF-8. It returns an error if the charset is not one of the standard
+// encodings for HTML. It is suitable for use as encoding/xml.Decoder's
+// CharsetReader function.
+func NewReaderByName(charset string, input io.Reader) (io.Reader, error) {
+ e, _ := Lookup(charset)
+ if e == nil {
+ return nil, fmt.Errorf("unsupported charset: %q", charset)
+ }
+ return transform.NewReader(input, e.NewDecoder()), nil
+}
+
func prescan(content []byte) (e encoding.Encoding, name string) {
z := html.NewTokenizer(bytes.NewReader(content))
for {
diff --git a/html/charset/charset_test.go b/html/charset/charset_test.go
index d309f75..44a1867 100644
--- a/html/charset/charset_test.go
+++ b/html/charset/charset_test.go
@@ -6,6 +6,7 @@ package charset
import (
"bytes"
+ "encoding/xml"
"io/ioutil"
"runtime"
"strings"
@@ -213,3 +214,23 @@ func TestFromMeta(t *testing.T) {
}
}
}
+
+func TestXML(t *testing.T) {
+ const s = "<?xml version=\"1.0\" encoding=\"windows-1252\"?><a><Word>r\xe9sum\xe9</Word></a>"
+
+ d := xml.NewDecoder(strings.NewReader(s))
+ d.CharsetReader = NewReaderByName
+
+ var a struct {
+ Word string
+ }
+ err := d.Decode(&a)
+ if err != nil {
+ t.Fatalf("Decode: %v", err)
+ }
+
+ want := "résumé"
+ if a.Word != want {
+ t.Errorf("got %q, want %q", a.Word, want)
+ }
+}