// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package xml import ( "bytes" "errors" "reflect" "strconv" "strings" "time" ) // BUG(rsc): Mapping between XML elements and data structures is inherently flawed: // an XML element is an order-dependent collection of anonymous // values, while a data structure is an order-independent collection // of named values. // See package json for a textual representation more suitable // to data structures. // Unmarshal parses the XML-encoded data and stores the result in // the value pointed to by v, which must be an arbitrary struct, // slice, or string. Well-formed data that does not fit into v is // discarded. // // Because Unmarshal uses the reflect package, it can only assign // to exported (upper case) fields. Unmarshal uses a case-sensitive // comparison to match XML element names to tag values and struct // field names. // // Unmarshal maps an XML element to a struct using the following rules. // In the rules, the tag of a field refers to the value associated with the // key 'xml' in the struct field's tag (see the example above). // // * If the struct has a field of type []byte or string with tag // ",innerxml", Unmarshal accumulates the raw XML nested inside the // element in that field. The rest of the rules still apply. // // * If the struct has a field named XMLName of type xml.Name, // Unmarshal records the element name in that field. // // * If the XMLName field has an associated tag of the form // "name" or "namespace-URL name", the XML element must have // the given name (and, optionally, name space) or else Unmarshal // returns an error. // // * If the XML element has an attribute whose name matches a // struct field name with an associated tag containing ",attr" or // the explicit name in a struct field tag of the form "name,attr", // Unmarshal records the attribute value in that field. // // * If the XML element contains character data, that data is // accumulated in the first struct field that has tag "chardata". // The struct field may have type []byte or string. // If there is no such field, the character data is discarded. // // * If the XML element contains comments, they are accumulated in // the first struct field that has tag ",comments". The struct // field may have type []byte or string. If there is no such // field, the comments are discarded. // // * If the XML element contains a sub-element whose name matches // the prefix of a tag formatted as "a" or "a>b>c", unmarshal // will descend into the XML structure looking for elements with the // given names, and will map the innermost elements to that struct // field. A tag starting with ">" is equivalent to one starting // with the field name followed by ">". // // * If the XML element contains a sub-element whose name matches // a struct field's XMLName tag and the struct field has no // explicit name tag as per the previous rule, unmarshal maps // the sub-element to that struct field. // // * If the XML element contains a sub-element whose name matches a // field without any mode flags (",attr", ",chardata", etc), Unmarshal // maps the sub-element to that struct field. // // * If the XML element contains a sub-element that hasn't matched any // of the above rules and the struct has a field with tag ",any", // unmarshal maps the sub-element to that struct field. // // * A non-pointer anonymous struct field is handled as if the // fields of its value were part of the outer struct. // // * A struct field with tag "-" is never unmarshalled into. // // Unmarshal maps an XML element to a string or []byte by saving the // concatenation of that element's character data in the string or // []byte. The saved []byte is never nil. // // Unmarshal maps an attribute value to a string or []byte by saving // the value in the string or slice. // // Unmarshal maps an XML element to a slice by extending the length of // the slice and mapping the element to the newly created value. // // Unmarshal maps an XML element or attribute value to a bool by // setting it to the boolean value represented by the string. // // Unmarshal maps an XML element or attribute value to an integer or // floating-point field by setting the field to the result of // interpreting the string value in decimal. There is no check for // overflow. // // Unmarshal maps an XML element to an xml.Name by recording the // element name. // // Unmarshal maps an XML element to a pointer by setting the pointer // to a freshly allocated value and then mapping the element to that value. // func Unmarshal(data []byte, v interface{}) error { return NewDecoder(bytes.NewBuffer(data)).Decode(v) } // Decode works like xml.Unmarshal, except it reads the decoder // stream to find the start element. func (d *Decoder) Decode(v interface{}) error { return d.DecodeElement(v, nil) } // DecodeElement works like xml.Unmarshal except that it takes // a pointer to the start XML element to decode into v. // It is useful when a client reads some raw XML tokens itself // but also wants to defer to Unmarshal for some elements. func (d *Decoder) DecodeElement(v interface{}, start *StartElement) error { val := reflect.ValueOf(v) if val.Kind() != reflect.Ptr { return errors.New("non-pointer passed to Unmarshal") } return d.unmarshal(val.Elem(), start) } // An UnmarshalError represents an error in the unmarshalling process. type UnmarshalError string func (e UnmarshalError) Error() string { return string(e) } // Unmarshal a single XML element into val. func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error { // Find start element if we need it. if start == nil { for { tok, err := p.Token() if err != nil { return err } if t, ok := tok.(StartElement); ok { start = &t break } } } if pv := val; pv.Kind() == reflect.Ptr { if pv.IsNil() { pv.Set(reflect.New(pv.Type().Elem())) } val = pv.Elem() } var ( data []byte saveData reflect.Value comment []byte saveComment reflect.Value saveXML reflect.Value saveXMLIndex int saveXMLData []byte saveAny reflect.Value sv reflect.Value tinfo *typeInfo err error ) switch v := val; v.Kind() { default: return errors.New("unknown type " + v.Type().String()) case reflect.Interface: // TODO: For now, simply ignore the field. In the near // future we may choose to unmarshal the start // element on it, if not nil. return p.Skip() case reflect.Slice: typ := v.Type() if typ.Elem().Kind() == reflect.Uint8 { // []byte saveData = v break } // Slice of element values. // Grow slice. n := v.Len() if n >= v.Cap() { ncap := 2 * n if ncap < 4 { ncap = 4 } new := reflect.MakeSlice(typ, n, ncap) reflect.Copy(new, v) v.Set(new) } v.SetLen(n + 1) // Recur to read element into slice. if err := p.unmarshal(v.Index(n), start); err != nil { v.SetLen(n) return err } return nil case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: saveData = v case reflect.Struct: typ := v.Type() if typ == nameType { v.Set(reflect.ValueOf(start.Name)) break } if typ == timeType { saveData = v break } sv = v tinfo, err = getTypeInfo(typ) if err != nil { return err } // Validate and assign element name. if tinfo.xmlname != nil { finfo := tinfo.xmlname if finfo.name != "" && finfo.name != start.Name.Local { return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name.Local + ">") } if finfo.xmlns != "" && finfo.xmlns != start.Name.Space { e := "expected element <" + finfo.name + "> in name space " + finfo.xmlns + " but have " if start.Name.Space == "" { e += "no name space" } else { e += start.Name.Space } return UnmarshalError(e) } fv := sv.FieldByIndex(finfo.idx) if _, ok := fv.Interface().(Name); ok { fv.Set(reflect.ValueOf(start.Name)) } } // Assign attributes. // Also, determine whether we need to save character data or comments. for i := range tinfo.fields { finfo := &tinfo.fields[i] switch finfo.flags & fMode { case fAttr: strv := sv.FieldByIndex(finfo.idx) // Look for attribute. for _, a := range start.Attr { if a.Name.Local == finfo.name { copyValue(strv, []byte(a.Value)) break } } case fCharData: if !saveData.IsValid() { saveData = sv.FieldByIndex(finfo.idx) } case fComment: if !saveComment.IsValid() { saveComment = sv.FieldByIndex(finfo.idx) } case fAny: if !saveAny.IsValid() { saveAny = sv.FieldByIndex(finfo.idx) } case fInnerXml: if !saveXML.IsValid() { saveXML = sv.FieldByIndex(finfo.idx) if p.saved == nil { saveXMLIndex = 0 p.saved = new(bytes.Buffer) } else { saveXMLIndex = p.savedOffset() } } } } } // Find end element. // Process sub-elements along the way. Loop: for { var savedOffset int if saveXML.IsValid() { savedOffset = p.savedOffset() } tok, err := p.Token() if err != nil { return err } switch t := tok.(type) { case StartElement: consumed := false if sv.IsValid() { consumed, err = p.unmarshalPath(tinfo, sv, nil, &t) if err != nil { return err } if !consumed && saveAny.IsValid() { consumed = true if err := p.unmarshal(saveAny, &t); err != nil { return err } } } if !consumed { if err := p.Skip(); err != nil { return err } } case EndElement: if saveXML.IsValid() { saveXMLData = p.saved.Bytes()[saveXMLIndex:savedOffset] if saveXMLIndex == 0 { p.saved = nil } } break Loop case CharData: if saveData.IsValid() { data = append(data, t...) } case Comment: if saveComment.IsValid() { comment = append(comment, t...) } } } if err := copyValue(saveData, data); err != nil { return err } switch t := saveComment; t.Kind() { case reflect.String: t.SetString(string(comment)) case reflect.Slice: t.Set(reflect.ValueOf(comment)) } switch t := saveXML; t.Kind() { case reflect.String: t.SetString(string(saveXMLData)) case reflect.Slice: t.Set(reflect.ValueOf(saveXMLData)) } return nil } func copyValue(dst reflect.Value, src []byte) (err error) { // Helper functions for integer and unsigned integer conversions var itmp int64 getInt64 := func() bool { itmp, err = strconv.ParseInt(string(src), 10, 64) // TODO: should check sizes return err == nil } var utmp uint64 getUint64 := func() bool { utmp, err = strconv.ParseUint(string(src), 10, 64) // TODO: check for overflow? return err == nil } var ftmp float64 getFloat64 := func() bool { ftmp, err = strconv.ParseFloat(string(src), 64) // TODO: check for overflow? return err == nil } // Save accumulated data. switch t := dst; t.Kind() { case reflect.Invalid: // Probably a comment. default: return errors.New("cannot happen: unknown type " + t.Type().String()) case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: if !getInt64() { return err } t.SetInt(itmp) case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: if !getUint64() { return err } t.SetUint(utmp) case reflect.Float32, reflect.Float64: if !getFloat64() { return err } t.SetFloat(ftmp) case reflect.Bool: value, err := strconv.ParseBool(strings.TrimSpace(string(src))) if err != nil { return err } t.SetBool(value) case reflect.String: t.SetString(string(src)) case reflect.Slice: if len(src) == 0 { // non-nil to flag presence src = []byte{} } t.SetBytes(src) case reflect.Struct: if t.Type() == timeType { tv, err := time.Parse(time.RFC3339, string(src)) if err != nil { return err } t.Set(reflect.ValueOf(tv)) } } return nil } // unmarshalPath walks down an XML structure looking for wanted // paths, and calls unmarshal on them. // The consumed result tells whether XML elements have been consumed // from the Decoder until start's matching end element, or if it's // still untouched because start is uninteresting for sv's fields. func (p *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement) (consumed bool, err error) { recurse := false Loop: for i := range tinfo.fields { finfo := &tinfo.fields[i] if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) { continue } for j := range parents { if parents[j] != finfo.parents[j] { continue Loop } } if len(finfo.parents) == len(parents) && finfo.name == start.Name.Local { // It's a perfect match, unmarshal the field. return true, p.unmarshal(sv.FieldByIndex(finfo.idx), start) } if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == start.Name.Local { // It's a prefix for the field. Break and recurse // since it's not ok for one field path to be itself // the prefix for another field path. recurse = true // We can reuse the same slice as long as we // don't try to append to it. parents = finfo.parents[:len(parents)+1] break } } if !recurse { // We have no business with this element. return false, nil } // The element is not a perfect match for any field, but one // or more fields have the path to this element as a parent // prefix. Recurse and attempt to match these. for { var tok Token tok, err = p.Token() if err != nil { return true, err } switch t := tok.(type) { case StartElement: consumed2, err := p.unmarshalPath(tinfo, sv, parents, &t) if err != nil { return true, err } if !consumed2 { if err := p.Skip(); err != nil { return true, err } } case EndElement: return true, nil } } panic("unreachable") } // Skip reads tokens until it has consumed the end element // matching the most recent start element already consumed. // It recurs if it encounters a start element, so it can be used to // skip nested structures. // It returns nil if it finds an end element matching the start // element; otherwise it returns an error describing the problem. func (d *Decoder) Skip() error { for { tok, err := d.Token() if err != nil { return err } switch tok.(type) { case StartElement: if err := d.Skip(); err != nil { return err } case EndElement: return nil } } panic("unreachable") }