aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSasha Smundak <asmundak@google.com>2020-06-02 14:56:13 -0700
committerSasha Smundak <asmundak@google.com>2020-06-02 15:19:39 -0700
commit911f836dc405291b8d655328e785c00d3093528b (patch)
treed9c7512dfe416e2c5cd2e30fb081b0a70ac6ecad
parent5144115e7cda2641b0a3db1dd53ac557bbb58851 (diff)
parent23e6ba8cc2de65cb79fedf9827731040ddd4bdf5 (diff)
downloadgo-etree-911f836dc405291b8d655328e785c00d3093528b.tar.gz
Merge sso://github/beevik/etree, add mandatory files
Bug: 158031244 Test: N/A Change-Id: Idda5b583f92c0d952e1af23dcf90eed94db2f843
-rw-r--r--.travis.yml18
-rw-r--r--CONTRIBUTORS10
-rw-r--r--LICENSE24
-rw-r--r--METADATA20
-rw-r--r--MODULE_LICENSE_BSD0
-rw-r--r--README.md205
-rw-r--r--RELEASE_NOTES.md109
-rw-r--r--etree.go1505
-rw-r--r--etree_test.go1115
-rw-r--r--example_test.go69
-rw-r--r--go.mod3
-rw-r--r--helpers.go276
-rw-r--r--path.go580
-rw-r--r--path_test.go222
14 files changed, 4156 insertions, 0 deletions
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..e12bb98
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,18 @@
+language: go
+sudo: false
+
+env:
+ - GO111MODULE=on
+
+go:
+ - 1.11.x
+ - 1.12.x
+ - tip
+
+matrix:
+ allow_failures:
+ - go: tip
+
+script:
+ - go vet ./...
+ - go test -v ./...
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
new file mode 100644
index 0000000..03211a8
--- /dev/null
+++ b/CONTRIBUTORS
@@ -0,0 +1,10 @@
+Brett Vickers (beevik)
+Felix Geisendörfer (felixge)
+Kamil Kisiel (kisielk)
+Graham King (grahamking)
+Matt Smith (ma314smith)
+Michal Jemala (michaljemala)
+Nicolas Piganeau (npiganeau)
+Chris Brown (ccbrown)
+Earncef Sequeira (earncef)
+Gabriel de Labachelerie (wuzuf)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..26f1f77
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,24 @@
+Copyright 2015-2019 Brett Vickers. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..bcfee03
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,20 @@
+name: "go-etree"
+description:
+ "The etree package is a lightweight, pure go package that expresses XML in "
+ "the form of an element tree. Its design was inspired by the Python "
+ "ElementTree module."
+
+third_party {
+ url {
+ type: HOMEPAGE
+ value: "https://github.com/beevik/etree"
+ }
+ url {
+ type: GIT
+ value: "https://github.com/beevik/etree.git"
+ }
+ version: "v1.1.0"
+ last_upgrade_date { year: 2020 month: 2 day: 22 }
+ license_type: NOTICE
+}
+
diff --git a/MODULE_LICENSE_BSD b/MODULE_LICENSE_BSD
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_BSD
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..08ec26b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,205 @@
+[![Build Status](https://travis-ci.org/beevik/etree.svg?branch=master)](https://travis-ci.org/beevik/etree)
+[![GoDoc](https://godoc.org/github.com/beevik/etree?status.svg)](https://godoc.org/github.com/beevik/etree)
+
+etree
+=====
+
+The etree package is a lightweight, pure go package that expresses XML in
+the form of an element tree. Its design was inspired by the Python
+[ElementTree](http://docs.python.org/2/library/xml.etree.elementtree.html)
+module.
+
+Some of the package's capabilities and features:
+
+* Represents XML documents as trees of elements for easy traversal.
+* Imports, serializes, modifies or creates XML documents from scratch.
+* Writes and reads XML to/from files, byte slices, strings and io interfaces.
+* Performs simple or complex searches with lightweight XPath-like query APIs.
+* Auto-indents XML using spaces or tabs for better readability.
+* Implemented in pure go; depends only on standard go libraries.
+* Built on top of the go [encoding/xml](http://golang.org/pkg/encoding/xml)
+ package.
+
+### Creating an XML document
+
+The following example creates an XML document from scratch using the etree
+package and outputs its indented contents to stdout.
+```go
+doc := etree.NewDocument()
+doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
+doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
+
+people := doc.CreateElement("People")
+people.CreateComment("These are all known people")
+
+jon := people.CreateElement("Person")
+jon.CreateAttr("name", "Jon")
+
+sally := people.CreateElement("Person")
+sally.CreateAttr("name", "Sally")
+
+doc.Indent(2)
+doc.WriteTo(os.Stdout)
+```
+
+Output:
+```xml
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="style.xsl"?>
+<People>
+ <!--These are all known people-->
+ <Person name="Jon"/>
+ <Person name="Sally"/>
+</People>
+```
+
+### Reading an XML file
+
+Suppose you have a file on disk called `bookstore.xml` containing the
+following data:
+
+```xml
+<bookstore xmlns:p="urn:schemas-books-com:prices">
+
+ <book category="COOKING">
+ <title lang="en">Everyday Italian</title>
+ <author>Giada De Laurentiis</author>
+ <year>2005</year>
+ <p:price>30.00</p:price>
+ </book>
+
+ <book category="CHILDREN">
+ <title lang="en">Harry Potter</title>
+ <author>J K. Rowling</author>
+ <year>2005</year>
+ <p:price>29.99</p:price>
+ </book>
+
+ <book category="WEB">
+ <title lang="en">XQuery Kick Start</title>
+ <author>James McGovern</author>
+ <author>Per Bothner</author>
+ <author>Kurt Cagle</author>
+ <author>James Linn</author>
+ <author>Vaidyanathan Nagarajan</author>
+ <year>2003</year>
+ <p:price>49.99</p:price>
+ </book>
+
+ <book category="WEB">
+ <title lang="en">Learning XML</title>
+ <author>Erik T. Ray</author>
+ <year>2003</year>
+ <p:price>39.95</p:price>
+ </book>
+
+</bookstore>
+```
+
+This code reads the file's contents into an etree document.
+```go
+doc := etree.NewDocument()
+if err := doc.ReadFromFile("bookstore.xml"); err != nil {
+ panic(err)
+}
+```
+
+You can also read XML from a string, a byte slice, or an `io.Reader`.
+
+### Processing elements and attributes
+
+This example illustrates several ways to access elements and attributes using
+etree selection queries.
+```go
+root := doc.SelectElement("bookstore")
+fmt.Println("ROOT element:", root.Tag)
+
+for _, book := range root.SelectElements("book") {
+ fmt.Println("CHILD element:", book.Tag)
+ if title := book.SelectElement("title"); title != nil {
+ lang := title.SelectAttrValue("lang", "unknown")
+ fmt.Printf(" TITLE: %s (%s)\n", title.Text(), lang)
+ }
+ for _, attr := range book.Attr {
+ fmt.Printf(" ATTR: %s=%s\n", attr.Key, attr.Value)
+ }
+}
+```
+Output:
+```
+ROOT element: bookstore
+CHILD element: book
+ TITLE: Everyday Italian (en)
+ ATTR: category=COOKING
+CHILD element: book
+ TITLE: Harry Potter (en)
+ ATTR: category=CHILDREN
+CHILD element: book
+ TITLE: XQuery Kick Start (en)
+ ATTR: category=WEB
+CHILD element: book
+ TITLE: Learning XML (en)
+ ATTR: category=WEB
+```
+
+### Path queries
+
+This example uses etree's path functions to select all book titles that fall
+into the category of 'WEB'. The double-slash prefix in the path causes the
+search for book elements to occur recursively; book elements may appear at any
+level of the XML hierarchy.
+```go
+for _, t := range doc.FindElements("//book[@category='WEB']/title") {
+ fmt.Println("Title:", t.Text())
+}
+```
+
+Output:
+```
+Title: XQuery Kick Start
+Title: Learning XML
+```
+
+This example finds the first book element under the root bookstore element and
+outputs the tag and text of each of its child elements.
+```go
+for _, e := range doc.FindElements("./bookstore/book[1]/*") {
+ fmt.Printf("%s: %s\n", e.Tag, e.Text())
+}
+```
+
+Output:
+```
+title: Everyday Italian
+author: Giada De Laurentiis
+year: 2005
+price: 30.00
+```
+
+This example finds all books with a price of 49.99 and outputs their titles.
+```go
+path := etree.MustCompilePath("./bookstore/book[p:price='49.99']/title")
+for _, e := range doc.FindElementsPath(path) {
+ fmt.Println(e.Text())
+}
+```
+
+Output:
+```
+XQuery Kick Start
+```
+
+Note that this example uses the FindElementsPath function, which takes as an
+argument a pre-compiled path object. Use precompiled paths when you plan to
+search with the same path more than once.
+
+### Other features
+
+These are just a few examples of the things the etree package can do. See the
+[documentation](http://godoc.org/github.com/beevik/etree) for a complete
+description of its capabilities.
+
+### Contributing
+
+This project accepts contributions. Just fork the repo and submit a pull
+request!
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
new file mode 100644
index 0000000..ee59d7a
--- /dev/null
+++ b/RELEASE_NOTES.md
@@ -0,0 +1,109 @@
+Release v1.1.0
+==============
+
+**New Features**
+
+* New attribute helpers.
+ * Added the `Element.SortAttrs` method, which lexicographically sorts an
+ element's attributes by key.
+* New `ReadSettings` properties.
+ * Added `Entity` for the support of custom entity maps.
+* New `WriteSettings` properties.
+ * Added `UseCRLF` to allow the output of CR-LF newlines instead of the
+ default LF newlines. This is useful on Windows systems.
+* Additional support for text and CDATA sections.
+ * The `Element.Text` method now returns the concatenation of all consecutive
+ character data tokens immediately following an element's opening tag.
+ * Added `Element.SetCData` to replace the character data immediately
+ following an element's opening tag with a CDATA section.
+ * Added `Element.CreateCData` to create and add a CDATA section child
+ `CharData` token to an element.
+ * Added `Element.CreateText` to create and add a child text `CharData` token
+ to an element.
+ * Added `NewCData` to create a parentless CDATA section `CharData` token.
+ * Added `NewText` to create a parentless text `CharData`
+ token.
+ * Added `CharData.IsCData` to detect if the token contains a CDATA section.
+ * Added `CharData.IsWhitespace` to detect if the token contains whitespace
+ inserted by one of the document Indent functions.
+ * Modified `Element.SetText` so that it replaces a run of consecutive
+ character data tokens following the element's opening tag (instead of just
+ the first one).
+* New "tail text" support.
+ * Added the `Element.Tail` method, which returns the text immediately
+ following an element's closing tag.
+ * Added the `Element.SetTail` method, which modifies the text immediately
+ following an element's closing tag.
+* New element child insertion and removal methods.
+ * Added the `Element.InsertChildAt` method, which inserts a new child token
+ before the specified child token index.
+ * Added the `Element.RemoveChildAt` method, which removes the child token at
+ the specified child token index.
+* New element and attribute queries.
+ * Added the `Element.Index` method, which returns the element's index within
+ its parent element's child token list.
+ * Added the `Element.NamespaceURI` method to return the namespace URI
+ associated with an element.
+ * Added the `Attr.NamespaceURI` method to return the namespace URI
+ associated with an element.
+ * Added the `Attr.Element` method to return the element that an attribute
+ belongs to.
+* New Path filter functions.
+ * Added `[local-name()='val']` to keep elements whose unprefixed tag matches
+ the desired value.
+ * Added `[name()='val']` to keep elements whose full tag matches the desired
+ value.
+ * Added `[namespace-prefix()='val']` to keep elements whose namespace prefix
+ matches the desired value.
+ * Added `[namespace-uri()='val']` to keep elements whose namespace URI
+ matches the desired value.
+
+**Bug Fixes**
+
+* A default XML `CharSetReader` is now used to prevent failed parsing of XML
+ documents using certain encodings.
+ ([Issue](https://github.com/beevik/etree/issues/53)).
+* All characters are now properly escaped according to XML parsing rules.
+ ([Issue](https://github.com/beevik/etree/issues/55)).
+* The `Document.Indent` and `Document.IndentTabs` functions no longer insert
+ empty string `CharData` tokens.
+
+**Deprecated**
+
+* `Element`
+ * The `InsertChild` method is deprecated. Use `InsertChildAt` instead.
+ * The `CreateCharData` method is deprecated. Use `CreateText` instead.
+* `CharData`
+ * The `NewCharData` method is deprecated. Use `NewText` instead.
+
+
+Release v1.0.1
+==============
+
+**Changes**
+
+* Added support for absolute etree Path queries. An absolute path begins with
+ `/` or `//` and begins its search from the element's document root.
+* Added [`GetPath`](https://godoc.org/github.com/beevik/etree#Element.GetPath)
+ and [`GetRelativePath`](https://godoc.org/github.com/beevik/etree#Element.GetRelativePath)
+ functions to the [`Element`](https://godoc.org/github.com/beevik/etree#Element)
+ type.
+
+**Breaking changes**
+
+* A path starting with `//` is now interpreted as an absolute path.
+ Previously, it was interpreted as a relative path starting from the element
+ whose
+ [`FindElement`](https://godoc.org/github.com/beevik/etree#Element.FindElement)
+ method was called. To remain compatible with this release, all paths
+ prefixed with `//` should be prefixed with `.//` when called from any
+ element other than the document's root.
+* [**edit 2/1/2019**]: Minor releases should not contain breaking changes.
+ Even though this breaking change was very minor, it was a mistake to include
+ it in this minor release. In the future, all breaking changes will be
+ limited to major releases (e.g., version 2.0.0).
+
+Release v1.0.0
+==============
+
+Initial release.
diff --git a/etree.go b/etree.go
new file mode 100644
index 0000000..8a8c9bb
--- /dev/null
+++ b/etree.go
@@ -0,0 +1,1505 @@
+// Copyright 2015-2019 Brett Vickers.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package etree provides XML services through an Element Tree
+// abstraction.
+package etree
+
+import (
+ "bufio"
+ "bytes"
+ "encoding/xml"
+ "errors"
+ "io"
+ "os"
+ "sort"
+ "strings"
+)
+
+const (
+ // NoIndent is used with the Document Indent function to disable all
+ // indenting.
+ NoIndent = -1
+)
+
+// ErrXML is returned when XML parsing fails due to incorrect formatting.
+var ErrXML = errors.New("etree: invalid XML format")
+
+// ReadSettings determine the default behavior of the Document's ReadFrom*
+// methods.
+type ReadSettings struct {
+ // CharsetReader to be passed to standard xml.Decoder. Default: nil.
+ CharsetReader func(charset string, input io.Reader) (io.Reader, error)
+
+ // Permissive allows input containing common mistakes such as missing tags
+ // or attribute values. Default: false.
+ Permissive bool
+
+ // Entity to be passed to standard xml.Decoder. Default: nil.
+ Entity map[string]string
+}
+
+// newReadSettings creates a default ReadSettings record.
+func newReadSettings() ReadSettings {
+ return ReadSettings{
+ CharsetReader: func(label string, input io.Reader) (io.Reader, error) {
+ return input, nil
+ },
+ Permissive: false,
+ }
+}
+
+// dup creates a duplicate of the ReadSettings object.
+func (s *ReadSettings) dup() ReadSettings {
+ var entityCopy map[string]string
+ if s.Entity != nil {
+ entityCopy = make(map[string]string)
+ for k, v := range s.Entity {
+ entityCopy[k] = v
+ }
+ }
+ return ReadSettings{
+ CharsetReader: s.CharsetReader,
+ Permissive: s.Permissive,
+ Entity: entityCopy,
+ }
+}
+
+// WriteSettings determine the behavior of the Document's WriteTo* and
+// Indent* methods.
+type WriteSettings struct {
+ // CanonicalEndTags forces the production of XML end tags, even for
+ // elements that have no child elements. Default: false.
+ CanonicalEndTags bool
+
+ // CanonicalText forces the production of XML character references for
+ // text data characters &, <, and >. If false, XML character references
+ // are also produced for " and '. Default: false.
+ CanonicalText bool
+
+ // CanonicalAttrVal forces the production of XML character references for
+ // attribute value characters &, < and ". If false, XML character
+ // references are also produced for > and '. Default: false.
+ CanonicalAttrVal bool
+
+ // UseCRLF causes the document's indentation methods to use a carriage
+ // return followed by a linefeed ("\r\n") when outputting a newline. If
+ // false, only a linefeed is used ("\n"). Default: false.
+ UseCRLF bool
+}
+
+// newWriteSettings creates a default WriteSettings record.
+func newWriteSettings() WriteSettings {
+ return WriteSettings{
+ CanonicalEndTags: false,
+ CanonicalText: false,
+ CanonicalAttrVal: false,
+ UseCRLF: false,
+ }
+}
+
+// dup creates a dulicate of the WriteSettings object.
+func (s *WriteSettings) dup() WriteSettings {
+ return *s
+}
+
+// A Token is an interface type used to represent XML elements, character
+// data, CDATA sections, XML comments, XML directives, and XML processing
+// instructions.
+type Token interface {
+ Parent() *Element
+ Index() int
+ dup(parent *Element) Token
+ setParent(parent *Element)
+ setIndex(index int)
+ writeTo(w *bufio.Writer, s *WriteSettings)
+}
+
+// A Document is a container holding a complete XML tree.
+//
+// A document has a single embedded element, which contains zero or more child
+// tokens, one of which is usually the root element. The embedded element may
+// include other children such as processing instruction tokens or character
+// data tokens. The document's embedded element is never directly serialized;
+// only its children are.
+//
+// A document also contains read and write settings, which influence the way
+// the document is deserialized, serialized, and indented.
+type Document struct {
+ Element
+ ReadSettings ReadSettings
+ WriteSettings WriteSettings
+}
+
+// An Element represents an XML element, its attributes, and its child tokens.
+type Element struct {
+ Space, Tag string // namespace prefix and tag
+ Attr []Attr // key-value attribute pairs
+ Child []Token // child tokens (elements, comments, etc.)
+ parent *Element // parent element
+ index int // token index in parent's children
+}
+
+// An Attr represents a key-value attribute within an XML element.
+type Attr struct {
+ Space, Key string // The attribute's namespace prefix and key
+ Value string // The attribute value string
+ element *Element // element containing the attribute
+}
+
+// charDataFlags are used with CharData tokens to store additional settings.
+type charDataFlags uint8
+
+const (
+ // The CharData contains only whitespace.
+ whitespaceFlag charDataFlags = 1 << iota
+
+ // The CharData contains a CDATA section.
+ cdataFlag
+)
+
+// CharData may be used to represent simple text data or a CDATA section
+// within an XML document. The Data property should never be modified
+// directly; use the SetData method instead.
+type CharData struct {
+ Data string // the simple text or CDATA section content
+ parent *Element
+ index int
+ flags charDataFlags
+}
+
+// A Comment represents an XML comment.
+type Comment struct {
+ Data string // the comment's text
+ parent *Element
+ index int
+}
+
+// A Directive represents an XML directive.
+type Directive struct {
+ Data string // the directive string
+ parent *Element
+ index int
+}
+
+// A ProcInst represents an XML processing instruction.
+type ProcInst struct {
+ Target string // the processing instruction target
+ Inst string // the processing instruction value
+ parent *Element
+ index int
+}
+
+// NewDocument creates an XML document without a root element.
+func NewDocument() *Document {
+ return &Document{
+ Element: Element{Child: make([]Token, 0)},
+ ReadSettings: newReadSettings(),
+ WriteSettings: newWriteSettings(),
+ }
+}
+
+// NewDocumentWithRoot creates an XML document and sets the element 'e' as its
+// root element. If the element 'e' is already part of another document, it is
+// first removed from its existing document.
+func NewDocumentWithRoot(e *Element) *Document {
+ d := NewDocument()
+ d.SetRoot(e)
+ return d
+}
+
+// Copy returns a recursive, deep copy of the document.
+func (d *Document) Copy() *Document {
+ return &Document{
+ Element: *(d.Element.dup(nil).(*Element)),
+ ReadSettings: d.ReadSettings.dup(),
+ WriteSettings: d.WriteSettings.dup(),
+ }
+}
+
+// Root returns the root element of the document. It returns nil if there is
+// no root element.
+func (d *Document) Root() *Element {
+ for _, t := range d.Child {
+ if c, ok := t.(*Element); ok {
+ return c
+ }
+ }
+ return nil
+}
+
+// SetRoot replaces the document's root element with the element 'e'. If the
+// document already has a root element when this function is called, then the
+// existing root element is unbound from the document. If the element 'e' is
+// part of another document, then it is unbound from the other document.
+func (d *Document) SetRoot(e *Element) {
+ if e.parent != nil {
+ e.parent.RemoveChild(e)
+ }
+
+ // If there is already a root element, replace it.
+ p := &d.Element
+ for i, t := range p.Child {
+ if _, ok := t.(*Element); ok {
+ t.setParent(nil)
+ t.setIndex(-1)
+ p.Child[i] = e
+ e.setParent(p)
+ e.setIndex(i)
+ return
+ }
+ }
+
+ // No existing root element, so add it.
+ p.addChild(e)
+}
+
+// ReadFrom reads XML from the reader 'r' into this document. The function
+// returns the number of bytes read and any error encountered.
+func (d *Document) ReadFrom(r io.Reader) (n int64, err error) {
+ return d.Element.readFrom(r, d.ReadSettings)
+}
+
+// ReadFromFile reads XML from a local file at path 'filepath' into this
+// document.
+func (d *Document) ReadFromFile(filepath string) error {
+ f, err := os.Open(filepath)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ _, err = d.ReadFrom(f)
+ return err
+}
+
+// ReadFromBytes reads XML from the byte slice 'b' into the this document.
+func (d *Document) ReadFromBytes(b []byte) error {
+ _, err := d.ReadFrom(bytes.NewReader(b))
+ return err
+}
+
+// ReadFromString reads XML from the string 's' into this document.
+func (d *Document) ReadFromString(s string) error {
+ _, err := d.ReadFrom(strings.NewReader(s))
+ return err
+}
+
+// WriteTo serializes the document out to the writer 'w'. The function returns
+// the number of bytes written and any error encountered.
+func (d *Document) WriteTo(w io.Writer) (n int64, err error) {
+ cw := newCountWriter(w)
+ b := bufio.NewWriter(cw)
+ for _, c := range d.Child {
+ c.writeTo(b, &d.WriteSettings)
+ }
+ err, n = b.Flush(), cw.bytes
+ return
+}
+
+// WriteToFile serializes the document out to the file at path 'filepath'.
+func (d *Document) WriteToFile(filepath string) error {
+ f, err := os.Create(filepath)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ _, err = d.WriteTo(f)
+ return err
+}
+
+// WriteToBytes serializes this document into a slice of bytes.
+func (d *Document) WriteToBytes() (b []byte, err error) {
+ var buf bytes.Buffer
+ if _, err = d.WriteTo(&buf); err != nil {
+ return
+ }
+ return buf.Bytes(), nil
+}
+
+// WriteToString serializes this document into a string.
+func (d *Document) WriteToString() (s string, err error) {
+ var b []byte
+ if b, err = d.WriteToBytes(); err != nil {
+ return
+ }
+ return string(b), nil
+}
+
+type indentFunc func(depth int) string
+
+// Indent modifies the document's element tree by inserting character data
+// tokens containing newlines and indentation. The amount of indentation per
+// depth level is given by the 'spaces' parameter. Pass etree.NoIndent for
+// 'spaces' if you want no indentation at all.
+func (d *Document) Indent(spaces int) {
+ var indent indentFunc
+ switch {
+ case spaces < 0:
+ indent = func(depth int) string { return "" }
+ case d.WriteSettings.UseCRLF:
+ indent = func(depth int) string { return indentCRLF(depth*spaces, indentSpaces) }
+ default:
+ indent = func(depth int) string { return indentLF(depth*spaces, indentSpaces) }
+ }
+ d.Element.indent(0, indent)
+}
+
+// IndentTabs modifies the document's element tree by inserting CharData
+// tokens containing newlines and tabs for indentation. One tab is used per
+// indentation level.
+func (d *Document) IndentTabs() {
+ var indent indentFunc
+ switch d.WriteSettings.UseCRLF {
+ case true:
+ indent = func(depth int) string { return indentCRLF(depth, indentTabs) }
+ default:
+ indent = func(depth int) string { return indentLF(depth, indentTabs) }
+ }
+ d.Element.indent(0, indent)
+}
+
+// NewElement creates an unparented element with the specified tag (i.e.,
+// name). The tag may include a namespace prefix followed by a colon.
+func NewElement(tag string) *Element {
+ space, stag := spaceDecompose(tag)
+ return newElement(space, stag, nil)
+}
+
+// newElement is a helper function that creates an element and binds it to
+// a parent element if possible.
+func newElement(space, tag string, parent *Element) *Element {
+ e := &Element{
+ Space: space,
+ Tag: tag,
+ Attr: make([]Attr, 0),
+ Child: make([]Token, 0),
+ parent: parent,
+ index: -1,
+ }
+ if parent != nil {
+ parent.addChild(e)
+ }
+ return e
+}
+
+// Copy creates a recursive, deep copy of the element and all its attributes
+// and children. The returned element has no parent but can be parented to a
+// another element using AddChild, or added to a document with SetRoot or
+// NewDocumentWithRoot.
+func (e *Element) Copy() *Element {
+ return e.dup(nil).(*Element)
+}
+
+// FullTag returns the element e's complete tag, including namespace prefix if
+// present.
+func (e *Element) FullTag() string {
+ if e.Space == "" {
+ return e.Tag
+ }
+ return e.Space + ":" + e.Tag
+}
+
+// NamespaceURI returns the XML namespace URI associated with the element. If
+// the element is part of the XML default namespace, NamespaceURI returns the
+// empty string.
+func (e *Element) NamespaceURI() string {
+ if e.Space == "" {
+ return e.findDefaultNamespaceURI()
+ }
+ return e.findLocalNamespaceURI(e.Space)
+}
+
+// findLocalNamespaceURI finds the namespace URI corresponding to the
+// requested prefix.
+func (e *Element) findLocalNamespaceURI(prefix string) string {
+ for _, a := range e.Attr {
+ if a.Space == "xmlns" && a.Key == prefix {
+ return a.Value
+ }
+ }
+
+ if e.parent == nil {
+ return ""
+ }
+
+ return e.parent.findLocalNamespaceURI(prefix)
+}
+
+// findDefaultNamespaceURI finds the default namespace URI of the element.
+func (e *Element) findDefaultNamespaceURI() string {
+ for _, a := range e.Attr {
+ if a.Space == "" && a.Key == "xmlns" {
+ return a.Value
+ }
+ }
+
+ if e.parent == nil {
+ return ""
+ }
+
+ return e.parent.findDefaultNamespaceURI()
+}
+
+// namespacePrefix returns the namespace prefix associated with the element.
+func (e *Element) namespacePrefix() string {
+ return e.Space
+}
+
+// name returns the tag associated with the element.
+func (e *Element) name() string {
+ return e.Tag
+}
+
+// Text returns all character data immediately following the element's opening
+// tag.
+func (e *Element) Text() string {
+ if len(e.Child) == 0 {
+ return ""
+ }
+
+ text := ""
+ for _, ch := range e.Child {
+ if cd, ok := ch.(*CharData); ok {
+ if text == "" {
+ text = cd.Data
+ } else {
+ text += cd.Data
+ }
+ } else {
+ break
+ }
+ }
+ return text
+}
+
+// SetText replaces all character data immediately following an element's
+// opening tag with the requested string.
+func (e *Element) SetText(text string) {
+ e.replaceText(0, text, 0)
+}
+
+// SetCData replaces all character data immediately following an element's
+// opening tag with a CDATA section.
+func (e *Element) SetCData(text string) {
+ e.replaceText(0, text, cdataFlag)
+}
+
+// Tail returns all character data immediately following the element's end
+// tag.
+func (e *Element) Tail() string {
+ if e.Parent() == nil {
+ return ""
+ }
+
+ p := e.Parent()
+ i := e.Index()
+
+ text := ""
+ for _, ch := range p.Child[i+1:] {
+ if cd, ok := ch.(*CharData); ok {
+ if text == "" {
+ text = cd.Data
+ } else {
+ text += cd.Data
+ }
+ } else {
+ break
+ }
+ }
+ return text
+}
+
+// SetTail replaces all character data immediately following the element's end
+// tag with the requested string.
+func (e *Element) SetTail(text string) {
+ if e.Parent() == nil {
+ return
+ }
+
+ p := e.Parent()
+ p.replaceText(e.Index()+1, text, 0)
+}
+
+// replaceText is a helper function that replaces a series of chardata tokens
+// starting at index i with the requested text.
+func (e *Element) replaceText(i int, text string, flags charDataFlags) {
+ end := e.findTermCharDataIndex(i)
+
+ switch {
+ case end == i:
+ if text != "" {
+ // insert a new chardata token at index i
+ cd := newCharData(text, flags, nil)
+ e.InsertChildAt(i, cd)
+ }
+
+ case end == i+1:
+ if text == "" {
+ // remove the chardata token at index i
+ e.RemoveChildAt(i)
+ } else {
+ // replace the first and only character token at index i
+ cd := e.Child[i].(*CharData)
+ cd.Data, cd.flags = text, flags
+ }
+
+ default:
+ if text == "" {
+ // remove all chardata tokens starting from index i
+ copy(e.Child[i:], e.Child[end:])
+ removed := end - i
+ e.Child = e.Child[:len(e.Child)-removed]
+ for j := i; j < len(e.Child); j++ {
+ e.Child[j].setIndex(j)
+ }
+ } else {
+ // replace the first chardata token at index i and remove all
+ // subsequent chardata tokens
+ cd := e.Child[i].(*CharData)
+ cd.Data, cd.flags = text, flags
+ copy(e.Child[i+1:], e.Child[end:])
+ removed := end - (i + 1)
+ e.Child = e.Child[:len(e.Child)-removed]
+ for j := i + 1; j < len(e.Child); j++ {
+ e.Child[j].setIndex(j)
+ }
+ }
+ }
+}
+
+// findTermCharDataIndex finds the index of the first child token that isn't
+// a CharData token. It starts from the requested start index.
+func (e *Element) findTermCharDataIndex(start int) int {
+ for i := start; i < len(e.Child); i++ {
+ if _, ok := e.Child[i].(*CharData); !ok {
+ return i
+ }
+ }
+ return len(e.Child)
+}
+
+// CreateElement creates a new element with the specified tag (i.e., name) and
+// adds it as the last child token of this element. The tag may include a
+// prefix followed by a colon.
+func (e *Element) CreateElement(tag string) *Element {
+ space, stag := spaceDecompose(tag)
+ return newElement(space, stag, e)
+}
+
+// AddChild adds the token 't' as the last child of the element. If token 't'
+// was already the child of another element, it is first removed from its
+// parent element.
+func (e *Element) AddChild(t Token) {
+ if t.Parent() != nil {
+ t.Parent().RemoveChild(t)
+ }
+ e.addChild(t)
+}
+
+// InsertChild inserts the token 't' into this element's list of children just
+// before the element's existing child token 'ex'. If the existing element
+// 'ex' does not appear in this element's list of child tokens, then 't' is
+// added to the end of this element's list of child tokens. If token 't' is
+// already the child of another element, it is first removed from the other
+// element's list of child tokens.
+//
+// Deprecated: InsertChild is deprecated. Use InsertChildAt instead.
+func (e *Element) InsertChild(ex Token, t Token) {
+ if ex == nil || ex.Parent() != e {
+ e.AddChild(t)
+ return
+ }
+
+ if t.Parent() != nil {
+ t.Parent().RemoveChild(t)
+ }
+
+ t.setParent(e)
+
+ i := ex.Index()
+ e.Child = append(e.Child, nil)
+ copy(e.Child[i+1:], e.Child[i:])
+ e.Child[i] = t
+
+ for j := i; j < len(e.Child); j++ {
+ e.Child[j].setIndex(j)
+ }
+}
+
+// InsertChildAt inserts the token 't' into this element's list of child
+// tokens just before the requested 'index'. If the index is greater than or
+// equal to the length of the list of child tokens, then the token 't' is
+// added to the end of the list of child tokens.
+func (e *Element) InsertChildAt(index int, t Token) {
+ if index >= len(e.Child) {
+ e.AddChild(t)
+ return
+ }
+
+ if t.Parent() != nil {
+ if t.Parent() == e && t.Index() > index {
+ index--
+ }
+ t.Parent().RemoveChild(t)
+ }
+
+ t.setParent(e)
+
+ e.Child = append(e.Child, nil)
+ copy(e.Child[index+1:], e.Child[index:])
+ e.Child[index] = t
+
+ for j := index; j < len(e.Child); j++ {
+ e.Child[j].setIndex(j)
+ }
+}
+
+// RemoveChild attempts to remove the token 't' from this element's list of
+// child tokens. If the token 't' was a child of this element, then it is
+// removed and returned. Otherwise, nil is returned.
+func (e *Element) RemoveChild(t Token) Token {
+ if t.Parent() != e {
+ return nil
+ }
+ return e.RemoveChildAt(t.Index())
+}
+
+// RemoveChildAt removes the child token appearing in slot 'index' of this
+// element's list of child tokens. The removed child token is then returned.
+// If the index is out of bounds, no child is removed and nil is returned.
+func (e *Element) RemoveChildAt(index int) Token {
+ if index >= len(e.Child) {
+ return nil
+ }
+
+ t := e.Child[index]
+ for j := index + 1; j < len(e.Child); j++ {
+ e.Child[j].setIndex(j - 1)
+ }
+ e.Child = append(e.Child[:index], e.Child[index+1:]...)
+ t.setIndex(-1)
+ t.setParent(nil)
+ return t
+}
+
+// ReadFrom reads XML from the reader ;ri' and stores the result as a new
+// child of this element.
+func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err error) {
+ r := newCountReader(ri)
+ dec := xml.NewDecoder(r)
+ dec.CharsetReader = settings.CharsetReader
+ dec.Strict = !settings.Permissive
+ dec.Entity = settings.Entity
+ var stack stack
+ stack.push(e)
+ for {
+ t, err := dec.RawToken()
+ switch {
+ case err == io.EOF:
+ return r.bytes, nil
+ case err != nil:
+ return r.bytes, err
+ case stack.empty():
+ return r.bytes, ErrXML
+ }
+
+ top := stack.peek().(*Element)
+
+ switch t := t.(type) {
+ case xml.StartElement:
+ e := newElement(t.Name.Space, t.Name.Local, top)
+ for _, a := range t.Attr {
+ e.createAttr(a.Name.Space, a.Name.Local, a.Value, e)
+ }
+ stack.push(e)
+ case xml.EndElement:
+ stack.pop()
+ case xml.CharData:
+ data := string(t)
+ var flags charDataFlags
+ if isWhitespace(data) {
+ flags = whitespaceFlag
+ }
+ newCharData(data, flags, top)
+ case xml.Comment:
+ newComment(string(t), top)
+ case xml.Directive:
+ newDirective(string(t), top)
+ case xml.ProcInst:
+ newProcInst(t.Target, string(t.Inst), top)
+ }
+ }
+}
+
+// SelectAttr finds an element attribute matching the requested 'key' and, if
+// found, returns a pointer to the matching attribute. The function returns
+// nil if no matching attribute is found. The key may include a namespace
+// prefix followed by a colon.
+func (e *Element) SelectAttr(key string) *Attr {
+ space, skey := spaceDecompose(key)
+ for i, a := range e.Attr {
+ if spaceMatch(space, a.Space) && skey == a.Key {
+ return &e.Attr[i]
+ }
+ }
+ return nil
+}
+
+// SelectAttrValue finds an element attribute matching the requested 'key' and
+// returns its value if found. If no matching attribute is found, the function
+// returns the 'dflt' value instead. The key may include a namespace prefix
+// followed by a colon.
+func (e *Element) SelectAttrValue(key, dflt string) string {
+ space, skey := spaceDecompose(key)
+ for _, a := range e.Attr {
+ if spaceMatch(space, a.Space) && skey == a.Key {
+ return a.Value
+ }
+ }
+ return dflt
+}
+
+// ChildElements returns all elements that are children of this element.
+func (e *Element) ChildElements() []*Element {
+ var elements []*Element
+ for _, t := range e.Child {
+ if c, ok := t.(*Element); ok {
+ elements = append(elements, c)
+ }
+ }
+ return elements
+}
+
+// SelectElement returns the first child element with the given 'tag' (i.e.,
+// name). The function returns nil if no child element matching the tag is
+// found. The tag may include a namespace prefix followed by a colon.
+func (e *Element) SelectElement(tag string) *Element {
+ space, stag := spaceDecompose(tag)
+ for _, t := range e.Child {
+ if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag {
+ return c
+ }
+ }
+ return nil
+}
+
+// SelectElements returns a slice of all child elements with the given 'tag'
+// (i.e., name). The tag may include a namespace prefix followed by a colon.
+func (e *Element) SelectElements(tag string) []*Element {
+ space, stag := spaceDecompose(tag)
+ var elements []*Element
+ for _, t := range e.Child {
+ if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag {
+ elements = append(elements, c)
+ }
+ }
+ return elements
+}
+
+// FindElement returns the first element matched by the XPath-like 'path'
+// string. The function returns nil if no child element is found using the
+// path. It panics if an invalid path string is supplied.
+func (e *Element) FindElement(path string) *Element {
+ return e.FindElementPath(MustCompilePath(path))
+}
+
+// FindElementPath returns the first element matched by the 'path' object. The
+// function returns nil if no element is found using the path.
+func (e *Element) FindElementPath(path Path) *Element {
+ p := newPather()
+ elements := p.traverse(e, path)
+ if len(elements) > 0 {
+ return elements[0]
+ }
+ return nil
+}
+
+// FindElements returns a slice of elements matched by the XPath-like 'path'
+// string. The function returns nil if no child element is found using the
+// path. It panics if an invalid path string is supplied.
+func (e *Element) FindElements(path string) []*Element {
+ return e.FindElementsPath(MustCompilePath(path))
+}
+
+// FindElementsPath returns a slice of elements matched by the 'path' object.
+func (e *Element) FindElementsPath(path Path) []*Element {
+ p := newPather()
+ return p.traverse(e, path)
+}
+
+// GetPath returns the absolute path of the element. The absolute path is the
+// full path from the document's root.
+func (e *Element) GetPath() string {
+ path := []string{}
+ for seg := e; seg != nil; seg = seg.Parent() {
+ if seg.Tag != "" {
+ path = append(path, seg.Tag)
+ }
+ }
+
+ // Reverse the path.
+ for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 {
+ path[i], path[j] = path[j], path[i]
+ }
+
+ return "/" + strings.Join(path, "/")
+}
+
+// GetRelativePath returns the path of this element relative to the 'source'
+// element. If the two elements are not part of the same element tree, then
+// the function returns the empty string.
+func (e *Element) GetRelativePath(source *Element) string {
+ var path []*Element
+
+ if source == nil {
+ return ""
+ }
+
+ // Build a reverse path from the element toward the root. Stop if the
+ // source element is encountered.
+ var seg *Element
+ for seg = e; seg != nil && seg != source; seg = seg.Parent() {
+ path = append(path, seg)
+ }
+
+ // If we found the source element, reverse the path and compose the
+ // string.
+ if seg == source {
+ if len(path) == 0 {
+ return "."
+ }
+ parts := []string{}
+ for i := len(path) - 1; i >= 0; i-- {
+ parts = append(parts, path[i].Tag)
+ }
+ return "./" + strings.Join(parts, "/")
+ }
+
+ // The source wasn't encountered, so climb from the source element toward
+ // the root of the tree until an element in the reversed path is
+ // encountered.
+
+ findPathIndex := func(e *Element, path []*Element) int {
+ for i, ee := range path {
+ if e == ee {
+ return i
+ }
+ }
+ return -1
+ }
+
+ climb := 0
+ for seg = source; seg != nil; seg = seg.Parent() {
+ i := findPathIndex(seg, path)
+ if i >= 0 {
+ path = path[:i] // truncate at found segment
+ break
+ }
+ climb++
+ }
+
+ // No element in the reversed path was encountered, so the two elements
+ // must not be part of the same tree.
+ if seg == nil {
+ return ""
+ }
+
+ // Reverse the (possibly truncated) path and prepend ".." segments to
+ // climb.
+ parts := []string{}
+ for i := 0; i < climb; i++ {
+ parts = append(parts, "..")
+ }
+ for i := len(path) - 1; i >= 0; i-- {
+ parts = append(parts, path[i].Tag)
+ }
+ return strings.Join(parts, "/")
+}
+
+// indent recursively inserts proper indentation between an XML element's
+// child tokens.
+func (e *Element) indent(depth int, indent indentFunc) {
+ e.stripIndent()
+ n := len(e.Child)
+ if n == 0 {
+ return
+ }
+
+ oldChild := e.Child
+ e.Child = make([]Token, 0, n*2+1)
+ isCharData, firstNonCharData := false, true
+ for _, c := range oldChild {
+ // Insert NL+indent before child if it's not character data.
+ // Exceptions: when it's the first non-character-data child, or when
+ // the child is at root depth.
+ _, isCharData = c.(*CharData)
+ if !isCharData {
+ if !firstNonCharData || depth > 0 {
+ s := indent(depth)
+ if s != "" {
+ newCharData(s, whitespaceFlag, e)
+ }
+ }
+ firstNonCharData = false
+ }
+
+ e.addChild(c)
+
+ // Recursively process child elements.
+ if ce, ok := c.(*Element); ok {
+ ce.indent(depth+1, indent)
+ }
+ }
+
+ // Insert NL+indent before the last child.
+ if !isCharData {
+ if !firstNonCharData || depth > 0 {
+ s := indent(depth - 1)
+ if s != "" {
+ newCharData(s, whitespaceFlag, e)
+ }
+ }
+ }
+}
+
+// stripIndent removes any previously inserted indentation.
+func (e *Element) stripIndent() {
+ // Count the number of non-indent child tokens
+ n := len(e.Child)
+ for _, c := range e.Child {
+ if cd, ok := c.(*CharData); ok && cd.IsWhitespace() {
+ n--
+ }
+ }
+ if n == len(e.Child) {
+ return
+ }
+
+ // Strip out indent CharData
+ newChild := make([]Token, n)
+ j := 0
+ for _, c := range e.Child {
+ if cd, ok := c.(*CharData); ok && cd.IsWhitespace() {
+ continue
+ }
+ newChild[j] = c
+ newChild[j].setIndex(j)
+ j++
+ }
+ e.Child = newChild
+}
+
+// dup duplicates the element.
+func (e *Element) dup(parent *Element) Token {
+ ne := &Element{
+ Space: e.Space,
+ Tag: e.Tag,
+ Attr: make([]Attr, len(e.Attr)),
+ Child: make([]Token, len(e.Child)),
+ parent: parent,
+ index: e.index,
+ }
+ for i, t := range e.Child {
+ ne.Child[i] = t.dup(ne)
+ }
+ copy(ne.Attr, e.Attr)
+ return ne
+}
+
+// Parent returns this element's parent element. It returns nil if this
+// element has no parent.
+func (e *Element) Parent() *Element {
+ return e.parent
+}
+
+// Index returns the index of this element within its parent element's
+// list of child tokens. If this element has no parent, then the function
+// returns -1.
+func (e *Element) Index() int {
+ return e.index
+}
+
+// setParent replaces this element token's parent.
+func (e *Element) setParent(parent *Element) {
+ e.parent = parent
+}
+
+// setIndex sets this element token's index within its parent's Child slice.
+func (e *Element) setIndex(index int) {
+ e.index = index
+}
+
+// writeTo serializes the element to the writer w.
+func (e *Element) writeTo(w *bufio.Writer, s *WriteSettings) {
+ w.WriteByte('<')
+ w.WriteString(e.FullTag())
+ for _, a := range e.Attr {
+ w.WriteByte(' ')
+ a.writeTo(w, s)
+ }
+ if len(e.Child) > 0 {
+ w.WriteByte('>')
+ for _, c := range e.Child {
+ c.writeTo(w, s)
+ }
+ w.Write([]byte{'<', '/'})
+ w.WriteString(e.FullTag())
+ w.WriteByte('>')
+ } else {
+ if s.CanonicalEndTags {
+ w.Write([]byte{'>', '<', '/'})
+ w.WriteString(e.FullTag())
+ w.WriteByte('>')
+ } else {
+ w.Write([]byte{'/', '>'})
+ }
+ }
+}
+
+// addChild adds a child token to the element e.
+func (e *Element) addChild(t Token) {
+ t.setParent(e)
+ t.setIndex(len(e.Child))
+ e.Child = append(e.Child, t)
+}
+
+// CreateAttr creates an attribute with the specified 'key' and 'value' and
+// adds it to this element. If an attribute with same key already exists on
+// this element, then its value is replaced. The key may include a namespace
+// prefix followed by a colon.
+func (e *Element) CreateAttr(key, value string) *Attr {
+ space, skey := spaceDecompose(key)
+ return e.createAttr(space, skey, value, e)
+}
+
+// createAttr is a helper function that creates attributes.
+func (e *Element) createAttr(space, key, value string, parent *Element) *Attr {
+ for i, a := range e.Attr {
+ if space == a.Space && key == a.Key {
+ e.Attr[i].Value = value
+ return &e.Attr[i]
+ }
+ }
+ a := Attr{
+ Space: space,
+ Key: key,
+ Value: value,
+ element: parent,
+ }
+ e.Attr = append(e.Attr, a)
+ return &e.Attr[len(e.Attr)-1]
+}
+
+// RemoveAttr removes the first attribute of this element whose key matches
+// 'key'. It returns a copy of the removed attribute if a match is found. If
+// no match is found, it returns nil. The key may include a namespace prefix
+// followed by a colon.
+func (e *Element) RemoveAttr(key string) *Attr {
+ space, skey := spaceDecompose(key)
+ for i, a := range e.Attr {
+ if space == a.Space && skey == a.Key {
+ e.Attr = append(e.Attr[0:i], e.Attr[i+1:]...)
+ return &Attr{
+ Space: a.Space,
+ Key: a.Key,
+ Value: a.Value,
+ element: nil,
+ }
+ }
+ }
+ return nil
+}
+
+// SortAttrs sorts this element's attributes lexicographically by key.
+func (e *Element) SortAttrs() {
+ sort.Sort(byAttr(e.Attr))
+}
+
+type byAttr []Attr
+
+func (a byAttr) Len() int {
+ return len(a)
+}
+
+func (a byAttr) Swap(i, j int) {
+ a[i], a[j] = a[j], a[i]
+}
+
+func (a byAttr) Less(i, j int) bool {
+ sp := strings.Compare(a[i].Space, a[j].Space)
+ if sp == 0 {
+ return strings.Compare(a[i].Key, a[j].Key) < 0
+ }
+ return sp < 0
+}
+
+// FullKey returns this attribute's complete key, including namespace prefix
+// if present.
+func (a *Attr) FullKey() string {
+ if a.Space == "" {
+ return a.Key
+ }
+ return a.Space + ":" + a.Key
+}
+
+// Element returns a pointer to the element containing this attribute.
+func (a *Attr) Element() *Element {
+ return a.element
+}
+
+// NamespaceURI returns the XML namespace URI associated with this attribute.
+// The function returns the empty string if the attribute is unprefixed or
+// if the attribute is part of the XML default namespace.
+func (a *Attr) NamespaceURI() string {
+ if a.Space == "" {
+ return ""
+ }
+ return a.element.findLocalNamespaceURI(a.Space)
+}
+
+// writeTo serializes the attribute to the writer.
+func (a *Attr) writeTo(w *bufio.Writer, s *WriteSettings) {
+ w.WriteString(a.FullKey())
+ w.WriteString(`="`)
+ var m escapeMode
+ if s.CanonicalAttrVal {
+ m = escapeCanonicalAttr
+ } else {
+ m = escapeNormal
+ }
+ escapeString(w, a.Value, m)
+ w.WriteByte('"')
+}
+
+// NewText creates an unparented CharData token containing simple text data.
+func NewText(text string) *CharData {
+ return newCharData(text, 0, nil)
+}
+
+// NewCData creates an unparented XML character CDATA section with 'data' as
+// its content.
+func NewCData(data string) *CharData {
+ return newCharData(data, cdataFlag, nil)
+}
+
+// NewCharData creates an unparented CharData token containing simple text
+// data.
+//
+// Deprecated: NewCharData is deprecated. Instead, use NewText, which does the
+// same thing.
+func NewCharData(data string) *CharData {
+ return newCharData(data, 0, nil)
+}
+
+// newCharData creates a character data token and binds it to a parent
+// element. If parent is nil, the CharData token remains unbound.
+func newCharData(data string, flags charDataFlags, parent *Element) *CharData {
+ c := &CharData{
+ Data: data,
+ parent: nil,
+ index: -1,
+ flags: flags,
+ }
+ if parent != nil {
+ parent.addChild(c)
+ }
+ return c
+}
+
+// CreateText creates a CharData token simple text data and adds it to the
+// end of this element's list of child tokens.
+func (e *Element) CreateText(text string) *CharData {
+ return newCharData(text, 0, e)
+}
+
+// CreateCData creates a CharData token containing a CDATA section with 'data'
+// as its content and adds it to the end of this element's list of child
+// tokens.
+func (e *Element) CreateCData(data string) *CharData {
+ return newCharData(data, cdataFlag, e)
+}
+
+// CreateCharData creates a CharData token simple text data and adds it to the
+// end of this element's list of child tokens.
+//
+// Deprecated: CreateCharData is deprecated. Instead, use CreateText, which
+// does the same thing.
+func (e *Element) CreateCharData(data string) *CharData {
+ return newCharData(data, 0, e)
+}
+
+// SetData modifies the content of the CharData token. In the case of a
+// CharData token containing simple text, the simple text is modified. In the
+// case of a CharData token containing a CDATA section, the CDATA section's
+// content is modified.
+func (c *CharData) SetData(text string) {
+ c.Data = text
+ if isWhitespace(text) {
+ c.flags |= whitespaceFlag
+ } else {
+ c.flags &= ^whitespaceFlag
+ }
+}
+
+// IsCData returns true if this CharData token is contains a CDATA section. It
+// returns false if the CharData token contains simple text.
+func (c *CharData) IsCData() bool {
+ return (c.flags & cdataFlag) != 0
+}
+
+// IsWhitespace returns true if this CharData token contains only whitespace.
+func (c *CharData) IsWhitespace() bool {
+ return (c.flags & whitespaceFlag) != 0
+}
+
+// Parent returns this CharData token's parent element, or nil if it has no
+// parent.
+func (c *CharData) Parent() *Element {
+ return c.parent
+}
+
+// Index returns the index of this CharData token within its parent element's
+// list of child tokens. If this CharData token has no parent, then the
+// function returns -1.
+func (c *CharData) Index() int {
+ return c.index
+}
+
+// dup duplicates the character data.
+func (c *CharData) dup(parent *Element) Token {
+ return &CharData{
+ Data: c.Data,
+ flags: c.flags,
+ parent: parent,
+ index: c.index,
+ }
+}
+
+// setParent replaces the character data token's parent.
+func (c *CharData) setParent(parent *Element) {
+ c.parent = parent
+}
+
+// setIndex sets the CharData token's index within its parent element's Child
+// slice.
+func (c *CharData) setIndex(index int) {
+ c.index = index
+}
+
+// writeTo serializes character data to the writer.
+func (c *CharData) writeTo(w *bufio.Writer, s *WriteSettings) {
+ if c.IsCData() {
+ w.WriteString(`<![CDATA[`)
+ w.WriteString(c.Data)
+ w.WriteString(`]]>`)
+ } else {
+ var m escapeMode
+ if s.CanonicalText {
+ m = escapeCanonicalText
+ } else {
+ m = escapeNormal
+ }
+ escapeString(w, c.Data, m)
+ }
+}
+
+// NewComment creates an unparented comment token.
+func NewComment(comment string) *Comment {
+ return newComment(comment, nil)
+}
+
+// NewComment creates a comment token and sets its parent element to 'parent'.
+func newComment(comment string, parent *Element) *Comment {
+ c := &Comment{
+ Data: comment,
+ parent: nil,
+ index: -1,
+ }
+ if parent != nil {
+ parent.addChild(c)
+ }
+ return c
+}
+
+// CreateComment creates a comment token using the specified 'comment' string
+// and adds it as the last child token of this element.
+func (e *Element) CreateComment(comment string) *Comment {
+ return newComment(comment, e)
+}
+
+// dup duplicates the comment.
+func (c *Comment) dup(parent *Element) Token {
+ return &Comment{
+ Data: c.Data,
+ parent: parent,
+ index: c.index,
+ }
+}
+
+// Parent returns comment token's parent element, or nil if it has no parent.
+func (c *Comment) Parent() *Element {
+ return c.parent
+}
+
+// Index returns the index of this Comment token within its parent element's
+// list of child tokens. If this Comment token has no parent, then the
+// function returns -1.
+func (c *Comment) Index() int {
+ return c.index
+}
+
+// setParent replaces the comment token's parent.
+func (c *Comment) setParent(parent *Element) {
+ c.parent = parent
+}
+
+// setIndex sets the Comment token's index within its parent element's Child
+// slice.
+func (c *Comment) setIndex(index int) {
+ c.index = index
+}
+
+// writeTo serialies the comment to the writer.
+func (c *Comment) writeTo(w *bufio.Writer, s *WriteSettings) {
+ w.WriteString("<!--")
+ w.WriteString(c.Data)
+ w.WriteString("-->")
+}
+
+// NewDirective creates an unparented XML directive token.
+func NewDirective(data string) *Directive {
+ return newDirective(data, nil)
+}
+
+// newDirective creates an XML directive and binds it to a parent element. If
+// parent is nil, the Directive remains unbound.
+func newDirective(data string, parent *Element) *Directive {
+ d := &Directive{
+ Data: data,
+ parent: nil,
+ index: -1,
+ }
+ if parent != nil {
+ parent.addChild(d)
+ }
+ return d
+}
+
+// CreateDirective creates an XML directive token with the specified 'data'
+// value and adds it as the last child token of this element.
+func (e *Element) CreateDirective(data string) *Directive {
+ return newDirective(data, e)
+}
+
+// dup duplicates the directive.
+func (d *Directive) dup(parent *Element) Token {
+ return &Directive{
+ Data: d.Data,
+ parent: parent,
+ index: d.index,
+ }
+}
+
+// Parent returns directive token's parent element, or nil if it has no
+// parent.
+func (d *Directive) Parent() *Element {
+ return d.parent
+}
+
+// Index returns the index of this Directive token within its parent element's
+// list of child tokens. If this Directive token has no parent, then the
+// function returns -1.
+func (d *Directive) Index() int {
+ return d.index
+}
+
+// setParent replaces the directive token's parent.
+func (d *Directive) setParent(parent *Element) {
+ d.parent = parent
+}
+
+// setIndex sets the Directive token's index within its parent element's Child
+// slice.
+func (d *Directive) setIndex(index int) {
+ d.index = index
+}
+
+// writeTo serializes the XML directive to the writer.
+func (d *Directive) writeTo(w *bufio.Writer, s *WriteSettings) {
+ w.WriteString("<!")
+ w.WriteString(d.Data)
+ w.WriteString(">")
+}
+
+// NewProcInst creates an unparented XML processing instruction.
+func NewProcInst(target, inst string) *ProcInst {
+ return newProcInst(target, inst, nil)
+}
+
+// newProcInst creates an XML processing instruction and binds it to a parent
+// element. If parent is nil, the ProcInst remains unbound.
+func newProcInst(target, inst string, parent *Element) *ProcInst {
+ p := &ProcInst{
+ Target: target,
+ Inst: inst,
+ parent: nil,
+ index: -1,
+ }
+ if parent != nil {
+ parent.addChild(p)
+ }
+ return p
+}
+
+// CreateProcInst creates an XML processing instruction token with the
+// sepcified 'target' and instruction 'inst'. It is then added as the last
+// child token of this element.
+func (e *Element) CreateProcInst(target, inst string) *ProcInst {
+ return newProcInst(target, inst, e)
+}
+
+// dup duplicates the procinst.
+func (p *ProcInst) dup(parent *Element) Token {
+ return &ProcInst{
+ Target: p.Target,
+ Inst: p.Inst,
+ parent: parent,
+ index: p.index,
+ }
+}
+
+// Parent returns processing instruction token's parent element, or nil if it
+// has no parent.
+func (p *ProcInst) Parent() *Element {
+ return p.parent
+}
+
+// Index returns the index of this ProcInst token within its parent element's
+// list of child tokens. If this ProcInst token has no parent, then the
+// function returns -1.
+func (p *ProcInst) Index() int {
+ return p.index
+}
+
+// setParent replaces the processing instruction token's parent.
+func (p *ProcInst) setParent(parent *Element) {
+ p.parent = parent
+}
+
+// setIndex sets the processing instruction token's index within its parent
+// element's Child slice.
+func (p *ProcInst) setIndex(index int) {
+ p.index = index
+}
+
+// writeTo serializes the processing instruction to the writer.
+func (p *ProcInst) writeTo(w *bufio.Writer, s *WriteSettings) {
+ w.WriteString("<?")
+ w.WriteString(p.Target)
+ if p.Inst != "" {
+ w.WriteByte(' ')
+ w.WriteString(p.Inst)
+ }
+ w.WriteString("?>")
+}
diff --git a/etree_test.go b/etree_test.go
new file mode 100644
index 0000000..501f4ad
--- /dev/null
+++ b/etree_test.go
@@ -0,0 +1,1115 @@
+// Copyright 2015-2019 Brett Vickers.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package etree
+
+import (
+ "encoding/xml"
+ "io"
+ "strings"
+ "testing"
+)
+
+func newDocumentFromString(t *testing.T, s string) *Document {
+ t.Helper()
+ doc := NewDocument()
+ err := doc.ReadFromString(s)
+ if err != nil {
+ t.Error("etree: failed to parse document")
+ }
+ return doc
+}
+
+func checkStrEq(t *testing.T, got, want string) {
+ t.Helper()
+ if got != want {
+ t.Errorf("etree: unexpected result.\nGot:\n%s\nWanted:\n%s\n", got, want)
+ }
+}
+
+func checkStrBinaryEq(t *testing.T, got, want string) {
+ t.Helper()
+ if got != want {
+ t.Errorf("etree: unexpected result.\nGot:\n%v\nWanted:\n%v\n", []byte(got), []byte(want))
+ }
+}
+
+func checkIntEq(t *testing.T, got, want int) {
+ t.Helper()
+ if got != want {
+ t.Errorf("etree: unexpected integer. Got: %d. Wanted: %d\n", got, want)
+ }
+}
+
+func checkBoolEq(t *testing.T, got, want bool) {
+ t.Helper()
+ if got != want {
+ t.Errorf("etree: unexpected boolean. Got: %v. Wanted: %v\n", got, want)
+ }
+}
+
+func checkElementEq(t *testing.T, got, want *Element) {
+ t.Helper()
+ if got != want {
+ t.Errorf("etree: unexpected element. Got: %v. Wanted: %v.\n", got, want)
+ }
+}
+
+func checkDocEq(t *testing.T, doc *Document, expected string) {
+ t.Helper()
+ doc.Indent(NoIndent)
+ s, err := doc.WriteToString()
+ if err != nil {
+ t.Error("etree: failed to serialize document")
+ }
+ if s != expected {
+ t.Errorf("etree: unexpected document.\nGot:\n%s\nWanted:\n%s\n", s, expected)
+ }
+}
+
+func checkIndexes(t *testing.T, e *Element) {
+ t.Helper()
+ for i := 0; i < len(e.Child); i++ {
+ c := e.Child[i]
+ if c.Index() != i {
+ t.Errorf("Child index mismatch. Got %d, expected %d.", c.Index(), i)
+ }
+ if ce, ok := c.(*Element); ok {
+ checkIndexes(t, ce)
+ }
+ }
+}
+
+func TestDocument(t *testing.T) {
+ // Create a document
+ doc := NewDocument()
+ doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
+ doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
+ store := doc.CreateElement("store")
+ store.CreateAttr("xmlns:t", "urn:books-com:titles")
+ store.CreateDirective("Directive")
+ store.CreateComment("This is a comment")
+ book := store.CreateElement("book")
+ book.CreateAttr("lang", "fr")
+ book.CreateAttr("lang", "en")
+ title := book.CreateElement("t:title")
+ title.SetText("Nicholas Nickleby")
+ title.SetText("Great Expectations")
+ author := book.CreateElement("author")
+ author.CreateCharData("Charles Dickens")
+ review := book.CreateElement("review")
+ review.CreateCData("<<< Will be replaced")
+ review.SetCData(">>> Excellent book")
+ doc.IndentTabs()
+
+ checkIndexes(t, &doc.Element)
+
+ // Serialize the document to a string
+ s, err := doc.WriteToString()
+ if err != nil {
+ t.Error("etree: failed to serialize document")
+ }
+
+ // Make sure the serialized XML matches expectation.
+ expected := `<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="style.xsl"?>
+<store xmlns:t="urn:books-com:titles">
+ <!Directive>
+ <!--This is a comment-->
+ <book lang="en">
+ <t:title>Great Expectations</t:title>
+ <author>Charles Dickens</author>
+ <review><![CDATA[>>> Excellent book]]></review>
+ </book>
+</store>
+`
+ checkStrEq(t, s, expected)
+
+ // Test the structure of the XML
+ if doc.Root() != store {
+ t.Error("etree: root mismatch")
+ }
+ if len(store.ChildElements()) != 1 || len(store.Child) != 7 {
+ t.Error("etree: incorrect tree structure")
+ }
+ if len(book.ChildElements()) != 3 || len(book.Attr) != 1 || len(book.Child) != 7 {
+ t.Error("etree: incorrect tree structure")
+ }
+ if len(title.ChildElements()) != 0 || len(title.Child) != 1 || len(title.Attr) != 0 {
+ t.Error("etree: incorrect tree structure")
+ }
+ if len(author.ChildElements()) != 0 || len(author.Child) != 1 || len(author.Attr) != 0 {
+ t.Error("etree: incorrect tree structure")
+ }
+ if len(review.ChildElements()) != 0 || len(review.Child) != 1 || len(review.Attr) != 0 {
+ t.Error("etree: incorrect tree structure")
+ }
+ if book.parent != store || store.parent != &doc.Element || doc.parent != nil {
+ t.Error("etree: incorrect tree structure")
+ }
+ if title.parent != book || author.parent != book {
+ t.Error("etree: incorrect tree structure")
+ }
+
+ // Perform some basic queries on the document
+ elements := doc.SelectElements("store")
+ if len(elements) != 1 || elements[0] != store {
+ t.Error("etree: incorrect SelectElements result")
+ }
+ element := doc.SelectElement("store")
+ if element != store {
+ t.Error("etree: incorrect SelectElement result")
+ }
+ elements = store.SelectElements("book")
+ if len(elements) != 1 || elements[0] != book {
+ t.Error("etree: incorrect SelectElements result")
+ }
+ element = store.SelectElement("book")
+ if element != book {
+ t.Error("etree: incorrect SelectElement result")
+ }
+ attr := book.SelectAttr("lang")
+ if attr == nil || attr.Key != "lang" || attr.Value != "en" {
+ t.Error("etree: incorrect SelectAttr result")
+ }
+ if book.SelectAttrValue("lang", "unknown") != "en" {
+ t.Error("etree: incorrect SelectAttrValue result")
+ }
+ if book.SelectAttrValue("t:missing", "unknown") != "unknown" {
+ t.Error("etree: incorrect SelectAttrValue result")
+ }
+ attr = book.RemoveAttr("lang")
+ if attr.Value != "en" {
+ t.Error("etree: incorrect RemoveAttr result")
+ }
+ book.CreateAttr("lang", "de")
+ attr = book.RemoveAttr("lang")
+ if attr.Value != "de" {
+ t.Error("etree: incorrect RemoveAttr result")
+ }
+ element = book.SelectElement("t:title")
+ if element != title || element.Text() != "Great Expectations" || len(element.Attr) != 0 {
+ t.Error("etree: incorrect SelectElement result")
+ }
+ element = book.SelectElement("title")
+ if element != title {
+ t.Error("etree: incorrect SelectElement result")
+ }
+ element = book.SelectElement("p:title")
+ if element != nil {
+ t.Error("etree: incorrect SelectElement result")
+ }
+ element = book.RemoveChildAt(title.Index()).(*Element)
+ if element != title {
+ t.Error("etree: incorrect RemoveElement result")
+ }
+ element = book.SelectElement("title")
+ if element != nil {
+ t.Error("etree: incorrect SelectElement result")
+ }
+ element = book.SelectElement("review")
+ if element != review || element.Text() != ">>> Excellent book" || len(element.Attr) != 0 {
+ t.Error("etree: incorrect SelectElement result")
+ }
+}
+
+func TestDocumentReadNonUTF8Encodings(t *testing.T) {
+ s := `<?xml version="1.0" encoding="ISO-8859-1"?>
+ <store>
+ <book lang="en">
+ <title>Great Expectations</title>
+ <author>Charles Dickens</author>
+ </book>
+</store>`
+
+ doc := NewDocument()
+ doc.ReadSettings.CharsetReader = func(label string, input io.Reader) (io.Reader, error) {
+ return input, nil
+ }
+ err := doc.ReadFromString(s)
+ if err != nil {
+ t.Fatal("etree: incorrect ReadFromString result")
+ }
+}
+
+func TestDocumentReadPermissive(t *testing.T) {
+ s := "<select disabled></select>"
+
+ doc := NewDocument()
+ err := doc.ReadFromString(s)
+ if err == nil {
+ t.Fatal("etree: incorrect ReadFromString result")
+ }
+
+ doc.ReadSettings.Permissive = true
+ err = doc.ReadFromString(s)
+ if err != nil {
+ t.Fatal("etree: incorrect ReadFromString result")
+ }
+}
+
+func TestDocumentReadHTMLEntities(t *testing.T) {
+ s := `<store>
+ <book lang="en">
+ <title>&rarr;&nbsp;Great Expectations</title>
+ <author>Charles Dickens</author>
+ </book>
+</store>`
+
+ doc := NewDocument()
+ err := doc.ReadFromString(s)
+ if err == nil {
+ t.Fatal("etree: incorrect ReadFromString result")
+ }
+
+ doc.ReadSettings.Entity = xml.HTMLEntity
+ err = doc.ReadFromString(s)
+ if err != nil {
+ t.Fatal("etree: incorrect ReadFromString result")
+ }
+}
+
+func TestEscapeCodes(t *testing.T) {
+ cases := []struct {
+ input string
+ normal string
+ attrCanonical string
+ textCanonical string
+ }{
+ {
+ "&<>'\"\t\n\r",
+ "<e a=\"&amp;&lt;&gt;&apos;&quot;\t\n\r\">&amp;&lt;&gt;&apos;&quot;\t\n\r</e>",
+ "<e a=\"&amp;&lt;>'&quot;&#x9;&#xA;&#xD;\">&amp;&lt;&gt;&apos;&quot;\t\n\r</e>",
+ "<e a=\"&amp;&lt;&gt;&apos;&quot;\t\n\r\">&amp;&lt;&gt;'\"\t\n&#xD;</e>",
+ },
+ {
+ "\x00\x1f\x08\x09\x0a\x0d",
+ "<e a=\"���\t\n\r\">���\t\n\r</e>",
+ "<e a=\"���&#x9;&#xA;&#xD;\">���\t\n\r</e>",
+ "<e a=\"���\t\n\r\">���\t\n&#xD;</e>",
+ },
+ }
+ for _, c := range cases {
+ doc := NewDocument()
+
+ e := doc.CreateElement("e")
+ e.SetText(c.input)
+ e.CreateAttr("a", c.input)
+
+ doc.WriteSettings.CanonicalText = false
+ doc.WriteSettings.CanonicalAttrVal = false
+ s, err := doc.WriteToString()
+ if err != nil {
+ t.Error("etree: Escape test produced inocrrect result.")
+ }
+ checkStrEq(t, s, c.normal)
+
+ doc.WriteSettings.CanonicalText = false
+ doc.WriteSettings.CanonicalAttrVal = true
+ s, err = doc.WriteToString()
+ if err != nil {
+ t.Error("etree: Escape test produced inocrrect result.")
+ }
+ checkStrEq(t, s, c.attrCanonical)
+
+ doc.WriteSettings.CanonicalText = true
+ doc.WriteSettings.CanonicalAttrVal = false
+ s, err = doc.WriteToString()
+ if err != nil {
+ t.Error("etree: Escape test produced inocrrect result.")
+ }
+ checkStrEq(t, s, c.textCanonical)
+ }
+}
+
+func TestCanonical(t *testing.T) {
+ BOM := "\xef\xbb\xbf"
+
+ doc := NewDocument()
+ doc.WriteSettings.CanonicalEndTags = true
+ doc.WriteSettings.CanonicalText = true
+ doc.WriteSettings.CanonicalAttrVal = true
+ doc.CreateCharData(BOM)
+ doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
+
+ people := doc.CreateElement("People")
+ people.CreateComment("These are all known people")
+
+ jon := people.CreateElement("Person")
+ jon.CreateAttr("name", "Jon O'Reilly")
+ jon.SetText("\r<'\">&\u0004\u0005\u001f�")
+
+ sally := people.CreateElement("Person")
+ sally.CreateAttr("name", "Sally")
+ sally.CreateAttr("escape", "\r\n\t<'\">&")
+
+ doc.Indent(2)
+ s, err := doc.WriteToString()
+ if err != nil {
+ t.Error("etree: WriteSettings WriteTo produced incorrect result.")
+ }
+
+ expected := BOM + `<?xml-stylesheet type="text/xsl" href="style.xsl"?>
+<People>
+ <!--These are all known people-->
+ <Person name="Jon O'Reilly">&#xD;&lt;'"&gt;&amp;����</Person>
+ <Person name="Sally" escape="&#xD;&#xA;&#x9;&lt;'&quot;>&amp;"></Person>
+</People>
+`
+ checkStrEq(t, s, expected)
+}
+
+func TestCopy(t *testing.T) {
+ s := `<store>
+ <book lang="en">
+ <title>Great Expectations</title>
+ <author>Charles Dickens</author>
+ </book>
+</store>`
+
+ doc := newDocumentFromString(t, s)
+
+ s1, err := doc.WriteToString()
+ if err != nil {
+ t.Error("etree: incorrect WriteToString result")
+ }
+
+ doc2 := doc.Copy()
+ checkIndexes(t, &doc2.Element)
+ s2, err := doc2.WriteToString()
+ if err != nil {
+ t.Error("etree: incorrect Copy result")
+ }
+
+ if s1 != s2 {
+ t.Error("etree: mismatched Copy result")
+ t.Error("wanted:\n" + s1)
+ t.Error("got:\n" + s2)
+ }
+
+ e1 := doc.FindElement("./store/book/title")
+ e2 := doc2.FindElement("./store/book/title")
+ if e1 == nil || e2 == nil {
+ t.Error("etree: incorrect FindElement result")
+ }
+ if e1 == e2 {
+ t.Error("etree: incorrect FindElement result")
+ }
+
+ e1.parent.RemoveChildAt(e1.Index())
+ s1, _ = doc.WriteToString()
+ s2, _ = doc2.WriteToString()
+ if s1 == s2 {
+ t.Error("etree: incorrect result after RemoveElement")
+ }
+}
+
+func TestGetPath(t *testing.T) {
+ s := `<a>
+ <b1>
+ <c1>
+ <d1/>
+ <d1a/>
+ </c1>
+ </b1>
+ <b2>
+ <c2>
+ <d2/>
+ </c2>
+ </b2>
+</a>`
+
+ doc := newDocumentFromString(t, s)
+
+ cases := []struct {
+ from string
+ to string
+ relpath string
+ topath string
+ }{
+ {"a", ".", "..", "/"},
+ {".", "a", "./a", "/a"},
+ {"a/b1/c1/d1", ".", "../../../..", "/"},
+ {".", "a/b1/c1/d1", "./a/b1/c1/d1", "/a/b1/c1/d1"},
+ {"a", "a", ".", "/a"},
+ {"a/b1", "a/b1/c1", "./c1", "/a/b1/c1"},
+ {"a/b1/c1", "a/b1", "..", "/a/b1"},
+ {"a/b1/c1", "a/b1/c1", ".", "/a/b1/c1"},
+ {"a", "a/b1", "./b1", "/a/b1"},
+ {"a/b1", "a", "..", "/a"},
+ {"a", "a/b1/c1", "./b1/c1", "/a/b1/c1"},
+ {"a/b1/c1", "a", "../..", "/a"},
+ {"a/b1/c1/d1", "a", "../../..", "/a"},
+ {"a", "a/b1/c1/d1", "./b1/c1/d1", "/a/b1/c1/d1"},
+ {"a/b1", "a/b2", "../b2", "/a/b2"},
+ {"a/b2", "a/b1", "../b1", "/a/b1"},
+ {"a/b1/c1/d1", "a/b2/c2/d2", "../../../b2/c2/d2", "/a/b2/c2/d2"},
+ {"a/b2/c2/d2", "a/b1/c1/d1", "../../../b1/c1/d1", "/a/b1/c1/d1"},
+ {"a/b1/c1/d1", "a/b1/c1/d1a", "../d1a", "/a/b1/c1/d1a"},
+ }
+
+ for _, c := range cases {
+ fe := doc.FindElement(c.from)
+ te := doc.FindElement(c.to)
+
+ rp := te.GetRelativePath(fe)
+ if rp != c.relpath {
+ t.Errorf("GetRelativePath from '%s' to '%s'. Expected '%s', got '%s'.\n", c.from, c.to, c.relpath, rp)
+ }
+
+ p := te.GetPath()
+ if p != c.topath {
+ t.Errorf("GetPath for '%s'. Expected '%s', got '%s'.\n", c.to, c.topath, p)
+ }
+ }
+}
+
+func TestInsertChild(t *testing.T) {
+ s := `<book lang="en">
+ <t:title>Great Expectations</t:title>
+ <author>Charles Dickens</author>
+</book>
+`
+
+ doc := newDocumentFromString(t, s)
+
+ year := NewElement("year")
+ year.SetText("1861")
+
+ book := doc.FindElement("//book")
+ book.InsertChildAt(book.SelectElement("t:title").Index(), year)
+
+ expected1 := `<book lang="en">
+ <year>1861</year>
+ <t:title>Great Expectations</t:title>
+ <author>Charles Dickens</author>
+</book>
+`
+ doc.Indent(2)
+ s1, _ := doc.WriteToString()
+ checkStrEq(t, s1, expected1)
+
+ book.RemoveChildAt(year.Index())
+ book.InsertChildAt(book.SelectElement("author").Index(), year)
+
+ expected2 := `<book lang="en">
+ <t:title>Great Expectations</t:title>
+ <year>1861</year>
+ <author>Charles Dickens</author>
+</book>
+`
+ doc.Indent(2)
+ s2, _ := doc.WriteToString()
+ checkStrEq(t, s2, expected2)
+
+ book.RemoveChildAt(year.Index())
+ book.InsertChildAt(len(book.Child), year)
+
+ expected3 := `<book lang="en">
+ <t:title>Great Expectations</t:title>
+ <author>Charles Dickens</author>
+ <year>1861</year>
+</book>
+`
+ doc.Indent(2)
+ s3, _ := doc.WriteToString()
+ checkStrEq(t, s3, expected3)
+
+ book.RemoveChildAt(year.Index())
+ book.InsertChildAt(999, year)
+
+ expected4 := `<book lang="en">
+ <t:title>Great Expectations</t:title>
+ <author>Charles Dickens</author>
+ <year>1861</year>
+</book>
+`
+ doc.Indent(2)
+ s4, _ := doc.WriteToString()
+ checkStrEq(t, s4, expected4)
+}
+
+func TestCdata(t *testing.T) {
+ var tests = []struct {
+ in, out string
+ }{
+ {`<tag>1234567</tag>`, "1234567"},
+ {`<tag><![CDATA[1234567]]></tag>`, "1234567"},
+ {`<tag>1<![CDATA[2]]>3<![CDATA[4]]>5<![CDATA[6]]>7</tag>`, "1234567"},
+ {`<tag>1<![CDATA[2]]>3<inner>4</inner>5<![CDATA[6]]>7</tag>`, "123"},
+ {`<tag>1<inner>4</inner>5<![CDATA[6]]>7</tag>`, "1"},
+ {`<tag><![CDATA[1]]><inner>4</inner>5<![CDATA[6]]>7</tag>`, "1"},
+ }
+
+ for _, test := range tests {
+ doc := NewDocument()
+ err := doc.ReadFromString(test.in)
+ if err != nil {
+ t.Fatal("etree ReadFromString: " + err.Error())
+ }
+
+ tag := doc.FindElement("tag")
+ if tag.Text() != test.out {
+ t.Fatalf("etree invalid cdata. Expected: %v. Got: %v\n", test.out, tag.Text())
+ }
+ }
+}
+
+func TestAddChild(t *testing.T) {
+ s := `<book lang="en">
+ <t:title>Great Expectations</t:title>
+ <author>Charles Dickens</author>
+</book>
+`
+ doc1 := newDocumentFromString(t, s)
+
+ doc2 := NewDocument()
+ root := doc2.CreateElement("root")
+
+ for _, e := range doc1.FindElements("//book/*") {
+ root.AddChild(e)
+ }
+
+ expected1 := `<book lang="en"/>
+`
+ doc1.Indent(2)
+ s1, _ := doc1.WriteToString()
+ checkStrEq(t, s1, expected1)
+
+ expected2 := `<root>
+ <t:title>Great Expectations</t:title>
+ <author>Charles Dickens</author>
+</root>
+`
+ doc2.Indent(2)
+ s2, _ := doc2.WriteToString()
+ checkStrEq(t, s2, expected2)
+}
+
+func TestSetRoot(t *testing.T) {
+ s := `<?test a="wow"?>
+<book>
+ <title>Great Expectations</title>
+ <author>Charles Dickens</author>
+</book>
+`
+ doc := newDocumentFromString(t, s)
+
+ origroot := doc.Root()
+ if origroot.Parent() != &doc.Element {
+ t.Error("Root incorrect")
+ }
+
+ newroot := NewElement("root")
+ doc.SetRoot(newroot)
+
+ if doc.Root() != newroot {
+ t.Error("doc.Root() != newroot")
+ }
+ if origroot.Parent() != nil {
+ t.Error("origroot.Parent() != nil")
+ }
+
+ expected1 := `<?test a="wow"?>
+<root/>
+`
+ doc.Indent(2)
+ s1, _ := doc.WriteToString()
+ checkStrEq(t, s1, expected1)
+
+ doc.SetRoot(origroot)
+ doc.Indent(2)
+ expected2 := s
+ s2, _ := doc.WriteToString()
+ checkStrEq(t, s2, expected2)
+
+ doc2 := NewDocument()
+ doc2.CreateProcInst("test", `a="wow"`)
+ doc2.SetRoot(NewElement("root"))
+ doc2.Indent(2)
+ expected3 := expected1
+ s3, _ := doc2.WriteToString()
+ checkStrEq(t, s3, expected3)
+
+ doc2.SetRoot(doc.Root())
+ doc2.Indent(2)
+ expected4 := s
+ s4, _ := doc2.WriteToString()
+ checkStrEq(t, s4, expected4)
+
+ expected5 := `<?test a="wow"?>
+`
+ doc.Indent(2)
+ s5, _ := doc.WriteToString()
+ checkStrEq(t, s5, expected5)
+}
+
+func TestSortAttrs(t *testing.T) {
+ s := `<el foo='5' Foo='2' aaa='4' สวัสดี='7' AAA='1' a01='3' z='6' a:ZZZ='9' a:AAA='8'/>`
+ doc := newDocumentFromString(t, s)
+ doc.Root().SortAttrs()
+ doc.Indent(2)
+ out, _ := doc.WriteToString()
+ checkStrEq(t, out, `<el AAA="1" Foo="2" a01="3" aaa="4" foo="5" z="6" สวัสดี="7" a:AAA="8" a:ZZZ="9"/>`+"\n")
+}
+
+func TestCharsetReaderEncoding(t *testing.T) {
+ cases := []string{
+ `<?xml version="1.0" encoding="ISO-8859-1"?><foo></foo>`,
+ `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`,
+ `<?xml version="1.0" encoding="US-ASCII"?><foo></foo>`,
+ }
+
+ for _, c := range cases {
+ doc := NewDocument()
+ if err := doc.ReadFromBytes([]byte(c)); err != nil {
+ t.Error(err)
+ }
+ }
+}
+
+func TestCharData(t *testing.T) {
+ doc := NewDocument()
+ root := doc.CreateElement("root")
+ root.CreateCharData("This ")
+ root.CreateCData("is ")
+ e1 := NewText("a ")
+ e2 := NewCData("text ")
+ root.AddChild(e1)
+ root.AddChild(e2)
+ root.CreateCharData("Element!!")
+
+ s, err := doc.WriteToString()
+ if err != nil {
+ t.Error("etree: failed to serialize document")
+ }
+
+ checkStrEq(t, s, `<root>This <![CDATA[is ]]>a <![CDATA[text ]]>Element!!</root>`)
+
+ // Check we can parse the output
+ err = doc.ReadFromString(s)
+ if err != nil {
+ t.Fatal("etree: incorrect ReadFromString result")
+ }
+ if doc.Root().Text() != "This is a text Element!!" {
+ t.Error("etree: invalid text")
+ }
+}
+
+func TestIndentSettings(t *testing.T) {
+ doc := NewDocument()
+ root := doc.CreateElement("root")
+ ch1 := root.CreateElement("child1")
+ ch1.CreateElement("child2")
+
+ // First test with NoIndent.
+ doc.Indent(NoIndent)
+ s, err := doc.WriteToString()
+ if err != nil {
+ t.Error("etree: failed to serialize document")
+ }
+ expected := "<root><child1><child2/></child1></root>"
+ checkStrEq(t, s, expected)
+
+ // Run all indent test cases.
+ tests := []struct {
+ useTabs, useCRLF bool
+ ws, nl string
+ }{
+ {false, false, " ", "\n"},
+ {false, true, " ", "\r\n"},
+ {true, false, "\t", "\n"},
+ {true, true, "\t", "\r\n"},
+ }
+
+ for _, test := range tests {
+ doc.WriteSettings.UseCRLF = test.useCRLF
+ if test.useTabs {
+ doc.IndentTabs()
+ s, err := doc.WriteToString()
+ if err != nil {
+ t.Error("etree: failed to serialize document")
+ }
+ tab := test.ws
+ expected := "<root>" + test.nl + tab + "<child1>" + test.nl +
+ tab + tab + "<child2/>" + test.nl + tab +
+ "</child1>" + test.nl + "</root>" + test.nl
+ checkStrEq(t, s, expected)
+ } else {
+ for i := 0; i < 256; i++ {
+ doc.Indent(i)
+ s, err := doc.WriteToString()
+ if err != nil {
+ t.Error("etree: failed to serialize document")
+ }
+ tab := strings.Repeat(test.ws, i)
+ expected := "<root>" + test.nl + tab + "<child1>" + test.nl +
+ tab + tab + "<child2/>" + test.nl + tab +
+ "</child1>" + test.nl + "</root>" + test.nl
+ checkStrEq(t, s, expected)
+ }
+ }
+ }
+}
+
+func TestTokenIndexing(t *testing.T) {
+ s := `<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="style.xsl"?>
+<store xmlns:t="urn:books-com:titles">
+ <!Directive>
+ <!--This is a comment-->
+ <book lang="en">
+ <t:title>Great Expectations</t:title>
+ <author>Charles Dickens</author>
+ <review/>
+ </book>
+</store>`
+
+ doc := newDocumentFromString(t, s)
+ review := doc.FindElement("/store/book/review")
+ review.SetText("Excellent")
+
+ checkIndexes(t, &doc.Element)
+
+ doc.Indent(4)
+ checkIndexes(t, &doc.Element)
+
+ doc.Indent(NoIndent)
+ checkIndexes(t, &doc.Element)
+
+ e := NewElement("foo")
+ store := doc.SelectElement("store")
+ store.InsertChildAt(0, e)
+ checkIndexes(t, &doc.Element)
+
+ store.RemoveChildAt(0)
+ checkIndexes(t, &doc.Element)
+}
+
+func TestSetText(t *testing.T) {
+ doc := NewDocument()
+ root := doc.CreateElement("root")
+
+ checkDocEq(t, doc, `<root/>`)
+ checkStrEq(t, root.Text(), "")
+ checkIntEq(t, len(root.Child), 0)
+
+ root.SetText("foo")
+ checkDocEq(t, doc, `<root>foo</root>`)
+ checkStrEq(t, root.Text(), "foo")
+ checkIntEq(t, len(root.Child), 1)
+
+ root.SetText("bar")
+ checkDocEq(t, doc, `<root>bar</root>`)
+ checkStrEq(t, root.Text(), "bar")
+ checkIntEq(t, len(root.Child), 1)
+
+ root.CreateCData("cdata")
+ checkDocEq(t, doc, `<root>bar<![CDATA[cdata]]></root>`)
+ checkStrEq(t, root.Text(), "barcdata")
+ checkIntEq(t, len(root.Child), 2)
+
+ root.SetText("qux")
+ checkDocEq(t, doc, `<root>qux</root>`)
+ checkStrEq(t, root.Text(), "qux")
+ checkIntEq(t, len(root.Child), 1)
+
+ root.CreateCData("cdata")
+ checkDocEq(t, doc, `<root>qux<![CDATA[cdata]]></root>`)
+ checkStrEq(t, root.Text(), "quxcdata")
+ checkIntEq(t, len(root.Child), 2)
+
+ root.SetCData("baz")
+ checkDocEq(t, doc, `<root><![CDATA[baz]]></root>`)
+ checkStrEq(t, root.Text(), "baz")
+ checkIntEq(t, len(root.Child), 1)
+
+ root.CreateText("corge")
+ root.CreateCData("grault")
+ root.CreateText("waldo")
+ root.CreateCData("fred")
+ root.CreateElement("child")
+ checkDocEq(t, doc, `<root><![CDATA[baz]]>corge<![CDATA[grault]]>waldo<![CDATA[fred]]><child/></root>`)
+ checkStrEq(t, root.Text(), "bazcorgegraultwaldofred")
+ checkIntEq(t, len(root.Child), 6)
+
+ root.SetText("plugh")
+ checkDocEq(t, doc, `<root>plugh<child/></root>`)
+ checkStrEq(t, root.Text(), "plugh")
+ checkIntEq(t, len(root.Child), 2)
+
+ root.SetText("")
+ checkDocEq(t, doc, `<root><child/></root>`)
+ checkStrEq(t, root.Text(), "")
+ checkIntEq(t, len(root.Child), 1)
+
+ root.SetText("")
+ checkDocEq(t, doc, `<root><child/></root>`)
+ checkStrEq(t, root.Text(), "")
+ checkIntEq(t, len(root.Child), 1)
+
+ root.RemoveChildAt(0)
+ root.CreateText("corge")
+ root.CreateCData("grault")
+ root.CreateText("waldo")
+ root.CreateCData("fred")
+ root.CreateElement("child")
+ checkDocEq(t, doc, `<root>corge<![CDATA[grault]]>waldo<![CDATA[fred]]><child/></root>`)
+ checkStrEq(t, root.Text(), "corgegraultwaldofred")
+ checkIntEq(t, len(root.Child), 5)
+
+ root.SetText("")
+ checkDocEq(t, doc, `<root><child/></root>`)
+ checkStrEq(t, root.Text(), "")
+ checkIntEq(t, len(root.Child), 1)
+}
+
+func TestSetTail(t *testing.T) {
+ doc := NewDocument()
+ root := doc.CreateElement("root")
+ child := root.CreateElement("child")
+ root.CreateText("\n\t")
+ child.SetText("foo")
+ checkDocEq(t, doc, "<root><child>foo</child>\n\t</root>")
+ checkStrEq(t, child.Tail(), "\n\t")
+ checkIntEq(t, len(root.Child), 2)
+ checkIntEq(t, len(child.Child), 1)
+
+ root.CreateCData(" ")
+ checkDocEq(t, doc, "<root><child>foo</child>\n\t<![CDATA[ ]]></root>")
+ checkStrEq(t, child.Tail(), "\n\t ")
+ checkIntEq(t, len(root.Child), 3)
+ checkIntEq(t, len(child.Child), 1)
+
+ child.SetTail("")
+ checkDocEq(t, doc, "<root><child>foo</child></root>")
+ checkStrEq(t, child.Tail(), "")
+ checkIntEq(t, len(root.Child), 1)
+ checkIntEq(t, len(child.Child), 1)
+
+ child.SetTail("\t\t\t")
+ checkDocEq(t, doc, "<root><child>foo</child>\t\t\t</root>")
+ checkStrEq(t, child.Tail(), "\t\t\t")
+ checkIntEq(t, len(root.Child), 2)
+ checkIntEq(t, len(child.Child), 1)
+
+ child.SetTail("\t\n\n\t")
+ checkDocEq(t, doc, "<root><child>foo</child>\t\n\n\t</root>")
+ checkStrEq(t, child.Tail(), "\t\n\n\t")
+ checkIntEq(t, len(root.Child), 2)
+ checkIntEq(t, len(child.Child), 1)
+
+ child.SetTail("")
+ checkDocEq(t, doc, "<root><child>foo</child></root>")
+ checkStrEq(t, child.Tail(), "")
+ checkIntEq(t, len(root.Child), 1)
+ checkIntEq(t, len(child.Child), 1)
+}
+
+func TestAttrParent(t *testing.T) {
+ doc := NewDocument()
+ root := doc.CreateElement("root")
+ attr1 := root.CreateAttr("bar", "1")
+ attr2 := root.CreateAttr("qux", "2")
+
+ checkIntEq(t, len(root.Attr), 2)
+ checkElementEq(t, attr1.Element(), root)
+ checkElementEq(t, attr2.Element(), root)
+
+ attr1 = root.RemoveAttr("bar")
+ attr2 = root.RemoveAttr("qux")
+ checkElementEq(t, attr1.Element(), nil)
+ checkElementEq(t, attr2.Element(), nil)
+
+ s := `<root a="1" b="2" c="3" d="4"/>`
+ err := doc.ReadFromString(s)
+ if err != nil {
+ t.Error("etree: failed to parse document")
+ }
+
+ root = doc.SelectElement("root")
+ for i := range root.Attr {
+ checkElementEq(t, root.Attr[i].Element(), root)
+ }
+}
+
+func TestDefaultNamespaceURI(t *testing.T) {
+ s := `
+<root xmlns="https://root.example.com" xmlns:attrib="https://attrib.example.com" attrib:a="foo" b="bar">
+ <child1 xmlns="https://child.example.com" attrib:a="foo">
+ <grandchild1 xmlns="https://grandchild.example.com" a="foo">
+ </grandchild1>
+ <grandchild2 a="foo">
+ <greatgrandchild1 attrib:a="foo"/>
+ </grandchild2>
+ </child1>
+ <child2 a="foo"/>
+</root>`
+
+ doc := newDocumentFromString(t, s)
+ root := doc.SelectElement("root")
+ child1 := root.SelectElement("child1")
+ child2 := root.SelectElement("child2")
+ grandchild1 := child1.SelectElement("grandchild1")
+ grandchild2 := child1.SelectElement("grandchild2")
+ greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1")
+
+ checkStrEq(t, doc.NamespaceURI(), "")
+ checkStrEq(t, root.NamespaceURI(), "https://root.example.com")
+ checkStrEq(t, child1.NamespaceURI(), "https://child.example.com")
+ checkStrEq(t, child2.NamespaceURI(), "https://root.example.com")
+ checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com")
+ checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com")
+ checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://child.example.com")
+
+ checkStrEq(t, root.Attr[0].NamespaceURI(), "")
+ checkStrEq(t, root.Attr[1].NamespaceURI(), "")
+ checkStrEq(t, root.Attr[2].NamespaceURI(), "https://attrib.example.com")
+ checkStrEq(t, root.Attr[3].NamespaceURI(), "")
+ checkStrEq(t, child1.Attr[0].NamespaceURI(), "")
+ checkStrEq(t, child1.Attr[1].NamespaceURI(), "https://attrib.example.com")
+ checkStrEq(t, child2.Attr[0].NamespaceURI(), "")
+ checkStrEq(t, grandchild1.Attr[0].NamespaceURI(), "")
+ checkStrEq(t, grandchild1.Attr[1].NamespaceURI(), "")
+ checkStrEq(t, grandchild2.Attr[0].NamespaceURI(), "")
+ checkStrEq(t, greatgrandchild1.Attr[0].NamespaceURI(), "https://attrib.example.com")
+
+ f := doc.FindElements("//*[namespace-uri()='https://root.example.com']")
+ if len(f) != 2 || f[0] != root || f[1] != child2 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='https://child.example.com']")
+ if len(f) != 3 || f[0] != child1 || f[1] != grandchild2 || f[2] != greatgrandchild1 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']")
+ if len(f) != 1 || f[0] != grandchild1 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='']")
+ if len(f) != 0 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='foo']")
+ if len(f) != 0 {
+ t.Error("etree: failed namespace-uri test")
+ }
+}
+
+func TestLocalNamespaceURI(t *testing.T) {
+ s := `
+<a:root xmlns:a="https://root.example.com">
+ <b:child1 xmlns:b="https://child.example.com">
+ <c:grandchild1 xmlns:c="https://grandchild.example.com"/>
+ <b:grandchild2>
+ <a:greatgrandchild1/>
+ </b:grandchild2>
+ <a:grandchild3/>
+ <grandchild4/>
+ </b:child1>
+ <a:child2>
+ </a:child2>
+ <child3>
+ </child3>
+</a:root>`
+
+ doc := newDocumentFromString(t, s)
+ root := doc.SelectElement("root")
+ child1 := root.SelectElement("child1")
+ child2 := root.SelectElement("child2")
+ child3 := root.SelectElement("child3")
+ grandchild1 := child1.SelectElement("grandchild1")
+ grandchild2 := child1.SelectElement("grandchild2")
+ grandchild3 := child1.SelectElement("grandchild3")
+ grandchild4 := child1.SelectElement("grandchild4")
+ greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1")
+
+ checkStrEq(t, doc.NamespaceURI(), "")
+ checkStrEq(t, root.NamespaceURI(), "https://root.example.com")
+ checkStrEq(t, child1.NamespaceURI(), "https://child.example.com")
+ checkStrEq(t, child2.NamespaceURI(), "https://root.example.com")
+ checkStrEq(t, child3.NamespaceURI(), "")
+ checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com")
+ checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com")
+ checkStrEq(t, grandchild3.NamespaceURI(), "https://root.example.com")
+ checkStrEq(t, grandchild4.NamespaceURI(), "")
+ checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://root.example.com")
+
+ f := doc.FindElements("//*[namespace-uri()='https://root.example.com']")
+ if len(f) != 4 || f[0] != root || f[1] != child2 || f[2] != grandchild3 || f[3] != greatgrandchild1 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='https://child.example.com']")
+ if len(f) != 2 || f[0] != child1 || f[1] != grandchild2 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']")
+ if len(f) != 1 || f[0] != grandchild1 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='']")
+ if len(f) != 2 || f[0] != child3 || f[1] != grandchild4 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='foo']")
+ if len(f) != 0 {
+ t.Error("etree: failed namespace-uri test")
+ }
+}
+
+func TestWhitespace(t *testing.T) {
+ s := "<root>\n\t<child>\n\t\t<grandchild> x</grandchild>\n </child>\n</root>"
+
+ doc := newDocumentFromString(t, s)
+ root := doc.Root()
+ checkIntEq(t, len(root.Child), 3)
+
+ cd := root.Child[0].(*CharData)
+ checkBoolEq(t, cd.IsWhitespace(), true)
+ checkStrBinaryEq(t, cd.Data, "\n\t")
+
+ cd = root.Child[2].(*CharData)
+ checkBoolEq(t, cd.IsWhitespace(), true)
+ checkStrBinaryEq(t, cd.Data, "\n")
+
+ child := root.SelectElement("child")
+ checkIntEq(t, len(child.Child), 3)
+
+ cd = child.Child[0].(*CharData)
+ checkBoolEq(t, cd.IsWhitespace(), true)
+ checkStrBinaryEq(t, cd.Data, "\n\t\t")
+
+ cd = child.Child[2].(*CharData)
+ checkBoolEq(t, cd.IsWhitespace(), true)
+ checkStrBinaryEq(t, cd.Data, "\n ")
+
+ grandchild := child.SelectElement("grandchild")
+ checkIntEq(t, len(grandchild.Child), 1)
+
+ cd = grandchild.Child[0].(*CharData)
+ checkBoolEq(t, cd.IsWhitespace(), false)
+
+ cd.SetData(" ")
+ checkBoolEq(t, cd.IsWhitespace(), true)
+
+ cd.SetData(" x")
+ checkBoolEq(t, cd.IsWhitespace(), false)
+
+ cd.SetData("\t\n\r ")
+ checkBoolEq(t, cd.IsWhitespace(), true)
+
+ cd.SetData("\uFFFD")
+ checkBoolEq(t, cd.IsWhitespace(), false)
+
+ cd.SetData("")
+ checkBoolEq(t, cd.IsWhitespace(), true)
+}
diff --git a/example_test.go b/example_test.go
new file mode 100644
index 0000000..45fc4ca
--- /dev/null
+++ b/example_test.go
@@ -0,0 +1,69 @@
+// Copyright 2015-2019 Brett Vickers.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package etree
+
+import "os"
+
+// Create an etree Document, add XML entities to it, and serialize it
+// to stdout.
+func ExampleDocument_creating() {
+ doc := NewDocument()
+ doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
+ doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
+
+ people := doc.CreateElement("People")
+ people.CreateComment("These are all known people")
+
+ jon := people.CreateElement("Person")
+ jon.CreateAttr("name", "Jon O'Reilly")
+
+ sally := people.CreateElement("Person")
+ sally.CreateAttr("name", "Sally")
+
+ doc.Indent(2)
+ doc.WriteTo(os.Stdout)
+ // Output:
+ // <?xml version="1.0" encoding="UTF-8"?>
+ // <?xml-stylesheet type="text/xsl" href="style.xsl"?>
+ // <People>
+ // <!--These are all known people-->
+ // <Person name="Jon O&apos;Reilly"/>
+ // <Person name="Sally"/>
+ // </People>
+}
+
+func ExampleDocument_reading() {
+ doc := NewDocument()
+ if err := doc.ReadFromFile("document.xml"); err != nil {
+ panic(err)
+ }
+}
+
+func ExamplePath() {
+ xml := `
+<bookstore>
+ <book>
+ <title>Great Expectations</title>
+ <author>Charles Dickens</author>
+ </book>
+ <book>
+ <title>Ulysses</title>
+ <author>James Joyce</author>
+ </book>
+</bookstore>`
+
+ doc := NewDocument()
+ doc.ReadFromString(xml)
+ for _, e := range doc.FindElements(".//book[author='Charles Dickens']") {
+ doc := NewDocumentWithRoot(e.Copy())
+ doc.Indent(2)
+ doc.WriteTo(os.Stdout)
+ }
+ // Output:
+ // <book>
+ // <title>Great Expectations</title>
+ // <author>Charles Dickens</author>
+ // </book>
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..2a269fa
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,3 @@
+module github.com/beevik/etree
+
+go 1.12
diff --git a/helpers.go b/helpers.go
new file mode 100644
index 0000000..825e14e
--- /dev/null
+++ b/helpers.go
@@ -0,0 +1,276 @@
+// Copyright 2015-2019 Brett Vickers.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package etree
+
+import (
+ "bufio"
+ "io"
+ "strings"
+ "unicode/utf8"
+)
+
+// A simple stack
+type stack struct {
+ data []interface{}
+}
+
+func (s *stack) empty() bool {
+ return len(s.data) == 0
+}
+
+func (s *stack) push(value interface{}) {
+ s.data = append(s.data, value)
+}
+
+func (s *stack) pop() interface{} {
+ value := s.data[len(s.data)-1]
+ s.data[len(s.data)-1] = nil
+ s.data = s.data[:len(s.data)-1]
+ return value
+}
+
+func (s *stack) peek() interface{} {
+ return s.data[len(s.data)-1]
+}
+
+// A fifo is a simple first-in-first-out queue.
+type fifo struct {
+ data []interface{}
+ head, tail int
+}
+
+func (f *fifo) add(value interface{}) {
+ if f.len()+1 >= len(f.data) {
+ f.grow()
+ }
+ f.data[f.tail] = value
+ if f.tail++; f.tail == len(f.data) {
+ f.tail = 0
+ }
+}
+
+func (f *fifo) remove() interface{} {
+ value := f.data[f.head]
+ f.data[f.head] = nil
+ if f.head++; f.head == len(f.data) {
+ f.head = 0
+ }
+ return value
+}
+
+func (f *fifo) len() int {
+ if f.tail >= f.head {
+ return f.tail - f.head
+ }
+ return len(f.data) - f.head + f.tail
+}
+
+func (f *fifo) grow() {
+ c := len(f.data) * 2
+ if c == 0 {
+ c = 4
+ }
+ buf, count := make([]interface{}, c), f.len()
+ if f.tail >= f.head {
+ copy(buf[0:count], f.data[f.head:f.tail])
+ } else {
+ hindex := len(f.data) - f.head
+ copy(buf[0:hindex], f.data[f.head:])
+ copy(buf[hindex:count], f.data[:f.tail])
+ }
+ f.data, f.head, f.tail = buf, 0, count
+}
+
+// countReader implements a proxy reader that counts the number of
+// bytes read from its encapsulated reader.
+type countReader struct {
+ r io.Reader
+ bytes int64
+}
+
+func newCountReader(r io.Reader) *countReader {
+ return &countReader{r: r}
+}
+
+func (cr *countReader) Read(p []byte) (n int, err error) {
+ b, err := cr.r.Read(p)
+ cr.bytes += int64(b)
+ return b, err
+}
+
+// countWriter implements a proxy writer that counts the number of
+// bytes written by its encapsulated writer.
+type countWriter struct {
+ w io.Writer
+ bytes int64
+}
+
+func newCountWriter(w io.Writer) *countWriter {
+ return &countWriter{w: w}
+}
+
+func (cw *countWriter) Write(p []byte) (n int, err error) {
+ b, err := cw.w.Write(p)
+ cw.bytes += int64(b)
+ return b, err
+}
+
+// isWhitespace returns true if the byte slice contains only
+// whitespace characters.
+func isWhitespace(s string) bool {
+ for i := 0; i < len(s); i++ {
+ if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' {
+ return false
+ }
+ }
+ return true
+}
+
+// spaceMatch returns true if namespace a is the empty string
+// or if namespace a equals namespace b.
+func spaceMatch(a, b string) bool {
+ switch {
+ case a == "":
+ return true
+ default:
+ return a == b
+ }
+}
+
+// spaceDecompose breaks a namespace:tag identifier at the ':'
+// and returns the two parts.
+func spaceDecompose(str string) (space, key string) {
+ colon := strings.IndexByte(str, ':')
+ if colon == -1 {
+ return "", str
+ }
+ return str[:colon], str[colon+1:]
+}
+
+// Strings used by indentCRLF and indentLF
+const (
+ indentSpaces = "\r\n "
+ indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
+)
+
+// indentCRLF returns a CRLF newline followed by n copies of the first
+// non-CRLF character in the source string.
+func indentCRLF(n int, source string) string {
+ switch {
+ case n < 0:
+ return source[:2]
+ case n < len(source)-1:
+ return source[:n+2]
+ default:
+ return source + strings.Repeat(source[2:3], n-len(source)+2)
+ }
+}
+
+// indentLF returns a LF newline followed by n copies of the first non-LF
+// character in the source string.
+func indentLF(n int, source string) string {
+ switch {
+ case n < 0:
+ return source[1:2]
+ case n < len(source)-1:
+ return source[1 : n+2]
+ default:
+ return source[1:] + strings.Repeat(source[2:3], n-len(source)+2)
+ }
+}
+
+// nextIndex returns the index of the next occurrence of sep in s,
+// starting from offset. It returns -1 if the sep string is not found.
+func nextIndex(s, sep string, offset int) int {
+ switch i := strings.Index(s[offset:], sep); i {
+ case -1:
+ return -1
+ default:
+ return offset + i
+ }
+}
+
+// isInteger returns true if the string s contains an integer.
+func isInteger(s string) bool {
+ for i := 0; i < len(s); i++ {
+ if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') {
+ return false
+ }
+ }
+ return true
+}
+
+type escapeMode byte
+
+const (
+ escapeNormal escapeMode = iota
+ escapeCanonicalText
+ escapeCanonicalAttr
+)
+
+// escapeString writes an escaped version of a string to the writer.
+func escapeString(w *bufio.Writer, s string, m escapeMode) {
+ var esc []byte
+ last := 0
+ for i := 0; i < len(s); {
+ r, width := utf8.DecodeRuneInString(s[i:])
+ i += width
+ switch r {
+ case '&':
+ esc = []byte("&amp;")
+ case '<':
+ esc = []byte("&lt;")
+ case '>':
+ if m == escapeCanonicalAttr {
+ continue
+ }
+ esc = []byte("&gt;")
+ case '\'':
+ if m != escapeNormal {
+ continue
+ }
+ esc = []byte("&apos;")
+ case '"':
+ if m == escapeCanonicalText {
+ continue
+ }
+ esc = []byte("&quot;")
+ case '\t':
+ if m != escapeCanonicalAttr {
+ continue
+ }
+ esc = []byte("&#x9;")
+ case '\n':
+ if m != escapeCanonicalAttr {
+ continue
+ }
+ esc = []byte("&#xA;")
+ case '\r':
+ if m == escapeNormal {
+ continue
+ }
+ esc = []byte("&#xD;")
+ default:
+ if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
+ esc = []byte("\uFFFD")
+ break
+ }
+ continue
+ }
+ w.WriteString(s[last : i-width])
+ w.Write(esc)
+ last = i
+ }
+ w.WriteString(s[last:])
+}
+
+func isInCharacterRange(r rune) bool {
+ return r == 0x09 ||
+ r == 0x0A ||
+ r == 0x0D ||
+ r >= 0x20 && r <= 0xD7FF ||
+ r >= 0xE000 && r <= 0xFFFD ||
+ r >= 0x10000 && r <= 0x10FFFF
+}
diff --git a/path.go b/path.go
new file mode 100644
index 0000000..d183c89
--- /dev/null
+++ b/path.go
@@ -0,0 +1,580 @@
+// Copyright 2015-2019 Brett Vickers.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package etree
+
+import (
+ "strconv"
+ "strings"
+)
+
+/*
+A Path is a string that represents a search path through an etree starting
+from the document root or an arbitrary element. Paths are used with the
+Element object's Find* methods to locate and return desired elements.
+
+A Path consists of a series of slash-separated "selectors", each of which may
+be modified by one or more bracket-enclosed "filters". Selectors are used to
+traverse the etree from element to element, while filters are used to narrow
+the list of candidate elements at each node.
+
+Although etree Path strings are structurally and behaviorally similar to XPath
+strings (https://www.w3.org/TR/1999/REC-xpath-19991116/), they have a more
+limited set of selectors and filtering options.
+
+The following selectors are supported by etree paths:
+
+ . Select the current element.
+ .. Select the parent of the current element.
+ * Select all child elements of the current element.
+ / Select the root element when used at the start of a path.
+ // Select all descendants of the current element.
+ tag Select all child elements with a name matching the tag.
+
+The following basic filters are supported:
+
+ [@attrib] Keep elements with an attribute named attrib.
+ [@attrib='val'] Keep elements with an attribute named attrib and value matching val.
+ [tag] Keep elements with a child element named tag.
+ [tag='val'] Keep elements with a child element named tag and text matching val.
+ [n] Keep the n-th element, where n is a numeric index starting from 1.
+
+The following function-based filters are supported:
+
+ [text()] Keep elements with non-empty text.
+ [text()='val'] Keep elements whose text matches val.
+ [local-name()='val'] Keep elements whose un-prefixed tag matches val.
+ [name()='val'] Keep elements whose full tag exactly matches val.
+ [namespace-prefix()] Keep elements with non-empty namespace prefixes.
+ [namespace-prefix()='val'] Keep elements whose namespace prefix matches val.
+ [namespace-uri()] Keep elements with non-empty namespace URIs.
+ [namespace-uri()='val'] Keep elements whose namespace URI matches val.
+
+Below are some examples of etree path strings.
+
+Select the bookstore child element of the root element:
+ /bookstore
+
+Beginning from the root element, select the title elements of all descendant
+book elements having a 'category' attribute of 'WEB':
+ //book[@category='WEB']/title
+
+Beginning from the current element, select the first descendant book element
+with a title child element containing the text 'Great Expectations':
+ .//book[title='Great Expectations'][1]
+
+Beginning from the current element, select all child elements of book elements
+with an attribute 'language' set to 'english':
+ ./book/*[@language='english']
+
+Beginning from the current element, select all child elements of book elements
+containing the text 'special':
+ ./book/*[text()='special']
+
+Beginning from the current element, select all descendant book elements whose
+title child element has a 'language' attribute of 'french':
+ .//book/title[@language='french']/..
+
+Beginning from the current element, select all descendant book elements
+belonging to the http://www.w3.org/TR/html4/ namespace:
+ .//book[namespace-uri()='http://www.w3.org/TR/html4/']
+
+*/
+type Path struct {
+ segments []segment
+}
+
+// ErrPath is returned by path functions when an invalid etree path is provided.
+type ErrPath string
+
+// Error returns the string describing a path error.
+func (err ErrPath) Error() string {
+ return "etree: " + string(err)
+}
+
+// CompilePath creates an optimized version of an XPath-like string that
+// can be used to query elements in an element tree.
+func CompilePath(path string) (Path, error) {
+ var comp compiler
+ segments := comp.parsePath(path)
+ if comp.err != ErrPath("") {
+ return Path{nil}, comp.err
+ }
+ return Path{segments}, nil
+}
+
+// MustCompilePath creates an optimized version of an XPath-like string that
+// can be used to query elements in an element tree. Panics if an error
+// occurs. Use this function to create Paths when you know the path is
+// valid (i.e., if it's hard-coded).
+func MustCompilePath(path string) Path {
+ p, err := CompilePath(path)
+ if err != nil {
+ panic(err)
+ }
+ return p
+}
+
+// A segment is a portion of a path between "/" characters.
+// It contains one selector and zero or more [filters].
+type segment struct {
+ sel selector
+ filters []filter
+}
+
+func (seg *segment) apply(e *Element, p *pather) {
+ seg.sel.apply(e, p)
+ for _, f := range seg.filters {
+ f.apply(p)
+ }
+}
+
+// A selector selects XML elements for consideration by the
+// path traversal.
+type selector interface {
+ apply(e *Element, p *pather)
+}
+
+// A filter pares down a list of candidate XML elements based
+// on a path filter in [brackets].
+type filter interface {
+ apply(p *pather)
+}
+
+// A pather is helper object that traverses an element tree using
+// a Path object. It collects and deduplicates all elements matching
+// the path query.
+type pather struct {
+ queue fifo
+ results []*Element
+ inResults map[*Element]bool
+ candidates []*Element
+ scratch []*Element // used by filters
+}
+
+// A node represents an element and the remaining path segments that
+// should be applied against it by the pather.
+type node struct {
+ e *Element
+ segments []segment
+}
+
+func newPather() *pather {
+ return &pather{
+ results: make([]*Element, 0),
+ inResults: make(map[*Element]bool),
+ candidates: make([]*Element, 0),
+ scratch: make([]*Element, 0),
+ }
+}
+
+// traverse follows the path from the element e, collecting
+// and then returning all elements that match the path's selectors
+// and filters.
+func (p *pather) traverse(e *Element, path Path) []*Element {
+ for p.queue.add(node{e, path.segments}); p.queue.len() > 0; {
+ p.eval(p.queue.remove().(node))
+ }
+ return p.results
+}
+
+// eval evalutes the current path node by applying the remaining
+// path's selector rules against the node's element.
+func (p *pather) eval(n node) {
+ p.candidates = p.candidates[0:0]
+ seg, remain := n.segments[0], n.segments[1:]
+ seg.apply(n.e, p)
+
+ if len(remain) == 0 {
+ for _, c := range p.candidates {
+ if in := p.inResults[c]; !in {
+ p.inResults[c] = true
+ p.results = append(p.results, c)
+ }
+ }
+ } else {
+ for _, c := range p.candidates {
+ p.queue.add(node{c, remain})
+ }
+ }
+}
+
+// A compiler generates a compiled path from a path string.
+type compiler struct {
+ err ErrPath
+}
+
+// parsePath parses an XPath-like string describing a path
+// through an element tree and returns a slice of segment
+// descriptors.
+func (c *compiler) parsePath(path string) []segment {
+ // If path ends with //, fix it
+ if strings.HasSuffix(path, "//") {
+ path += "*"
+ }
+
+ var segments []segment
+
+ // Check for an absolute path
+ if strings.HasPrefix(path, "/") {
+ segments = append(segments, segment{new(selectRoot), []filter{}})
+ path = path[1:]
+ }
+
+ // Split path into segments
+ for _, s := range splitPath(path) {
+ segments = append(segments, c.parseSegment(s))
+ if c.err != ErrPath("") {
+ break
+ }
+ }
+ return segments
+}
+
+func splitPath(path string) []string {
+ var pieces []string
+ start := 0
+ inquote := false
+ for i := 0; i+1 <= len(path); i++ {
+ if path[i] == '\'' {
+ inquote = !inquote
+ } else if path[i] == '/' && !inquote {
+ pieces = append(pieces, path[start:i])
+ start = i + 1
+ }
+ }
+ return append(pieces, path[start:])
+}
+
+// parseSegment parses a path segment between / characters.
+func (c *compiler) parseSegment(path string) segment {
+ pieces := strings.Split(path, "[")
+ seg := segment{
+ sel: c.parseSelector(pieces[0]),
+ filters: []filter{},
+ }
+ for i := 1; i < len(pieces); i++ {
+ fpath := pieces[i]
+ if fpath[len(fpath)-1] != ']' {
+ c.err = ErrPath("path has invalid filter [brackets].")
+ break
+ }
+ seg.filters = append(seg.filters, c.parseFilter(fpath[:len(fpath)-1]))
+ }
+ return seg
+}
+
+// parseSelector parses a selector at the start of a path segment.
+func (c *compiler) parseSelector(path string) selector {
+ switch path {
+ case ".":
+ return new(selectSelf)
+ case "..":
+ return new(selectParent)
+ case "*":
+ return new(selectChildren)
+ case "":
+ return new(selectDescendants)
+ default:
+ return newSelectChildrenByTag(path)
+ }
+}
+
+var fnTable = map[string]func(e *Element) string{
+ "local-name": (*Element).name,
+ "name": (*Element).FullTag,
+ "namespace-prefix": (*Element).namespacePrefix,
+ "namespace-uri": (*Element).NamespaceURI,
+ "text": (*Element).Text,
+}
+
+// parseFilter parses a path filter contained within [brackets].
+func (c *compiler) parseFilter(path string) filter {
+ if len(path) == 0 {
+ c.err = ErrPath("path contains an empty filter expression.")
+ return nil
+ }
+
+ // Filter contains [@attr='val'], [fn()='val'], or [tag='val']?
+ eqindex := strings.Index(path, "='")
+ if eqindex >= 0 {
+ rindex := nextIndex(path, "'", eqindex+2)
+ if rindex != len(path)-1 {
+ c.err = ErrPath("path has mismatched filter quotes.")
+ return nil
+ }
+
+ key := path[:eqindex]
+ value := path[eqindex+2 : rindex]
+
+ switch {
+ case key[0] == '@':
+ return newFilterAttrVal(key[1:], value)
+ case strings.HasSuffix(key, "()"):
+ name := key[:len(key)-2]
+ if fn, ok := fnTable[name]; ok {
+ return newFilterFuncVal(fn, value)
+ }
+ c.err = ErrPath("path has unknown function " + name)
+ return nil
+ default:
+ return newFilterChildText(key, value)
+ }
+ }
+
+ // Filter contains [@attr], [N], [tag] or [fn()]
+ switch {
+ case path[0] == '@':
+ return newFilterAttr(path[1:])
+ case strings.HasSuffix(path, "()"):
+ name := path[:len(path)-2]
+ if fn, ok := fnTable[name]; ok {
+ return newFilterFunc(fn)
+ }
+ c.err = ErrPath("path has unknown function " + name)
+ return nil
+ case isInteger(path):
+ pos, _ := strconv.Atoi(path)
+ switch {
+ case pos > 0:
+ return newFilterPos(pos - 1)
+ default:
+ return newFilterPos(pos)
+ }
+ default:
+ return newFilterChild(path)
+ }
+}
+
+// selectSelf selects the current element into the candidate list.
+type selectSelf struct{}
+
+func (s *selectSelf) apply(e *Element, p *pather) {
+ p.candidates = append(p.candidates, e)
+}
+
+// selectRoot selects the element's root node.
+type selectRoot struct{}
+
+func (s *selectRoot) apply(e *Element, p *pather) {
+ root := e
+ for root.parent != nil {
+ root = root.parent
+ }
+ p.candidates = append(p.candidates, root)
+}
+
+// selectParent selects the element's parent into the candidate list.
+type selectParent struct{}
+
+func (s *selectParent) apply(e *Element, p *pather) {
+ if e.parent != nil {
+ p.candidates = append(p.candidates, e.parent)
+ }
+}
+
+// selectChildren selects the element's child elements into the
+// candidate list.
+type selectChildren struct{}
+
+func (s *selectChildren) apply(e *Element, p *pather) {
+ for _, c := range e.Child {
+ if c, ok := c.(*Element); ok {
+ p.candidates = append(p.candidates, c)
+ }
+ }
+}
+
+// selectDescendants selects all descendant child elements
+// of the element into the candidate list.
+type selectDescendants struct{}
+
+func (s *selectDescendants) apply(e *Element, p *pather) {
+ var queue fifo
+ for queue.add(e); queue.len() > 0; {
+ e := queue.remove().(*Element)
+ p.candidates = append(p.candidates, e)
+ for _, c := range e.Child {
+ if c, ok := c.(*Element); ok {
+ queue.add(c)
+ }
+ }
+ }
+}
+
+// selectChildrenByTag selects into the candidate list all child
+// elements of the element having the specified tag.
+type selectChildrenByTag struct {
+ space, tag string
+}
+
+func newSelectChildrenByTag(path string) *selectChildrenByTag {
+ s, l := spaceDecompose(path)
+ return &selectChildrenByTag{s, l}
+}
+
+func (s *selectChildrenByTag) apply(e *Element, p *pather) {
+ for _, c := range e.Child {
+ if c, ok := c.(*Element); ok && spaceMatch(s.space, c.Space) && s.tag == c.Tag {
+ p.candidates = append(p.candidates, c)
+ }
+ }
+}
+
+// filterPos filters the candidate list, keeping only the
+// candidate at the specified index.
+type filterPos struct {
+ index int
+}
+
+func newFilterPos(pos int) *filterPos {
+ return &filterPos{pos}
+}
+
+func (f *filterPos) apply(p *pather) {
+ if f.index >= 0 {
+ if f.index < len(p.candidates) {
+ p.scratch = append(p.scratch, p.candidates[f.index])
+ }
+ } else {
+ if -f.index <= len(p.candidates) {
+ p.scratch = append(p.scratch, p.candidates[len(p.candidates)+f.index])
+ }
+ }
+ p.candidates, p.scratch = p.scratch, p.candidates[0:0]
+}
+
+// filterAttr filters the candidate list for elements having
+// the specified attribute.
+type filterAttr struct {
+ space, key string
+}
+
+func newFilterAttr(str string) *filterAttr {
+ s, l := spaceDecompose(str)
+ return &filterAttr{s, l}
+}
+
+func (f *filterAttr) apply(p *pather) {
+ for _, c := range p.candidates {
+ for _, a := range c.Attr {
+ if spaceMatch(f.space, a.Space) && f.key == a.Key {
+ p.scratch = append(p.scratch, c)
+ break
+ }
+ }
+ }
+ p.candidates, p.scratch = p.scratch, p.candidates[0:0]
+}
+
+// filterAttrVal filters the candidate list for elements having
+// the specified attribute with the specified value.
+type filterAttrVal struct {
+ space, key, val string
+}
+
+func newFilterAttrVal(str, value string) *filterAttrVal {
+ s, l := spaceDecompose(str)
+ return &filterAttrVal{s, l, value}
+}
+
+func (f *filterAttrVal) apply(p *pather) {
+ for _, c := range p.candidates {
+ for _, a := range c.Attr {
+ if spaceMatch(f.space, a.Space) && f.key == a.Key && f.val == a.Value {
+ p.scratch = append(p.scratch, c)
+ break
+ }
+ }
+ }
+ p.candidates, p.scratch = p.scratch, p.candidates[0:0]
+}
+
+// filterFunc filters the candidate list for elements satisfying a custom
+// boolean function.
+type filterFunc struct {
+ fn func(e *Element) string
+}
+
+func newFilterFunc(fn func(e *Element) string) *filterFunc {
+ return &filterFunc{fn}
+}
+
+func (f *filterFunc) apply(p *pather) {
+ for _, c := range p.candidates {
+ if f.fn(c) != "" {
+ p.scratch = append(p.scratch, c)
+ }
+ }
+ p.candidates, p.scratch = p.scratch, p.candidates[0:0]
+}
+
+// filterFuncVal filters the candidate list for elements containing a value
+// matching the result of a custom function.
+type filterFuncVal struct {
+ fn func(e *Element) string
+ val string
+}
+
+func newFilterFuncVal(fn func(e *Element) string, value string) *filterFuncVal {
+ return &filterFuncVal{fn, value}
+}
+
+func (f *filterFuncVal) apply(p *pather) {
+ for _, c := range p.candidates {
+ if f.fn(c) == f.val {
+ p.scratch = append(p.scratch, c)
+ }
+ }
+ p.candidates, p.scratch = p.scratch, p.candidates[0:0]
+}
+
+// filterChild filters the candidate list for elements having
+// a child element with the specified tag.
+type filterChild struct {
+ space, tag string
+}
+
+func newFilterChild(str string) *filterChild {
+ s, l := spaceDecompose(str)
+ return &filterChild{s, l}
+}
+
+func (f *filterChild) apply(p *pather) {
+ for _, c := range p.candidates {
+ for _, cc := range c.Child {
+ if cc, ok := cc.(*Element); ok &&
+ spaceMatch(f.space, cc.Space) &&
+ f.tag == cc.Tag {
+ p.scratch = append(p.scratch, c)
+ }
+ }
+ }
+ p.candidates, p.scratch = p.scratch, p.candidates[0:0]
+}
+
+// filterChildText filters the candidate list for elements having
+// a child element with the specified tag and text.
+type filterChildText struct {
+ space, tag, text string
+}
+
+func newFilterChildText(str, text string) *filterChildText {
+ s, l := spaceDecompose(str)
+ return &filterChildText{s, l, text}
+}
+
+func (f *filterChildText) apply(p *pather) {
+ for _, c := range p.candidates {
+ for _, cc := range c.Child {
+ if cc, ok := cc.(*Element); ok &&
+ spaceMatch(f.space, cc.Space) &&
+ f.tag == cc.Tag &&
+ f.text == cc.Text() {
+ p.scratch = append(p.scratch, c)
+ }
+ }
+ }
+ p.candidates, p.scratch = p.scratch, p.candidates[0:0]
+}
diff --git a/path_test.go b/path_test.go
new file mode 100644
index 0000000..ed0b570
--- /dev/null
+++ b/path_test.go
@@ -0,0 +1,222 @@
+// Copyright 2015-2019 Brett Vickers.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package etree
+
+import "testing"
+
+var testXML = `
+<?xml version="1.0" encoding="UTF-8"?>
+<bookstore xmlns:p="urn:books-com:prices">
+
+ <!Directive>
+
+ <book category="COOKING">
+ <title lang="en">Everyday Italian</title>
+ <author>Giada De Laurentiis</author>
+ <year>2005</year>
+ <p:price>30.00</p:price>
+ <editor>Clarkson Potter</editor>
+ </book>
+
+ <book category="CHILDREN">
+ <title lang="en" sku="150">Harry Potter</title>
+ <author>J K. Rowling</author>
+ <year>2005</year>
+ <p:price p:tax="1.99">29.99</p:price>
+ <editor></editor>
+ <editor/>
+ </book>
+
+ <book category="WEB">
+ <title lang="en">XQuery Kick Start</title>
+ <author>James McGovern</author>
+ <author>Per Bothner</author>
+ <author>Kurt Cagle</author>
+ <author>James Linn</author>
+ <author>Vaidyanathan Nagarajan</author>
+ <year>2003</year>
+ <price>49.99</p:price>
+ <editor>
+ </editor>
+ </book>
+
+ <!-- Final book -->
+ <book category="WEB" path="/books/xml">
+ <title lang="en">Learning XML</title>
+ <author>Erik T. Ray</author>
+ <year>2003</year>
+ <p:price>39.95</p:price>
+ </book>
+
+</bookstore>
+`
+
+type test struct {
+ path string
+ result interface{}
+}
+
+type errorResult string
+
+var tests = []test{
+ // basic queries
+ {"./bookstore/book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
+ {"./bookstore/book/author", []string{"Giada De Laurentiis", "J K. Rowling", "James McGovern", "Per Bothner", "Kurt Cagle", "James Linn", "Vaidyanathan Nagarajan", "Erik T. Ray"}},
+ {"./bookstore/book/year", []string{"2005", "2005", "2003", "2003"}},
+ {"./bookstore/book/p:price", []string{"30.00", "29.99", "39.95"}},
+ {"./bookstore/book/isbn", nil},
+
+ // descendant queries
+ {"//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
+ {"//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
+ {".//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
+ {".//bookstore//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
+ {".//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
+ {".//p:price/.", []string{"30.00", "29.99", "39.95"}},
+ {".//price", []string{"30.00", "29.99", "49.99", "39.95"}},
+
+ // positional queries
+ {"./bookstore/book[1]/title", "Everyday Italian"},
+ {"./bookstore/book[4]/title", "Learning XML"},
+ {"./bookstore/book[5]/title", nil},
+ {"./bookstore/book[3]/author[0]", "James McGovern"},
+ {"./bookstore/book[3]/author[1]", "James McGovern"},
+ {"./bookstore/book[3]/author[3]/./.", "Kurt Cagle"},
+ {"./bookstore/book[3]/author[6]", nil},
+ {"./bookstore/book[-1]/title", "Learning XML"},
+ {"./bookstore/book[-4]/title", "Everyday Italian"},
+ {"./bookstore/book[-5]/title", nil},
+
+ // text function queries
+ {"./bookstore/book[author='James McGovern']/title", "XQuery Kick Start"},
+ {"./bookstore/book[author='Per Bothner']/title", "XQuery Kick Start"},
+ {"./bookstore/book[author='Kurt Cagle']/title", "XQuery Kick Start"},
+ {"./bookstore/book[author='James Linn']/title", "XQuery Kick Start"},
+ {"./bookstore/book[author='Vaidyanathan Nagarajan']/title", "XQuery Kick Start"},
+ {"//book[p:price='29.99']/title", "Harry Potter"},
+ {"//book[price='29.99']/title", "Harry Potter"},
+ {"//book/price[text()='29.99']", "29.99"},
+ {"//book/author[text()='Kurt Cagle']", "Kurt Cagle"},
+ {"//book/editor[text()]", []string{"Clarkson Potter", "\n\t\t"}},
+
+ // namespace function queries
+ {"//*[namespace-uri()]", []string{"30.00", "29.99", "39.95"}},
+ {"//*[namespace-uri()='urn:books-com:prices']", []string{"30.00", "29.99", "39.95"}},
+ {"//*[namespace-uri()='foo']", nil},
+ {"//*[namespace-prefix()]", []string{"30.00", "29.99", "39.95"}},
+ {"//*[namespace-prefix()='p']", []string{"30.00", "29.99", "39.95"}},
+ {"//*[name()='p:price']", []string{"30.00", "29.99", "39.95"}},
+ {"//*[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}},
+ {"//price[namespace-uri()='']", []string{"49.99"}},
+ {"//price[namespace-prefix()='']", []string{"49.99"}},
+ {"//price[name()='price']", []string{"49.99"}},
+ {"//price[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}},
+
+ // attribute queries
+ {"./bookstore/book[@category='WEB']/title", []string{"XQuery Kick Start", "Learning XML"}},
+ {"./bookstore/book[@path='/books/xml']/title", []string{"Learning XML"}},
+ {"./bookstore/book[@category='COOKING']/title[@lang='en']", "Everyday Italian"},
+ {"./bookstore/book/title[@lang='en'][@sku='150']", "Harry Potter"},
+ {"./bookstore/book/title[@lang='fr']", nil},
+ {"//p:price[@p:tax='1.99']", []string{"29.99"}},
+ {"//p:price[@tax='1.99']", []string{"29.99"}},
+ {"//p:price[@p:tax]", []string{"29.99"}},
+ {"//p:price[@tax]", []string{"29.99"}},
+
+ // parent queries
+ {"./bookstore/book[@category='COOKING']/title/../../book[4]/title", "Learning XML"},
+
+ // root queries
+ {"/bookstore/book[1]/title", "Everyday Italian"},
+ {"/bookstore/book[4]/title", "Learning XML"},
+ {"/bookstore/book[5]/title", nil},
+ {"/bookstore/book[3]/author[0]", "James McGovern"},
+ {"/bookstore/book[3]/author[1]", "James McGovern"},
+ {"/bookstore/book[3]/author[3]/./.", "Kurt Cagle"},
+ {"/bookstore/book[3]/author[6]", nil},
+ {"/bookstore/book[-1]/title", "Learning XML"},
+ {"/bookstore/book[-4]/title", "Everyday Italian"},
+ {"/bookstore/book[-5]/title", nil},
+
+ // bad paths
+ {"./bookstore/book[]", errorResult("etree: path contains an empty filter expression.")},
+ {"./bookstore/book[@category='WEB'", errorResult("etree: path has invalid filter [brackets].")},
+ {"./bookstore/book[@category='WEB]", errorResult("etree: path has mismatched filter quotes.")},
+ {"./bookstore/book[author]a", errorResult("etree: path has invalid filter [brackets].")},
+}
+
+func TestPath(t *testing.T) {
+ doc := NewDocument()
+ err := doc.ReadFromString(testXML)
+ if err != nil {
+ t.Error(err)
+ }
+
+ for _, test := range tests {
+ path, err := CompilePath(test.path)
+ if err != nil {
+ if r, ok := test.result.(errorResult); !ok || err.Error() != string(r) {
+ fail(t, test)
+ }
+ continue
+ }
+
+ // Test both FindElementsPath and FindElementPath
+ element := doc.FindElementPath(path)
+ elements := doc.FindElementsPath(path)
+
+ switch s := test.result.(type) {
+ case errorResult:
+ fail(t, test)
+ case nil:
+ if element != nil || len(elements) != 0 {
+ fail(t, test)
+ }
+ case string:
+ if element == nil || element.Text() != s ||
+ len(elements) != 1 || elements[0].Text() != s {
+ fail(t, test)
+ }
+ case []string:
+ if element == nil || element.Text() != s[0] || len(elements) != len(s) {
+ fail(t, test)
+ continue
+ }
+ for i := 0; i < len(elements); i++ {
+ if elements[i].Text() != s[i] {
+ fail(t, test)
+ break
+ }
+ }
+ }
+
+ }
+}
+
+func fail(t *testing.T, test test) {
+ t.Helper()
+ t.Errorf("etree: failed test '%s'\n", test.path)
+}
+
+func TestAbsolutePath(t *testing.T) {
+ doc := NewDocument()
+ err := doc.ReadFromString(testXML)
+ if err != nil {
+ t.Error(err)
+ }
+
+ elements := doc.FindElements("//book/author")
+ for _, e := range elements {
+ title := e.FindElement("/bookstore/book[1]/title")
+ if title == nil || title.Text() != "Everyday Italian" {
+ t.Errorf("etree: absolute path test failed")
+ }
+
+ title = e.FindElement("//book[p:price='29.99']/title")
+ if title == nil || title.Text() != "Harry Potter" {
+ t.Errorf("etree: absolute path test failed")
+ }
+ }
+}