From 30832cc4f3129bc40598373c3da571d9b256c1e7 Mon Sep 17 00:00:00 2001 From: Brett Vickers Date: Thu, 31 Jan 2019 16:17:18 -0800 Subject: Add namespace uri support * Attributes can be queried for their associated Element. * Add NamespaceURI method for Element and Attr. This method allows you to discover the namespace URI associated with any element or attribute in the document. * Add namespace-uri() function queries to path. --- etree.go | 117 ++++++++++++++++++++++++++++++++-------- etree_test.go | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ path.go | 110 ++++++++++++++++++++++++------------- 3 files changed, 339 insertions(+), 59 deletions(-) diff --git a/etree.go b/etree.go index 9f7fdd8..2968da1 100644 --- a/etree.go +++ b/etree.go @@ -105,7 +105,7 @@ type Document struct { // An Element represents an XML element, its attributes, and its child tokens. type Element struct { - Space, Tag string // namespace and tag + Space, Tag string // namespace prefix and tag Attr []Attr // key-value attribute pairs Child []Token // child tokens (elements, comments, etc.) parent *Element // parent element @@ -114,8 +114,9 @@ type Element struct { // An Attr represents a key-value attribute of an XML element. type Attr struct { - Space, Key string // The attribute's namespace and key - Value string // The attribute value string + Space, Key string // The attribute's namespace prefix and key + Value string // The attribute value string + element *Element // element containing the attribute } // charDataFlags are used with CharData tokens to store additional settings. @@ -319,7 +320,7 @@ func (d *Document) IndentTabs() { } // NewElement creates an unparented element with the specified tag. The tag -// may be prefixed by a namespace and a colon. +// may be prefixed by a namespace prefix and a colon. func NewElement(tag string) *Element { space, stag := spaceDecompose(tag) return newElement(space, stag, nil) @@ -349,6 +350,57 @@ func (e *Element) Copy() *Element { return e.dup(nil).(*Element) } +// NamespaceURI returns the XML namespace URI associated with the element. If +// the element is part of the XML default namespace, NamespaceURI returns the +// empty string. +func (e *Element) NamespaceURI() string { + if e.Space == "" { + return e.findDefaultNamespaceURI() + } + return e.findLocalNamespaceURI(e.Space) +} + +// findLocalNamespaceURI finds the namespace URI corresponding to the +// requested prefix. +func (e *Element) findLocalNamespaceURI(prefix string) string { + for _, a := range e.Attr { + if a.Space == "xmlns" && a.Key == prefix { + return a.Value + } + } + + if e.parent == nil { + return "" + } + + return e.parent.findLocalNamespaceURI(prefix) +} + +// findDefaultNamespaceURI finds the default namespace URI of the element. +func (e *Element) findDefaultNamespaceURI() string { + for _, a := range e.Attr { + if a.Space == "" && a.Key == "xmlns" { + return a.Value + } + } + + if e.parent == nil { + return "" + } + + return e.parent.findDefaultNamespaceURI() +} + +// hasText returns true if the element has character data immediately +// folllowing the element's opening tag. +func (e *Element) hasText() bool { + if len(e.Child) == 0 { + return false + } + _, ok := e.Child[0].(*CharData) + return ok +} + // Text returns all character data immediately following the element's opening // tag. func (e *Element) Text() string { @@ -479,7 +531,7 @@ func (e *Element) findTermCharDataIndex(start int) int { // CreateElement creates an element with the specified tag and adds it as the // last child element of the element e. The tag may be prefixed by a namespace -// and a colon. +// prefix and a colon. func (e *Element) CreateElement(tag string) *Element { space, stag := spaceDecompose(tag) return newElement(space, stag, e) @@ -608,7 +660,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er case xml.StartElement: e := newElement(t.Name.Space, t.Name.Local, top) for _, a := range t.Attr { - e.createAttr(a.Name.Space, a.Name.Local, a.Value) + e.createAttr(a.Name.Space, a.Name.Local, a.Value, e) } stack.push(e) case xml.EndElement: @@ -632,7 +684,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er // SelectAttr finds an element attribute matching the requested key and // returns it if found. Returns nil if no matching attribute is found. The key -// may be prefixed by a namespace and a colon. +// may be prefixed by a namespace prefix and a colon. func (e *Element) SelectAttr(key string) *Attr { space, skey := spaceDecompose(key) for i, a := range e.Attr { @@ -644,8 +696,8 @@ func (e *Element) SelectAttr(key string) *Attr { } // SelectAttrValue finds an element attribute matching the requested key and -// returns its value if found. The key may be prefixed by a namespace and a -// colon. If the key is not found, the dflt value is returned instead. +// returns its value if found. The key may be prefixed by a namespace prefix +// and a colon. If the key is not found, the dflt value is returned instead. func (e *Element) SelectAttrValue(key, dflt string) string { space, skey := spaceDecompose(key) for _, a := range e.Attr { @@ -668,8 +720,8 @@ func (e *Element) ChildElements() []*Element { } // SelectElement returns the first child element with the given tag. The tag -// may be prefixed by a namespace and a colon. Returns nil if no element with -// a matching tag was found. +// may be prefixed by a namespace prefix and a colon. Returns nil if no +// element with a matching tag was found. func (e *Element) SelectElement(tag string) *Element { space, stag := spaceDecompose(tag) for _, t := range e.Child { @@ -681,7 +733,7 @@ func (e *Element) SelectElement(tag string) *Element { } // SelectElements returns a slice of all child elements with the given tag. -// The tag may be prefixed by a namespace and a colon. +// The tag may be prefixed by a namespace prefix and a colon. func (e *Element) SelectElements(tag string) []*Element { space, stag := spaceDecompose(tag) var elements []*Element @@ -974,35 +1026,46 @@ func (e *Element) addChild(t Token) { } // CreateAttr creates an attribute and adds it to element e. The key may be -// prefixed by a namespace and a colon. If an attribute with the key already -// exists, its value is replaced. +// prefixed by a namespace prefix and a colon. If an attribute with the key +// already exists, its value is replaced. func (e *Element) CreateAttr(key, value string) *Attr { space, skey := spaceDecompose(key) - return e.createAttr(space, skey, value) + return e.createAttr(space, skey, value, e) } // createAttr is a helper function that creates attributes. -func (e *Element) createAttr(space, key, value string) *Attr { +func (e *Element) createAttr(space, key, value string, parent *Element) *Attr { for i, a := range e.Attr { if space == a.Space && key == a.Key { e.Attr[i].Value = value return &e.Attr[i] } } - a := Attr{space, key, value} + a := Attr{ + Space: space, + Key: key, + Value: value, + element: parent, + } e.Attr = append(e.Attr, a) return &e.Attr[len(e.Attr)-1] } -// RemoveAttr removes and returns the first attribute of the element whose key -// matches the given key. The key may be prefixed by a namespace and a colon. -// If an equal attribute does not exist, nil is returned. +// RemoveAttr removes and returns a copy of the first attribute of the element +// whose key matches the given key. The key may be prefixed by a namespace +// prefix and a colon. If a matching attribute does not exist, nil is +// returned. func (e *Element) RemoveAttr(key string) *Attr { space, skey := spaceDecompose(key) for i, a := range e.Attr { if space == a.Space && skey == a.Key { e.Attr = append(e.Attr[0:i], e.Attr[i+1:]...) - return &a + return &Attr{ + Space: a.Space, + Key: a.Key, + Value: a.Value, + element: nil, + } } } return nil @@ -1031,6 +1094,18 @@ func (a byAttr) Less(i, j int) bool { return sp < 0 } +// Element returns the element containing the attribute. +func (a *Attr) Element() *Element { + return a.element +} + +// NamespaceURI returns the XML namespace URI associated with the attribute. +// If the element is part of the XML default namespace, NamespaceURI returns +// empty string. +func (a *Attr) NamespaceURI() string { + return a.element.NamespaceURI() +} + // writeTo serializes the attribute to the writer. func (a *Attr) writeTo(w *bufio.Writer, s *WriteSettings) { if a.Space != "" { diff --git a/etree_test.go b/etree_test.go index bf3c5cd..92927d6 100644 --- a/etree_test.go +++ b/etree_test.go @@ -32,6 +32,13 @@ func checkIntEq(t *testing.T, got, want int) { } } +func checkElementEq(t *testing.T, got, want *Element) { + t.Helper() + if got != want { + t.Errorf("etree: unexpected element. Got: %v. Wanted: %v.\n", got, want) + } +} + func checkDocEq(t *testing.T, doc *Document, expected string) { t.Helper() doc.Indent(NoIndent) @@ -912,3 +919,167 @@ func TestSetTail(t *testing.T) { checkIntEq(t, len(root.Child), 1) checkIntEq(t, len(child.Child), 1) } + +func TestAttrParent(t *testing.T) { + doc := NewDocument() + root := doc.CreateElement("root") + attr1 := root.CreateAttr("bar", "1") + attr2 := root.CreateAttr("qux", "2") + + checkIntEq(t, len(root.Attr), 2) + checkElementEq(t, attr1.Element(), root) + checkElementEq(t, attr2.Element(), root) + + attr1 = root.RemoveAttr("bar") + attr2 = root.RemoveAttr("qux") + checkElementEq(t, attr1.Element(), nil) + checkElementEq(t, attr2.Element(), nil) + + s := `` + err := doc.ReadFromString(s) + if err != nil { + t.Error("etree: failed to parse document") + } + + root = doc.SelectElement("root") + for i := range root.Attr { + checkElementEq(t, root.Attr[i].Element(), root) + } +} + +func TestDefaultNamespaceURI(t *testing.T) { + s := ` + + + + + + + + + +` + + doc := NewDocument() + err := doc.ReadFromString(s) + if err != nil { + t.Error("etree: failed to parse document") + } + + root := doc.SelectElement("root") + child1 := root.SelectElement("child1") + child2 := root.SelectElement("child2") + grandchild1 := child1.SelectElement("grandchild1") + grandchild2 := child1.SelectElement("grandchild2") + greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1") + + checkStrEq(t, doc.NamespaceURI(), "") + checkStrEq(t, root.NamespaceURI(), "http://root.example.com") + checkStrEq(t, child1.NamespaceURI(), "http://child.example.com") + checkStrEq(t, child2.NamespaceURI(), "http://root.example.com") + checkStrEq(t, grandchild1.NamespaceURI(), "http://grandchild.example.com") + checkStrEq(t, grandchild2.NamespaceURI(), "http://child.example.com") + checkStrEq(t, greatgrandchild1.NamespaceURI(), "http://child.example.com") + + checkStrEq(t, root.Attr[0].NamespaceURI(), "http://root.example.com") + checkStrEq(t, child1.Attr[0].NamespaceURI(), "http://child.example.com") + checkStrEq(t, child2.Attr[0].NamespaceURI(), "http://root.example.com") + checkStrEq(t, grandchild1.Attr[0].NamespaceURI(), "http://grandchild.example.com") + checkStrEq(t, grandchild2.Attr[0].NamespaceURI(), "http://child.example.com") + checkStrEq(t, greatgrandchild1.Attr[0].NamespaceURI(), "http://child.example.com") + + f := doc.FindElements("//*[namespace-uri()='http://root.example.com']") + if len(f) != 2 || f[0] != root || f[1] != child2 { + t.Error("etree: failed namespace-uri test") + } + + f = doc.FindElements("//*[namespace-uri()='http://child.example.com']") + if len(f) != 3 || f[0] != child1 || f[1] != grandchild2 || f[2] != greatgrandchild1 { + t.Error("etree: failed namespace-uri test") + } + + f = doc.FindElements("//*[namespace-uri()='http://grandchild.example.com']") + if len(f) != 1 || f[0] != grandchild1 { + t.Error("etree: failed namespace-uri test") + } + + f = doc.FindElements("//*[namespace-uri()='']") + if len(f) != 0 { + t.Error("etree: failed namespace-uri test") + } + + f = doc.FindElements("//*[namespace-uri()='foo']") + if len(f) != 0 { + t.Error("etree: failed namespace-uri test") + } +} + +func TestLocalNamespaceURI(t *testing.T) { + s := ` + + + + + + + + + + + + + +` + + doc := NewDocument() + err := doc.ReadFromString(s) + if err != nil { + t.Error("etree: failed to parse document") + } + + root := doc.SelectElement("root") + child1 := root.SelectElement("child1") + child2 := root.SelectElement("child2") + child3 := root.SelectElement("child3") + grandchild1 := child1.SelectElement("grandchild1") + grandchild2 := child1.SelectElement("grandchild2") + grandchild3 := child1.SelectElement("grandchild3") + grandchild4 := child1.SelectElement("grandchild4") + greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1") + + checkStrEq(t, doc.NamespaceURI(), "") + checkStrEq(t, root.NamespaceURI(), "http://root.example.com") + checkStrEq(t, child1.NamespaceURI(), "http://child.example.com") + checkStrEq(t, child2.NamespaceURI(), "http://root.example.com") + checkStrEq(t, child3.NamespaceURI(), "") + checkStrEq(t, grandchild1.NamespaceURI(), "http://grandchild.example.com") + checkStrEq(t, grandchild2.NamespaceURI(), "http://child.example.com") + checkStrEq(t, grandchild3.NamespaceURI(), "http://root.example.com") + checkStrEq(t, grandchild4.NamespaceURI(), "") + checkStrEq(t, greatgrandchild1.NamespaceURI(), "http://root.example.com") + + f := doc.FindElements("//*[namespace-uri()='http://root.example.com']") + if len(f) != 4 || f[0] != root || f[1] != child2 || f[2] != grandchild3 || f[3] != greatgrandchild1 { + t.Error("etree: failed namespace-uri test") + } + + f = doc.FindElements("//*[namespace-uri()='http://child.example.com']") + if len(f) != 2 || f[0] != child1 || f[1] != grandchild2 { + t.Error("etree: failed namespace-uri test") + } + + f = doc.FindElements("//*[namespace-uri()='http://grandchild.example.com']") + if len(f) != 1 || f[0] != grandchild1 { + t.Error("etree: failed namespace-uri test") + } + + f = doc.FindElements("//*[namespace-uri()='']") + if len(f) != 2 || f[0] != child3 || f[1] != grandchild4 { + t.Error("etree: failed namespace-uri test") + } + + f = doc.FindElements("//*[namespace-uri()='foo']") + if len(f) != 0 { + t.Error("etree: failed namespace-uri test") + } +} diff --git a/path.go b/path.go index a1a59bd..be3823f 100644 --- a/path.go +++ b/path.go @@ -17,22 +17,28 @@ similar to XPath strings, they have a more limited set of selectors and filtering options. The following selectors and filters are supported by etree paths: - . Select the current element. - .. Select the parent of the current element. - * Select all child elements of the current element. - / Select the root element when used at the start of a path. - // Select all descendants of the current element. If used at - the start of a path, select all descendants of the root. - tag Select all child elements with the given tag. - [#] Select the element of the given index (1-based, - negative starts from the end). - [@attrib] Select all elements with the given attribute. - [@attrib='val'] Select all elements with the given attribute set to val. - [tag] Select all elements with a child element named tag. - [tag='val'] Select all elements with a child element named tag - and text matching val. - [text()] Select all elements with non-empty text. - [text()='val'] Select all elements whose text matches val. + . Select the current element. + .. Select the parent of the current element. + * Select all child elements of the current element. + / Select the root element when used at the start of + a path. + // Select all descendants of the current element. If + used at the start of a path, select all + descendants of the root. + tag Select all child elements with the given tag. + [#] Select the element of the given index (1-based, + negative starts from the end). + [@attrib] Select all elements with the given attribute. + [@attrib='val'] Select all elements with the given attribute set + to val. + [tag] Select all elements with a child element named + tag. + [tag='val'] Select all elements with a child element named + tag and text matching val. + [text()] Select all elements with non-empty text. + [text()='val'] Select all elements whose text matches val. + [namespace-uri()='val'] Select all elements whose namespace URI matches + val. Examples: @@ -260,6 +266,14 @@ func (c *compiler) parseSelector(path string) selector { } } +var fnTable = map[string]struct { + hasFn func(e *Element) bool + getValFn func(e *Element) string +}{ + "text": {(*Element).hasText, (*Element).Text}, + "namespace-uri": {nil, (*Element).NamespaceURI}, +} + // parseFilter parses a path filter contained within [brackets]. func (c *compiler) parseFilter(path string) filter { if len(path) == 0 { @@ -267,7 +281,7 @@ func (c *compiler) parseFilter(path string) filter { return nil } - // Filter contains [@attr='val'], [text()='val'], or [tag='val']? + // Filter contains [@attr='val'], [fn()='val'], or [tag='val']? eqindex := strings.Index(path, "='") if eqindex >= 0 { rindex := nextIndex(path, "'", eqindex+2) @@ -275,22 +289,38 @@ func (c *compiler) parseFilter(path string) filter { c.err = ErrPath("path has mismatched filter quotes.") return nil } + + key := path[:eqindex] + value := path[eqindex+2 : rindex] + switch { - case path[0] == '@': - return newFilterAttrVal(path[1:eqindex], path[eqindex+2:rindex]) - case strings.HasPrefix(path, "text()"): - return newFilterTextVal(path[eqindex+2 : rindex]) + case key[0] == '@': + return newFilterAttrVal(key[1:], value) + case strings.HasSuffix(key, "()"): + fn := key[:len(key)-2] + if t, ok := fnTable[fn]; ok && t.getValFn != nil { + return newFilterFuncVal(t.getValFn, value) + } else { + c.err = ErrPath("path has unknown function " + fn) + return nil + } default: - return newFilterChildText(path[:eqindex], path[eqindex+2:rindex]) + return newFilterChildText(key, value) } } - // Filter contains [@attr], [N], [tag] or [text()] + // Filter contains [@attr], [N], [tag] or [fn()] switch { case path[0] == '@': return newFilterAttr(path[1:]) - case path == "text()": - return newFilterText() + case strings.HasSuffix(path, "()"): + fn := path[:len(path)-2] + if t, ok := fnTable[fn]; ok && t.hasFn != nil { + return newFilterFunc(t.hasFn) + } else { + c.err = ErrPath("path has unknown function " + fn) + return nil + } case isInteger(path): pos, _ := strconv.Atoi(path) switch { @@ -448,35 +478,39 @@ func (f *filterAttrVal) apply(p *pather) { p.candidates, p.scratch = p.scratch, p.candidates[0:0] } -// filterText filters the candidate list for elements having text. -type filterText struct{} +// filterFunc filters the candidate list for elements satisfying a custom +// boolean function. +type filterFunc struct { + fn func(e *Element) bool +} -func newFilterText() *filterText { - return &filterText{} +func newFilterFunc(fn func(e *Element) bool) *filterFunc { + return &filterFunc{fn} } -func (f *filterText) apply(p *pather) { +func (f *filterFunc) apply(p *pather) { for _, c := range p.candidates { - if c.Text() != "" { + if f.fn(c) { p.scratch = append(p.scratch, c) } } p.candidates, p.scratch = p.scratch, p.candidates[0:0] } -// filterTextVal filters the candidate list for elements having -// text equal to the specified value. -type filterTextVal struct { +// filterFuncVal filters the candidate list for elements containing a value +// matching the result of a custom function. +type filterFuncVal struct { + fn func(e *Element) string val string } -func newFilterTextVal(value string) *filterTextVal { - return &filterTextVal{value} +func newFilterFuncVal(fn func(e *Element) string, value string) *filterFuncVal { + return &filterFuncVal{fn, value} } -func (f *filterTextVal) apply(p *pather) { +func (f *filterFuncVal) apply(p *pather) { for _, c := range p.candidates { - if c.Text() == f.val { + if f.fn(c) == f.val { p.scratch = append(p.scratch, c) } } -- cgit v1.2.3