diff options
Diffstat (limited to 'src/tools/ak/res/resxml')
-rw-r--r-- | src/tools/ak/res/resxml/BUILD | 24 | ||||
-rw-r--r-- | src/tools/ak/res/resxml/xml_parser.go | 133 | ||||
-rw-r--r-- | src/tools/ak/res/resxml/xml_parser_test.go | 226 |
3 files changed, 383 insertions, 0 deletions
diff --git a/src/tools/ak/res/resxml/BUILD b/src/tools/ak/res/resxml/BUILD new file mode 100644 index 0000000..c74aa68 --- /dev/null +++ b/src/tools/ak/res/resxml/BUILD @@ -0,0 +1,24 @@ +load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") +load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test") + +licenses(["notice"]) + +go_library( + name = "resxml", + srcs = ["xml_parser.go"], + importpath = "src/tools/ak/res/resxml/resxml", + visibility = ["//src/tools/ak/liteparse:__subpackages__"], + deps = [ + "//src/tools/ak/res/respipe", + ], +) + +go_test( + name = "resxml_test", + size = "small", + srcs = ["xml_parser_test.go"], + embed = [":resxml"], + deps = [ + "//src/tools/ak/res/respipe", + ], +) diff --git a/src/tools/ak/res/resxml/xml_parser.go b/src/tools/ak/res/resxml/xml_parser.go new file mode 100644 index 0000000..ed765fd --- /dev/null +++ b/src/tools/ak/res/resxml/xml_parser.go @@ -0,0 +1,133 @@ +// Copyright 2022 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package resxml contains common functions to extract information from xml files and feed that information into the resource processing pipeline. +package resxml + +import ( + "context" + "encoding/xml" + "io" + + "src/tools/ak/res/respipe/respipe" +) + +// XMLEvent wraps an XMLToken and the Offset at which it was encountered. +type XMLEvent struct { + Token xml.Token + Offset int64 +} + +// ConsumeUntil takes xmlEvents from the provided chan and discards them until it finds a StartEvent which matches the provided name. If the channel is exhausted, false is returned. +func ConsumeUntil(name xml.Name, xmlC <-chan XMLEvent) (XMLEvent, bool) { + for xe := range xmlC { + if se, ok := xe.Token.(xml.StartElement); ok { + if SloppyMatches(name, se.Name) { + return xe, true + } + } + } + return XMLEvent{}, false +} + +// ForwardChildren takes the provided StartElement and a channel of XMLEvents and forwards that all events onto the returned XMLEvent channel until the matching EndElement to start is encountered. +func ForwardChildren(ctx context.Context, start XMLEvent, xmlC <-chan XMLEvent) <-chan XMLEvent { + eventC := make(chan XMLEvent, 1) + se := start.Token.(xml.StartElement) + go func() { + defer close(eventC) + count := 1 + for xe := range xmlC { + if e, ok := xe.Token.(xml.StartElement); ok { + if StrictMatches(e.Name, se.Name) { + count++ + } + } + if e, ok := xe.Token.(xml.EndElement); ok { + if StrictMatches(e.Name, se.Name) { + count-- + } + if count == 0 { + return + } + } + if !SendXML(ctx, eventC, xe) { + return + } + } + }() + return eventC + +} + +// StrictMatches considers xml.Names equal if both their space and name matches. +func StrictMatches(n1, n2 xml.Name) bool { + return n1.Local == n2.Local && n1.Space == n2.Space +} + +// SloppyMatches ignores xml.Name Space attributes unless both names specify Space. Otherwise +// only the Local attribute is used for matching. +func SloppyMatches(n1, n2 xml.Name) bool { + if n1.Space != "" && n2.Space != "" { + return StrictMatches(n1, n2) + } + return n1.Local == n2.Local +} + +// StreamDoc parses the provided doc and forwards all xml tokens to the returned XMLEvent chan. +func StreamDoc(ctx context.Context, doc io.Reader) (<-chan XMLEvent, <-chan error) { + eventC := make(chan XMLEvent) + errC := make(chan error) + go func() { + defer close(eventC) + defer close(errC) + decoder := xml.NewDecoder(doc) + // Turns off unknown entities check. Would otherwise fail on resources + // using non-standard XML entities. + decoder.Strict = false + for { + tok, err := decoder.Token() + if err == io.EOF { + return + } + if err != nil { + respipe.SendErr(ctx, errC, respipe.Errorf(ctx, "offset: %d xml error: %v", decoder.InputOffset(), err)) + return + } + tok = xml.CopyToken(tok) + if !SendXML(ctx, eventC, XMLEvent{tok, decoder.InputOffset()}) { + return + } + } + }() + return eventC, errC +} + +// SendXML sends an XMLEvent to the provided channel and returns true, otherwise if the context is done, it returns false. +func SendXML(ctx context.Context, xmlC chan<- XMLEvent, xml XMLEvent) bool { + select { + case <-ctx.Done(): + return false + case xmlC <- xml: + return true + } +} + +// Attrs returns all []xml.Attrs encounted on an XMLEvent. +func Attrs(xe XMLEvent) []xml.Attr { + if se, ok := xe.Token.(xml.StartElement); ok { + return se.Attr + } + return nil +} diff --git a/src/tools/ak/res/resxml/xml_parser_test.go b/src/tools/ak/res/resxml/xml_parser_test.go new file mode 100644 index 0000000..8c39e29 --- /dev/null +++ b/src/tools/ak/res/resxml/xml_parser_test.go @@ -0,0 +1,226 @@ +// Copyright 2022 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package resxml + +import ( + "bytes" + "context" + "encoding/xml" + "io" + "reflect" + "testing" + + "src/tools/ak/res/respipe/respipe" +) + +const ( + doc = ` + <Person> + <FullName>Grace R. Emlin</FullName> + <Company>Example Inc.</Company> + <Email where="home"> + <Addr>gre@example.com</Addr> + </Email> + <City>Hanga Rao<Street>1234 Main St.</Street>RandomText</City> + <Email where='work'> + <Addr>gre@work.com</Addr> + </Email> + <Group> + <Value>Friends</Value> + <Value>Squash</Value> + </Group> + <State>Easter Island</State> + </Person> + ` +) + +func TestForwardChildren(t *testing.T) { + ctx, cancel := context.WithCancel(respipe.PrefixErr(context.Background(), "test doc: ")) + defer cancel() + xmlC, errC := StreamDoc(ctx, bytes.NewBufferString(doc)) + xe, ok := ConsumeUntil(xml.Name{Local: "City"}, xmlC) + if !ok { + t.Fatalf("Expected to find: %s in %s", xml.Name{Local: "City"}, doc) + } + childC := ForwardChildren(ctx, xe, xmlC) + wantEvents := []XMLEvent{ + { + Token: xml.CharData("Hanga Rao"), + }, + { + Token: xml.StartElement{Name: xml.Name{Local: "Street"}, Attr: []xml.Attr{}}, + }, + { + Token: xml.CharData("1234 Main St."), + }, + { + Token: xml.EndElement{Name: xml.Name{Local: "Street"}}, + }, + { + Token: xml.CharData("RandomText"), + }, + } + var gotEvents []XMLEvent + for childC != nil || errC != nil { + select { + case xe, ok := <-childC: + if !ok { + childC = nil + cancel() + continue + } + xe.Offset = 0 + gotEvents = append(gotEvents, xe) + case e, ok := <-errC: + if !ok { + errC = nil + continue + } + t.Errorf("unexpected error: %v", e) + } + } + + if !reflect.DeepEqual(wantEvents, gotEvents) { + t.Errorf("Got children: %#v wanted: %#v", gotEvents, wantEvents) + } + +} + +func TestAttrs(t *testing.T) { + tests := []struct { + arg XMLEvent + want []xml.Attr + }{ + { + XMLEvent{ + Token: xml.StartElement{ + Attr: []xml.Attr{ + { + Name: xml.Name{Local: "dog"}, + Value: "shepard", + }, + { + Name: xml.Name{Local: "cat"}, + Value: "cheshire", + }, + }, + }, + }, + []xml.Attr{ + { + Name: xml.Name{Local: "dog"}, + Value: "shepard", + }, + { + Name: xml.Name{Local: "cat"}, + Value: "cheshire", + }, + }, + }, + { + XMLEvent{Token: xml.StartElement{}}, + []xml.Attr(nil), + }, + { + XMLEvent{Token: xml.CharData("foo")}, + []xml.Attr(nil), + }, + } + + for _, tc := range tests { + got := Attrs(tc.arg) + if !reflect.DeepEqual(got, tc.want) { + t.Errorf("Attrs(%#v): %#v wanted %#v", tc.arg, got, tc.want) + } + } +} + +func TestConsumeUntil(t *testing.T) { + ctx, cancel := context.WithCancel(respipe.PrefixErr(context.Background(), "test doc: ")) + defer cancel() + xmlC, errC := StreamDoc(ctx, bytes.NewBufferString(doc)) + + xe, ok := ConsumeUntil(xml.Name{Local: "Email"}, xmlC) + if !ok { + t.Fatalf("Expected to find: %s in %s", xml.Name{Local: "Email"}, doc) + } + if se, ok := xe.Token.(xml.StartElement); ok { + want := []xml.Attr{{xml.Name{Local: "where"}, "home"}} + if !reflect.DeepEqual(want, se.Attr) { + t.Errorf("Got attr: %v wanted: %v", se.Attr, want) + } + } else { + t.Fatalf("Got: %v Expected to stop on a start element", xe) + } + xe, ok = ConsumeUntil(xml.Name{Local: "Email"}, xmlC) + if !ok { + t.Fatalf("Expected to find: %s in %s", xml.Name{Local: "Email"}, doc) + } + if se, ok := xe.Token.(xml.StartElement); ok { + want := []xml.Attr{{xml.Name{Local: "where"}, "work"}} + if !reflect.DeepEqual(want, se.Attr) { + t.Errorf("Got attr: %v wanted: %v", se.Attr, want) + } + } else { + t.Fatalf("Got: %v Expected to stop on a start element", xe) + } + xe, ok = ConsumeUntil(xml.Name{Local: "Email"}, xmlC) + if ok { + t.Fatalf("Expected no more nodes with: %v got: %v in doc: %s", xml.Name{Local: "Email"}, xe, doc) + } + e, ok := <-errC + if ok { + t.Fatalf("Expected no errors during parse: %v", e) + } +} + +func TestStreamDoc(t *testing.T) { + dec := xml.NewDecoder(bytes.NewBufferString(doc)) + var events []XMLEvent + for { + tok, err := dec.Token() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("Unexpected xml parse failure: %v", err) + } + events = append(events, XMLEvent{xml.CopyToken(tok), dec.InputOffset()}) + } + ctx, cancel := context.WithCancel(respipe.PrefixErr(context.Background(), "test doc: ")) + defer cancel() + xmlC, errC := StreamDoc(ctx, bytes.NewBufferString(doc)) + var got []XMLEvent + for xmlC != nil || errC != nil { + select { + case e, ok := <-errC: + if !ok { + errC = nil + continue + } + t.Errorf("Unexpected error: %v", e) + case xe, ok := <-xmlC: + if !ok { + xmlC = nil + continue + } + got = append(got, xe) + } + } + if !reflect.DeepEqual(events, got) { + t.Errorf("StreamDoc() got: %v wanted: %v", got, events) + } + +} |