Improve indent APIs

Introduce the 'Unindent' function to remove indentation from a document. This is a convenience function that replaces 'Indent(etree.NoIndent)'. Add an 'IndentWithSettings' function to Document. This function allows you more control over the indentation algorithm, which is configured with the new 'IndentSettings' struct. UseCRLF and PreserveLeafWhitespace were moved from Document to this new InputSettings struct. (Document.UseCRLF will continue to work, but it is deprecated since it has existed in the API for a long time. Document.PreserveLeafWhitespace has been totally removed since it was public for only a couple hours.)
author: Brett Vickers <brett@beevik.com> 2023-05-07 12:22:06 -0700
committer: Brett Vickers <brett@beevik.com> 2023-05-07 12:22:06 -0700
commit: 83a7a1267d824af921661b9fdde0b826d9e36371 (patch)
tree: 8c48796f642b37d51d0e2fbc6ea10121fbe49ac3
parent: 67ff30fec486c65fa387680d9b0c9ed5de71194d (diff)
download: go-etree-83a7a1267d824af921661b9fdde0b826d9e36371.tar.gz
2 files changed, 214 insertions, 61 deletions
diff --git a/etree.go b/etree.go
index 9e5a914..9db3660 100644
--- a/etree.go
+++ b/etree.go
@@ -66,8 +66,7 @@ func (s *ReadSettings) dup() ReadSettings {
 	}
 }
 
-// WriteSettings determine the behavior of the Document's WriteTo* and
-// Indent* methods.
+// WriteSettings determine the behavior of the Document's WriteTo* methods.
 type WriteSettings struct {
 	// CanonicalEndTags forces the production of XML end tags, even for
 	// elements that have no child elements. Default: false.
@@ -91,23 +90,19 @@ type WriteSettings struct {
 	// UseCRLF causes the document's Indent* methods to use a carriage return
 	// followed by a linefeed ("\r\n") when outputting a newline. If false,
 	// only a linefeed is used ("\n"). Default: false.
+	//
+	// Deprecated: UseCRLF is deprecated. Use IndentSettings.UseCRLF instead.
 	UseCRLF bool
-
-	// PreserveLeafWhitespace causes the document's Indent* methods to
-	// preserve whitespace inside XML elements containing only non-CDATA
-	// character data. Default: false.
-	PreserveLeafWhitespace bool
 }
 
 // newWriteSettings creates a default WriteSettings record.
 func newWriteSettings() WriteSettings {
 	return WriteSettings{
-		CanonicalEndTags:       false,
-		CanonicalText:          false,
-		CanonicalAttrVal:       false,
-		AttrSingleQuote:        false,
-		UseCRLF:                false,
-		PreserveLeafWhitespace: false,
+		CanonicalEndTags: false,
+		CanonicalText:    false,
+		CanonicalAttrVal: false,
+		AttrSingleQuote:  false,
+		UseCRLF:          false,
 	}
 }
 
@@ -116,6 +111,39 @@ func (s *WriteSettings) dup() WriteSettings {
 	return *s
 }
 
+// IndentSettings determine the behavior of the Document's Indent and
+// IndentTabs methods.
+type IndentSettings struct {
+	// Spaces indicates the number of spaces to insert for each level of
+	// indentation. Set to etree.NoIndent to remove all indentation. Ignored
+	// when UseTabs is true. Default: 4.
+	Spaces int
+
+	// UseTabs causes tabs to be used instead of spaces when indenting.
+	// Default: false.
+	UseTabs bool
+
+	// UseCRLF causes newlines to be written as a carriage return followed by
+	// a linefeed ("\r\n"). If false, only a linefeed character is output
+	// for a newline ("\n"). Default: false.
+	UseCRLF bool
+
+	// PreserveLeafWhitespace causes indent methods to preserve whitespace
+	// within XML elements containing only non-CDATA character data. Default:
+	// false.
+	PreserveLeafWhitespace bool
+}
+
+// NewIndentSettings creates a default IndentSettings record.
+func NewIndentSettings() IndentSettings {
+	return IndentSettings{
+		Spaces:                 4,
+		UseTabs:                false,
+		UseCRLF:                false,
+		PreserveLeafWhitespace: false,
+	}
+}
+
 // A Token is an interface type used to represent XML elements, character
 // data, CDATA sections, XML comments, XML directives, and XML processing
 // instructions.
@@ -341,34 +369,58 @@ func (d *Document) WriteToString() (s string, err error) {
 type indentFunc func(depth int) string
 
 // Indent modifies the document's element tree by inserting character data
-// tokens containing newlines and indentation. The amount of indentation per
-// depth level is given by the 'spaces' parameter. Pass etree.NoIndent for
-// 'spaces' if you want no indentation at all.
+// tokens containing newlines and spaces for indentation. The amount of
+// indentation per depth level is given by the 'spaces' parameter. Pass
+// etree.NoIndent for 'spaces' if you want indentation to be removed.
 func (d *Document) Indent(spaces int) {
-	var indent indentFunc
-	switch {
-	case spaces < 0:
-		indent = func(depth int) string { return "" }
-	case d.WriteSettings.UseCRLF:
-		indent = func(depth int) string { return indentCRLF(depth*spaces, indentSpaces) }
-	default:
-		indent = func(depth int) string { return indentLF(depth*spaces, indentSpaces) }
-	}
-	d.Element.indent(0, indent, &d.WriteSettings)
+	s := NewIndentSettings()
+	s.Spaces = spaces
+	d.IndentWithSettings(s)
 }
 
 // IndentTabs modifies the document's element tree by inserting CharData
-// tokens containing newlines and tabs for indentation.  One tab is used per
+// tokens containing newlines and tabs for indentation. One tab is used per
 // indentation level.
 func (d *Document) IndentTabs() {
+	s := NewIndentSettings()
+	s.UseTabs = true
+	d.IndentWithSettings(s)
+}
+
+// IndentWithSettings modifies the document's element tree by inserting
+// character data tokens containing newlines and indentation. The behavior
+// of the indentation algorithm is configured by the indent settings.
+func (d *Document) IndentWithSettings(s IndentSettings) {
+	// WriteSettings.UseCRLF is deprecated. Until removed from the package, it
+	// overrides IndentSettings.UseCRLF when true.
+	if d.WriteSettings.UseCRLF {
+		s.UseCRLF = true
+	}
+
 	var indent indentFunc
-	switch d.WriteSettings.UseCRLF {
-	case true:
-		indent = func(depth int) string { return indentCRLF(depth, indentTabs) }
-	default:
-		indent = func(depth int) string { return indentLF(depth, indentTabs) }
+	if s.UseTabs {
+		if s.UseCRLF {
+			indent = func(depth int) string { return indentCRLF(depth, indentTabs) }
+		} else {
+			indent = func(depth int) string { return indentLF(depth, indentTabs) }
+		}
+	} else {
+		if s.Spaces < 0 {
+			indent = func(depth int) string { return "" }
+		} else if s.UseCRLF {
+			indent = func(depth int) string { return indentCRLF(depth*s.Spaces, indentSpaces) }
+		} else {
+			indent = func(depth int) string { return indentLF(depth*s.Spaces, indentSpaces) }
+		}
 	}
-	d.Element.indent(0, indent, &d.WriteSettings)
+
+	d.Element.indent(0, indent, &s)
+}
+
+// Unindent modifies the document's element tree by removing character data
+// tokens containing only whitespace.
+func (d *Document) Unindent() {
+	d.Indent(NoIndent)
 }
 
 // NewElement creates an unparented element with the specified tag (i.e.,
@@ -942,7 +994,7 @@ func (e *Element) GetRelativePath(source *Element) string {
 
 // indent recursively inserts proper indentation between an XML element's
 // child tokens.
-func (e *Element) indent(depth int, indent indentFunc, s *WriteSettings) {
+func (e *Element) indent(depth int, indent indentFunc, s *IndentSettings) {
 	e.stripIndent(s)
 	n := len(e.Child)
 	if n == 0 {
@@ -987,7 +1039,7 @@ func (e *Element) indent(depth int, indent indentFunc, s *WriteSettings) {
 }
 
 // stripIndent removes any previously inserted indentation.
-func (e *Element) stripIndent(s *WriteSettings) {
+func (e *Element) stripIndent(s *IndentSettings) {
 	// Count the number of non-indent child tokens
 	n := len(e.Child)
 	for _, c := range e.Child {
diff --git a/etree_test.go b/etree_test.go
index 83eafc6..0fc1fc0 100644
--- a/etree_test.go
+++ b/etree_test.go
@@ -730,44 +730,105 @@ func TestCharData(t *testing.T) {
 	}
 }
 
-func TestIndentPreserveWhitespace(t *testing.T) {
+func TestIndentSimple(t *testing.T) {
+	doc := NewDocument()
+	root := doc.CreateElement("root")
+	ch1 := root.CreateElement("child1")
+	ch1.CreateElement("child2")
+
+	// First test Unindent.
+	doc.Unindent()
+	s, err := doc.WriteToString()
+	if err != nil {
+		t.Error("etree: failed to serialize document")
+	}
+	expected := "<root><child1><child2/></child1></root>"
+	checkStrEq(t, s, expected)
+
+	// Now test Indent with NoIndent (which should produce the same result
+	// as Unindent).
+	doc.Indent(NoIndent)
+	s, err = doc.WriteToString()
+	if err != nil {
+		t.Error("etree: failed to serialize document")
+	}
+	checkStrEq(t, s, expected)
+
+	// Run all indent test cases.
 	tests := []struct {
-		input  string
-		output string
+		useTabs, useCRLF bool
+		ws, nl           string
 	}{
-		{"<test></test>", "<test/>\n"},
-		{"<test>  </test>", "<test>  </test>\n"},
-		{"<test>\t</test>", "<test>\t</test>\n"},
-		{"<test>\t\n \t</test>", "<test>\t\n \t</test>\n"},
-		{"<test><![CDATA[ ]]></test>", "<test><![CDATA[ ]]></test>\n"},
-		{"<test> <![CDATA[ ]]> </test>", "<test><![CDATA[ ]]></test>\n"},
-		{"<outer> <inner> </inner> </outer>", "<outer>\n  <inner> </inner>\n</outer>\n"},
+		{false, false, " ", "\n"},
+		{false, true, " ", "\r\n"},
+		{true, false, "\t", "\n"},
+		{true, true, "\t", "\r\n"},
 	}
 
 	for _, test := range tests {
-		doc := NewDocument()
-		doc.WriteSettings.PreserveLeafWhitespace = true
-		err := doc.ReadFromString(test.input)
-		if err != nil {
-			t.Error("etree: failed to read string")
-		}
-		doc.Indent(2)
-		output, err := doc.WriteToString()
-		if err != nil {
-			t.Error("etree: failed to read string")
+		doc.WriteSettings.UseCRLF = test.useCRLF
+		if test.useTabs {
+			doc.IndentTabs()
+			s, err := doc.WriteToString()
+			if err != nil {
+				t.Error("etree: failed to serialize document")
+			}
+			tab := test.ws
+			expected := "<root>" + test.nl + tab + "<child1>" + test.nl +
+				tab + tab + "<child2/>" + test.nl + tab +
+				"</child1>" + test.nl + "</root>" + test.nl
+			checkStrEq(t, s, expected)
+		} else {
+			for i := 0; i < 256; i++ {
+				doc.Indent(i)
+				s, err := doc.WriteToString()
+				if err != nil {
+					t.Error("etree: failed to serialize document")
+				}
+				tab := strings.Repeat(test.ws, i)
+				expected := "<root>" + test.nl + tab + "<child1>" + test.nl +
+					tab + tab + "<child2/>" + test.nl + tab +
+					"</child1>" + test.nl + "</root>" + test.nl
+				checkStrEq(t, s, expected)
+			}
 		}
-		checkStrEq(t, output, test.output)
 	}
 }
 
-func TestIndentSettings(t *testing.T) {
+func TestIndentWithDefaultSettings(t *testing.T) {
+	input := `<root>
+	<child1>
+		<child2>    </child2>
+	</child1>
+</root>`
+
+	doc := NewDocument()
+	err := doc.ReadFromString(input)
+	if err != nil {
+		t.Error("etree: failed to read string")
+	}
+
+	doc.IndentWithSettings(NewIndentSettings())
+	s, err := doc.WriteToString()
+	if err != nil {
+		t.Error("etree: failed to serialize document")
+	}
+	expected := "<root>\n    <child1>\n        <child2/>\n    </child1>\n</root>\n"
+	checkStrEq(t, s, expected)
+}
+
+func TestIndentWithSettings(t *testing.T) {
 	doc := NewDocument()
 	root := doc.CreateElement("root")
 	ch1 := root.CreateElement("child1")
 	ch1.CreateElement("child2")
 
 	// First test with NoIndent.
-	doc.Indent(NoIndent)
+	settings := NewIndentSettings()
+	settings.UseCRLF = false
+	settings.UseTabs = false
+	settings.Spaces = NoIndent
+	doc.IndentWithSettings(settings)
 	s, err := doc.WriteToString()
 	if err != nil {
 		t.Error("etree: failed to serialize document")
@@ -787,9 +848,11 @@ func TestIndentSettings(t *testing.T) {
 	}
 
 	for _, test := range tests {
-		doc.WriteSettings.UseCRLF = test.useCRLF
 		if test.useTabs {
-			doc.IndentTabs()
+			settings := NewIndentSettings()
+			settings.UseTabs = true
+			settings.UseCRLF = test.useCRLF
+			doc.IndentWithSettings(settings)
 			s, err := doc.WriteToString()
 			if err != nil {
 				t.Error("etree: failed to serialize document")
@@ -801,7 +864,11 @@ func TestIndentSettings(t *testing.T) {
 			checkStrEq(t, s, expected)
 		} else {
 			for i := 0; i < 256; i++ {
-				doc.Indent(i)
+				settings := NewIndentSettings()
+				settings.Spaces = i
+				settings.UseTabs = false
+				settings.UseCRLF = test.useCRLF
+				doc.IndentWithSettings(settings)
 				s, err := doc.WriteToString()
 				if err != nil {
 					t.Error("etree: failed to serialize document")
@@ -816,6 +883,40 @@ func TestIndentSettings(t *testing.T) {
 	}
 }
 
+func TestIndentPreserveWhitespace(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected string
+	}{
+		{"<test></test>", "<test/>\n"},
+		{"<test>  </test>", "<test>  </test>\n"},
+		{"<test>\t</test>", "<test>\t</test>\n"},
+		{"<test>\t\n \t</test>", "<test>\t\n \t</test>\n"},
+		{"<test><![CDATA[ ]]></test>", "<test><![CDATA[ ]]></test>\n"},
+		{"<test> <![CDATA[ ]]> </test>", "<test><![CDATA[ ]]></test>\n"},
+		{"<outer> <inner> </inner> </outer>", "<outer>\n  <inner> </inner>\n</outer>\n"},
+	}
+
+	for _, test := range tests {
+		doc := NewDocument()
+		err := doc.ReadFromString(test.input)
+		if err != nil {
+			t.Error("etree: failed to read string")
+		}
+
+		s := NewIndentSettings()
+		s.Spaces = 2
+		s.PreserveLeafWhitespace = true
+		doc.IndentWithSettings(s)
+
+		output, err := doc.WriteToString()
+		if err != nil {
+			t.Error("etree: failed to read string")
+		}
+		checkStrEq(t, output, test.expected)
+	}
+}
+
 func TestTokenIndexing(t *testing.T) {
 	s := `<?xml version="1.0" encoding="UTF-8"?>
 <?xml-stylesheet type="text/xsl" href="style.xsl"?>
author	Brett Vickers <brett@beevik.com>	2023-05-07 12:22:06 -0700
committer	Brett Vickers <brett@beevik.com>	2023-05-07 12:22:06 -0700
commit	83a7a1267d824af921661b9fdde0b826d9e36371 (patch)
tree	8c48796f642b37d51d0e2fbc6ea10121fbe49ac3
parent	67ff30fec486c65fa387680d9b0c9ed5de71194d (diff)
download	go-etree-83a7a1267d824af921661b9fdde0b826d9e36371.tar.gz