diff options
Diffstat (limited to 'tvloader/reader/reader.go')
-rw-r--r-- | tvloader/reader/reader.go | 161 |
1 files changed, 161 insertions, 0 deletions
diff --git a/tvloader/reader/reader.go b/tvloader/reader/reader.go new file mode 100644 index 0000000..786f7ea --- /dev/null +++ b/tvloader/reader/reader.go @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later + +package reader + +import ( + "bufio" + "fmt" + "io" + "strings" + "unicode" +) + +// TagValuePair is a convenience struct for a (tag, value) string pair. +type TagValuePair struct { + Tag string + Value string +} + +// ReadTagValues takes an io.Reader, scans it line by line and returns +// a slice of {string, string} structs in the form {tag, value}. +func ReadTagValues(content io.Reader) ([]TagValuePair, error) { + r := &tvReader{} + + scanner := bufio.NewScanner(content) + for scanner.Scan() { + // read each line, one by one + err := r.readNextLine(scanner.Text()) + if err != nil { + return nil, err + } + } + if err := scanner.Err(); err != nil { + return nil, err + } + + // finalize and make sure all is well + tvList, err := r.finalize() + if err != nil { + return nil, err + } + + // convert internal format to exported TagValueList + var exportedTVList []TagValuePair + for _, tv := range tvList { + tvPair := TagValuePair{Tag: tv.tag, Value: tv.value} + exportedTVList = append(exportedTVList, tvPair) + } + + return exportedTVList, nil +} + +type tagvalue struct { + tag string + value string +} + +type tvReader struct { + midtext bool + tvList []tagvalue + currentLine int + currentTag string + currentValue string +} + +func (reader *tvReader) finalize() ([]tagvalue, error) { + if reader.midtext { + return nil, fmt.Errorf("finalize called while still midtext parsing a text tag") + } + return reader.tvList, nil +} + +func (reader *tvReader) readNextLine(line string) error { + reader.currentLine++ + + if reader.midtext { + return reader.readNextLineFromMidtext(line) + } + + return reader.readNextLineFromReady(line) +} + +func (reader *tvReader) readNextLineFromReady(line string) error { + // strip whitespace from beginning of line + line2 := strings.TrimLeftFunc(line, func(r rune) bool { + return unicode.IsSpace(r) + }) + + // ignore empty lines + if line2 == "" { + return nil + } + + // ignore comment lines + if strings.HasPrefix(line2, "#") { + return nil + } + + // split at colon + substrings := strings.SplitN(line2, ":", 2) + if len(substrings) == 1 { + // error if a colon isn't found + return fmt.Errorf("no colon found in '%s'", line) + } + + // the first substring is the tag + reader.currentTag = strings.TrimSpace(substrings[0]) + + // determine whether the value contains (or starts) a <text> line + substrings = strings.SplitN(substrings[1], "<text>", 2) + if len(substrings) == 1 { + // no <text> tag found means this is a single-line value + // strip whitespace and use as a single line + reader.currentValue = strings.TrimSpace(substrings[0]) + } else { + // there was a <text> tag; now decide whether it's multi-line + substrings = strings.SplitN(substrings[1], "</text>", 2) + if len(substrings) > 1 { + // there is also a </text> tag; take the middle part and + // set as value + reader.currentValue = substrings[0] + } else { + // there is no </text> tag on this line; switch to midtext + reader.currentValue = substrings[0] + "\n" + reader.midtext = true + return nil + } + } + + // if we got here, the value was on a single line + // so go ahead and add it to the tag-value list + tv := tagvalue{reader.currentTag, reader.currentValue} + reader.tvList = append(reader.tvList, tv) + + // and reset + reader.currentTag = "" + reader.currentValue = "" + + return nil +} + +func (reader *tvReader) readNextLineFromMidtext(line string) error { + // look for whether the line closes here + substrings := strings.SplitN(line, "</text>", 2) + if len(substrings) == 1 { + // doesn't contain </text>, so keep building the current value + reader.currentValue += line + "\n" + return nil + } + + // contains </text>, so end and record this pair + reader.currentValue += substrings[0] + tv := tagvalue{reader.currentTag, reader.currentValue} + reader.tvList = append(reader.tvList, tv) + + // and reset + reader.midtext = false + reader.currentTag = "" + reader.currentValue = "" + + return nil +} |