aboutsummaryrefslogtreecommitdiff
path: root/starlarkjson/json.go
diff options
context:
space:
mode:
Diffstat (limited to 'starlarkjson/json.go')
-rw-r--r--starlarkjson/json.go478
1 files changed, 478 insertions, 0 deletions
diff --git a/starlarkjson/json.go b/starlarkjson/json.go
new file mode 100644
index 0000000..fc5d53f
--- /dev/null
+++ b/starlarkjson/json.go
@@ -0,0 +1,478 @@
+// Copyright 2020 The Bazel Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package starlarkjson defines utilities for converting Starlark values
+// to/from JSON strings. The most recent IETF standard for JSON is
+// https://www.ietf.org/rfc/rfc7159.txt.
+package starlarkjson // import "go.starlark.net/starlarkjson"
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "log"
+ "math"
+ "math/big"
+ "sort"
+ "strconv"
+ "strings"
+ "unicode/utf8"
+
+ "go.starlark.net/starlark"
+ "go.starlark.net/starlarkstruct"
+)
+
+// Module json is a Starlark module of JSON-related functions.
+//
+// json = module(
+// encode,
+// decode,
+// indent,
+// )
+//
+// def encode(x):
+//
+// The encode function accepts one required positional argument,
+// which it converts to JSON by cases:
+// - A Starlark value that implements Go's standard json.Marshal
+// interface defines its own JSON encoding.
+// - None, True, and False are converted to null, true, and false, respectively.
+// - Starlark int values, no matter how large, are encoded as decimal integers.
+// Some decoders may not be able to decode very large integers.
+// - Starlark float values are encoded using decimal point notation,
+// even if the value is an integer.
+// It is an error to encode a non-finite floating-point value.
+// - Starlark strings are encoded as JSON strings, using UTF-16 escapes.
+// - a Starlark IterableMapping (e.g. dict) is encoded as a JSON object.
+// It is an error if any key is not a string.
+// - any other Starlark Iterable (e.g. list, tuple) is encoded as a JSON array.
+// - a Starlark HasAttrs (e.g. struct) is encoded as a JSON object.
+// It an application-defined type matches more than one the cases describe above,
+// (e.g. it implements both Iterable and HasFields), the first case takes precedence.
+// Encoding any other value yields an error.
+//
+// def decode(x):
+//
+// The decode function accepts one positional parameter, a JSON string.
+// It returns the Starlark value that the string denotes.
+// - Numbers are parsed as int or float, depending on whether they
+// contain a decimal point.
+// - JSON objects are parsed as new unfrozen Starlark dicts.
+// - JSON arrays are parsed as new unfrozen Starlark lists.
+// Decoding fails if x is not a valid JSON string.
+//
+// def indent(str, *, prefix="", indent="\t"):
+//
+// The indent function pretty-prints a valid JSON encoding,
+// and returns a string containing the indented form.
+// It accepts one required positional parameter, the JSON string,
+// and two optional keyword-only string parameters, prefix and indent,
+// that specify a prefix of each new line, and the unit of indentation.
+//
+var Module = &starlarkstruct.Module{
+ Name: "json",
+ Members: starlark.StringDict{
+ "encode": starlark.NewBuiltin("json.encode", encode),
+ "decode": starlark.NewBuiltin("json.decode", decode),
+ "indent": starlark.NewBuiltin("json.indent", indent),
+ },
+}
+
+func encode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ var x starlark.Value
+ if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x); err != nil {
+ return nil, err
+ }
+
+ buf := new(bytes.Buffer)
+
+ var quoteSpace [128]byte
+ quote := func(s string) {
+ // Non-trivial escaping is handled by Go's encoding/json.
+ if isPrintableASCII(s) {
+ buf.Write(strconv.AppendQuote(quoteSpace[:0], s))
+ } else {
+ // TODO(adonovan): opt: RFC 8259 mandates UTF-8 for JSON.
+ // Can we avoid this call?
+ data, _ := json.Marshal(s)
+ buf.Write(data)
+ }
+ }
+
+ var emit func(x starlark.Value) error
+ emit = func(x starlark.Value) error {
+ switch x := x.(type) {
+ case json.Marshaler:
+ // Application-defined starlark.Value types
+ // may define their own JSON encoding.
+ data, err := x.MarshalJSON()
+ if err != nil {
+ return err
+ }
+ buf.Write(data)
+
+ case starlark.NoneType:
+ buf.WriteString("null")
+
+ case starlark.Bool:
+ if x {
+ buf.WriteString("true")
+ } else {
+ buf.WriteString("false")
+ }
+
+ case starlark.Int:
+ fmt.Fprint(buf, x)
+
+ case starlark.Float:
+ if !isFinite(float64(x)) {
+ return fmt.Errorf("cannot encode non-finite float %v", x)
+ }
+ fmt.Fprintf(buf, "%g", x) // always contains a decimal point
+
+ case starlark.String:
+ quote(string(x))
+
+ case starlark.IterableMapping:
+ // e.g. dict (must have string keys)
+ buf.WriteByte('{')
+ items := x.Items()
+ for _, item := range items {
+ if _, ok := item[0].(starlark.String); !ok {
+ return fmt.Errorf("%s has %s key, want string", x.Type(), item[0].Type())
+ }
+ }
+ sort.Slice(items, func(i, j int) bool {
+ return items[i][0].(starlark.String) < items[j][0].(starlark.String)
+ })
+ for i, item := range items {
+ if i > 0 {
+ buf.WriteByte(',')
+ }
+ k, _ := starlark.AsString(item[0])
+ quote(k)
+ buf.WriteByte(':')
+ if err := emit(item[1]); err != nil {
+ return fmt.Errorf("in %s key %s: %v", x.Type(), item[0], err)
+ }
+ }
+ buf.WriteByte('}')
+
+ case starlark.Iterable:
+ // e.g. tuple, list
+ buf.WriteByte('[')
+ iter := x.Iterate()
+ defer iter.Done()
+ var elem starlark.Value
+ for i := 0; iter.Next(&elem); i++ {
+ if i > 0 {
+ buf.WriteByte(',')
+ }
+ if err := emit(elem); err != nil {
+ return fmt.Errorf("at %s index %d: %v", x.Type(), i, err)
+ }
+ }
+ buf.WriteByte(']')
+
+ case starlark.HasAttrs:
+ // e.g. struct
+ buf.WriteByte('{')
+ var names []string
+ names = append(names, x.AttrNames()...)
+ sort.Strings(names)
+ for i, name := range names {
+ v, err := x.Attr(name)
+ if err != nil || v == nil {
+ log.Fatalf("internal error: dir(%s) includes %q but value has no .%s field", x.Type(), name, name)
+ }
+ if i > 0 {
+ buf.WriteByte(',')
+ }
+ quote(name)
+ buf.WriteByte(':')
+ if err := emit(v); err != nil {
+ return fmt.Errorf("in field .%s: %v", name, err)
+ }
+ }
+ buf.WriteByte('}')
+
+ default:
+ return fmt.Errorf("cannot encode %s as JSON", x.Type())
+ }
+ return nil
+ }
+
+ if err := emit(x); err != nil {
+ return nil, fmt.Errorf("%s: %v", b.Name(), err)
+ }
+ return starlark.String(buf.String()), nil
+}
+
+// isPrintableASCII reports whether s contains only printable ASCII.
+func isPrintableASCII(s string) bool {
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if b < 0x20 || b >= 0x80 {
+ return false
+ }
+ }
+ return true
+}
+
+// isFinite reports whether f represents a finite rational value.
+// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0).
+func isFinite(f float64) bool {
+ return math.Abs(f) <= math.MaxFloat64
+}
+
+func indent(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+ prefix, indent := "", "\t" // keyword-only
+ if err := starlark.UnpackArgs(b.Name(), nil, kwargs,
+ "prefix?", &prefix,
+ "indent?", &indent,
+ ); err != nil {
+ return nil, err
+ }
+ var str string // positional-only
+ if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &str); err != nil {
+ return nil, err
+ }
+
+ buf := new(bytes.Buffer)
+ if err := json.Indent(buf, []byte(str), prefix, indent); err != nil {
+ return nil, fmt.Errorf("%s: %v", b.Name(), err)
+ }
+ return starlark.String(buf.String()), nil
+}
+
+func decode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (_ starlark.Value, err error) {
+ var s string
+ if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &s); err != nil {
+ return nil, err
+ }
+
+ // The decoder necessarily makes certain representation choices
+ // such as list vs tuple, struct vs dict, int vs float.
+ // In principle, we could parameterize it to allow the caller to
+ // control the returned types, but there's no compelling need yet.
+
+ // Use panic/recover with a distinguished type (failure) for error handling.
+ type failure string
+ fail := func(format string, args ...interface{}) {
+ panic(failure(fmt.Sprintf(format, args...)))
+ }
+
+ i := 0
+
+ // skipSpace consumes leading spaces, and reports whether there is more input.
+ skipSpace := func() bool {
+ for ; i < len(s); i++ {
+ b := s[i]
+ if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
+ return true
+ }
+ }
+ return false
+ }
+
+ // next consumes leading spaces and returns the first non-space.
+ // It panics if at EOF.
+ next := func() byte {
+ if skipSpace() {
+ return s[i]
+ }
+ fail("unexpected end of file")
+ panic("unreachable")
+ }
+
+ // parse returns the next JSON value from the input.
+ // It consumes leading but not trailing whitespace.
+ // It panics on error.
+ var parse func() starlark.Value
+ parse = func() starlark.Value {
+ b := next()
+ switch b {
+ case '"':
+ // string
+
+ // Find end of quotation.
+ // Also, record whether trivial unquoting is safe.
+ // Non-trivial unquoting is handled by Go's encoding/json.
+ safe := true
+ closed := false
+ j := i + 1
+ for ; j < len(s); j++ {
+ b := s[j]
+ if b == '\\' {
+ safe = false
+ j++ // skip x in \x
+ } else if b == '"' {
+ closed = true
+ j++ // skip '"'
+ break
+ } else if b >= utf8.RuneSelf {
+ safe = false
+ }
+ }
+ if !closed {
+ fail("unclosed string literal")
+ }
+
+ r := s[i:j]
+ i = j
+
+ // unquote
+ if safe {
+ r = r[1 : len(r)-1]
+ } else if err := json.Unmarshal([]byte(r), &r); err != nil {
+ fail("%s", err)
+ }
+ return starlark.String(r)
+
+ case 'n':
+ if strings.HasPrefix(s[i:], "null") {
+ i += len("null")
+ return starlark.None
+ }
+
+ case 't':
+ if strings.HasPrefix(s[i:], "true") {
+ i += len("true")
+ return starlark.True
+ }
+
+ case 'f':
+ if strings.HasPrefix(s[i:], "false") {
+ i += len("false")
+ return starlark.False
+ }
+
+ case '[':
+ // array
+ var elems []starlark.Value
+
+ i++ // '['
+ b = next()
+ if b != ']' {
+ for {
+ elem := parse()
+ elems = append(elems, elem)
+ b = next()
+ if b != ',' {
+ if b != ']' {
+ fail("got %q, want ',' or ']'", b)
+ }
+ break
+ }
+ i++ // ','
+ }
+ }
+ i++ // ']'
+ return starlark.NewList(elems)
+
+ case '{':
+ // object
+ dict := new(starlark.Dict)
+
+ i++ // '{'
+ b = next()
+ if b != '}' {
+ for {
+ key := parse()
+ if _, ok := key.(starlark.String); !ok {
+ fail("got %s for object key, want string", key.Type())
+ }
+ b = next()
+ if b != ':' {
+ fail("after object key, got %q, want ':' ", b)
+ }
+ i++ // ':'
+ value := parse()
+ dict.SetKey(key, value) // can't fail
+ b = next()
+ if b != ',' {
+ if b != '}' {
+ fail("in object, got %q, want ',' or '}'", b)
+ }
+ break
+ }
+ i++ // ','
+ }
+ }
+ i++ // '}'
+ return dict
+
+ default:
+ // number?
+ if isdigit(b) || b == '-' {
+ // scan literal. Allow [0-9+-eE.] for now.
+ float := false
+ var j int
+ for j = i + 1; j < len(s); j++ {
+ b = s[j]
+ if isdigit(b) {
+ // ok
+ } else if b == '.' ||
+ b == 'e' ||
+ b == 'E' ||
+ b == '+' ||
+ b == '-' {
+ float = true
+ } else {
+ break
+ }
+ }
+ num := s[i:j]
+ i = j
+
+ // Unlike most C-like languages,
+ // JSON disallows a leading zero before a digit.
+ digits := num
+ if num[0] == '-' {
+ digits = num[1:]
+ }
+ if digits == "" || digits[0] == '0' && len(digits) > 1 && isdigit(digits[1]) {
+ fail("invalid number: %s", num)
+ }
+
+ // parse literal
+ if float {
+ x, err := strconv.ParseFloat(num, 64)
+ if err != nil {
+ fail("invalid number: %s", num)
+ }
+ return starlark.Float(x)
+ } else {
+ x, ok := new(big.Int).SetString(num, 10)
+ if !ok {
+ fail("invalid number: %s", num)
+ }
+ return starlark.MakeBigInt(x)
+ }
+ }
+ }
+ fail("unexpected character %q", b)
+ panic("unreachable")
+ }
+ defer func() {
+ x := recover()
+ switch x := x.(type) {
+ case failure:
+ err = fmt.Errorf("json.decode: at offset %d, %s", i, x)
+ case nil:
+ // nop
+ default:
+ panic(x) // unexpected panic
+ }
+ }()
+ x := parse()
+ if skipSpace() {
+ fail("unexpected character %q after value", s[i])
+ }
+ return x, nil
+}
+
+func isdigit(b byte) bool {
+ return b >= '0' && b <= '9'
+}