20 files changed, 739 insertions, 284 deletions
diff --git a/go.mod b/go.mod
index 50bc000..d14060e 100644
--- a/go.mod
+++ b/go.mod
@@ -6,6 +6,8 @@ require (
 	github.com/chzyer/logex v1.1.10 // indirect
 	github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e
 	github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 // indirect
-	golang.org/x/sys v0.0.0-20200803210538-64077c9b5642
+	github.com/google/go-cmp v0.5.1 // indirect
+	golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f
+	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
 	google.golang.org/protobuf v1.25.0
 )
diff --git a/go.sum b/go.sum
index b40c868..90a8048 100644
--- a/go.sum
+++ b/go.sum
@@ -24,8 +24,9 @@ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5a
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w=
 github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.1 h1:JFrFEBb2xKufg6XkJsJr+WbKb4FQlURi5RUcBveYu9k=
+github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -42,15 +43,16 @@ golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20200803210538-64077c9b5642 h1:B6caxRw+hozq68X2MY7jEpZh/cr4/aHLv9xU8Kkadrw=
-golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
 golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
diff --git a/internal/compile/compile.go b/internal/compile/compile.go
index ab67018..c314e6e 100644
--- a/internal/compile/compile.go
+++ b/internal/compile/compile.go
@@ -33,6 +33,7 @@ import (
 	"os"
 	"path/filepath"
 	"strconv"
+	"strings"
 	"sync"
 
 	"go.starlark.net/resolve"
@@ -46,7 +47,7 @@ var Disassemble = false
 const debug = false // make code generation verbose, for debugging the compiler
 
 // Increment this to force recompilation of saved bytecode files.
-const Version = 11
+const Version = 12
 
 type Opcode uint8
 
@@ -309,12 +310,15 @@ func (op Opcode) String() string {
 type Program struct {
 	Loads     []Binding     // name (really, string) and position of each load stmt
 	Names     []string      // names of attributes and predeclared variables
-	Constants []interface{} // = string | int64 | float64 | *big.Int
+	Constants []interface{} // = string | int64 | float64 | *big.Int | Bytes
 	Functions []*Funcode
 	Globals   []Binding // for error messages and tracing
 	Toplevel  *Funcode  // module initialization function
 }
 
+// The type of a bytes literal value, to distinguish from text string.
+type Bytes string
+
 // A Funcode is the code of a compiled Starlark function.
 //
 // Funcodes are serialized by the encoder.function method,
@@ -863,6 +867,8 @@ func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) {
 		switch x := fn.Prog.Constants[arg].(type) {
 		case string:
 			comment = strconv.Quote(x)
+		case Bytes:
+			comment = "b" + strconv.Quote(string(x))
 		default:
 			comment = fmt.Sprint(x)
 		}
@@ -1283,8 +1289,12 @@ func (fcomp *fcomp) expr(e syntax.Expr) {
 		fcomp.lookup(e)
 
 	case *syntax.Literal:
-		// e.Value is int64, float64, *bigInt, or string.
-		fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(e.Value))
+		// e.Value is int64, float64, *bigInt, string
+		v := e.Value
+		if e.Token == syntax.BYTES {
+			v = Bytes(v.(string))
+		}
+		fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(v))
 
 	case *syntax.ListExpr:
 		for _, x := range e.List {
@@ -1522,7 +1532,7 @@ func (fcomp *fcomp) plus(e *syntax.BinaryExpr) {
 }
 
 // addable reports whether e is a statically addable
-// expression: a [s]tring, [l]ist, or [t]uple.
+// expression: a [s]tring, [b]ytes, [l]ist, or [t]uple.
 func addable(e syntax.Expr) rune {
 	switch e := e.(type) {
 	case *syntax.Literal:
@@ -1530,6 +1540,8 @@ func addable(e syntax.Expr) rune {
 		switch e.Token {
 		case syntax.STRING:
 			return 's'
+		case syntax.BYTES:
+			return 'b'
 		}
 	case *syntax.ListExpr:
 		return 'l'
@@ -1544,12 +1556,16 @@ func addable(e syntax.Expr) rune {
 // The resulting syntax is degenerate, lacking position, etc.
 func add(code rune, args []summand) syntax.Expr {
 	switch code {
-	case 's':
-		var buf bytes.Buffer
+	case 's', 'b':
+		var buf strings.Builder
 		for _, arg := range args {
 			buf.WriteString(arg.x.(*syntax.Literal).Value.(string))
 		}
-		return &syntax.Literal{Token: syntax.STRING, Value: buf.String()}
+		tok := syntax.STRING
+		if code == 'b' {
+			tok = syntax.BYTES
+		}
+		return &syntax.Literal{Token: tok, Value: buf.String()}
 	case 'l':
 		var elems []syntax.Expr
 		for _, arg := range args {
diff --git a/internal/compile/serial.go b/internal/compile/serial.go
index 0107ef9..adadabf 100644
--- a/internal/compile/serial.go
+++ b/internal/compile/serial.go
@@ -51,9 +51,10 @@ package compile
 //
 // Constant:                            # type      data
 //      type            varint          # 0=string  string
-//      data            ...             # 1=int     varint
-//                                      # 2=float   varint (bits as uint64)
-//                                      # 3=bigint  string (decimal ASCII text)
+//      data            ...             # 1=bytes   string
+//                                      # 2=int     varint
+//                                      # 3=float   varint (bits as uint64)
+//                                      # 4=bigint  string (decimal ASCII text)
 //
 // The encoding starts with a four-byte magic number.
 // The next four bytes are a little-endian uint32
@@ -109,14 +110,17 @@ func (prog *Program) Encode() []byte {
 		case string:
 			e.int(0)
 			e.string(c)
-		case int64:
+		case Bytes:
 			e.int(1)
+			e.string(string(c))
+		case int64:
+			e.int(2)
 			e.int64(c)
 		case float64:
-			e.int(2)
+			e.int(3)
 			e.uint64(math.Float64bits(c))
 		case *big.Int:
-			e.int(3)
+			e.int(4)
 			e.string(c.Text(10))
 		}
 	}
@@ -249,10 +253,12 @@ func DecodeProgram(data []byte) (_ *Program, err error) {
 		case 0:
 			c = d.string()
 		case 1:
-			c = d.int64()
+			c = Bytes(d.string())
 		case 2:
-			c = math.Float64frombits(d.uint64())
+			c = d.int64()
 		case 3:
+			c = math.Float64frombits(d.uint64())
+		case 4:
 			c, _ = new(big.Int).SetString(d.string(), 10)
 		}
 		constants[i] = c
diff --git a/lib/proto/proto.go b/lib/proto/proto.go
index 84aa0d6..149162d 100644
--- a/lib/proto/proto.go
+++ b/lib/proto/proto.go
@@ -79,8 +79,6 @@
 package proto
 
 // TODO(adonovan): Go and Starlark API improvements:
-// - Contribute the 'bytes' data type to the core language.
-//   See https://github.com/bazelbuild/starlark/issues/112.
 // - Make Message and RepeatedField comparable.
 //   (NOTE: proto.Equal works only with generated message types.)
 // - Support maps, oneof, any. But not messageset if we can avoid it.
@@ -234,7 +232,7 @@ func marshal(_ *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwar
 		if err != nil {
 			return nil, fmt.Errorf("%s: %v", fn.Name(), err)
 		}
-		return Bytes(data), nil
+		return starlark.Bytes(data), nil
 	} else {
 		text, err := prototext.MarshalOptions{Indent: "  "}.Marshal(m.Message())
 		if err != nil {
@@ -247,7 +245,7 @@ func marshal(_ *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwar
 // unmarshal(msg) decodes a binary protocol message to a Message.
 func unmarshal(thread *starlark.Thread, fn *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
 	var desc MessageDescriptor
-	var data Bytes
+	var data starlark.Bytes
 	if err := starlark.UnpackPositionalArgs(fn.Name(), args, kwargs, 2, &desc, &data); err != nil {
 		return nil, err
 	}
@@ -486,7 +484,7 @@ func toProto(fdesc protoreflect.FieldDescriptor, v starlark.Value) (protoreflect
 	case protoreflect.StringKind:
 		if s, ok := starlark.AsString(v); ok {
 			return protoreflect.ValueOfString(s), nil
-		} else if b, ok := v.(Bytes); ok {
+		} else if b, ok := v.(starlark.Bytes); ok {
 			// TODO(adonovan): allow bytes for string? Not friendly to a Java port.
 			return protoreflect.ValueOfBytes([]byte(b)), nil
 		}
@@ -497,7 +495,7 @@ func toProto(fdesc protoreflect.FieldDescriptor, v starlark.Value) (protoreflect
 			// Instead provide b"..." literals in the core
 			// and a bytes(str) conversion.
 			return protoreflect.ValueOfBytes([]byte(s)), nil
-		} else if b, ok := v.(Bytes); ok {
+		} else if b, ok := v.(starlark.Bytes); ok {
 			return protoreflect.ValueOfBytes([]byte(b)), nil
 		}
 
@@ -588,7 +586,7 @@ func toStarlark1(typ protoreflect.FieldDescriptor, x protoreflect.Value, frozen
 		return starlark.String(x.String())
 
 	case protoreflect.BytesKind:
-		return Bytes(x.Bytes())
+		return starlark.Bytes(x.Bytes())
 
 	case protoreflect.DoubleKind, protoreflect.FloatKind:
 		return starlark.Float(x.Float())
@@ -1232,78 +1230,3 @@ func (x EnumValueDescriptor) CompareSameType(op syntax.Token, y_ starlark.Value,
 		return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y_.Type())
 	}
 }
-
-// A Bytes is an immutable sequence of bytes.
-// It is comparable, iterable, indexable, and sliceable.
-//
-// (In go.starlark.net, text Strings are also byte strings,
-// but we shouldn't rely on that.
-// See https://github.com/bazelbuild/starlark/issues/112.)
-type Bytes string
-
-var (
-	_ starlark.Comparable = Bytes("")
-	_ starlark.Iterable   = Bytes("")
-	_ starlark.Sliceable  = Bytes("")
-	_ starlark.Sequence   = Bytes("")
-)
-
-func (b Bytes) String() string             { return fmt.Sprintf("<%d bytes>", len(b)) }
-func (b Bytes) Type() string               { return "bytes" }
-func (b Bytes) Freeze()                    {} // immutable
-func (b Bytes) Truth() starlark.Bool       { return len(b) > 0 }
-func (b Bytes) Hash() (uint32, error)      { return starlark.String(b).Hash() }
-func (b Bytes) Len() int                   { return len(b) }
-func (b Bytes) Index(i int) starlark.Value { return starlark.MakeInt(int(b[i])) }
-
-func (b Bytes) Slice(start, end, step int) starlark.Value {
-	if step == 1 {
-		return b[start:end]
-	}
-
-	sign := signum(step)
-	var str []byte
-	for i := start; signum(end-i) == sign; i += step {
-		str = append(str, b[i])
-	}
-	return Bytes(str)
-}
-
-// From Hacker's Delight, section 2.8.
-func signum64(x int64) int { return int(uint64(x>>63) | uint64(-x)>>63) }
-func signum(x int) int     { return signum64(int64(x)) }
-
-func (b Bytes) Iterate() starlark.Iterator { return &bytesIterator{string(b)} }
-
-type bytesIterator struct{ string }
-
-func (it *bytesIterator) Next(p *starlark.Value) bool {
-	if it.string == "" {
-		return false
-	}
-	*p = starlark.MakeInt(int(it.string[0]))
-	it.string = it.string[1:]
-	return true
-}
-
-func (it *bytesIterator) Done() {}
-
-func (x Bytes) CompareSameType(op syntax.Token, y_ starlark.Value, depth int) (bool, error) {
-	y := y_.(Bytes)
-	cmp := strings.Compare(string(x), string(y))
-	switch op {
-	case syntax.EQL:
-		return cmp == 0, nil
-	case syntax.NEQ:
-		return cmp != 0, nil
-	case syntax.LE:
-		return cmp <= 0, nil
-	case syntax.LT:
-		return cmp < 0, nil
-	case syntax.GE:
-		return cmp >= 0, nil
-	case syntax.GT:
-		return cmp > 0, nil
-	}
-	panic(op)
-}
diff --git a/starlark/eval.go b/starlark/eval.go
index c9bbb67..d0ad91f 100644
--- a/starlark/eval.go
+++ b/starlark/eval.go
@@ -478,6 +478,8 @@ func makeToplevelFunction(prog *compile.Program, predeclared StringDict) *Functi
 			v = MakeBigInt(c)
 		case string:
 			v = String(c)
+		case compile.Bytes:
+			v = Bytes(c)
 		case float64:
 			v = Float(c)
 		default:
@@ -796,6 +798,8 @@ func Binary(op syntax.Token, x, y Value) (Value, error) {
 				return xf * y, nil
 			case String:
 				return stringRepeat(y, x)
+			case Bytes:
+				return bytesRepeat(y, x)
 			case *List:
 				elems, err := tupleRepeat(Tuple(y.elems), x)
 				if err != nil {
@@ -820,6 +824,10 @@ func Binary(op syntax.Token, x, y Value) (Value, error) {
 			if y, ok := y.(Int); ok {
 				return stringRepeat(x, y)
 			}
+		case Bytes:
+			if y, ok := y.(Int); ok {
+				return bytesRepeat(x, y)
+			}
 		case *List:
 			if y, ok := y.(Int); ok {
 				elems, err := tupleRepeat(Tuple(x.elems), y)
@@ -996,6 +1004,19 @@ func Binary(op syntax.Token, x, y Value) (Value, error) {
 				return nil, fmt.Errorf("'in <string>' requires string as left operand, not %s", x.Type())
 			}
 			return Bool(strings.Contains(string(y), string(needle))), nil
+		case Bytes:
+			switch needle := x.(type) {
+			case Bytes:
+				return Bool(strings.Contains(string(y), string(needle))), nil
+			case Int:
+				var b byte
+				if err := AsInt(needle, &b); err != nil {
+					return nil, fmt.Errorf("int in bytes: %s", err)
+				}
+				return Bool(strings.IndexByte(string(y), b) >= 0), nil
+			default:
+				return nil, fmt.Errorf("'in bytes' requires bytes or int as left operand, not %s", x.Type())
+			}
 		case rangeValue:
 			i, err := NumberToInt(x)
 			if err != nil {
@@ -1138,6 +1159,11 @@ func tupleRepeat(elems Tuple, n Int) (Tuple, error) {
 	return res, nil
 }
 
+func bytesRepeat(b Bytes, n Int) (Bytes, error) {
+	res, err := stringRepeat(String(b), n)
+	return Bytes(res), err
+}
+
 func stringRepeat(s String, n Int) (String, error) {
 	if s == "" {
 		return "", nil
diff --git a/starlark/eval_test.go b/starlark/eval_test.go
index 4ce08d3..9752fe8 100644
--- a/starlark/eval_test.go
+++ b/starlark/eval_test.go
@@ -115,6 +115,7 @@ func TestExecFile(t *testing.T) {
 		"testdata/assign.star",
 		"testdata/bool.star",
 		"testdata/builtins.star",
+		"testdata/bytes.star",
 		"testdata/control.star",
 		"testdata/dict.star",
 		"testdata/float.star",
diff --git a/starlark/hashtable.go b/starlark/hashtable.go
index d425019..27990b5 100644
--- a/starlark/hashtable.go
+++ b/starlark/hashtable.go
@@ -362,9 +362,9 @@ func hashString(s string) uint32 {
 //go:linkname goStringHash runtime.stringHash
 func goStringHash(s string, seed uintptr) uintptr
 
-// softHashString computes the FNV hash of s in software.
+// softHashString computes the 32-bit FNV-1a hash of s in software.
 func softHashString(s string) uint32 {
-	var h uint32
+	var h uint32 = 2166136261
 	for i := 0; i < len(s); i++ {
 		h ^= uint32(s[i])
 		h *= 16777619
diff --git a/starlark/library.go b/starlark/library.go
index 5645418..5620426 100644
--- a/starlark/library.go
+++ b/starlark/library.go
@@ -42,6 +42,7 @@ func init() {
 		"any":       NewBuiltin("any", any),
 		"all":       NewBuiltin("all", all),
 		"bool":      NewBuiltin("bool", bool_),
+		"bytes":     NewBuiltin("bytes", bytes_),
 		"chr":       NewBuiltin("chr", chr),
 		"dict":      NewBuiltin("dict", dict),
 		"dir":       NewBuiltin("dir", dir),
@@ -73,6 +74,10 @@ func init() {
 // methods of built-in types
 // https://github.com/google/starlark-go/blob/master/doc/spec.md#built-in-methods
 var (
+	bytesMethods = map[string]*Builtin{
+		"elems": NewBuiltin("elems", bytes_elems),
+	}
+
 	dictMethods = map[string]*Builtin{
 		"clear":      NewBuiltin("clear", dict_clear),
 		"get":        NewBuiltin("get", dict_get),
@@ -198,6 +203,45 @@ func bool_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error
 	return x.Truth(), nil
 }
 
+// https://github.com/google/starlark-go/blob/master/doc/spec.md#bytes
+func bytes_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+	if len(kwargs) > 0 {
+		return nil, fmt.Errorf("bytes does not accept keyword arguments")
+	}
+	if len(args) != 1 {
+		return nil, fmt.Errorf("bytes: got %d arguments, want exactly 1", len(args))
+	}
+	switch x := args[0].(type) {
+	case Bytes:
+		return x, nil
+	case String:
+		// Invalid encodings are replaced by that of U+FFFD.
+		return Bytes(utf8Transcode(string(x))), nil
+	case Iterable:
+		// iterable of numeric byte values
+		var buf strings.Builder
+		if n := Len(x); n >= 0 {
+			// common case: known length
+			buf.Grow(n)
+		}
+		iter := x.Iterate()
+		defer iter.Done()
+		var elem Value
+		var b byte
+		for i := 0; iter.Next(&elem); i++ {
+			if err := AsInt(elem, &b); err != nil {
+				return nil, fmt.Errorf("bytes: at index %d, %s", i, err)
+			}
+			buf.WriteByte(b)
+		}
+		return Bytes(buf.String()), nil
+
+	default:
+		// Unlike string(foo), which stringifies it, bytes(foo) is an error.
+		return nil, fmt.Errorf("bytes: got %s, want string, bytes, or iterable of ints", x.Type())
+	}
+}
+
 // https://github.com/google/starlark-go/blob/master/doc/spec.md#chr
 func chr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
 	if len(kwargs) > 0 {
@@ -261,9 +305,6 @@ func enumerate(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, e
 	}
 
 	iter := iterable.Iterate()
-	if iter == nil {
-		return nil, fmt.Errorf("enumerate: got %s, want iterable", iterable.Type())
-	}
 	defer iter.Done()
 
 	var pairs []Value
@@ -433,19 +474,27 @@ func hasattr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, err
 
 // https://github.com/google/starlark-go/blob/master/doc/spec.md#hash
 func hash(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
-	var s string
-	if err := UnpackPositionalArgs("hash", args, kwargs, 1, &s); err != nil {
+	var x Value
+	if err := UnpackPositionalArgs("hash", args, kwargs, 1, &x); err != nil {
 		return nil, err
 	}
 
-	// The Starlark spec requires that the hash function be
-	// deterministic across all runs, motivated by the need
-	// for reproducibility of builds. Thus we cannot call
-	// String.Hash, which uses the fastest implementation
-	// available, because as varies across process restarts,
-	// and may evolve with the implementation.
-
-	return MakeInt(int(javaStringHash(s))), nil
+	var h int
+	switch x := x.(type) {
+	case String:
+		// The Starlark spec requires that the hash function be
+		// deterministic across all runs, motivated by the need
+		// for reproducibility of builds. Thus we cannot call
+		// String.Hash, which uses the fastest implementation
+		// available, because as varies across process restarts,
+		// and may evolve with the implementation.
+		h = int(javaStringHash(string(x)))
+	case Bytes:
+		h = int(softHashString(string(x))) // FNV32
+	default:
+		return nil, fmt.Errorf("hash: got %s, want string or bytes", x.Type())
+	}
+	return MakeInt(h), nil
 }
 
 // javaStringHash returns the same hash as would be produced by
@@ -691,16 +740,26 @@ func ord(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error)
 	if len(args) != 1 {
 		return nil, fmt.Errorf("ord: got %d arguments, want 1", len(args))
 	}
-	s, ok := AsString(args[0])
-	if !ok {
-		return nil, fmt.Errorf("ord: got %s, want string", args[0].Type())
-	}
-	r, sz := utf8.DecodeRuneInString(s)
-	if sz == 0 || sz != len(s) {
-		n := utf8.RuneCountInString(s)
-		return nil, fmt.Errorf("ord: string encodes %d Unicode code points, want 1", n)
+	switch x := args[0].(type) {
+	case String:
+		// ord(string) returns int value of sole rune.
+		s := string(x)
+		r, sz := utf8.DecodeRuneInString(s)
+		if sz == 0 || sz != len(s) {
+			n := utf8.RuneCountInString(s)
+			return nil, fmt.Errorf("ord: string encodes %d Unicode code points, want 1", n)
+		}
+		return MakeInt(int(r)), nil
+
+	case Bytes:
+		// ord(bytes) returns int value of sole byte.
+		if len(x) != 1 {
+			return nil, fmt.Errorf("ord: bytes has length %d, want 1", len(x))
+		}
+		return MakeInt(int(x[0])), nil
+	default:
+		return nil, fmt.Errorf("ord: got %s, want string or bytes", x.Type())
 	}
-	return MakeInt(int(r)), nil
 }
 
 // https://github.com/google/starlark-go/blob/master/doc/spec.md#print
@@ -716,6 +775,8 @@ func print(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error
 		}
 		if s, ok := AsString(v); ok {
 			buf.WriteString(s)
+		} else if b, ok := v.(Bytes); ok {
+			buf.WriteString(string(b))
 		} else {
 			writeValue(buf, v, nil)
 		}
@@ -993,11 +1054,29 @@ func str(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error)
 	if len(args) != 1 {
 		return nil, fmt.Errorf("str: got %d arguments, want exactly 1", len(args))
 	}
-	x := args[0]
-	if _, ok := AsString(x); !ok {
-		x = String(x.String())
+	switch x := args[0].(type) {
+	case String:
+		return x, nil
+	case Bytes:
+		// Invalid encodings are replaced by that of U+FFFD.
+		return String(utf8Transcode(string(x))), nil
+	default:
+		return String(x.String()), nil
 	}
-	return x, nil
+}
+
+// utf8Transcode returns the UTF-8-to-UTF-8 transcoding of s.
+// The effect is that each code unit that is part of an
+// invalid sequence is replaced by U+FFFD.
+func utf8Transcode(s string) string {
+	if utf8.ValidString(s) {
+		return s
+	}
+	var out strings.Builder
+	for _, r := range s {
+		out.WriteRune(r)
+	}
+	return out.String()
 }
 
 // https://github.com/google/starlark-go/blob/master/doc/spec.md#tuple
@@ -1374,13 +1453,51 @@ func string_iterable(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value,
 	if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
 		return nil, err
 	}
-	return stringIterable{
-		s:          b.Receiver().(String),
-		ords:       b.Name()[len(b.Name())-2] == 'd',
-		codepoints: b.Name()[0] == 'c',
-	}, nil
+	s := b.Receiver().(String)
+	ords := b.Name()[len(b.Name())-2] == 'd'
+	codepoints := b.Name()[0] == 'c'
+	if codepoints {
+		return stringCodepoints{s, ords}, nil
+	} else {
+		return stringElems{s, ords}, nil
+	}
+}
+
+// bytes_elems returns an unspecified iterable value whose
+// iterator yields the int values of successive elements.
+func bytes_elems(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
+	if err := UnpackPositionalArgs(b.Name(), args, kwargs, 0); err != nil {
+		return nil, err
+	}
+	return bytesIterable{b.Receiver().(Bytes)}, nil
+}
+
+// A bytesIterable is an iterable returned by bytes.elems(),
+// whose iterator yields a sequence of numeric bytes values.
+type bytesIterable struct{ bytes Bytes }
+
+var _ Iterable = (*bytesIterable)(nil)
+
+func (bi bytesIterable) String() string        { return bi.bytes.String() + ".elems()" }
+func (bi bytesIterable) Type() string          { return "bytes.elems" }
+func (bi bytesIterable) Freeze()               {} // immutable
+func (bi bytesIterable) Truth() Bool           { return True }
+func (bi bytesIterable) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", bi.Type()) }
+func (bi bytesIterable) Iterate() Iterator     { return &bytesIterator{bi.bytes} }
+
+type bytesIterator struct{ bytes Bytes }
+
+func (it *bytesIterator) Next(p *Value) bool {
+	if it.bytes == "" {
+		return false
+	}
+	*p = MakeInt(int(it.bytes[0]))
+	it.bytes = it.bytes[1:]
+	return true
 }
 
+func (*bytesIterator) Done() {}
+
 // https://github.com/google/starlark-go/blob/master/doc/spec.md#string·count
 func string_count(_ *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) {
 	var sub string
diff --git a/starlark/testdata/bytes.star b/starlark/testdata/bytes.star
new file mode 100644
index 0000000..d500403
--- /dev/null
+++ b/starlark/testdata/bytes.star
@@ -0,0 +1,159 @@
+# Tests of 'bytes' (immutable byte strings).
+
+load("assert.star", "assert")
+
+# bytes(string) -- UTF-k to UTF-8 transcoding with U+FFFD replacement
+hello = bytes("hello, 世界")
+goodbye = bytes("goodbye")
+empty = bytes("")
+nonprinting = bytes("\t\n\x7F\u200D")  # TAB, NEWLINE, DEL, ZERO_WIDTH_JOINER
+assert.eq(bytes("hello, 世界"[:-1]), b"hello, 世��")
+
+# bytes(iterable of int) -- construct from numeric byte values
+assert.eq(bytes([65, 66, 67]), b"ABC")
+assert.eq(bytes((65, 66, 67)), b"ABC")
+assert.eq(bytes([0xf0, 0x9f, 0x98, 0xbf]), b"😿")
+assert.fails(lambda: bytes([300]),
+             "at index 0, 300 out of range .want value in unsigned 8-bit range")
+assert.fails(lambda: bytes([b"a"]),
+             "at index 0, got bytes, want int")
+assert.fails(lambda: bytes(1), "want string, bytes, or iterable of ints")
+
+# literals
+assert.eq(b"hello, 世界", hello)
+assert.eq(b"goodbye", goodbye)
+assert.eq(b"", empty)
+assert.eq(b"\t\n\x7F\u200D", nonprinting)
+assert.ne("abc", b"abc")
+assert.eq(b"\012\xff\u0400\U0001F63F", b"\n\xffЀ😿") # see scanner tests for more
+assert.eq(rb"\r\n\t", b"\\r\\n\\t") # raw
+
+# type
+assert.eq(type(hello), "bytes")
+
+# len
+assert.eq(len(hello), 13)
+assert.eq(len(goodbye), 7)
+assert.eq(len(empty), 0)
+assert.eq(len(b"A"), 1)
+assert.eq(len(b"Ѐ"), 2)
+assert.eq(len(b"世"), 3)
+assert.eq(len(b"😿"), 4)
+
+# truth
+assert.true(hello)
+assert.true(goodbye)
+assert.true(not empty)
+
+# str(bytes) does UTF-8 to UTF-k transcoding.
+# TODO(adonovan): specify.
+assert.eq(str(hello), "hello, 世界")
+assert.eq(str(hello[:-1]), "hello, 世��")  # incomplete UTF-8 encoding => U+FFFD
+assert.eq(str(goodbye), "goodbye")
+assert.eq(str(empty), "")
+assert.eq(str(nonprinting), "\t\n\x7f\u200d")
+assert.eq(str(b"\xED\xB0\x80"), "���") # UTF-8 encoding of unpaired surrogate => U+FFFD x 3
+
+# repr
+assert.eq(repr(hello), r'b"hello, 世界"')
+assert.eq(repr(hello[:-1]), r'b"hello, 世\xe7\x95"')  # (incomplete UTF-8 encoding )
+assert.eq(repr(goodbye), 'b"goodbye"')
+assert.eq(repr(empty), 'b""')
+assert.eq(repr(nonprinting), 'b"\\t\\n\\x7f\\u200d"')
+
+# equality
+assert.eq(hello, hello)
+assert.ne(hello, goodbye)
+assert.eq(b"goodbye", goodbye)
+
+# ordered comparison
+assert.lt(b"abc", b"abd")
+assert.lt(b"abc", b"abcd")
+assert.lt(b"\x7f", b"\x80") # bytes compare as uint8, not int8
+
+# bytes are dict-hashable
+dict = {hello: 1, goodbye: 2}
+dict[b"goodbye"] = 3
+assert.eq(len(dict), 2)
+assert.eq(dict[goodbye], 3)
+
+# hash(bytes) is 32-bit FNV-1a.
+assert.eq(hash(b""), 0x811c9dc5)
+assert.eq(hash(b"a"), 0xe40c292c)
+assert.eq(hash(b"ab"), 0x4d2505ca)
+assert.eq(hash(b"abc"), 0x1a47e90b)
+
+# indexing
+assert.eq(goodbye[0], b"g")
+assert.eq(goodbye[-1], b"e")
+assert.fails(lambda: goodbye[100], "out of range")
+
+# slicing
+assert.eq(goodbye[:4], b"good")
+assert.eq(goodbye[4:], b"bye")
+assert.eq(goodbye[::2], b"gobe")
+assert.eq(goodbye[3:4], b"d")  # special case: len=1
+assert.eq(goodbye[4:4], b"")  # special case: len=0
+
+# bytes in bytes
+assert.eq(b"bc" in b"abcd", True)
+assert.eq(b"bc" in b"dcab", False)
+assert.fails(lambda: "bc" in b"dcab", "requires bytes or int as left operand, not string")
+
+# int in bytes
+assert.eq(97 in b"abc", True)  # 97='a'
+assert.eq(100 in b"abc", False) # 100='d'
+assert.fails(lambda: 256 in b"abc", "int in bytes: 256 out of range")
+assert.fails(lambda: -1 in b"abc", "int in bytes: -1 out of range")
+
+# ord   TODO(adonovan): specify
+assert.eq(ord(b"a"), 97)
+assert.fails(lambda: ord(b"ab"), "ord: bytes has length 2, want 1")
+assert.fails(lambda: ord(b""), "ord: bytes has length 0, want 1")
+
+# repeat (bytes * int)
+assert.eq(goodbye * 3, b"goodbyegoodbyegoodbye")
+assert.eq(3 * goodbye, b"goodbyegoodbyegoodbye")
+
+# elems() returns an iterable value over 1-byte substrings.
+assert.eq(type(hello.elems()), "bytes.elems")
+assert.eq(str(hello.elems()), "b\"hello, 世界\".elems()")
+assert.eq(list(hello.elems()), [104, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140])
+assert.eq(bytes([104, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140]), hello)
+assert.eq(list(goodbye.elems()), [103, 111, 111, 100, 98, 121, 101])
+assert.eq(list(empty.elems()), [])
+assert.eq(bytes(hello.elems()), hello) # bytes(iterable) is dual to bytes.elems()
+
+# x[i] = ...
+def f():
+    b"abc"[1] = b"B"
+
+assert.fails(f, "bytes.*does not support.*assignment")
+
+# TODO(adonovan): the specification is not finalized in many areas:
+# - chr, ord functions
+# - encoding/decoding bytes to string.
+# - methods: find, index, split, etc.
+#
+# Summary of string operations (put this in spec).
+#
+# string to number:
+# - bytes[i]  returns numeric value of ith byte.
+# - ord(string)  returns numeric value of sole code point in string.
+# - ord(string[i])  is not a useful operation: fails on non-ASCII; see below.
+#   Q. Perhaps ord should return the first (not sole) code point? Then it becomes a UTF-8 decoder.
+#      Perhaps ord(string, index=int) should apply the index and relax the len=1 check.
+# - string.codepoint()  iterates over 1-codepoint substrings.
+# - string.codepoint_ords()  iterates over numeric values of code points in string.
+# - string.elems()  iterates over 1-element (UTF-k code) substrings.
+# - string.elem_ords()  iterates over numeric UTF-k code values.
+# - string.elem_ords()[i]  returns numeric value of ith element (UTF-k code).
+# - string.elems()[i]  returns substring of a single element (UTF-k code).
+# - int(string)  parses string as decimal (or other) numeric literal.
+#
+# number to string:
+# - chr(int) returns string, UTF-k encoding of Unicode code point (like Python).
+#   Redundant with '%c' % int (which Python2 calls 'unichr'.)
+# - bytes(chr(int)) returns byte string containing UTF-8 encoding of one code point.
+# - bytes([int]) returns 1-byte string (with regrettable list allocation).
+# - str(int) - format number as decimal.
diff --git a/starlark/testdata/json.star b/starlark/testdata/json.star
index ef33d91..7c7b316 100644
--- a/starlark/testdata/json.star
+++ b/starlark/testdata/json.star
@@ -23,7 +23,7 @@ assert.eq(json.encode(range(3)), "[0,1,2]") # a built-in iterable
 assert.eq(json.encode(dict(x = 1, y = "two")), '{"x":1,"y":"two"}')
 assert.eq(json.encode(dict(y = "two", x = 1)), '{"x":1,"y":"two"}') # key, not insertion, order
 assert.eq(json.encode(struct(x = 1, y = "two")), '{"x":1,"y":"two"}')  # a user-defined HasAttrs
-assert.eq(json.encode("\x80"), '"\\ufffd"') # invalid UTF-8 -> replacement char
+assert.eq(json.encode("😹"[:1]), '"\\ufffd"') # invalid UTF-8 -> replacement char
 
 def encode_error(expr, error):
     assert.fails(lambda: json.encode(expr), error)
diff --git a/starlark/testdata/string.star b/starlark/testdata/string.star
index 84c6791..b317d1a 100644
--- a/starlark/testdata/string.star
+++ b/starlark/testdata/string.star
@@ -37,8 +37,9 @@ assert.eq(chr(1049), "Й")  # 2-byte UTF-8 encoding
 assert.eq(chr(0x1F63F), "😿")  # 4-byte UTF-8 encoding
 assert.fails(lambda: chr(-1), "Unicode code point -1 out of range \\(<0\\)")
 assert.fails(lambda: chr(0x110000), "Unicode code point U\\+110000 out of range \\(>0x10FFFF\\)")
-assert.eq(ord("A"), 65)
-assert.eq(ord("Й"), 1049)
+assert.eq(ord("A"), 0x41)
+assert.eq(ord("Й"), 0x419)
+assert.eq(ord("世"), 0x4e16)
 assert.eq(ord("😿"), 0x1F63F)
 assert.eq(ord("Й"[1:]), 0xFFFD)  # = Unicode replacement character
 assert.fails(lambda: ord("abc"), "string encodes 3 Unicode code points, want 1")
@@ -46,42 +47,50 @@ assert.fails(lambda: ord(""), "string encodes 0 Unicode code points, want 1")
 assert.fails(lambda: ord("😿"[1:]), "string encodes 3 Unicode code points, want 1")  # 3 x 0xFFFD
 
 # string.codepoint_ords
-assert.eq(type("abcЙ😿".codepoint_ords()), "codepoints")
+assert.eq(type("abcЙ😿".codepoint_ords()), "string.codepoints")
 assert.eq(str("abcЙ😿".codepoint_ords()), '"abcЙ😿".codepoint_ords()')
 assert.eq(list("abcЙ😿".codepoint_ords()), [97, 98, 99, 1049, 128575])
 assert.eq(list(("A" + "😿Z"[1:]).codepoint_ords()), [ord("A"), 0xFFFD, 0xFFFD, 0xFFFD, ord("Z")])
 assert.eq(list("".codepoint_ords()), [])
+assert.fails(lambda: "abcЙ😿".codepoint_ords()[2], "unhandled index")  # not indexable
+assert.fails(lambda: len("abcЙ😿".codepoint_ords()), "no len")  # unknown length
 
 # string.codepoints
-assert.eq(type("abcЙ😿".codepoints()), "codepoints")
+assert.eq(type("abcЙ😿".codepoints()), "string.codepoints")
 assert.eq(str("abcЙ😿".codepoints()), '"abcЙ😿".codepoints()')
 assert.eq(list("abcЙ😿".codepoints()), ["a", "b", "c", "Й", "😿"])
-assert.eq(list(("A" + "😿Z"[1:]).codepoints()), ["A", "\x9f", "\x98", "\xbf", "Z"])
+assert.eq(list(("A" + "😿Z"[1:]).codepoints()), ["A", "�", "�", "�", "Z"])
 assert.eq(list("".codepoints()), [])
+assert.fails(lambda: "abcЙ😿".codepoints()[2], "unhandled index")  # not indexable
+assert.fails(lambda: len("abcЙ😿".codepoints()), "no len")  # unknown length
 
 # string.elem_ords
-assert.eq(type("abcЙ😿".elem_ords()), "elems")
+assert.eq(type("abcЙ😿".elem_ords()), "string.elems")
 assert.eq(str("abcЙ😿".elem_ords()), '"abcЙ😿".elem_ords()')
 assert.eq(list("abcЙ😿".elem_ords()), [97, 98, 99, 208, 153, 240, 159, 152, 191])
 assert.eq(list(("A" + "😿Z"[1:]).elem_ords()), [65, 159, 152, 191, 90])
 assert.eq(list("".elem_ords()), [])
+assert.eq("abcЙ😿".elem_ords()[2], 99)  # indexable
+assert.eq(len("abcЙ😿".elem_ords()), 9)  # known length
 
-# string.elems
-assert.eq(type("abcЙ😿".elems()), "elems")
+# string.elems (1-byte substrings, which are invalid text)
+assert.eq(type("abcЙ😿".elems()), "string.elems")
 assert.eq(str("abcЙ😿".elems()), '"abcЙ😿".elems()')
 assert.eq(
-    list("abcЙ😿".elems()),
-    ["a", "b", "c", "\xd0", "\x99", "\xf0", "\x9f", "\x98", "\xbf"],
+    repr(list("abcЙ😿".elems())),
+    r'["a", "b", "c", "\xd0", "\x99", "\xf0", "\x9f", "\x98", "\xbf"]',
 )
 assert.eq(
-    list(("A" + "😿Z"[1:]).elems()),
-    ["A", "\x9f", "\x98", "\xbf", "Z"],
+    repr(list(("A" + "😿Z"[1:]).elems())),
+    r'["A", "\x9f", "\x98", "\xbf", "Z"]',
 )
 assert.eq(list("".elems()), [])
+assert.eq("abcЙ😿".elems()[2], "c")  # indexable
+assert.eq(len("abcЙ😿".elems()), 9)  # known length
 
 # indexing, x[i]
 assert.eq("Hello, 世界!"[0], "H")
-assert.eq("Hello, 世界!"[7], "\xe4")
+assert.eq(repr("Hello, 世界!"[7]), r'"\xe4"')  # (invalid text)
 assert.eq("Hello, 世界!"[13], "!")
 assert.fails(lambda: "abc"[-4], "out of range")
 assert.eq("abc"[-3], "a")
@@ -93,10 +102,8 @@ assert.eq("abc"[2], "c")
 assert.fails(lambda: "abc"[4], "out of range")
 
 # x[i] = ...
-x2 = "abc"
-
 def f():
-    x2[1] = "B"
+    "abc"[1] = "B"
 
 assert.fails(f, "string.*does not support.*assignment")
 
@@ -122,6 +129,7 @@ assert.eq("abc"[:3], "abc")
 assert.eq("abc"[:4], "abc")
 assert.eq("abc"[1:2], "b")
 assert.eq("abc"[2:1], "")
+assert.eq(repr("😿"[:1]), r'"\xf0"')  # (invalid text)
 
 # non-unit strides
 assert.eq("abcd"[0:4:1], "abcd")
diff --git a/starlark/value.go b/starlark/value.go
index bcec750..81e29ed 100644
--- a/starlark/value.go
+++ b/starlark/value.go
@@ -499,13 +499,20 @@ func (f Float) Unary(op syntax.Token) (Value, error) {
 	return nil, nil
 }
 
-// String is the type of a Starlark string.
+// String is the type of a Starlark text string.
 //
 // A String encapsulates an an immutable sequence of bytes,
 // but strings are not directly iterable. Instead, iterate
 // over the result of calling one of these four methods:
 // codepoints, codepoint_ords, elems, elem_ords.
 //
+// Strings typically contain text; use Bytes for binary strings.
+// The Starlark spec defines text strings as sequences of UTF-k
+// codes that encode Unicode code points. In this Go implementation,
+// k=8, whereas in a Java implementation, k=16. For portability,
+// operations on strings should aim to avoid assumptions about
+// the value of k.
+//
 // Warning: the contract of the Value interface's String method is that
 // it returns the value printed in Starlark notation,
 // so s.String() or fmt.Sprintf("%s", s) returns a quoted string.
@@ -513,7 +520,7 @@ func (f Float) Unary(op syntax.Token) (Value, error) {
 // of a Starlark string as a Go string.
 type String string
 
-func (s String) String() string        { return strconv.Quote(string(s)) }
+func (s String) String() string        { return syntax.Quote(string(s), false) }
 func (s String) GoString() string      { return string(s) }
 func (s String) Type() string          { return "string" }
 func (s String) Freeze()               {} // immutable
@@ -545,73 +552,106 @@ func (x String) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, err
 
 func AsString(x Value) (string, bool) { v, ok := x.(String); return string(v), ok }
 
-// A stringIterable is an iterable whose iterator yields a sequence of
-// either Unicode code points or elements (bytes),
-// either numerically or as successive substrings.
-type stringIterable struct {
-	s          String
-	ords       bool
-	codepoints bool
+// A stringElems is an iterable whose iterator yields a sequence of
+// elements (bytes), either numerically or as successive substrings.
+// It is an indexable sequence.
+type stringElems struct {
+	s    String
+	ords bool
 }
 
-var _ Iterable = (*stringIterable)(nil)
+var (
+	_ Iterable  = (*stringElems)(nil)
+	_ Indexable = (*stringElems)(nil)
+)
 
-func (si stringIterable) String() string {
-	var etype string
-	if si.codepoints {
-		etype = "codepoint"
+func (si stringElems) String() string {
+	if si.ords {
+		return si.s.String() + ".elem_ords()"
 	} else {
-		etype = "elem"
+		return si.s.String() + ".elems()"
 	}
+}
+func (si stringElems) Type() string          { return "string.elems" }
+func (si stringElems) Freeze()               {} // immutable
+func (si stringElems) Truth() Bool           { return True }
+func (si stringElems) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", si.Type()) }
+func (si stringElems) Iterate() Iterator     { return &stringElemsIterator{si, 0} }
+func (si stringElems) Len() int              { return len(si.s) }
+func (si stringElems) Index(i int) Value {
 	if si.ords {
-		return si.s.String() + "." + etype + "_ords()"
+		return MakeInt(int(si.s[i]))
 	} else {
-		return si.s.String() + "." + etype + "s()"
+		// TODO(adonovan): opt: preallocate canonical 1-byte strings
+		// to avoid interface allocation.
+		return si.s[i : i+1]
+	}
+}
+
+type stringElemsIterator struct {
+	si stringElems
+	i  int
+}
+
+func (it *stringElemsIterator) Next(p *Value) bool {
+	if it.i == len(it.si.s) {
+		return false
 	}
+	*p = it.si.Index(it.i)
+	it.i++
+	return true
+}
+
+func (*stringElemsIterator) Done() {}
+
+// A stringCodepoints is an iterable whose iterator yields a sequence of
+// Unicode code points, either numerically or as successive substrings.
+// It is not indexable.
+type stringCodepoints struct {
+	s    String
+	ords bool
 }
-func (si stringIterable) Type() string {
-	if si.codepoints {
-		return "codepoints"
+
+var _ Iterable = (*stringCodepoints)(nil)
+
+func (si stringCodepoints) String() string {
+	if si.ords {
+		return si.s.String() + ".codepoint_ords()"
 	} else {
-		return "elems"
+		return si.s.String() + ".codepoints()"
 	}
 }
-func (si stringIterable) Freeze()               {} // immutable
-func (si stringIterable) Truth() Bool           { return True }
-func (si stringIterable) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", si.Type()) }
-func (si stringIterable) Iterate() Iterator     { return &stringIterator{si, 0} }
+func (si stringCodepoints) Type() string          { return "string.codepoints" }
+func (si stringCodepoints) Freeze()               {} // immutable
+func (si stringCodepoints) Truth() Bool           { return True }
+func (si stringCodepoints) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", si.Type()) }
+func (si stringCodepoints) Iterate() Iterator     { return &stringCodepointsIterator{si, 0} }
 
-type stringIterator struct {
-	si stringIterable
+type stringCodepointsIterator struct {
+	si stringCodepoints
 	i  int
 }
 
-func (it *stringIterator) Next(p *Value) bool {
+func (it *stringCodepointsIterator) Next(p *Value) bool {
 	s := it.si.s[it.i:]
 	if s == "" {
 		return false
 	}
-	if it.si.codepoints {
-		r, sz := utf8.DecodeRuneInString(string(s))
-		if !it.si.ords {
-			*p = s[:sz]
+	r, sz := utf8.DecodeRuneInString(string(s))
+	if !it.si.ords {
+		if r == utf8.RuneError {
+			*p = String(r)
 		} else {
-			*p = MakeInt(int(r))
+			*p = s[:sz]
 		}
-		it.i += sz
 	} else {
-		b := int(s[0])
-		if !it.si.ords {
-			*p = s[:1]
-		} else {
-			*p = MakeInt(b)
-		}
-		it.i += 1
+		*p = MakeInt(int(r))
 	}
+	it.i += sz
 	return true
 }
 
-func (*stringIterator) Done() {}
+func (*stringCodepointsIterator) Done() {}
 
 // A Function is a function defined by a Starlark def statement or lambda expression.
 // The initialization behavior of a Starlark module is also represented by a Function.
@@ -1084,6 +1124,7 @@ func writeValue(out *strings.Builder, x Value, path []Value) {
 	case nil:
 		out.WriteString("<nil>") // indicates a bug
 
+	// These four cases are duplicates of T.String(), for efficiency.
 	case NoneType:
 		out.WriteString("None")
 
@@ -1098,7 +1139,7 @@ func writeValue(out *strings.Builder, x Value, path []Value) {
 		}
 
 	case String:
-		fmt.Fprintf(out, "%q", string(x))
+		out.WriteString(syntax.Quote(string(x), false))
 
 	case *List:
 		out.WriteByte('[')
@@ -1318,6 +1359,8 @@ func Len(x Value) int {
 	switch x := x.(type) {
 	case String:
 		return x.Len()
+	case Indexable:
+		return x.Len()
 	case Sequence:
 		return x.Len()
 	}
@@ -1335,3 +1378,54 @@ func Iterate(x Value) Iterator {
 	}
 	return nil
 }
+
+// Bytes is the type of a Starlark binary string.
+//
+// A Bytes encapsulates an immutable sequence of bytes.
+// It is comparable, indexable, and sliceable, but not direcly iterable;
+// use bytes.elems() for an iterable view.
+//
+// In this Go implementation, the elements of 'string' and 'bytes' are
+// both bytes, but in other implementations, notably Java, the elements
+// of a 'string' are UTF-16 codes (Java chars). The spec abstracts text
+// strings as sequences of UTF-k codes that encode Unicode code points,
+// and operations that convert from text to binary incur UTF-k-to-UTF-8
+// transcoding; conversely, conversion from binary to text incurs
+// UTF-8-to-UTF-k transcoding. Because k=8 for Go, these operations
+// are the identity function, at least for valid encodings of text.
+type Bytes string
+
+var (
+	_ Comparable = Bytes("")
+	_ Sliceable  = Bytes("")
+	_ Indexable  = Bytes("")
+)
+
+func (b Bytes) String() string        { return syntax.Quote(string(b), true) }
+func (b Bytes) Type() string          { return "bytes" }
+func (b Bytes) Freeze()               {} // immutable
+func (b Bytes) Truth() Bool           { return len(b) > 0 }
+func (b Bytes) Hash() (uint32, error) { return String(b).Hash() }
+func (b Bytes) Len() int              { return len(b) }
+func (b Bytes) Index(i int) Value     { return b[i : i+1] }
+
+func (b Bytes) Attr(name string) (Value, error) { return builtinAttr(b, name, bytesMethods) }
+func (b Bytes) AttrNames() []string             { return builtinAttrNames(bytesMethods) }
+
+func (b Bytes) Slice(start, end, step int) Value {
+	if step == 1 {
+		return b[start:end]
+	}
+
+	sign := signum(step)
+	var str []byte
+	for i := start; signum(end-i) == sign; i += step {
+		str = append(str, b[i])
+	}
+	return Bytes(str)
+}
+
+func (x Bytes) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) {
+	y := y_.(Bytes)
+	return threeway(op, strings.Compare(string(x), string(y))), nil
+}
diff --git a/syntax/parse.go b/syntax/parse.go
index 50b8087..f4c8fff 100644
--- a/syntax/parse.go
+++ b/syntax/parse.go
@@ -771,8 +771,7 @@ func (p *parser) parseArgs() []Expr {
 }
 
 //  primary = IDENT
-//          | INT | FLOAT
-//          | STRING
+//          | INT | FLOAT | STRING | BYTES
 //          | '[' ...                    // list literal or comprehension
 //          | '{' ...                    // dict literal or comprehension
 //          | '(' ...                    // tuple or parenthesized expression
@@ -782,7 +781,7 @@ func (p *parser) parsePrimary() Expr {
 	case IDENT:
 		return p.parseIdent()
 
-	case INT, FLOAT, STRING:
+	case INT, FLOAT, STRING, BYTES:
 		var val interface{}
 		tok := p.tok
 		switch tok {
@@ -794,7 +793,7 @@ func (p *parser) parsePrimary() Expr {
 			}
 		case FLOAT:
 			val = p.tokval.float
-		case STRING:
+		case STRING, BYTES:
 			val = p.tokval.string
 		}
 		raw := p.tokval.raw
diff --git a/syntax/parse_test.go b/syntax/parse_test.go
index 6052e79..fedbb3e 100644
--- a/syntax/parse_test.go
+++ b/syntax/parse_test.go
@@ -361,9 +361,12 @@ func writeTree(out *bytes.Buffer, x reflect.Value) {
 	case reflect.Struct:
 		switch v := x.Interface().(type) {
 		case syntax.Literal:
-			if v.Token == syntax.STRING {
+			switch v.Token {
+			case syntax.STRING:
 				fmt.Fprintf(out, "%q", v.Value)
-			} else if v.Token == syntax.INT {
+			case syntax.BYTES:
+				fmt.Fprintf(out, "b%q", v.Value)
+			case syntax.INT:
 				fmt.Fprintf(out, "%d", v.Value)
 			}
 			return
diff --git a/syntax/quote.go b/syntax/quote.go
index 49cb259..741e106 100644
--- a/syntax/quote.go
+++ b/syntax/quote.go
@@ -10,6 +10,8 @@ import (
 	"fmt"
 	"strconv"
 	"strings"
+	"unicode"
+	"unicode/utf8"
 )
 
 // unesc maps single-letter chars following \ to their actual values.
@@ -41,15 +43,20 @@ var esc = [256]byte{
 }
 
 // unquote unquotes the quoted string, returning the actual
-// string value, whether the original was triple-quoted, and
-// an error describing invalid input.
-func unquote(quoted string) (s string, triple bool, err error) {
+// string value, whether the original was triple-quoted,
+// whether it was a byte string, and an error describing invalid input.
+func unquote(quoted string) (s string, triple, isByte bool, err error) {
 	// Check for raw prefix: means don't interpret the inner \.
 	raw := false
 	if strings.HasPrefix(quoted, "r") {
 		raw = true
 		quoted = quoted[1:]
 	}
+	// Check for bytes prefix.
+	if strings.HasPrefix(quoted, "b") {
+		isByte = true
+		quoted = quoted[1:]
+	}
 
 	if len(quoted) < 2 {
 		err = fmt.Errorf("string literal too short")
@@ -138,7 +145,7 @@ func unquote(quoted string) (s string, triple bool, err error) {
 			quoted = quoted[2:]
 
 		case '0', '1', '2', '3', '4', '5', '6', '7':
-			// Octal escape, up to 3 digits.
+			// Octal escape, up to 3 digits, \OOO.
 			n := int(quoted[1] - '0')
 			quoted = quoted[2:]
 			for i := 1; i < 3; i++ {
@@ -148,6 +155,10 @@ func unquote(quoted string) (s string, triple bool, err error) {
 				n = n*8 + int(quoted[0]-'0')
 				quoted = quoted[1:]
 			}
+			if !isByte && n > 127 {
+				err = fmt.Errorf(`non-ASCII octal escape \%o (use \u%04X for the UTF-8 encoding of U+%04X)`, n, n, n)
+				return
+			}
 			if n >= 256 {
 				// NOTE: Python silently discards the high bit,
 				// so that '\541' == '\141' == 'a'.
@@ -158,7 +169,7 @@ func unquote(quoted string) (s string, triple bool, err error) {
 			buf.WriteByte(byte(n))
 
 		case 'x':
-			// Hexadecimal escape, exactly 2 digits.
+			// Hexadecimal escape, exactly 2 digits, \xXX. [0-127]
 			if len(quoted) < 4 {
 				err = fmt.Errorf(`truncated escape sequence %s`, quoted)
 				return
@@ -168,8 +179,41 @@ func unquote(quoted string) (s string, triple bool, err error) {
 				err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4])
 				return
 			}
+			if !isByte && n > 127 {
+				err = fmt.Errorf(`non-ASCII hex escape %s (use \u%04X for the UTF-8 encoding of U+%04X)`,
+					quoted[:4], n, n)
+				return
+			}
 			buf.WriteByte(byte(n))
 			quoted = quoted[4:]
+
+		case 'u', 'U':
+			// Unicode code point, 4 (\uXXXX) or 8 (\UXXXXXXXX) hex digits.
+			sz := 6
+			if quoted[1] == 'U' {
+				sz = 10
+			}
+			if len(quoted) < sz {
+				err = fmt.Errorf(`truncated escape sequence %s`, quoted)
+				return
+			}
+			n, err1 := strconv.ParseUint(quoted[2:sz], 16, 0)
+			if err1 != nil {
+				err = fmt.Errorf(`invalid escape sequence %s`, quoted[:sz])
+				return
+			}
+			if n > unicode.MaxRune {
+				err = fmt.Errorf(`code point out of range: %s (max \U%08x)`,
+					quoted[:sz], n)
+				return
+			}
+			// As in Go, surrogates are disallowed.
+			if 0xD800 <= n && n < 0xE000 {
+				err = fmt.Errorf(`invalid Unicode code point U+%04X`, n)
+				return
+			}
+			buf.WriteRune(rune(n))
+			quoted = quoted[sz:]
 		}
 	}
 
@@ -187,67 +231,79 @@ func indexByte(s string, b byte) int {
 	return -1
 }
 
-// hex is a list of the hexadecimal digits, for use in quoting.
-// We always print lower-case hexadecimal.
-const hex = "0123456789abcdef"
+// Quote returns a Starlark literal that denotes s.
+// If b, it returns a bytes literal.
+func Quote(s string, b bool) string {
+	const hex = "0123456789abcdef"
+	var runeTmp [utf8.UTFMax]byte
 
-// quote returns the quoted form of the string value "x".
-// If triple is true, quote uses the triple-quoted form """x""".
-func quote(unquoted string, triple bool) string {
-	q := `"`
-	if triple {
-		q = `"""`
+	buf := make([]byte, 0, 3*len(s)/2)
+	if b {
+		buf = append(buf, 'b')
 	}
-
-	buf := new(strings.Builder)
-	buf.WriteString(q)
-
-	for i := 0; i < len(unquoted); i++ {
-		c := unquoted[i]
-		if c == '"' && triple && (i+1 < len(unquoted) && unquoted[i+1] != '"' || i+2 < len(unquoted) && unquoted[i+2] != '"') {
-			// Can pass up to two quotes through, because they are followed by a non-quote byte.
-			buf.WriteByte(c)
-			if i+1 < len(unquoted) && unquoted[i+1] == '"' {
-				buf.WriteByte(c)
-				i++
-			}
-			continue
+	buf = append(buf, '"')
+	for width := 0; len(s) > 0; s = s[width:] {
+		r := rune(s[0])
+		width = 1
+		if r >= utf8.RuneSelf {
+			r, width = utf8.DecodeRuneInString(s)
 		}
-		if triple && c == '\n' {
-			// Can allow newline in triple-quoted string.
-			buf.WriteByte(c)
+		if width == 1 && r == utf8.RuneError {
+			// String (!b) literals accept \xXX escapes only for ASCII,
+			// but we must use them here to represent invalid bytes.
+			// The result is not a legal literal.
+			buf = append(buf, `\x`...)
+			buf = append(buf, hex[s[0]>>4])
+			buf = append(buf, hex[s[0]&0xF])
 			continue
 		}
-		if c == '\'' {
-			// Can allow ' since we always use ".
-			buf.WriteByte(c)
+		if r == '"' || r == '\\' { // always backslashed
+			buf = append(buf, '\\')
+			buf = append(buf, byte(r))
 			continue
 		}
-		if esc[c] != 0 {
-			buf.WriteByte('\\')
-			buf.WriteByte(esc[c])
+		if strconv.IsPrint(r) {
+			n := utf8.EncodeRune(runeTmp[:], r)
+			buf = append(buf, runeTmp[:n]...)
 			continue
 		}
-		if c < 0x20 || c >= 0x80 {
-			// BUILD files are supposed to be Latin-1, so escape all control and high bytes.
-			// I'd prefer to use \x here, but Blaze does not implement
-			// \x in quoted strings (b/7272572).
-			buf.WriteByte('\\')
-			buf.WriteByte(hex[c>>6]) // actually octal but reusing hex digits 0-7.
-			buf.WriteByte(hex[(c>>3)&7])
-			buf.WriteByte(hex[c&7])
-			/*
-				buf.WriteByte('\\')
-				buf.WriteByte('x')
-				buf.WriteByte(hex[c>>4])
-				buf.WriteByte(hex[c&0xF])
-			*/
-			continue
+		switch r {
+		case '\a':
+			buf = append(buf, `\a`...)
+		case '\b':
+			buf = append(buf, `\b`...)
+		case '\f':
+			buf = append(buf, `\f`...)
+		case '\n':
+			buf = append(buf, `\n`...)
+		case '\r':
+			buf = append(buf, `\r`...)
+		case '\t':
+			buf = append(buf, `\t`...)
+		case '\v':
+			buf = append(buf, `\v`...)
+		default:
+			switch {
+			case r < ' ' || r == 0x7f:
+				buf = append(buf, `\x`...)
+				buf = append(buf, hex[byte(r)>>4])
+				buf = append(buf, hex[byte(r)&0xF])
+			case r > utf8.MaxRune:
+				r = 0xFFFD
+				fallthrough
+			case r < 0x10000:
+				buf = append(buf, `\u`...)
+				for s := 12; s >= 0; s -= 4 {
+					buf = append(buf, hex[r>>uint(s)&0xF])
+				}
+			default:
+				buf = append(buf, `\U`...)
+				for s := 28; s >= 0; s -= 4 {
+					buf = append(buf, hex[r>>uint(s)&0xF])
+				}
+			}
 		}
-		buf.WriteByte(c)
-		continue
 	}
-
-	buf.WriteString(q)
-	return buf.String()
+	buf = append(buf, '"')
+	return string(buf)
 }
diff --git a/syntax/quote_test.go b/syntax/quote_test.go
index f9068ee..be7498b 100644
--- a/syntax/quote_test.go
+++ b/syntax/quote_test.go
@@ -22,17 +22,14 @@ var quoteTests = []struct {
 	{`'quote"here'`, `quote"here`, false},
 	{`"quote'here"`, `quote'here`, true},
 	{`'quote\'here'`, `quote'here`, false},
-	{`"""hello " ' world "" asdf ''' foo"""`, `hello " ' world "" asdf ''' foo`, true},
-	{`"""hello
-world"""`, "hello\nworld", true},
 
-	{`"\a\b\f\n\r\t\v\000\377"`, "\a\b\f\n\r\t\v\000\xFF", true},
-	{`"\a\b\f\n\r\t\v\x00\xff"`, "\a\b\f\n\r\t\v\000\xFF", false},
-	{`"\a\b\f\n\r\t\v\000\xFF"`, "\a\b\f\n\r\t\v\000\xFF", false},
-	{`"\a\b\f\n\r\t\v\000\377\"'\\\003\200"`, "\a\b\f\n\r\t\v\x00\xFF\"'\\\x03\x80", true},
-	{`"\a\b\f\n\r\t\v\x00\xff\"'\\\x03\x80"`, "\a\b\f\n\r\t\v\x00\xFF\"'\\\x03\x80", false},
-	{`"\a\b\f\n\r\t\v\000\xFF\"'\\\x03\x80"`, "\a\b\f\n\r\t\v\x00\xFF\"'\\\x03\x80", false},
-	{`"\a\b\f\n\r\t\v\000\xFF\"\\\x03\x80"`, "\a\b\f\n\r\t\v\x00\xFF\"\\\x03\x80", false},
+	{`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", true},
+	{`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", false},
+	{`"\a\b\f\n\r\t\v\x00\x7f"`, "\a\b\f\n\r\t\v\000\x7F", false},
+	{`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", true},
+	{`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", false},
+	{`"\a\b\f\n\r\t\v\x00\x7f\"'\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"'\\\x03", false},
+	{`"\a\b\f\n\r\t\v\x00\x7f\"\\\x03"`, "\a\b\f\n\r\t\v\x00\x7F\"\\\x03", false},
 	{
 		`"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/    \x27\\1\x27,/g' >> $@; "`,
 		"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/    '\\1',/g' >> $@; ",
@@ -50,7 +47,7 @@ func TestQuote(t *testing.T) {
 		if !tt.std {
 			continue
 		}
-		q := quote(tt.s, strings.HasPrefix(tt.q, `"""`))
+		q := Quote(tt.s, false)
 		if q != tt.q {
 			t.Errorf("quote(%#q) = %s, want %s", tt.s, q, tt.q)
 		}
@@ -59,7 +56,7 @@ func TestQuote(t *testing.T) {
 
 func TestUnquote(t *testing.T) {
 	for _, tt := range quoteTests {
-		s, triple, err := unquote(tt.q)
+		s, triple, _, err := unquote(tt.q)
 		wantTriple := strings.HasPrefix(tt.q, `"""`) || strings.HasPrefix(tt.q, `'''`)
 		if s != tt.s || triple != wantTriple || err != nil {
 			t.Errorf("unquote(%s) = %#q, %v, %v want %#q, %v, nil", tt.q, s, triple, err, tt.s, wantTriple)
diff --git a/syntax/scan.go b/syntax/scan.go
index a162264..bb4165e 100644
--- a/syntax/scan.go
+++ b/syntax/scan.go
@@ -35,6 +35,7 @@ const (
 	INT    // 123
 	FLOAT  // 1.23e45
 	STRING // "foo" or 'foo' or '''foo''' or r'foo' or r"foo"
+	BYTES  // b"foo", etc
 
 	// Punctuation
 	PLUS          // +
@@ -268,7 +269,7 @@ func newScanner(filename string, src interface{}, keepComments bool) (*scanner,
 		lineStart:    true,
 		keepComments: keepComments,
 	}
-	sc.readline, _ = src.(func() ([]byte, error)) // REPL only
+	sc.readline, _ = src.(func() ([]byte, error)) // ParseCompoundStmt (REPL) only
 	if sc.readline == nil {
 		data, err := readSource(filename, src)
 		if err != nil {
@@ -422,7 +423,7 @@ type tokenValue struct {
 	int    int64    // decoded int
 	bigInt *big.Int // decoded integers > int64
 	float  float64  // decoded float
-	string string   // decoded string
+	string string   // decoded string or bytes
 	pos    Position // start position of token
 }
 
@@ -642,8 +643,15 @@ start:
 
 	// identifier or keyword
 	if isIdentStart(c) {
-		// raw string literal
-		if c == 'r' && len(sc.rest) > 1 && (sc.rest[1] == '"' || sc.rest[1] == '\'') {
+		if (c == 'r' || c == 'b') && len(sc.rest) > 1 && (sc.rest[1] == '"' || sc.rest[1] == '\'') {
+			//  r"..."
+			//  b"..."
+			sc.readRune()
+			c = sc.peekRune()
+			return sc.scanString(val, c)
+		} else if c == 'r' && len(sc.rest) > 2 && sc.rest[1] == 'b' && (sc.rest[2] == '"' || sc.rest[2] == '\'') {
+			// rb"..."
+			sc.readRune()
 			sc.readRune()
 			c = sc.peekRune()
 			return sc.scanString(val, c)
@@ -887,12 +895,16 @@ func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
 	}
 	val.raw = raw.String()
 
-	s, _, err := unquote(val.raw)
+	s, _, isByte, err := unquote(val.raw)
 	if err != nil {
 		sc.error(start, err.Error())
 	}
 	val.string = s
-	return STRING
+	if isByte {
+		return BYTES
+	} else {
+		return STRING
+	}
 }
 
 func (sc *scanner) scanNumber(val *tokenValue, c rune) Token {
diff --git a/syntax/scan_test.go b/syntax/scan_test.go
index 0f2d9f2..9582bd7 100644
--- a/syntax/scan_test.go
+++ b/syntax/scan_test.go
@@ -10,6 +10,7 @@ import (
 	"go/build"
 	"io/ioutil"
 	"path/filepath"
+	"strings"
 	"testing"
 )
 
@@ -42,8 +43,8 @@ func scan(src interface{}) (tokens string, err error) {
 			}
 		case FLOAT:
 			fmt.Fprintf(&buf, "%e", val.float)
-		case STRING:
-			fmt.Fprintf(&buf, "%q", val.string)
+		case STRING, BYTES:
+			buf.WriteString(Quote(val.string, tok == BYTES))
 		default:
 			buf.WriteString(tok.String())
 		}
@@ -189,9 +190,34 @@ pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
 		{"i = 012934", `foo.star:1:5: invalid int literal`},
 		// octal escapes in string literals
 		{`"\037"`, `"\x1f" EOF`},
-		{`"\377"`, `"\xff" EOF`},
-		{`"\378"`, `"\x1f8" EOF`},                                // = '\37' + '8'
-		{`"\400"`, `foo.star:1:1: invalid escape sequence \400`}, // unlike Python 2 and 3
+		{`"\377"`, `foo.star:1:1: non-ASCII octal escape \377 (use \u00FF for the UTF-8 encoding of U+00FF)`},
+		{`"\378"`, `"\x1f8" EOF`},                               // = '\37' + '8'
+		{`"\400"`, `foo.star:1:1: non-ASCII octal escape \400`}, // unlike Python 2 and 3
+		// hex escapes
+		{`"\x00\x20\x09\x41\x7e\x7f"`, `"\x00 \tA~\x7f" EOF`}, // DEL is non-printable
+		{`"\x80"`, `foo.star:1:1: non-ASCII hex escape`},
+		{`"\xff"`, `foo.star:1:1: non-ASCII hex escape`},
+		{`"\xFf"`, `foo.star:1:1: non-ASCII hex escape`},
+		{`"\xF"`, `foo.star:1:1: truncated escape sequence \xF`},
+		{`"\x"`, `foo.star:1:1: truncated escape sequence \x`},
+		{`"\xfg"`, `foo.star:1:1: invalid escape sequence \xfg`},
+		// Unicode escapes
+		// \uXXXX
+		{`"\u0400"`, `"Ѐ" EOF`},
+		{`"\u100"`, `foo.star:1:1: truncated escape sequence \u100`},
+		{`"\u04000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
+		{`"\u100g"`, `foo.star:1:1: invalid escape sequence \u100g`},
+		{`"\u4E16"`, `"世" EOF`},
+		{`"\udc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
+		// \UXXXXXXXX
+		{`"\U00000400"`, `"Ѐ" EOF`},
+		{`"\U0000400"`, `foo.star:1:1: truncated escape sequence \U0000400`},
+		{`"\U000004000"`, `"Ѐ0" EOF`}, // = U+0400 + '0'
+		{`"\U1000000g"`, `foo.star:1:1: invalid escape sequence \U1000000g`},
+		{`"\U0010FFFF"`, `"\U0010ffff" EOF`},
+		{`"\U00110000"`, `foo.star:1:1: code point out of range: \U00110000 (max \U00110000)`},
+		{`"\U0001F63F"`, `"😿" EOF`},
+		{`"\U0000dc00"`, `foo.star:1:1: invalid Unicode code point U+DC00`}, // surrogate
 
 		// backslash escapes
 		// As in Go, a backslash must escape something.
@@ -218,6 +244,12 @@ pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
 		{`r'\"'`, `"\\\"" EOF`},
 		{`'a\zb'`, `foo.star:1:1: invalid escape sequence \z`},
 		{`"\o123"`, `foo.star:1:1: invalid escape sequence \o`},
+		// bytes literals (where they differ from text strings)
+		{`b"AЀ世😿"`, `b"AЀ世😿`},                                       // 1-4 byte encodings, literal
+		{`b"\x41\u0400\u4e16\U0001F63F"`, `b"AЀ世😿"`},                // same, as escapes
+		{`b"\377\378\x80\xff\xFf"`, `b"\xff\x1f8\x80\xff\xff" EOF`}, // hex/oct escapes allow non-ASCII
+		{`b"\400"`, `foo.star:1:2: invalid escape sequence \400`},
+		{`b"\udc00"`, `foo.star:1:2: invalid Unicode code point U+DC00`}, // (same as string)
 		// floats starting with octal digits
 		{"012934.", `1.293400e+04 EOF`},
 		{"012934.1", `1.293410e+04 EOF`},
@@ -243,7 +275,9 @@ pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated
 		if err != nil {
 			got = err.(Error).Error()
 		}
-		if test.want != got {
+		// Prefix match allows us to truncate errors in expecations.
+		// Success cases all end in EOF.
+		if !strings.HasPrefix(got, test.want) {
 			t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want)
 		}
 	}
diff --git a/syntax/syntax.go b/syntax/syntax.go
index 8bbf5c0..20b28bb 100644
--- a/syntax/syntax.go
+++ b/syntax/syntax.go
@@ -251,7 +251,7 @@ func (x *Ident) Span() (start, end Position) {
 // A Literal represents a literal string or number.
 type Literal struct {
 	commentsRef
-	Token    Token // = STRING | INT | FLOAT
+	Token    Token // = STRING | BYTES | INT | FLOAT
 	TokenPos Position
 	Raw      string      // uninterpreted text
 	Value    interface{} // = string | int64 | *big.Int | float64