diff options
author | Alan Donovan <adonovan@google.com> | 2017-10-02 10:10:28 -0400 |
---|---|---|
committer | Alan Donovan <adonovan@google.com> | 2017-10-02 10:10:28 -0400 |
commit | 312d1a5b5a9c50204aee186aeca0b7dbbd3eaaa0 (patch) | |
tree | b766f2d515a7a3abcb0ebc6da796e04ab9739a97 | |
download | starlark-go-312d1a5b5a9c50204aee186aeca0b7dbbd3eaaa0.tar.gz |
skylark: create GitHub repository from google3@170697745
45 files changed, 19417 insertions, 0 deletions
diff --git a/README.md b/README.md new file mode 100644 index 0000000..b9583fa --- /dev/null +++ b/README.md @@ -0,0 +1,139 @@ + +<!-- This file is the project homepage at github.com/google/skylark --> + +# Skylark in Go + +This is the home of the _Skylark in Go_ project. +Skylark in Go is an interpreter for Skylark, implemented in Go. + +Skylark is a dialect of Python intended for use as a configuration language. +Like Python, it is an untyped dynamic language with high-level data +types, first-class functions with lexical scope, and garbage collection. +Unlike CPython, independent Skylark threads execute in parallel, so +Skylark workloads scale well on parallel machines. +Skylark is a small and simple language with a familiar and highly +readable syntax. You can use it as an expressive notation for +structured data, defining functions to eliminate repetition, or you +can use it to add scripting capabilities to an existing application. + +A Skylark interpreter is typically embedded within a larger +application, and the application may define additional domain-specific +functions and data types beyond those provided by the core language. +For example, Skylark was originally developed for the +[Bazel build tool](https://bazel.build). +Bazel uses Skylark as the notation both for its BUILD files (like +Makefiles, these declare the executables, libraries, and tests in a +directory) and for [its macro +language](https://docs.bazel.build/versions/master/skylark/language.html), +through which Bazel is extended with custom logic to support new +languages and compilers. + + +## Documentation + +* Language definition: [doc/spec.md](doc/spec.md) + +* About the Go implementation: [doc/impl.md](doc/impl.md) + +* API documentation: [godoc.org/github.com/google/skylark](https://godoc.org/github.com/google/skylark) + +* Mailing list: [skylark-go](https://groups.google.com/forum/#!forum/skylark-go) + +* Issue tracker: [https://github.com/google/skylark/issues](https://github.com/google/skylark/issues) + +### Getting started + +Build the code: + +```shell +$ go get github.com/google/skylark +$ go build github.com/google/skylark/cmd/skylark +``` + +Run the interpreter: + +``` +$ cat coins.sky +coins = { + 'dime': 10, + 'nickel': 5, + 'penny': 1, + 'quarter': 25, +} +print('By name:\t' + ', '.join(sorted(coins.keys()))) +print('By value:\t' + ', '.join(sorted(coins.keys(), cmp=lambda x, y: coins[x] - coins[y]))) + +$ ./skylark -lambda coins.sky +By name: dime, nickel, penny, quarter +By value: penny, nickel, dime, quarter +``` + +Interact with the read-eval-print loop (REPL): + +``` +$ ./skylark +>>> def fibonacci(n): +... res = range(n) +... for i in res[2:]: +... res[i] = res[i-2] + res[i-1] +... return res +... +>>> fibonacci(10) +[0, 1, 1, 2, 3, 5, 8, 13, 21, 34] +>>> +``` + +### Contributing + +We welcome submissions but please let us know what you're working on +if you want to change or add to the Skylark repository. + +Before undertaking to write something new for the Skylark project, +please file an issue or claim an existing issue. +All significant changes to the language or to the interpreter's Go +API must be discussed before they can be accepted. +This gives all participants a chance to validate the design and to +avoid duplication of effort. + +Despite some differences, the Go implementation of Skylark strives to +match the behavior of the Java implementation used by Bazel. +For that reason, proposals to change the language itself should +generally be directed to the Bazel team, not to the maintainers of +this project. +Only once there is consensus that a language change is desirable may +its Go implementation proceed. + +We use GitHub pull requests for contributions. + +Please complete Google's contributor license agreement (CLA) before +sending your first change to the project. If you are the copyright +holder, you will need to agree to the +[individual contributor license agreement](https://cla.developers.google.com/about/google-individual), +which can be completed online. +If your organization is the copyright holder, the organization will +need to agree to the [corporate contributor license agreement](https://cla.developers.google.com/about/google-corporate). +If the copyright holder for your contribution has already completed +the agreement in connection with another Google open source project, +it does not need to be completed again. + + +### Credits + +Skylark was designed and implemented in Java by Laurent Le Brun, +Dmitry Lomov, Jon Brandvin, and Damien Martin-Guillerez, standing on +the shoulders of the Python community. +The Go implementation was written by Alan Donovan and Jay Conrod; +its scanner was derived from one written by Russ Cox. + +### Legal + +Skylark in Go is Copyright (c) 2017 The Bazel Authors. +All rights reserved. + +It is provided under an Apache license. + +The name "Skylark" is a code name of the Bazel project. +We plan to rename the language before the end of 2017 to reflect its +applicability to projects unrelated to Bazel. + +Skylark in Go is not an official Google product. diff --git a/cmd/skylark/skylark.go b/cmd/skylark/skylark.go new file mode 100644 index 0000000..ca1ef45 --- /dev/null +++ b/cmd/skylark/skylark.go @@ -0,0 +1,144 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The skylark command interprets a Skylark file. +// With no arguments, it starts a read-eval-print loop (REPL). +package main + +import ( + "bufio" + "bytes" + "flag" + "fmt" + "log" + "os" + "runtime/pprof" + "sort" + "strings" + + "github.com/google/skylark" + "github.com/google/skylark/resolve" + "github.com/google/skylark/syntax" +) + +// flags +var ( + cpuprofile = flag.String("cpuprofile", "", "gather CPU profile in this file") + showenv = flag.Bool("showenv", false, "on success, print final global environment") +) + +// non-standard dialect flags +func init() { + flag.BoolVar(&resolve.AllowFloat, "fp", resolve.AllowFloat, "allow floating-point numbers") + flag.BoolVar(&resolve.AllowFreeze, "freeze", resolve.AllowFreeze, "add freeze built-in function") + flag.BoolVar(&resolve.AllowSet, "set", resolve.AllowSet, "allow set data type") + flag.BoolVar(&resolve.AllowLambda, "lambda", resolve.AllowLambda, "allow lambda expressions") + flag.BoolVar(&resolve.AllowNestedDef, "nesteddef", resolve.AllowNestedDef, "allow nested def statements") +} + +func main() { + log.SetPrefix("skylark: ") + log.SetFlags(0) + flag.Parse() + + if *cpuprofile != "" { + f, err := os.Create(*cpuprofile) + if err != nil { + log.Fatal(err) + } + if err := pprof.StartCPUProfile(f); err != nil { + log.Fatal(err) + } + defer pprof.StopCPUProfile() + } + + switch len(flag.Args()) { + case 0: + repl() + case 1: + execfile(flag.Args()[0]) + default: + log.Fatal("want at most one Skylark file name") + } +} + +func execfile(filename string) { + thread := new(skylark.Thread) + globals := make(skylark.StringDict) + if err := skylark.ExecFile(thread, filename, nil, globals); err != nil { + printError(err) + os.Exit(1) + } + + // Print the global environment. + if *showenv { + var names []string + for name := range globals { + if !strings.HasPrefix(name, "_") { + names = append(names, name) + } + } + sort.Strings(names) + for _, name := range names { + fmt.Fprintf(os.Stderr, "%s = %s\n", name, globals[name]) + } + } +} + +func repl() { + thread := new(skylark.Thread) + globals := make(skylark.StringDict) + + sc := bufio.NewScanner(os.Stdin) +outer: + for { + fmt.Fprintf(os.Stderr, ">>> ") + if !sc.Scan() { + break + } + line := sc.Text() + if l := strings.TrimSpace(line); l == "" || l[0] == '#' { + continue // blank or comment + } + + // If the line contains a well-formed + // expression, evaluate it. + if _, err := syntax.ParseExpr("<stdin>", line); err == nil { + if v, err := skylark.Eval(thread, "<stdin>", line, globals); err != nil { + printError(err) + } else if v != skylark.None { + fmt.Println(v) + } + continue + } + + // Otherwise assume it is the first of several + // comprising a file, followed by a blank line. + var buf bytes.Buffer + fmt.Fprintln(&buf, line) + for { + fmt.Fprintf(os.Stderr, "... ") + if !sc.Scan() { + break outer + } + line := sc.Text() + if l := strings.TrimSpace(line); l == "" { + break // blank + } + fmt.Fprintln(&buf, line) + } + if err := skylark.ExecFile(thread, "<stdin>", &buf, globals); err != nil { + printError(err) + } + } + fmt.Println() +} + +func printError(err error) { + if evalErr, ok := err.(*skylark.EvalError); ok { + fmt.Fprintln(os.Stderr, evalErr.Backtrace()) + } else { + fmt.Fprintln(os.Stderr, err) + } +} diff --git a/doc/impl.md b/doc/impl.md new file mode 100644 index 0000000..c478cc0 --- /dev/null +++ b/doc/impl.md @@ -0,0 +1,158 @@ + +# Skylark in Go: Implementation + +This document describes some of the design choices of the Go +implementation of Skylark. + +[TOC] + +## Scanner + +The scanner is derived from Russ Cox's +[buildifier](https://github.com/bazelbuild/buildtools/tree/master/buildifier) +tool, which pretty-prints Bazel BUILD files. + +Most of the work happens in `(*scanner).nextToken`. + +## Parser + +The parser is hand-written recursive-descent parser. It uses the +technique of [precedence +climbing](http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing) +to reduce the number of productions. + +Because `load` is not a reserved word, Skylark's `load` statements are +created by post-processing `load(...)` function calls that appear in +an expression statement. + +In some places the parser accepts a larger set of programs than are +strictly valid, leaving the task of rejecting them to the subsequent +resolver pass. For example, in the function call `f(a, b=c)` the +parser accepts any expression for `a` and `b`, even though `b` may +legally be only an identifier. For the parser to distinguish these +cases would require additional lookahead. + +## Resolver + +The resolver reports structural errors in the program, such as the use +of `break` and `continue` outside of a loop. + +Skylark has stricter syntactic limitations than Python. For example, +it does not permit `for` loops or `if` statements at top level, nor +does it permit global variables to be bound more than once. +These limitations come from the Bazel project's desire to make it easy +to identify the sole statement that defines each global, permitting +accurate cross-reference documentation. + +In addition, the resolver validates all variable names, classifying +them as references to builtin, global, local, or free variables. +Local and free variables are mapped to a small integer, allowing the +evaluator to use an efficient (flat) representation for the +environment. + +Not all features of the Go implementation are "standard" (that is, +supported by Bazel's Java implementation), at least for now, so +non-standard features such as `lambda`, `float`, and `set` +are flag-controlled. The resolver reports +any uses of dialect features that have not been enabled. + + +## Evaluator + +### Data types + +<b>Integers:</b> Integers are representing using `big.Int`, an +arbitrary precision integer. This representation was chosen because, +for many applications, Skylark must be able to handle without loss +protocol buffer values containing signed and unsigned 64-bit integers, +which requires 65 bits of precision. + +Small integers (<256) are preallocated, but all other values require +memory allocation. Integer performance is relatively poor, but it +matters little for Bazel-like workloads which depend much +more on lists of strings than on integers. (Recall that a typical loop +over a list in Skylark does not materialize the loop index as an `int`.) + +An optimization worth trying would be to represent integers using +either an `int32` or `big.Int`, with the `big.Int` used only when +`int32` does not suffice. Using `int32`, not `int64`, for "small" +numbers would make it easier to detect overflow from operations like +`int32 * int32`, which would trigger the use of `big.Int`. + +<b>Floating point</b>: +Floating point numbers are represented using Go's `float64`. +Again, `float` support is required to support protocol buffers. The +existence of floating-point NaN and its infamous comparison behavior +(`NaN != NaN`) had many ramifications for the API, since we cannot +assume the result of an ordered comparison is either less than, +greater than, or equal: it may also fail. + +<b>Strings</b>: + +TODO: discuss UTF-8 and string.bytes method. + +<b>Dictionaries and sets</b>: +Skylark dictionaries have predictable iteration order. +Furthermore, many Skylark values are hashable in Skylark even though +the Go values that represent them are not hashable in Go: big +integers, for example. +Consequently, we cannot use Go maps to implement Skylark's dictionary. + +We use a simple hash table whose buckets are linked lists, each +element of which holds up to 8 key/value pairs. In a well-distributed +table the list should rarely exceed length 1. In addition, each +key/value item is part of doubly-linked list that maintains the +insertion order of the elements for iteration. + +``` +TODO +per object freeze +fail-fast iterators +Go extension interfaces +skylarkstruct +UnpackArgs +``` + +<b>Evaluation strategy:</b> +The evaluator uses a simple recursive tree walk, returning a value or +an error for each expression. We have experimented with just-in-time +compilation of syntax trees to bytecode, but two limitations in the +current Go compiler prevent this strategy from outperforming the +tree-walking evaluator. + +First, the Go compiler does not generate a "computed goto" for a +switch statement ([Go issue +5496](https://github.com/golang/go/issues/5496)). A bytecode +interpreter's main loop is a for-loop around a switch statement with +dozens or hundreds of cases, and the speed with which each case can be +dispatched strongly affects overall performance. +Currently, a switch statement generates a binary tree of ordered +comparisons, requiring several branches instead of one. + +Second, the Go compiler's escape analysis assumes that the underlying +array from a `make([]Value, n)` allocation always escapes +([Go issue 20533](https://github.com/golang/go/issues/20533)). +Because the bytecode interpreter's operand stack has a non-constant +length, it must be allocated with `make`. The resulting allocation +adds to the cost of each Skylark function call; this can be tolerated +by amortizing one very large stack allocation across many calls. +More problematic appears to be the cost of the additional GC write +barriers incurred by every VM operation: every intermediate result is +saved to the VM's operand stack, which is on the heap. +By contrast, intermediate results in the tree-walking evaluator are +never stored to the heap. + +``` +TODO +frames, backtrace, errors. +``` + +## Testing + +``` +TODO +skylarktest package +`assert` module +skylarkstruct +integration with Go testing.T +``` diff --git a/doc/spec.md b/doc/spec.md new file mode 100644 index 0000000..51fd4f7 --- /dev/null +++ b/doc/spec.md @@ -0,0 +1,3722 @@ +# Skylark in Go: Language definition + +Skylark is a dialect of Python intended for use as a configuration +language. A Skylark interpreter is typically embedded within a larger +application, and this application may define additional +domain-specific functions and data types beyond those provided by the +core language. For example, Skylark is embedded within (and was +originally developed for) the [Bazel build tool](https://bazel.build), +and [Bazel's build language](https://docs.bazel.build/versions/master/skylark/language.html) is based on Skylark. + +This document describes the Go implementation of Skylark +at github.com/google/skylark. +The language it defines is similar but not identical to +[the Java-based implementation](https://github.com/bazelbuild/bazel/blob/master/src/main/java/com/google/devtools/skylark/Skylark.java) +used by Bazel. +We identify places where their behaviors differ, and an +[appendix](#dialect-differences) provides a summary of those +differences. +We plan to converge both implementations on a single specification +in early 2018. + +This document is maintained by Alan Donovan <adonovan@google.com>. +It was was influenced by the Python specification, +Copyright 1990–2017, Python Software Foundation, +and the Go specification, Copyright 2009–2017, The Go Authors. + +Skylark was designed and implemented in Java by Laurent Le Brun, +Dmitry Lomov, Jon Brandvin, and Damien Martin-Guillerez, standing on +the shoulders of the Python community. +The Go implementation was written by Alan Donovan and Jay Conrod; +its scanner was derived from one written by Russ Cox. + +The name "Skylark" is a code name of the Bazel project. +We plan to rename the language before the end of 2017 to reflect its +applicability to projects unrelated to Bazel. + +## Overview + +Skylark is an untyped dynamic language with high-level data types, +first-class functions with lexical scope, and automatic memory +management or _garbage collection_. + +Skylark is strongly influenced by Python, and is almost a subset of +that language. In particular, its data types and syntax for +statements and expressions will be very familiar to any Python +programmer. +However, Skylark is intended not for writing applications but for +expressing configuration: its programs are short-lived and have no +external side effects and their main result is structured data or side +effects on the host application. +As a result, Skylark has no need for classes, exceptions, reflection, +concurrency, and other such features of Python. + + +## Contents + +[TOC] + + +## Lexical elements + +A Skylark program consists of one or more modules. +Each module is defined by a single UTF-8-encoded text file. + +A complete grammar of Skylark can be found in [grammar.txt](../syntax/grammar.txt). +That grammar is presented piecemeal throughout this document +in boxes such as this one, which explains the notation: + +```grammar {.good} +Grammar notation + +- lowercase and 'quoted' items are lexical tokens. +- Capitalized names denote grammar productions. +- (...) implies grouping. +- x | y means either x or y. +- [x] means x is optional. +- {x} means x is repeated zero or more times. +- The end of each declaration is marked with a period. +``` + +The contents of a Skylark file are broken into a sequence of tokens of +five kinds: white space, punctuation, keywords, identifiers, and literals. +Each token is formed from the longest sequence of characters that +would form a valid token of each kind. + +```grammar {.good} +File = {Statement | newline} eof . +``` + +*White space* consists of spaces (U+0020), tabs (U+0009), carriage +returns (U+000D), and newlines (U+000A). Within a line, white space +has no effect other than to delimit the previous token, but newlines, +and spaces at the start of a line, are significant tokens. + +*Comments*: A hash character (`#`) appearing outside of a string +literal marks the start of a comment; the comment extends to the end +of the line, not including the newline character. +Comments are treated like other white space. + +*Punctuation*: The following punctuation characters or sequences of +characters are tokens: + +```text ++ - * / // % +& | ** +. , = ; : +( ) [ ] { } +< > >= <= == != ++= -= *= /= //= %= +``` + +*Keywords*: The following tokens are keywords and may not be used as +identifiers: + +```text +and if +break in +continue lambda +def not +elif or +else pass +for return +``` + +The tokens below also may not be used as identifiers although they do not +appear in the grammar; they are reserved as possible future keywords: + +<!-- and to remain a syntactic subset of Python --> + +```text +as import +assert is +class nonlocal +del raise +except try +finally while +from with +global yield +``` + +<b>Implementation note:</b> +The Go implementation permits `assert` to be used as an identifier, +and this feature is widely used in its tests. + +<b>Implementation note:</b> +The Java implementation does not recognize `&` as a valid token. + +*Identifiers*: an identifier is a sequence of Unicode letters, decimal + digits, and underscores (`_`), not starting with a digit. +Identifiers are used as names for values. + +Examples: + +```text +None True len +x index starts_with arg0 +``` + +*Literals*: literals are tokens that denote specific values. Skylark +has string, integer, and floating-point literals. + +```text +0 # int +123 # decimal int +0x7f # hexadecimal int +0755 # octal int + +0.0 0. .0 # float +1e10 1e+10 1e-10 +1.1e10 1.1e+10 1.1e-10 + +"hello" 'hello' # string +'''hello''' """hello""" # triple-quoted string +r'hello' r"hello" # raw string literal +``` + +Integer and floating-point literal tokens are defined by the following grammar: + +```grammar {.good} +int = decimal_lit | octal_lit | hex_lit . +decimal_lit = ('1' … '9') {decimal_digit} . +octal_lit = '0' {octal_digit} . +hex_lit = '0' ('x'|'X') hex_digit {hex_digit} . + +float = decimals '.' [decimals] [exponent] + | decimals exponent + | '.' decimals [exponent] + . +decimals = decimal_digit {decimal_digit} . +exponent = ('e'|'E') ['+'|'-'] decimals . + +decimal_digit = '0' … '9' . +octal_digit = '0' … '7' . +hex_digit = '0' … '9' | 'A' … 'F' | 'a' … 'f' . +``` + +TODO: define string_lit, indent, outdent, semicolon, newline, eof + +## Data types + +The following eleven data types are known to the interpreter: + +```shell +NoneType # the type of None +bool # True or False +int # a signed integer of arbitrary magnitude +float # an IEEE 754 double-precision floating point number +string # a byte string +list # a fixed-length sequence of values +tuple # a fixed-length sequence of values, unmodifiable +dict # a mapping from values to values +set # a set of values +function # a function implemented in Skylark +builtin # a function or method implemented by the interpreter or host application +``` + +Additional data types may be defined by the host application into +which the interpreter is embedded, and those data types may +participate in basic operations of the language such as arithmetic, +comparison, indexing, and function calls. + +<!-- We needn't mention the stringIterable type here. --> + +Some operations can be applied to any Skylark value. For example, +every value has a type string that can be obtained with the expression +`type(x)`, and any value may be converted to a string using the +expression `str(x)`, or to a Boolean truth value using the expression +`bool(x)`. Other operations apply only to certain types. For +example, the indexing operation `a[i]` works only with strings, lists, +and tuples, and any application-defined types that are _indexable_. +The [_value concepts_](#value-concepts) section explains the groupings of +types by the operators they support. + + +### None + +`None` is a distinguished value used to indicate the absence of any other value. +For example, the result of a call to a function that contains no return statement is `None`. + +`None` is equal only to itself. Its [type](#type) is `"NoneType"`. +The truth value of `None` is `False`. + + +### Booleans + +There are two Boolean values, `True` and `False`, representing the +truth or falsehood of a predicate. The [type](#type) of a Boolean is `"bool"`. + +Boolean values are typically used as conditions in `if`-statements, +although any Skylark value used as a condition is implicitly +interpreted as a Boolean. +For example, the values `None`, `0`, `0.0`, and the empty sequences +`""`, `()`, `[]`, and `{}` have a truth value of `False`, whereas non-zero +numbers and non-empty sequences have a truth value of `True`. +Application-defined types determine their own truth value. +Any value may be explicitly converted to a Boolean using the built-in `bool` +function. + +```python +1 + 1 == 2 # True +2 + 2 == 5 # False + +if 1 + 1: + print("True") +else: + print("False") +``` + +### Integers + +The Skylark integer type represents integers. Its [type](#type) is `"int"`. + +Integers may be positive or negative, and arbitrarily large. +Integer arithmetic is exact. +Integers are totally ordered; comparisons follow mathematical +tradition. + +The `+` and `-` operators perform addition and subtraction, respectively. +The `*` operator performs multiplication. + +The `//` and `%` operations on integers compute floored division and +remainder of floored division, respectively. +If the signs of the operands differ, the sign of the remainder `x % y` +matches that of the dividend, `x`. +For all finite x and y (y ≠0), `(x // y) * y + (x % y) == x`. +The `/` operator implements real division, and +yields a `float` result even when its operands are both of type `int`. + +Integers, including negative values, may be interpreted as bit vectors. +The `|` and `&` operators implement bitwise OR and AND, respectively. +(This feature is not part of the Java implementation.) + +Any bool, number, or string may be interpreted as an integer by using +the `int` built-in function. + +An integer used in a Boolean context is considered true if it is +non-zero. + +```python +100 // 5 * 9 + 32 # 212 +3 // 2 # 1 +3 / 2 # 1.5 +111111111 * 111111111 # 12345678987654321 +"0x%x" % (0x1234 & 0xf00f) # "0x1004" +int("0xffff") # 65535 +``` + +<b>Implementation note:</b> +In the Go implementation of Skylark, integer representation and +arithmetic is exact, motivated by the need for lossless manipulation +of protocol messages which may contain signed and unsigned 64-bit +integers. +The Java implementation currently supports only signed 32-bit integers. + + +### Floating-point numbers + +The Skylark floating-point data type represents an IEEE 754 +double-precision floating-point number. Its [type](#type) is `"float"`. + +Arithmetic on floats using the `+`, `-`, `*`, `/`, `//`, and `%` + operators follows the IEE 754 standard. +However, computing the division or remainder of division by zero is a dynamic error. + +An arithmetic operation applied to a mixture of `float` and `int` +operands works as if the `int` operand is first converted to a +`float`. For example, `3.141 + 1` is equivalent to `3.141 + +float(1)`. +There are two floating-point division operators: +`x / y ` yields the floating-point quotient of `x` and `y`, +whereas `x // y` yields `floor(x / y)`, that is, the largest +integer value not greater than `x / y`. +Although the resulting number is integral, it is represented as a +`float` if either operand is a `float`. + +The infinite float values `+Inf` and `-Inf` represent numbers +greater/less than all finite float values. + +The non-finite `NaN` value represents the result of dubious operations +such as `Inf/Inf`. A NaN value compares neither less than, nor +greater than, nor equal to any value, including itself. + +All floats other than NaN are totally ordered, so they may be compared +using operators such as `==` and `<` and the `cmp` built-in function. + +Any bool, number, or string may be interpreted as a floating-point +number by using the `float` built-in function. + +A float used in a Boolean context is considered true if it is +non-zero. + +```python +1.23e45 * 1.23e45 # 1.5129e+90 +1.111111111111111 * 1.111111111111111 # 1.23457 +3.0 / 2 # 1.5 +3 / 2.0 # 1.5 +float(3) / 2 # 1.5 +3.0 // 2.0 # 1 +``` + +<b>Implementation note:</b> +The Go implementation of Skylark supports floating-point numbers as an +optional feature, motivated by the need for lossless manipulation of +protocol messages. +The Go implementation of the Skylark REPL requires the `-fp` flag to +enable support for floating-point literals, the `float` built-in +function, and the real division operator `/`. +The Java implementation does not yet support floating-point numbers. + + +### Strings + +A string represents an immutable sequence of bytes. +The [type](#type) of a string is `"string"`. + +Strings can represent arbitrary binary data, including zero bytes, but +most strings contain text, encoded by convention using UTF-8. + +The built-in `len` function returns the number of bytes in a string. + +Strings may be concatenated with the `+` operator. + +The substring expression `s[i:j]` returns the substring of `s` from +index `i` up to index `j`. The index expression `s[i]` returns the +1-byte substring `s[i:i+1]`. + +Strings are hashable, and thus may be used as keys in a dictionary. + +Strings are totally ordered lexicographically, so strings may be +compared using operators such as `==` and `<` and the `cmp` built-in +function. + +Strings are _not_ iterable sequences, so they cannot be used as the operand of +a `for`-loop, list comprehension, or any other operation than requires +an iterable sequence. +To obtain a view of a string as an iterable sequence of numeric byte +values, 1-byte substrings, numeric Unicode code points, or 1-code +point substrings, you must explicitly call one of its four methods: +`bytes`, `split_bytes`, `codepoints`, or `split_codepoints`. + +Any value may formatted as a string using the `str` or `repr` built-in +functions, the `str % tuple` operator, or the `str.format` method. + +A string used in a Boolean context is considered true if it is +non-empty. + +Strings have several built-in methods: + +* [`bytes`](#string·bytes) +* [`capitalize`](#string·capitalize) +* [`codepoints`](#string·codepoints) +* [`count`](#string·count) +* [`endswith`](#string·endswith) +* [`find`](#string·find) +* [`format`](#string·format) +* [`index`](#string·index) +* [`isalnum`](#string·isalnum) +* [`isalpha`](#string·isalpha) +* [`isdigit`](#string·isdigit) +* [`islower`](#string·islower) +* [`isspace`](#string·isspace) +* [`istitle`](#string·istitle) +* [`isupper`](#string·isupper) +* [`join`](#string·join) +* [`lower`](#string·lower) +* [`lstrip`](#string·lstrip) +* [`partition`](#string·partition) +* [`replace`](#string·replace) +* [`rfind`](#string·rfind) +* [`rindex`](#string·rindex) +* [`rpartition`](#string·rpartition) +* [`rsplit`](#string·rsplit) +* [`rstrip`](#string·rstrip) +* [`split`](#string·split) +* [`splitlines`](#string·splitlines) +* [`split_bytes`](#string·split_bytes) +* [`split_codepoints`](#string·split_codepoints) +* [`startswith`](#string·startswith) +* [`strip`](#string·strip) +* [`title`](#string·title) +* [`upper`](#string·upper) + +<b>Implementation note:</b> +The type of a string element varies across implementations. +There is agreement that byte strings, with text conventionally encoded +using UTF-8, is the ideal choice, but the Java implementation treats +strings as sequences of UTF-16 codes and changing it appears +intractible; see Google Issue b/36360490. + +<b>Implementation note:</b> +The Java implementation does not consistently treat strings as +iterable; see `testdata/string.sky` in the test suite and Google Issue +b/34385336 for further details. + +### Lists + +A list is a mutable sequence of values. +The [type](#type) of a list is `"list"`. + +Lists are indexable sequences: the elements of a list may be iterated +over by `for`-loops, list comprehensions, and various built-in +functions. + +List may be constructed using bracketed list notation: + +```python +[] # an empty list +[1] # a 1-element list +[1, 2] # a 2-element list +``` + +Lists can also be constructed from any iterable sequence by using the +built-in `list` function. + +The built-in `len` function applied to a list returns the number of elements. +The index expression `list[i]` returns the element at index i, +and the slice expression `list[i:j]` returns a new list consisting of +the elements at indices from i to j. + +List elements may be added using the `append` or `extend` methods, +removed using the `remove` method, or reordered by assignments such as +`list[i] = list[j]`. + +The concatenation operation `x + y` yields a new list containing all +the elements of the two lists x and y. + +For most types, `x += y` is equivalent to `x = x + y`, except that it +evaluates `x` only once, that is, it allocates a new list to hold +the concatenation of `x` and `y`. +However, if `x` refers to a list, the statement does not allocate a +new list but instead mutates the original list in place, similar to +`x.extend(y)`. + +Lists are not hashable, so may not be used in the keys of a dictionary. + +A list used in a Boolean context is considered true if it is +non-empty. + +A [_list comprehension_](#comprehensions) creates a new list whose elements are the +result of some expression applied to each element of another sequence. + +```python +[x*x for x in [1, 2, 3, 4]] # [1, 4, 9, 16] +``` + +A list value has these methods: + +* [`append`](#list·append) +* [`clear`](#list·clear) +* [`extend`](#list·extend) +* [`index`](#list·index) +* [`insert`](#list·insert) +* [`pop`](#list·pop) +* [`remove`](#list·remove) + +### Tuples + +A tuple is an immutable sequence of values. +The [type](#type) of a tuple is `"tuple"`. + +Tuples are constructed using parenthesized list notation: + +```python +() # the empty tuple +(1,) # a 1-tuple +(1, 2) # a 2-tuple ("pair") +(1, 2, 3) # a 3-tuple +``` + +Observe that for the 1-tuple, the trailing comma is necessary to +distinguish it from the parenthesized expression `(1)`. +1-tuples are seldom used. + +Skylark, unlike Python, does not permit a trailing comma to appear in +an unparenthesized tuple expression: + +```python +for k, v, in dict.items(): pass # syntax error at 'in' +_ = [(v, k) for k, v, in dict.items()] # syntax error at 'in' +f = lambda a, b, : None # syntax error at ':' + +sorted(3, 1, 4, 1,) # ok +[1, 2, 3, ] # ok +{1: 2, 3:4, } # ok +``` + +Any iterable sequence may be converted to a tuple by using the +built-in `tuple` function. + +Like lists, tuples are indexed sequences, so they may be indexed and +sliced. The index expression `tuple[i]` returns the tuple element at +index i, and the slice expression `tuple[i:j]` returns a subsequence +of a tuple. + +Tuples are iterable sequences, so they may be used as the operand of a +`for`-loop, a list comprehension, or various built-in functions. + +Unlike lists, tuples cannot be modified. +However, the mutable elements of a tuple may be modified. + +Tuples are hashable (assuming their elements are hashable), +so they may be used as keys of a dictionary. + +Tuples may be concatenated using the `+` operator. + +A tuple used in a Boolean context is considered true if it is +non-empty. + + +### Dictionaries + +A dictionary is a mutable mapping from keys to values. +The [type](#type) of a dictionary is `"dict"`. + +Dictionaries provide constant-time operations to insert an element, to +look up the value for a key, or to remove an element. Dictionaries +are implemented using hash tables, so keys must be hashable. Hashable +values include `None`, Booleans, numbers, and strings, and tuples +composed from hashable values. Most mutable values, such as lists, +dictionaries, and sets, are not hashable, even when frozen. +Attempting to use a non-hashable value as a key in a dictionary +results in a dynamic error, as does passing one to the built-in +`hash` function. + +A [dictionary expression](#dictionary-expressions) specifies a +dictionary as a set of key/value pairs enclosed in braces: + +```python +coins = { + "penny": 1, + "nickel": 5, + "dime": 10, + "quarter": 25, +} +``` + +The expression `d[k]`, where `d` is a dictionary and `k` is a key, +retrieves the value associated with the key. If the dictionary +contains no such item, the operation fails: + +```python +coins["penny"] # 1 +coins["dime"] # 10 +coins["silver dollar"] # error: key not found +``` + +The number of items in a dictionary `d` is given by `len(d)`. +A key/value item may be added to a dictionary, or updated if the key +is already present, by using `d[k]` on the left side of an assignment: + +```python +len(coins) # 4 +coins["shilling"] = 20 +len(coins) # 5, item was inserted +coins["shilling"] = 5 +len(coins) # 5, existing item was updated +``` + +A dictionary can also be constructed using a [dictionary +comprehension](#comprehension), which evaluates a pair of expressions, +the _key_ and the _value_, for every element of another iterable such +as a list. This example builds a mapping from each word to its length +in bytes: + +```python +words = ["able", "baker", "charlie"] +{x: len(x) for x in words} # {"charlie": 7, "baker": 5, "able": 4} +``` + +Dictionaries are iterable sequences, so they may be used as the +operand of a `for`-loop, a list comprehension, or various built-in +functions. +Iteration yields the dictionary's keys in the order in which they were +inserted; updating the value associated with an existing key does not +affect the iteration order. + +```python +x = dict([("a", 1), ("b", 2)]) # {"a": 1, "b": 2} +x.update([("a", 3), ("c", 4)]) # {"a": 3, "b": 2, "c": 4} +``` + +```python +for name in coins: + print(name, coins[name]) # prints "quarter 25", "dime 10", ... +``` + +Like all mutable values in Skylark, a dictionary can be frozen, and +once frozen, all subsequent operations that attempt to update it will +fail. + +A dictionary used in a Boolean context is considered true if it is +non-empty. + +The binary `+` operation may be applied to two dictionaries. It +yields a new dictionary whose elements are the union of the two +operands. If a key is present in both operands, the result contains +the value from the right operand. +<b>Note:</b> this feature is deprecated. Use the +`dict.update` method instead. + +Dictionaries may be compared for equality using `==` and `!=`. Two +dictionaries compare equal if they contain the same number of items +and each key/value item (k, v) found in one dictionary is also present +in the other. Dictionaries are not ordered; it is an error to compare +two dictionaries with `<`. + + +A dictionary value has these methods: + +* [`clear`](#dict·clear) +* [`get`](#dict·get) +* [`items`](#dict·items) +* [`keys`](#dict·keys) +* [`pop`](#dict·pop) +* [`popitem`](#dict·popitem) +* [`setdefault`](#dict·setdefault) +* [`update`](#dict·update) +* [`values`](#dict·values) + +### Sets + +A set is a mutable set of values. +The [type](#type) of a set is `"set"`. + +Like dictionaries, sets are implemented using hash tables, so the +elements of a set must be hashable. + +Sets may be compared for equality or inequality using `==` and `!=`. +Two sets compare equal if they contain the same elements. + +Sets are iterable sequences, so they may be used as the operand of a +`for`-loop, a list comprehension, or various built-in functions. +Iteration yields the set's elements in the order in which they were +inserted. + +The binary `|` and `&` operators compute union and intersection when +applied to sets. The right operand of the `|` operator may be any +iterable value. The binary `in` operator performs a set membership +test when its right operand is a set. + +Sets are instantiated by calling the built-in `set` function, which +returns a set containing all the elements of its optional argument, +which must be an iterable sequence. Sets have no literal syntax. + +The only method of a set is `union`, which is equivalent to the `|` operator. + +A set used in a Boolean context is considered true if it is non-empty. + +<b>Implementation note:</b> +The Go implementation of the Skylark REPL requires the `-set` flag to +enable support for sets. +The Java implementation does not support sets. + + +### Functions + +A function value represents a function defined in Skylark. +Its [type](#type) is `"function"`. +A function value used in a Boolean context is always considered true. + +Functions defined by a [`def` statement](#function-definitions) are named; +functions defined by a [`lambda` expression](#lambda-expressions) are anonymous. + +Function definitions may be nested, and an inner function may refer to a local variable of an outer function. + +A function definition defines zero or more named parameters. +Skylark has a rich mechanism for passing arguments to functions. + +<!-- TODO break up this explanation into caller-side and callee-side + parts, and put the former under function calls and the latter + under function definitions. Also try to convey that the Callable + interface sees the flattened-out args and kwargs and that's what + built-ins get. +--> + +The example below shows a definition and call of a function of two +required parameters, `x` and `y`. + +```python +def idiv(x, y): + return x // y + +idiv(6, 3) # 2 +``` + +A call may provide arguments to function parameters either by +position, as in the example above, or by name, as in first two calls +below, or by a mixture of the two forms, as in the third call below. +All the positional arguments must precede all the named arguments. +Named arguments may improve clarity, especially in functions of +several parameters. + +```python +idiv(x=6, y=3) # 2 +idiv(y=3, x=6) # 2 + +idiv(6, y=3) # 2 +``` + +<b>Optional parameters:</b> A parameter declaration may specify a +default value using `name=value` syntax; such a parameter is +_optional_. The default value expression is evaluated during +execution of the `def` statement or evaluation of the `lambda` +expression, and the default value forms part of the function value. +All optional parameters must follow all non-optional parameters. +A function call may omit arguments for any suffix of the optional +parameters; the effective values of those arguments are supplied by +the function's parameter defaults. + +```python +def f(x, y=3): + return x, y + +f(1, 2) # (1, 2) +f(1) # (1, 3) +``` + +If a function parameter's default value is a mutable expression, +modifications to the value during one call may be observed by +subsequent calls. +Beware of this when using lists or dicts as default values. +If the function becomes frozen, its parameters' default values become +frozen too. + +```python +def f(x, list=[]): + list.append(x) + return list + +f(4, [1,2,3]) # [1, 2, 3, 4] +f(1) # [1] +f(2) # [1, 2], not [2]! +freeze(f) +f(3) # error: cannot append to frozen list +``` + +<b>Variadic functions:</b> Some functions allow callers to provide an +arbitrary number of arguments. +After all required and optional parameters, a function definition may +specify a _variadic arguments_ or _varargs_ parameter, indicated by a +star preceding the parameter name: `*args`. +Any surplus positional arguments provided by the caller are formed +into a tuple and assigned to the `args` parameter. + +```python +def f(x, y, *args): + return x, y, args + +f(1, 2) # (1, 2, ()) +f(1, 2, 3, 4) # (1, 2, (3, 4)) +``` + +<b>Keyword-variadic functions:</b> Some functions allow callers to +provide an arbitrary sequence of `name=value` keyword arguments. +A function definition may include a final _keyworded arguments_ or +_kwargs_ parameter, indicated by a double-star preceding the parameter +name: `**kwargs`. +Any surplus named arguments that do not correspond to named parameters +are collected in a new dictionary and assigned to the `kwargs` parameter: + +```python +def f(x, y, **kwargs): + return x, y, kwargs + +f(1, 2) # (1, 2, {}) +f(x=2, y=1) # (2, 1, {}) +f(x=2, y=1, z=3) # (2, 1, {"z": 3}) +``` + +It is a static error if any two parameters of a function have the same name. + +Just as a function definition may accept an arbitrary number of +positional or keyworded arguments, a function call may provide an +arbitrary number of positional or keyworded arguments supplied by a +list or dictionary: + +```python +def f(a, b, c=5): + return a * b + c + +f(*[2, 3]) # 11 +f(*[2, 3, 7]) # 13 +f(*[2]) # error: f takes at least 2 arguments (1 given) + +f(**dict(b=3, a=2)) # 11 +f(**dict(c=7, a=2, b=3)) # 13 +f(**dict(a=2)) # error: f takes at least 2 arguments (1 given) +f(**dict(d=4)) # error: f got unexpected keyword argument "d" +``` + +Once the parameters have been successfully bound to the arguments +supplied by the call, the sequence of statements that comprise the +function body is executed. + +A function call completes normally after the execution of either a +`return` statement, or of the last statement in the function body. +The result of the function call is the value of the return statement's +operand, or `None` if the return statement had no operand or if the +function completeted without executing a return statement. + +```python +def f(x): + if x == 0: + return + if x < 0: + return -x + print(x) + +f(1) # returns None after printing "1" +f(0) # returns None without printing +f(-1) # returns 1 without printing +``` + + +It is a dynamic error for a function to call itself or another +function value with the same declaration. + +```python +def fib(x): + if x < 2: + return x + return fib(x-2) + fib(x-1) # dynamic error: function fib called recursively + +fib(5) +``` + +This rule, combined with the invariant that all loops are iterations +over finite sequences, implies that Skylark programs are not Turing-complete. + +<!-- This rule is supposed to deter people from abusing Skylark for + inappropriate uses, especially in the build system. + It may work for that purpose, but it doesn't stop Skylark programs + from consuming too much time or space. Perhaps it should be a + dialect option. +--> + + + +### Built-ins + +A Built-in is a function or method implemented in Go by the interpreter +or the application into which the interpreter is embedded. +The [type](#type) of a built-in is `"builtin"`. +A builtin value used in a Boolean context is always considered true. + +Many built-ins are defined in the "universe" block of the environment +(see [Name Resolution](#name-resolution)), and are thus available to +all Skylark programs. + +Except where noted, built-ins accept only positional arguments. +The parameter names serve merely as documentation. + +## Name binding and variables + +After a Skylark file is parsed, but before its execution begins, the +Skylark interpreter checks statically that the program is well formed. +For example, `break` and `continue` statements may appear only within +a loop; `if`, `for`, and `return` statements may appear only within a +function; and `load` statements may appear only outside any function. + +_Name resolution_ is the static checking process that +resolves names to variable bindings. +During execution, names refer to variables. Statically, names denote +places in the code where variables are created; these places are +called _bindings_. A name may denote different bindings at different +places in the program. The region of text in which a particular name +refers to the same binding is called that binding's _scope_. + +Four Skylark constructs bind names, as illustrated in the example below: +`load` statements (`a` and `b`), +`def` statements (`c`), +function parameters (`d`), +and assignments (`e`, `h`, including the augmented assignment `e += h`). +Variables may be assigned or re-assigned explicitly (`e`, `h`), or implicitly, as +in a `for`-loop (`f`) or comprehension (`g`, `i`). + +```python +load("lib.sky", "a", b="B") + +def c(d): + e = 0 + for f in d: + print([True for g in f]) + e += 1 + +h = [2*i for i in a] +``` + +The environment of a Skylark program is structured as a tree of +_lexical blocks_, each of which may contain name bindings. +The tree of blocks is parallel to the syntax tree. +Blocks are of four kinds. + +<!-- Avoid the term "built-in block" since that's also a type. --> +At the root of the tree is the _universe_ block, which binds constant +values such as `None`, `True`, and `False`, and built-in functions +such as `len`, `list`, and so on. +Skylark programs cannot change the set of universe bindings. +Because the universe block is shared by all files (modules), +all values bound in it must be immutable and stateless +from the perspective of the Skylark program. + +Nested beneath the universe block is the _module_ block, which +contains the bindings of the current file. +Bindings in the module block (such as `a`, `b`, `c`, and `h` in the +example) are called _global_. +The module block is typically empty at the start of the file +and is populated by top-level binding statements, +but an application may pre-bind one or more global names, +to provide domain-specific functions to that file, for example. + +A module block contains a _function_ block for each top-level +function, and a _comprehension_ block for each top-level +comprehension. +Bindings inside either of these kinds of block are called _local_. +Additional functions and comprehensions, and their blocks, may be +nested in any order, to any depth. + +If name is bound anywhere within a block, all uses of the name within +the block are treated as references to that binding, even uses that +appear before the binding. +The binding of `y` on the last line of the example below makes `y` +local to the function `hello`, so the use of `y` in the print +statement also refers to the local `y`, even though it appears +earlier. + +```python +y = "goodbye" + +def hello(): + for x in (1, 2): + if x == 2: + print(y) # prints "hello" + if x == 1: + y = "hello" +``` + +It is a dynamic error to evaluate a reference to a local variable +before it has been bound: + +```python +def f(): + print(x) # dynamic error: local variable x referenced before assignment + x = "hello" +``` + +The same is true for global variables: + +```python +print(x) # dynamic error: global variable x referenced before assignment +x = "hello" +``` + +It is a static error to bind a global variable already explicitly bound in the file: + +```python +x = 1 +x = 2 # static error: cannot reassign global x declared on line 1 +``` + +<!-- The above rule, and the rule that forbids if-statements and loops at + toplevel, exist to ensure that there is exactly one statement + that binds each global variable, which makes cross-referenced + documentation more useful, the designers assure me, but + I am skeptical that it's worth the trouble. --> + +If a name was pre-bound by the application, the Skylark program may +explicitly bind it, but only once. + +<b>Implementation note</b>: +An augmented assignment statement such as `x += 1` is considered a +binding of `x`. +However, because of the special behavior of `+=` for lists, which acts +like a non-binding reference, the Go implementation suppresses the +"cannot reassign" error for all augmented assigments at toplevel, +whereas the Java implementation reports the error even when the +statement would apply `+=` to a list. + +A function may refer to variables defined in an enclosing function. +In this example, the inner function `f` refers to a variable `x` +that is local to the outer function `squarer`. +`x` is a _free variable_ of `f`. +The function value (`f`) created by a `def` statement holds a +reference to each of its free variables so it may use +them even after the enclosing function has returned. + +```python +def squarer(): + x = [0] + def f(): + x[0] += 1 + return x[0]*x[0] + return f + +sq = squarer() +print(sq(), sq(), sq(), sq()) # "1 4 9 16" +``` + +An inner function cannot assign to a variable bound in an enclosing +function, because the assignment would bind the variable in the +inner function. +In the example below, the `x += 1` statement binds `x` within `f`, +hiding the outer `x`. +Execution fails because the inner `x` has not been assigned before the +attempt to increment it. + +```python +def squarer(): + x = 0 + def f(): + x += 1 # dynamic error: local variable x referenced before assignment + return x*x + return f + +sq = squarer() +``` + +(Skylark has no equivalent of Python's `nonlocal` or `global` +declarations, but as the first version of `squarer` showed, this +omission can be worked around by using a list of a single element.) + + +A name appearing after a dot, such as `split` in +`get_filename().split('/')`, is not resolved statically. +The [dot expression](#dot-expressions) `.split` is a dynamic operation +on the value returned by `get_filename()`. + + +## Value concepts {#value-concepts} + +Skylark has eleven core [data types](#data-types). An application +that embeds the Skylark intepreter may define additional types that +behave like Skylark values. All values, whether core or +application-defined, implement a few basic behaviors: + +```text +str(x) -- return a string representation of x +type(x) -- return a string describing the type of x +freeze(x) -- make x, and everything it transitively refers to, immutable +bool(x) -- convert x to a Boolean truth value +hash(x) -- return a hash code for x +``` + +### Identity and mutation + +Skylark is an imperative language: programs consist of sequences of +statements executed for their side effects. +For example, an assignment statement updates the value held by a +variable, and calls to some built-in functions such as `print` change +the state of the application that embeds the interpreter. + +Values of some data types, such as `NoneType`, `bool`, `int`, `float`, and +`string`, are _immutable_; they can never change. +Immutable values have no notion of _identity_: it is impossible for a +Skylark program to tell whether two integers, for instance, are +represented by the same object; it can tell only whether they are +equal. + +Values of other data types, such as `list`, `dict`, and `set`, are +_mutable_: they may be modified by a statement such as `a[i] = 0` or +`items.clear()`. Although `tuple` and `function` values are not +directly mutable, they may refer to mutable values indirectly, so for +this reason we consider them mutable too. Skylark values of these +types are actually _references_ to variables. + +Copying a reference to a variable, using an assignment statement for +instance, creates an _alias_ for the variable, and the effects of +operations applied to the variable through one alias are visible +through all others. + +```python +x = [] # x refers to a new empty list variable +y = x # y becomes an alias for x +x.append(1) # changes the variable referred to by x +print(y) # "[1]"; y observes the mutation +``` + +Skylark uses _call-by-value_ parameter passing: in a function call, +argument values are assigned to function parameters as if by +assignment statements. If the values are references, the caller and +callee may refer to the same variables, so if the called function +changes the variable referred to by a parameter, the effect may also +be observed by the caller: + +```python +def f(y): + y.append(1) # changes the variable referred to by x + +x = [] # x refers to a new empty list variable +f(x) # f's parameter y becomes an alias for x +print(x) # "[1]"; x observes the mutation +``` + + +As in all imperative languages, understanding _aliasing_, the +relationship between reference values and the variables to which they +refer, is crucial to writing correct programs. + +### Freezing a value + +Skylark has a feature unusual among imperative programming languages: +a mutable value may be _frozen_ so that all subsequent attempts to +mutate it fail with a dynamic error; the value, and all other values +reachable from it, become _immutable_. + +Immediately after execution of a Skylark module, all values in its +top-level environment are frozen. Because all the global variables of +an initialized Skylark module are immutable, the module may be published to +and used by other threads in a parallel program without the need for +locks. For example, the Bazel build system loads and executes BUILD +and .bzl files in parallel, and two modules being executed +concurrently may freely access variables or call functions from a +third without the possibility of a race condition. + +<b>Implementation note:</b> +The Go implementation of Skylark permits user code to freeze arbitrary +values by calling the `freeze` built-in function. +This feature must be enabled in the REPL by the `-freeze` flag. +This function is not present in the Java implementation, which freezes +values only _en masse_ at the end of module initialization. + +### Hashing + +The `dict` and `set` data types are implemented using hash tables, so +only _hashable_ values are suitable as keys of a `dict` or elements of +a `set`. Attempting to use a non-hashable value as the key in a hash +table, or as the operand of the `hash` built-in function, results in a +dynamic error. + +The hash of a value is an unspecified integer chosen so that two equal +values have the same hash, in other words, `x == y => hash(x) == hash(y)`. +A hashable value has the same hash throughout its lifetime. + +Values of the types `NoneType`, `bool`, `int`, `float`, and `string`, +which are all immutable, are hashable. + +Values of mutable types such as `list`, `dict`, and `set` are not +hashable. These values remain unhashable even if they have become +immutable due to _freezing_. + +A `tuple` value is hashable only if all its elements are hashable. +Thus `("localhost", 80)` is hashable but `([127, 0, 0, 1], 80)` is not. + +Values of the types `function` and `builtin` are also hashable. +Although functions are not necessarily immutable, as they may be +closures that refer to mutable variables, instances of these types +are compared by reference identity (see [Comparisons](#comparisons)), +so their hash values are derived from their identity. + + +### Sequence types + +Many Skylark data types represent a _sequence_ of values: lists, +tuples, and sets are sequences of arbitrary values, and in many +contexts dictionaries act like a sequence of their keys. + +We can classify different kinds of sequence types based on the +operations they support. +Each is listed below using the name of its corresponding interface in +the interpreter's Go API. + +* `Iterable`: an _iterable_ value lets us process each of its elements in a fixed order. + Examples: `dict`, `set`, `list`, `tuple`, but not `string`. +* `Sequence`: a _sequence of known length_ lets us know how many elements it + contains without processing them. + Examples: `dict`, `set`, `list`, `tuple`, but not `string`. +* `Indexable`: an _indexed_ type has a fixed length and provides efficient + random access to its elements, which are identified by integer indices. + Examples: `string`, `tuple`. +* `SetIndexable`: a _settable indexed type_ additionally allows us to modify the + element at a given integer index. Example: `list`. +* `Mapping`: a mapping is an association of keys to values. Example: `dict`. + +Although all of Skylark's core data types for sequences implement at +least the `Sequence` contract, it's possible for an an application +that embeds the Skylark interpreter to define additional data types +representing sequences of unknown length that implement only the `Iterable` contract. +representing sequences whose length is unknown, or perhaps even infinite, that implement +only the `Iterable` contract. + +Strings are not iterable, though they do support the `len(s)` and +`s[i]` operations. Skylark deviates from Python here to avoid common +pitfall in which a string is used by mistake where a list containing a +single string was intended, resulting in its interpretation as a sequence +of bytes. + +Most Skylark operators and built-in functions that need a sequence +of values will accept any iterable. + +It is a dynamic error to mutate a sequence such as a list, set, or +dictionary while iterating over it. + +```python +def increment_values(dict): + for k in dict: + dict[k] += 1 # error: cannot insert into hash table during iteration + +dict = {"one": 1, "two": 2} +increment_values(dict) +``` + + +### Indexing + +Many Skylark operators and functions require an index operand `i`, +such as `a[i]` or `list.insert(i, x)`. Others require two indices `i` +and `j` that indicate the start and end of a subsequence, such as +`a[i:j]`, `list.index(x, i, j)`, or `string.find(x, i, j)`. +All such operations follow similar conventions, described here. + +Indexing in Skylark is *zero-based*. The first element of a string +or list has index 0, the next 1, and so on. The last element of a +sequence of length `n` has index `n-1`. + +```python +"hello"[0] # "h" +"hello"[4] # "o" +"hello"[5] # error: index out of range +``` + +For subsequence operations that require two indices, the first is +_inclusive_ and the second _exclusive_. Thus `a[i:j]` indicates the +sequence starting with element `i` up to but not including element +`j`. The length of this subsequence is `j-i`. This convention is known +as *half-open indexing*. + +```python +"hello"[1:4] # "ell" +``` + +Either or both of the index operands may be omitted. If omitted, the +first is treated equivalent to 0 and the second is equivalent to the +length of the sequence: + +```python +"hello"[1:] # "ello" +"hello"[:4] # "hell" +``` + +It is permissible to supply a negative integer to an indexing +operation. The effective index is computed from the supplied value by +the following two-step procedure. First, if the value is negative, the +length of the sequence is added to it. This provides a convenient way +to address the final elements of the sequence: + +```python +"hello"[-1] # "o", like "hello"[4] +"hello"[-3:-1] # "ll", like "hello"[2:4] +``` + +Second, for subsequence operations, if the value is still negative, it +is replaced by zero, or if it is greater than the length `n` of the +sequence, it is replaced by `n`. In effect, the index is "truncated" to +the nearest value in the range `[0:n]`. + +```python +"hello"[-1000:+1000] # "hello" +``` + +This truncation step does not apply to indices of individual elements: + +```python +"hello"[-6] # error: index out of range +"hello"[-5] # "h" +"hello"[4] # "o" +"hello"[5] # error: index out of range +``` + + +## Expressions + +An expression specifies the computation of a value. + +The Skylark grammar defines several categories of expression. +An _operand_ is an expression consisting of a single token (such as an +identifier or a literal), or a bracketed expression. +Operands are self-delimiting. +An operand may be followed by any number of dot, call, or slice +suffixes, to form a _primary_ expression. +In some places in the Skylark grammar where an expression is expected, +it is legal to provide a comma-separated list of expressions denoting +a tuple. +The grammar uses `Expression` where a multiple-component expression is allowed, +and `Test` where it accepts an expression of only a single component. + +```grammar {.good} +Expression = Test {',' Test} . + +Test = LambdaExpr | IfExpr | PrimaryExpr | UnaryExpr | BinaryExpr . + +PrimaryExpr = Operand + | PrimaryExpr DotSuffix + | PrimaryExpr CallSuffix + | PrimaryExpr SliceSuffix + . + +Operand = identifier + | int | float | string + | ListExpr | ListComp + | DictExpr | DictComp + | '(' [Expression] [,] ')' + | ('-' | '+') PrimaryExpr + . + +DotSuffix = '.' identifier . +CallSuffix = '(' [Arguments [',']] ')' . +SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' . +``` + +TODO: resolve position of +x, -x, and 'not x' in grammar: Operand or UnaryExpr? + +### Identifiers + +```grammar {.good} {.good} +Primary = identifier +``` + +An identifier is a name that identifies a value. + +Lookup of locals and globals may fail if not yet defined. + +### Literals + +Skylark supports string literals of three different kinds: + +```grammar {.good} +Primary = int | float | string +``` + +Evaluation of a literal yields a value of the given type (string, int, +or float) with the given value. +See [Literals](#lexical elements) for details. + +### Parenthesized expressions + +```grammar {.good} +Primary = '(' [Expression] ')' +``` + +A single expression enclosed in parentheses yields the result of that expression. +Explicit parentheses may be used for clarity, +or to override the default association of subexpressions. + +```python +1 + 2 * 3 + 4 # 11 +(1 + 2) * (3 + 4) # 21 +``` + +If the parentheses are empty, or contain a single expression followed +by a comma, or contain two or more expressions, the expression yields a tuple. + +```python +() # (), the empty tuple +(1,) # (1,), a tuple of length 1 +(1, 2) # (1, 2), a 2-tuple or pair +(1, 2, 3) # (1, 2, 3), a 3-tuple or triple +``` + +In some contexts, such as a `return` or assignment statement or the +operand of a `for` statement, a tuple may be expressed without +parentheses. + +```python +x, y = 1, 2 + +return 1, 2 + +for x in 1, 2: + print(x) +``` + +Skylark (like Python 3) does not accept an unparenthesized tuple +expression as the operand of a list comprehension: + +```python +[2*x for x in 1, 2, 3] # parse error: unexpected ',' +``` + +### Dictionary expressions + +A dictionary expression is a comma-separated list of colon-separated +key/value expression pairs, enclosed in curly brackets, and it yields +a new dictionary object. +An optional comma may follow the final pair. + +```grammar {.good} +DictExpr = '{' [Entries [',']] '}' . +Entries = Entry {',' Entry} . +Entry = Test ':' Test . +``` + +Examples: + + +```python +{} +{"one": 1} +{"one": 1, "two": 2,} +``` + +The key and value expressions are evaluated in left-to-right order. +Evaluation fails if the same key is used multiple times. + +Only [hashable](#hashing) values may be used as the keys of a dictionary. +This includes all built-in types except dictionaries, sets, and lists; +a tuple is hashable only if its elements are hashable. + + +### List expressions + +A list expression is a comma-separated list of element expressions, +enclosed in square brackets, and it yields a new list object. +An optional comma may follow the last element expression. + +```grammar {.good} +ListExpr = '[' [Expression [',']] ']' . +``` + +Element expressions are evaluated in left-to-right order. + +Examples: + +```python +[] # [], empty list +[1] # [1], a 1-element list +[1, 2, 3,] # [1, 2, 3], a 3-element list +``` + +### Unary operators + +There are three unary operators, all appearing before their operand: +`+`, `-`, and `not`. + +```grammar {.good} +UnaryExpr = '+' PrimaryExpr + | '-' PrimaryExpr + | 'not' Test + . +``` + +```text ++ number unary positive (int, float) +- number unary negation (int, float) +not x logical negation (any type) +``` + +The `+` and `-` operators may be applied to any number +(`int` or `float`) and return the number unchanged. +Unary `+` is never necessary in a correct program, +but may serve as an assertion that its operand is a number, +or as documentation. + +```python +if x > 0: + return +1 +else if x < 0: + return -1 +else: + return 0 +``` + +The `not` operator returns the negation of the truth value of its +operand. + +```python +not True # False +not False # True +not [1, 2, 3] # False +not "" # True +not 0 # True +``` + +<b>Implementation note:</b> +The parser in the Java implementation of Skylark does not accept unary +`+` expressions. + +### Binary operators + +Skylark has the following binary operators, arranged in order of increasing precedence: + +```text +or +and +not +== != < > <= >= in not in +| +& +- + +* / // % +``` + +Comparison operators, `in`, and `not in` are non-associative, +so the parser will not accept `0 <= i < n`. +All other binary operators of equal precedence associate to the left. + +```grammar {.good} +BinaryExpr = Test {Binop Test} . + +Binop = 'or' + | 'and' + | 'not' + | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in' + | '|' + | '&' + | '-' | '+' + | '*' | '%' | '/' | '//' + . +``` + +#### `or` and `and` + +The `or` and `and` operators yield, respectively, the logical disjunction and +conjunction of their arguments, which need not be Booleans. +The expression `x or y` yields the value of `x` if its truth value is `True`, +or the value of `y` otherwise. + +```skylark +False or False # False +False or True # True +True or False # True +True or True # True + +0 or "hello" # "hello" +1 or "hello" # 1 +``` + +Similarly, `x and y` yields the value of `x` if its truth value is +`False`, or the value of `y` otherwise. + +```skylark +False and False # False +False and True # False +True and False # False +True and True # True + +0 and "hello" # 0 +1 and "hello" # "hello" +``` + +These operators use "short circuit" evaluation, so the second +expression is not evaluated if the value of the first expression has +already determined the result, allowing constructions like these: + +```python +len(x) > 0 and x[0] == 1 # x[0] is not evaluated if x is empty +x and x[0] == 1 +len(x) == 0 or x[0] == "" +not x or not x[0] +``` + +#### Comparisons + +The `==` operator reports whether its operands are equal; the `!=` +operator is its negation. + +The operators `<`, `>`, `<=`, and `>=` perform an ordered comparison +of their operands. It is an error to apply these operators to +operands of unequal type, unless one of the operands is an `int` and +the other is a `float`. Of the built-in types, only the following +support ordered comparison, using the ordering relation shown: + +```shell +NoneType # None <= None +bool # False < True +int # mathematical +float # as defined by IEEE 754 +string # lexicographical +tuple # lexicographical +list # lexicographical +``` + +Comparison of floating point values follows the IEEE 754 standard, +which breaks several mathematical identities. For example, if `x` is +a `NaN` value, the comparisons `x < y`, `x == y`, and `x > y` all +yield false for all values of `y`. + +Applications may define additional types that support ordered +comparison. + +The remaining built-in types support only equality comparisons. +Values of type `dict` or `set` compare equal if their elements compare +equal, and values of type `function` or `builtin` are equal only to +themselves. + +```shell +dict # equal contents +set # equal contents +function # identity +builtin # identity +``` + +#### Arithmetic operations + +The following table summarizes the binary arithmetic operations +available for built-in types: + +```shell +Arithmetic (int or float; result has type float unless both operands have type int) + number + number # addition + number - number # subtraction + number * number # multiplication + number / number # real division (result is always a float) + number // number # floored division + number % number # remainder of floored division + +Concatenation + string + string + list + list + tuple + tuple + dict + dict # (deprecated) + +Repetition (string/list/tuple) + int * sequence + sequence * int + +String interpolation + string % any # see String Interpolation + +Sets + int | int # bitwise union (OR) + set | iterable # set union + int & int # bitwise intersection (AND) + set & set # set intersection +``` + +The operands of the arithmetic operators `+`, `-`, `*`, `//`, and +`%` must both be numbers (`int` or `float`) but need not have the same type. +The type of the result has type `int` only if both operands have that type. +The result of real division `/` always has type `float`. + +The `+` operator may be applied to non-numeric operands of the same +type, such as two lists, two tuples, or two strings, in which case it +computes the concatenation of the two operands and yields a new value of +the same type. + +```python +"Hello, " + "world" # "Hello, world" +(1, 2) + (3, 4) # (1, 2, 3, 4) +[1, 2] + [3, 4] # [1, 2, 3, 4] +``` + +The `x + y` operation is deprecated for `dict` operands; see Google Issue b/31994014. +Use the [dict·update](dict·update) method instead: + +```python +# z = x + y +z = dict(x) +z.update(y) +``` + +The `*` operator may be applied to an integer _n_ and a value of type +`string`, `list`, or `tuple`, in which case it yields a new value +of the same sequence type consisting of _n_ repetitions of the original sequence. +The order of the operands is immaterial. +Negative values of _n_ behave like zero. + +```python +'mur' * 2 # 'murmur' +3 * range(3) # [0, 1, 2, 0, 1, 2, 0, 1, 2] +``` + +Applications may define additional types that support any subset of +these operators. + +The `&` operator requires two operands of the same type, either `int` or `set`. +For integers, it yields the bitwise intersection (AND) of its operands. +For sets, it yields a new set containing the intersection of the +elements of the operand sets, preserving the element order of the left +operand. + +The `|` operator likewise computes bitwise or set unions. +However, if the left operand of `|` is a set, the right operand may be +any iterable, not necessarily another set. +The result of `set | iterable` is a new set whose elements are the +union of the operands, preserving the order of the elements of the +operands, left before right. + +```python +0x12345678 & 0xFF # 0x00000078 +0x12345678 | 0xFF # 0x123456FF + +set([1, 2]) & set([2, 3]) # set([2]) +set([1, 2]) | set([2, 3]) # set([1, 2, 3]) +set([1, 2]) | [2,3] # set([1, 2, 3]) +``` + +<b>Implementation note:</b> +The Go implementation of the Skylark REPL requires the `-set` flag to +enable support for sets. +The Java implementation does not support sets, nor recognize `&` as a +token, nor support `int | int`. + + +#### Membership tests + +```text + any in sequence (list, tuple, dict, set, string) + any not in sequence +``` + +The `in` operator reports whether its first operand is a member of its +second operand, which must be a list, tuple, dict, set, or string. +The `not in` operator is its negation. +Both return a Boolean. + +The meaning of membership varies by the type of the second operand: +the members of a list, tuple, or set are its elements; +the members of a dict are its keys; +the members of a string are all its substrings. + +```python +1 in [1, 2, 3] # True +4 in (1, 2, 3) # False +4 not in set([1, 2, 3]) # True + +d = {"one": 1, "two": 2} +"one" in d # True +"three" in d # False +1 in d # False + +"nasty" in "dynasty" # True +"a" in "banana" # True +"f" not in "way" # True +``` + +#### String interpolation + +The expression `format % args` performs _string interpolation_, a +simple form of template expansion. +The `format` string is interpreted as a sequence of literal portions +and _conversions_. +Each conversion, which starts with a `%` character, is replaced by its +corresponding value from `args`. +The characters following `%` in each conversion determine which +argument it uses and how to convert it to a string. + +Each `%` character marks the start of a conversion specifier, unless +it is immediately followed by another `%`, in which cases both +characters denote a literal percent sign. + +If the `"%"` is immediately followed by `"(key)"`, the parenthesized +substring specifies the key of the `args` dictionary whose +corresponding value is the operand to convert. +Otherwise, the conversion's operand is the next element of `args`, +which must be a tuple with exactly one component per conversion, +unless the format string contains only a single conversion, in which +case `args` itself is its operand. + +Skylark does not support the flag, width, and padding specifiers +supported by Python's `%` and other variants of C's `printf`. + +After the optional `(key)` comes a single letter indicating what +operand types are valid and how to convert the operand `x` to a string: + +```text +% none literal percent sign +s any as if by str(x) +r any as if by repr(x) +d number signed integer decimal +i number signed integer decimal +o number signed octal +x number signed hexadecimal, lowercase +X number signed hexadecimal, uppercase +e number float exponential format, lowercase +E number float exponential format, uppercase +f number float decimal format, lowercase +F number float decimal format, uppercase +g number like %e for large exponents, %f otherwise +G number like %E for large exponents, %F otherwise +c string x (string must encode a single Unicode code point) + int as if by chr(x) +``` + +It is an error if the argument does not have the type required by the +conversion specifier. + +Examples: + +```python +"Hello %s, your score is %d" % ("Bob", 75) # "Hello Bob, your score is 75" + +"%d %o %x %c" % (65, 65, 65, 65) # "65 101 41 A" (decimal, octal, hexadecimal, Unicode) + +"%(greeting)s, %(audience)s" % dict( # "Hello, world" + greeting="Hello", + audience="world", +) + +"rate = %g%% APR" % 3.5 # "rate = 3.5% APR" +``` + +One subtlety: to use a tuple as the operand of a conversion in format +string containing only a single conversion, you must wrap the tuple in +a singleton tuple: + +```python +"coordinates=%s" % (40.741491, -74.003680) # error: too many arguments for format string +"coordinates=%s" % ((40.741491, -74.003680),) # "coordinates=(40.741491, -74.003680)" +``` + +TODO: specify `%e` and `%f` more precisely. + +### Conditional expressions + +A conditional expression has the form `a if cond else b`. +It first evaluates the condition `cond`. +If it's true, it evaluates `a` and yields its value; +otherwise it yields the value of `b`. + +```grammar {.good} +IfExpr = Test 'if' Test 'else' Test . +``` + +Example: + +```python +"yes" if enabled else "no" +``` + +### Comprehensions + +A comprehension constructs new list or dictionary value by looping +over one or more iterables and evaluating a _body_ expression that produces +successive elements of the result. + +A list comprehension consists of a single expression followed by one +or more _clauses_, the first of which must be a `for` clause. +Each `for` clause resembles a `for` statement, and specifies an +iterable operand and a set of variables to be assigned by successive +values of the iterable. +An `if` cause resembles an `if` statement, and specifies a condition +that must be met for the body expression to be evaluated. +A sequence of `for` and `if` clauses acts like a nested sequence of +`for` and `if` statements. + +```grammar {.good} +ListComp = '[' Test {CompClause} ']'. +DictComp = '{' Entry {CompClause} '}' . + +CompClause = 'for' LoopVariables 'in' Test + | 'if' Test . + +LoopVariables = PrimaryExpr {',' PrimaryExpr} . +``` + +Examples: + +```python +[x*x for x in range(5)] # [0, 1, 4, 9, 16] +[x*x for x in range(5) if x%2 == 0] # [0, 4, 16] +[(x, y) for x in range(5) + if x%2 == 0 + for y in range(5) + if y > x] # [(0, 1), (0, 2), (0, 3), (0, 4), (2, 3), (2, 4)] +``` + +A dict comprehension resembles a list comprehension, but its body is a +pair of expressions, `key: value`, separated by a colon, +and its result is a dictionary containing the key/value pairs +for which the body expression was evaluated. +Evaluation fails if the value of any key is unhashable. + +As with a `for` loop, the loop variables may exploit compound +assignment: + +```python +[x*y+z for (x, y), z in [((2, 3), 5), (("o", 2), "!")]] # [11, 'oo!'] +``` + +Skylark, following Python 3, does not accept an unparenthesized +tuple as the operand of a `for` clause: + +```python +[x*x for x in 1, 2, 3] # parse error: unexpected comma +``` + +Comprehensions in Skylark, again following Python 3, define a new lexical +block, so assignments to loop variables have no effect on variables of +the same name in an enclosing block: + +```python +x = 1 +_ = [x for x in [2]] # new variable x is local to the comprehension +print(x) # 1 +``` + + +### Function and method calls + +```grammar {.good} +CallSuffix = '(' [Arguments] ')' . + +Arguments = Argument {',' Argument} . +Argument = identifier | identifier '=' Test | '*' identifier | '**' identifier . +``` + +A value `f` of type `function` or `builtin` may be called using the expression `f(...)`. +Applications may define additional types whose values may be called in the same way. + +A method call such as `filename.endswith(".sky")` is the composition +of two operations, `m = filename.endswith` and `m(".sky")`. +The first, a dot operation, yields a _bound method_, a function value +that pairs a receiver value (the `filename` string) with a choice of +method ([string·endswith](#string.endswith)). + +Only built-in or application-defined types may have methods. + +See [Functions](#functions) for an explanation of function parameter passing. + +### Dot expressions + +A dot expression `x.f` selects the attribute `f` (a field or method) +of the value `x`. + +Fields are possessed by none of the main Skylark [data types](#data-types), +but some application-defined types have them. +Methods belong to the built-in types `string`, `list`, `dict`, and +`set`, and to many application-defined types. + +```grammar {.good} +DotSuffix = '.' identifier . +``` + +A dot expression fails if the value does not have an attribute of the +specified name. + +Use the built-in `hasattr(x, "f")` function to ascertain whether a +value has a specific attribute, or `dir(x)` to enumerate all its +attributes. The `getattr(x, "f")` function can be used to select an +attribute when the name `"f"` is not known statically. + +A dot expression that selects a method typically appears within a call +expression, as in these examples: + +```python +["able", "baker", "charlie"].index("baker") # 1 +"banana".count("a") # 3 +"banana".reverse() # error: string has no .reverse field or method +``` + +But when not called immediately, the dot expression evaluates to a +_bound method_, that is, a method coupled to a specific receiver +value. A bound method can be called like an ordinary function, +without a receiver argument: + +```python +f = "banana".count +f # <built-in method count of string value> +f("a") # 3 +f("n") # 2 +``` + +<b>Implementation note:</b> +The Java implementation does not currently allow a method to be +selected but not immediately called. +See Google Issue b/21392896. + +### Index expressions + +An index expression `a[i]` yields the `i`th element of an _indexable_ +type such as a string, tuple, or list. The index `i` must be an `int` +value in the range -`n` ≤ `i` < `n`, where `n` is `len(a)`; any other +index results in an error. + +```grammar {.good} +SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' . +``` + +A valid negative index `i` behaves like the non-negative index `n+i`, +allowing for convenient indexing relative to the end of the +sequence. + +```python +"abc"[0] # "a" +"abc"[1] # "b" +"abc"[-1] # "c" + +("zero", "one", "two")[0] # "zero" +("zero", "one", "two")[1] # "one" +("zero", "one", "two")[-1] # "two" +``` + +An index expression `d[key]` may also be applied to a dictionary `d`, +to obtain the value associated with the specified key. It is an error +if the dictionary contains no such key. + +An index expression appearing on the left side of an assignment causes +the specified list or dictionary element to be updated: + +```skylark +a = range(3) # a == [0, 1, 2] +a[2] = 7 # a == [0, 1, 7] + +coins["suzie b"] = 100 +``` + +It is a dynamic error to attempt to update an element of an immutable +type, such as a tuple or string, or a frozen value of a mutable type. + +### Slice expressions + +A slice expression `a[start:stop:stride]` yields a new value containing a +subsequence of `a`, which must be a string, tuple, or list. + +```grammar {.good} +SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' . +``` + +Each of the `start`, `stop`, and `stride` operands is optional; +if present, and not `None`, each must be an integer. +The `stride` value defaults to 1. +If the stride is not specified, the colon preceding it may be omitted too. +It is an error to specify a stride of zero. + +Conceptually, these operands specify a sequence of values `i` starting +at `start` and successively adding `stride` until `i` reaches or +passes `stop`. The result consists of the concatenation of values of +`a[i]` for which `i` is valid.` + +The effective start and stop indices are computed from the three +operands as follows. Let `n` be the length of the sequence. + +<b>If the stride is positive:</b> +If the `start` operand was omitted, it defaults to -infinity. +If the `end` operand was omitted, it defaults to +infinity. +For either operand, if a negative value was supplied, `n` is added to it. +The `start` and `end` values are then "clamped" to the +nearest value in the range 0 to `n`, inclusive. + +<b>If the stride is negative:</b> +If the `start` operand was omitted, it defaults to +infinity. +If the `end` operand was omitted, it defaults to -infinity. +For either operand, if a negative value was supplied, `n` is added to it. +The `start` and `end` values are then "clamped" to the +nearest value in the range -1 to `n`-1, inclusive. + +```python +"abc"[1:] # "bc" (remove first element) +"abc"[:-1] # "ab" (remove last element) +"abc"[1:-1] # "b" (remove first and last element) +"banana"[1::2] # "aaa" (select alternate elements starting at index 1) +"banana"[4::-2] # "nnb" (select alternate elements in reverse, starting at index 4) +``` + +Unlike Python, Skylark does not allow a slice expression on the left +side of an assignment. + +Slicing a tuple or string may be more efficient than slicing a list +because tuples and strings are immutable, so the result of the +operation can share the underlying representation of the original +operand (when the stride is 1). By contrast, slicing a list requires +the creation of a new list and copying of the necessary elements. + +<!-- TODO tighten up this section --> + +### Lambda expressions + +A `lambda` expression yields a new function value. + +```grammar {.good} +LambdaExpr = 'lambda' [Parameters] ':' Test . + +Parameters = Parameter {',' Parameter} . +Parameter = identifier + | identifier '=' Test + | '*' identifier + | '**' identifier + . +``` + +Syntactically, a lambda expression consists of the keyword `lambda`, +followed by a parameter list like that of a `def` statement but +unparenthesized, then a colon `:`, and a single expression, the +_function body_. + +Example: + +```python +func map(f, list): + return [f(x) for x in list] + +map(lambda x: 2*x, range(3)) # [2, 4, 6] +``` + +As with functions created by a `def` statement, a lambda function +captures the syntax of its body, the default values of any optional +parameters, the value of each free variable appearing in its body, and +the global dictionary of the current module. + +The name of a function created by a lambda expression is `"lambda"`. + +The two statements below are essentially equivalent, but that the +function created by the `def` statement is named `twice` and the +function created by the lambda expression is called `lambda`. + +```python +def twice(x): + return x * 2 + +twice = lambda(x): x * 2 +``` + +<b>Implementation note:</b> +The Go implementation of the Skylark REPL requires the `-lambda` flag +to enable support for lambda expressions. +The Java implementation does not support them. +See Google Issue b/36358844. + + +## Statements + +```grammar {.good} +Statement = DefStmt | IfStmt | ForStmt | SimpleStmt . +SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' . +SmallStmt = ReturnStmt + | BreakStmt | ContinueStmt | PassStmt + | AssignStmt + | ExprStmt + . +``` + +### Pass statements + +A `pass` statement does nothing. Use a `pass` statement when the +syntax requires a statement but no behavior is required, such as the +body of a function that does nothing. + +```grammar {.good} +PassStmt = 'pass' . +``` + +Example: + +```python +def noop(): + pass + +def list_to_dict(items): + # Convert list of tuples to dict + m = {} + for k, m[k] in items: + pass + return m +``` + +### Assignments + +An assignment statement has the form `lhs = rhs`. It evaluates the +expression on the right-hand side then assigns its value (or values) to +the variable (or variables) on the left-hand side. + +```grammar {.good} +AssignStmt = Expression '=' Expression . +``` + +The expression on the left-hand side is called a _target_. The +simplest target is the name of a variable, but a target may also have +the form of an index expression, to update the element of a list or +dictionary, or a dot expression, to update the field of an object: + +```python +k = 1 +a[i] = v +m.f = "" +``` + +Compound targets may consist of a comma-separated list of +subtargets, optionally surrounded by parentheses or square brackets, +and targets may be nested arbitarily in this way. +An assignment to a compound target checks that the right-hand value is a +sequence with the same number of elements as the target. +Each element of the sequence is then assigned to the corresponding +element of the target, recursively applying the same logic. +It is a static error if the sequence is empty. + +```python +pi, e = 3.141, 2.718 +(x, y) = f() +[zero, one, two] = range(3) + +[(a, b), (c, d)] = ("ab", "cd") +``` + +The same process for assigning a value to a target expression is used +in `for` loops and in comprehensions. + +<b>Implementation note:</b> +In the Java implementation, targets cannot be dot expressions. + + +### Augmented assignments + +An augmented assignment, which has the form `lhs op= rhs` updates the +variable `lhs` by applying a binary arithmetic operator `op` (one of +`+`, `-`, `*`, `/`, `//`, `%`) to the previous value of `lhs` and the value +of `rhs`. + +```grammar {.good} +AssignStmt = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=') Expression . +``` + +The left-hand side must be a simple target: +a name, an index expression, or a dot expression. + +```python +x -= 1 +x.filename += ".sky" +a[index()] *= 2 +``` + +Any subexpressions in the target on the left-hand side are evaluated +exactly once, before the evaluation of `rhs`. +The first two assignments above are thus equivalent to: + +```python +x = x - 1 +x.filename = x.filename + ".sky" +``` + +and the third assignment is similar in effect to the following two +statements but does not declare a new temporary variable `i`: + +```python +i = index() +a[i] = a[i] * 2 +``` + +### Function definitions + +A `def` statement creates a named function and assigns it to a variable. + +```grammar {.good} +DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite . +``` + +Example: + +```python +def twice(x): + return x * 2 + +str(twice) # "<function f>" +twice(2) # 4 +twice("two") # "twotwo" +``` + +The function's name is preceded by the `def` keyword and followed by +the parameter list (which is enclosed in parentheses), a colon, and +then an indented block of statements which form the body of the function. + +The parameter list is a comma-separated list whose elements are of +four kinds. First come zero or more required parameters, which are +simple identifiers; all calls must provide an argument value for these parameters. + +The required parameters are followed by zero or more optional +parameters, of the form `name=expression`. The expression specifies +the default value for the parameter for use in calls that do not +provide an argument value for it. + +The required parameters are optionally followed by a single parameter +name preceded by a `*`. This is the called the _varargs_ parameter, +and it accumulates surplus positional arguments specified by a call. + +Finally, there may be an optional parameter name preceded by `**`. +This is called the _keyword arguments_ parameter, and accumulates in a +dictionary any surplus `name=value` arguments that do not match a +prior parameter. + +Here are some example parameter lists: + +```python +def f(): pass +def f(a, b, c): pass +def f(a, b, c=1): pass +def f(a, b, c=1, *args): pass +def f(a, b, c=1, *args, **kwargs): pass +def f(**kwargs): pass +``` + +Execution of a `def` statement creates a new function object. The +function object contains: the syntax of the function body; the default +value for each optional parameter; the value of each free variable +referenced within the function body; and the global dictionary of the +current module. + +<!-- this is too implementation-oriented; it's not a spec. --> + +<b>Implementation note:</b> +The Go implementation of the Skylark REPL requires the `-nesteddef` +flag to enable support for nested `def` statements. +The Java implementation does not permit a `def` expression to be +nested within the body of another function. + + +### Return statements + +A `return` statement ends the execution of a function and returns a +value to the caller of the function. + +```grammar {.good} +ReturnStmt = 'return' Expression . +``` + +A return statement may have zero, one, or more +result expressions separated by commas. +With no expressions, the function has the result `None`. +With a single expression, the function's result is the value of that expression. +With multiple expressions, the function's result is a tuple. + +```python +return # returns None +return 1 # returns 1 +return 1, 2 # returns (1, 2) +``` + +### Expression statements + +An expression statement evaluates an expression and discards its result. + +```grammar {.good} +ExprStmt = Expression . +``` + +Any expression may be used as a statement, but an expression statement is +most often used to call a function for its side effects. + +```python +list.append(1) +``` + +### If statements + +An `if` statement evaluates an expression (the _condition_), then, if +the truth value of the condition is `True`, executes a list of +statements. + +```grammar {.good} +IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] . +``` + +Example: + +```python +if score >= 100: + print("You win!") + return +``` + +An `if` statement may have an `else` block defining a second list of +statements to be executed if the condition is false. + +```python +if score >= 100: + print("You win!") + return +else: + print("Keep trying...") + continue +``` + +It is common for the `else` block to contain another `if` statement. +To avoid increasing the nesting depth unnecessarily, the `else` and +following `if` may be combined as `elif`: + +```python +if x > 0: + result = +1 +elif x < 0: + result = -1 +else: + result = 0 +``` + +An `if` statement is permitted only within a function definition. +An `if` statement at top level results in a static error. + +### For loops + +A `for` loop evaluates its operand, which must be an iterable value. +Then, for each element of the iterable's sequence, the loop assigns +the successive element values to one or more variables and executes a +list of statements, the _loop body_. + +```grammar {.good} +ForStmt = 'for' LoopVariables 'in' Expression ':' Suite . +``` + +Example: + +```python +for x in range(10): + print(10) +``` + +The assignment of each value to the loop variables follows the same +rules as an ordinary assignment. In this example, two-element lists +are repeatedly assigned to the pair of variables (a, i): + +```python +for a, i in [["a", 1], ["b", 2], ["c", 3]]: + print(a, i) # prints "a 1", "b 2", "c 3" +``` + +Because Skylark loops always iterate over a finite sequence, they are +guaranteed to terminate, unlike loops in most languages which can +execute an arbitrary and perhaps unbounded number of iterations. + +Within the body of a `for` loop, `break` and `continue` statements may +be used to stop the execution of the loop or advance to the next +iteration. + +In Skylark, a `for` loop is permitted only within a function definition. +A `for` loop at top level results in a static error. + + +### Break and Continue + +The `break` and `continue` statements terminate the current iteration +of a `for` loop. Whereas the `continue` statement resumes the loop at +the next iteration, a `break` statement terminates the entire loop. + +```grammar {.good} +BreakStmt = 'break' . +ContinueStmt = 'continue' . +``` + +Example: + +```python +for x in range(10): + if x%2 == 1: + continue # skip odd numbers + if x > 7: + break # stop at 8 + print(x) # prints "0", "2", "4", "6" +``` + +Both statements affect only the innermost lexically enclosing loop. +It is a static error to use a `break` or `continue` statement outside a +loop. + + +### Load statements + +The `load` statement loads another Skylark module, extracts one or +more values from it, and binds them to names in the current module. + +Syntactically, a load statement looks like a function call `load(...)`. +However, `load` is not a keyword, so the parser converts any expression +statement containing a function call of the form `load(...)` into a +load statement. +In all other contexts, `load` acts like an ordinary identifier. + +<!-- +The awkwardness of load statements is a consequence of staying a +strict subset of Python syntax, which allows reuse of existing tools +such as editor support. Python import statements are inadequate for +Skylark because they don't allow arbitrary file names for module names. +--> + +A load statement within a function is a static error. + +A load statement requires at least two "arguments". +The first must be a literal string; it identifies the module to load. +Its interpretation is determined by the application into which the +Skylark interpreter is embedded, and is not specified here. + +During execution, the application determines what action to take for a +load statement. +A typical implementation locates and executes a Skylark file, +populating a cache of files executed so far to avoid duplicate work, +to obtain a module, which is a mapping from global names to values. + +The remaining arguments are a mixture of literal strings, such as +`"x"`, or named literal strings, such as `y="x"`. + +The literal string (`"x"`), which must denote a valid identifier not +starting with `_`, specifies the name to extract from the loaded +module. In effect, names starting with `_` are not exported. +The name (`y`) specifies the local name; +if no name is given, the local name matches the quoted name. + +```python +load("module.sky", "x", "y", "z") # assigns x, y, and z +load("module.sky", "x", y2="y", "z") # assigns x, y2, and z +``` + + +## Module execution + +Each Skylark file defines a _module_, which is a mapping from the +names of global variables to their values. +When a Skylark file is executed, whether directly by the application +or indirectly through a `load` statement, a new Skylark thread is +created, and this thread executes all the top-level statements in the +file. +Because if-statements and for-loops cannot appear outside of a function, +control flows from top to bottom. + +If execution reaches the end of the file, module initialization is +successful. +At that point, the value of each of the module's global variables is +frozen, rendering subsequent mutation impossible. +The module is then ready for use by another Skylark thread, such as +one executing a load statement. +Such threads may access values or call functions defined in the loaded +module. + +A Skylark thread may carry state on behalf of the application into +which it is embedded, and application-defined functions may behave +differently depending on this thread state. +Because module initialization always occurs in a new thread, thread +state is never carried from a higher-level module into a lower-level +one. +The initialization behavior of a module is thus independent of +whichever module triggered its initialization. + +If a Skylark thread encounters an error, execution stops and the error +is reported to the application, along with a backtrace showing the +stack of active function calls at the time of the error. +If an error occurs during initialization of a Skylark module, any +active `load` statements waiting for initialization of the module also +fail. + +Skylark provides no mechanism by which errors can be handled within +the language. + + +## Built-in constants and functions + +The outermost block of the Skylark environment is known as the "universe" block. +It defines a number of fundamental values and functions needed by all Skylark programs, +such as `None`, `True`, `False`, and `len`. + +These names are not reserved words so Skylark programs are free to +redefine them in a smaller block such as a function body or even at +the top level of a module. However, doing so may be confusing to the +reader. Nonetheless, this rule permits names to be added to the +universe block in later versions of the language without breaking +existing programs. + + +### None + +`None` is the distinguished value of the type `NoneType`. + +### True and False + +`True` and `False` are the two values of type `bool`. + +### any + +`any(x)` returns `True` if any element of the iterable sequence x is true. +If the iterable is empty, it returns `False`. + +### all + +`all(x)` returns `False` if any element of the iterable sequence x is false. +If the iterable is empty, it returns `True`. + +### bool + +`bool(x)` interprets `x` as a Boolean value---`True` or `False`. +With no argument, `bool()` returns `False`. + + +### chr + +`chr(i)` returns a string that encodes the single Unicode code point +whose value is specified by the integer `i`. `chr` fails unless 0 ≤ +`i` ≤ 0x10FFFF. + +Example: + +```python +chr(65) # "A", +chr(1049) # "Й", CYRILLIC CAPITAL LETTER SHORT I +chr(0x1F63F) # "😿", CRYING CAT FACE +``` + +See also: `ord`. + +<b>Implementation note:</b> `chr` is not provided by the Java implementation. + +### cmp + +`cmp(x, y)` compares two values `x` and `y` and returns an integer according to the outcome. +The result is negative if `x < y`, positive if `x > y`, and zero otherwise. +Consequently, `cmp(x, x)` always returns zero, even for floating-point NaN values. + +<b>Implementation note:</b> `cmp` is not provided by the Java implementation. + +### dict + +`dict` creates a dictionary. It accepts up to one positional +argument, which is interpreted as an iterable of two-element +sequences (pairs), each specifying a key/value pair in +the resulting dictionary. + +`dict` also accepts any number of keyword arguments, each of which +specifies a key/value pair in the resulting dictionary; +each keyword is treated as a string. + +```python +dict() # {}, empty dictionary +dict([(1, 2), (3, 4)]) # {1: 2, 3: 4} +dict([(1, 2), ["a", "b"]]) # {1: 2, "a": "b"} +dict(one=1, two=2) # {"one": 1, "two", 1} +dict([(1, 2)], x=3) # {1: 2, "x": 3} +``` + +With no arguments, `dict()` returns a new empty dictionary. + +`dict(x)` where x is a dictionary returns a new copy of x. + +### dir + +`dir(x)` returns a list of the names of the attributes (fields and methods) of its operand. +The attributes of a value `x` are the names `f` such that `x.f` is a valid expression. + +For example, + +```python +dir("hello") # ['capitalize', 'count', ...], the methods of a string +``` + +Several types known to the interpreter, such as list, string, and dict, have methods, but none have fields. +However, an application may define types with fields that may be read or set by statements such as these: + +```text +y = x.f +x.f = y +``` + +### enumerate + +`enumerate(x)` returns a list of (index, value) pairs, each containing +successive values of the iterable sequence xand the index of the value +within the sequence. + +The optional second parameter, `start`, specifies an integer value to +add to each index. + +```python +enumerate(["zero", "one", "two"]) # [(0, "zero"), (1, "one"), (2, "two")] +enumerate(["one", "two"], 1) # [(1, "one"), (2, "two")] +``` + +### float + +`float(x)` interprets its argument as a floating-point number. + +If x is a `float`, the result is x. +if x is an `int`, the result is the nearest floating point value to x. +If x is a string, the string is interpreted as a floating-point literal. +With no arguments, `float()` returns `0.0`. + +<b>Implementation note:</b> +Floating-point numbers are an optional feature. +The Go implementation of the Skylark REPL requires the `-fp` flag to +enable support for floating-point literals, the `float` built-in +function, and the real division operator `/`. +The Java implementation does not yet support floating-point numbers. + + +### freeze + +`freeze(x)` freezes x and all values transitively reachable from it. +Subsequent attempts to modify any of those values will fail. + +At the end of module execution, the value of each global in the module +dictionary is frozen as if by `freeze`. + +<b>Implementation note:</b> +The `freeze` function is an optional feature of the Go implementation, +and it must be enabled in the REPL using the `-freeze` flag. +It is not present in the Java implementation. + +### getattr + +`getattr(x, name)` returns the value of the attribute (field or method) of x named `name`. +It is a dynamic error if x has no such attribute. + +`getattr(x, "f")` is equivalent to `x.f`. + +```python +getattr("banana", "split")("a") # ["b", "n", "n", ""], equivalent to "banana".split("a") +``` + +### hasattr + +`hasattr(x, name)` reports whether x has an attribute (field or method) named `name`. + +### hash + +`hash(x)` returns an integer hash value for x such that `x == y` implies `hash(x) == hash(y)`. + +`hash` fails if x, or any value upon which its hash depends, is unhashable. + +<b>Implementation note:</b> the Java implementation of the `hash` +function accepts only strings. + +### int + +`int(x[, base])` interprets its argument as an integer. + +If x is an `int`, the result is x. +If x is a `float`, the result is the integer value nearest to x, +truncating towards zero; it is an error if x is not finite (`NaN`, +`+Inf`, `-Inf`). +If x is a `bool`, the result is 0 for `False` or 1 for `True`. + +If x is a string, it is interpreted like a string literal; +an optional base prefix (`0`, `0x`, `0X`) determines which base to use. +The string may specify an arbitrarily large integer, +whereas true integer literals are restricted to 64 bits. +If a non-zero `base` argument is provided, the string is interpreted +in that base and no base prefix is permitted; the base argument may +specified by name. + +`int()` with no arguments returns 0. + +### len + +`len(x)` returns the number of elements in its argument. + +It is a dynamic error if its argument is not a sequence. + +### list + +`list` constructs a list. + +`list(x)` returns a new list containing the elements of the +iterable sequence x. + +With no argument, `list()` returns a new empty list. + +### max + +`max(x)` returns the greatest element in the iterable sequence x. + +It is an error if any element does not support ordered comparison, +or if the sequence is empty. + +The optional named parameter `key` specifies a function to be applied +to each element prior to comparison. + +```python +max([3, 1, 4, 1, 5, 9]) # 9 +max("two", "three", "four") # "two", the lexicographically greatest +max("two", "three", "four", key=len) # "three", the longest +``` + +### min + +`min(x)` returns the least element in the iterable sequence x. + +It is an error if any element does not support ordered comparison, +or if the sequence is empty. + +```python +min([3, 1, 4, 1, 5, 9]) # 1 +min("two", "three", "four") # "four", the lexicographically least +min("two", "three", "four", key=len) # "two", the shortest +``` + + +### ord + +`ord(s)` returns the integer value of the sole Unicode code point encoded by the string `s`. + +If `s` does not encode exactly one Unicode code point, `ord` fails. +Each invalid code within the string is treated as if it encodes the +Unicode replacement character, U+FFFD. + +Example: + +```python +ord("A") # 65 +ord("Й") # 1049 +ord("😿") # 0x1F63F +ord("Й"[1:]) # 0xFFFD (Unicode replacement character) +``` + +See also: `chr`. + +<b>Implementation note:</b> `ord` is not provided by the Java implementation. + +### print + +`print(*args, **kwargs)` prints its arguments, followed by a newline. +Arguments are formatted as if by `str(x)` and separated with a space. +Keyword arguments are preceded by their name. + +Example: + +```python +print(1, "hi", x=3) # "1 hi x=3\n" +``` + +### range + +`range` returns a new list of integers drawn from the specified interval and stride. + +```python +range(stop) # equivalent to range(0, stop) +range(start, stop) # equivalent to range(start, stop, 1) +range(start, stop, step) +``` + +`range` requires between one and three integer arguments. +With one argument, `range(stop)` returns the ascending sequence of non-negative integers less than `stop`. +With two arguments, `range(start, stop)` returns only integers not less than `start`. + +With three arguments, `range(start, stop, step)` returns integers +formed by successively adding `step` to `start` until the value meets or passes `stop`. + +```python +range(10) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +range(3, 10) # [3, 4, 5, 6, 7, 8, 9] +range(3, 10, 2) # [3, 5, 7, 9] +range(10, 3, -2) # [10, 8, 6, 4] +``` + +### repr + +`repr(x)` formats its argument as a string. + +All strings in the result are double-quoted. + +```python +repr(1) # '1' +repr("x") # '"x"' +repr([1, "x"]) # '[1, "x"]' +``` + +### reversed + +`reversed(x)` returns a new list containing the elements of the iterable sequence x in reverse order. + +```python +reversed(range(5)) # [4, 3, 2, 1, 0] +reversed("stressed".split_codepoints()) # ["d", "e", "s", "s", "e", "r", "t", "s"] +reversed({"one": 1, "two": 2}.keys()) # ["two", "one"] +``` + +### set + +`set(x)` returns a new set containing the elements of the iterable x. +With no argument, `set()` returns a new empty set. + +```python +set([3, 1, 4, 1, 5, 9]) # set([3, 1, 4, 5, 9]) +``` + +<b>Implementation note:</b> +Sets are an optional feature of the Go implementation of Skylark. + + +### sorted + +`sorted(x)` returns a new list containing the elements of the iterable sequence x, in sorted order. + +The optional named parameter `reversed`, if true, causes `sorted` to +return results in reverse sorted order. + +The optional named parameter `cmp` specifies an alternative function +for ordered comparison of two elements. + +```python +sorted(set("harbors".split_codepoints())) # ['a', 'b', 'h', 'o', 'r', 's'] +sorted([3, 1, 4, 1, 5, 9]) # [1, 1, 3, 4, 5, 9] +sorted([3, 1, 4, 1, 5, 9], reverse=True) # [9, 5, 4, 3, 1, 1] + +def cmplen(x, y): return len(x) - len(y) + +sorted(["two", "three", "four"], cmp=cmplen) # ["two", "four", "three"], shortest to longest +sorted(["two", "three", "four"], cmp=cmplen, reverse=True) # ["three", "four", "two"], longest to shortest +``` + +<b>Implementation note:</b> +The Java implementation does not support the `cmp`, `key`, and +`reversed` parameters. + +### str + +`str(x)` formats its argument as a string. + +If x is a string, the result is x (without quotation). +All other strings, such as elements of a list of strings, are double-quoted. + +```python +str(1) # '1' +str("x") # 'x' +str([1, "x"]) # '[1, "x"]' +``` + +### tuple + +`tuple(x)` returns a tuple containing the elements of the iterable x. + +With no arguments, `tuple()` returns the empty tuple. + +### type + +type(x) returns a string describing the type of its operand. + +```python +type(None) # "NoneType" +type(0) # "int" +type(0.0) # "float" +``` + +### zip + +`zip()` returns a new list of n-tuples formed from corresponding +elements of each of the n iterable sequences provided as arguments to +`zip`. That is, the first tuple contains the first element of each of +the sequences, the second element contains the second element of each +of the sequences, and so on. The result list is only as long as the +shortest of the input sequences. + +```python +zip() # [] +zip(range(5)) # [(0,), (1,), (2,), (3,), (4,)] +zip(range(5), "abc") # [(0, "a"), (1, "b"), (2, "c")] +``` + +## Built-in methods + +This section lists the methods of built-in types. Methods are selected +using [dot expressions](#dot-expressions). +For example, strings have a `count` method that counts +occurrences of a substring; `"banana".count("a")` yields `3`. + +As with built-in functions, built-in methods accept only positional +arguments except where noted. +The parameter names serve merely as documentation. + + +### dict·clear + +`D.clear()` removes all the entries of dictionary D and returns `None`. +It fails if the dictionary is frozen or if there are active iterators. + +```python +x = {"one": 1, "two": 2} +x.clear() # None +print(x) # {} +``` + +<b>Implementation note:</b> +`Dict·clear` is not provided by the Java implementation. + +### dict·get + +`D.get(key[, default])` returns the dictionary value corresponding to the given key. +If the dictionary contains no such value, `get` returns `None`, or the +value of the optional `default` parameter if present. + +`get` fails if `key` is unhashable, or the dictionary is frozen or has active iterators. + +```python +x = {"one": 1, "two": 2} +x.get("one") # 1 +x.get("three") # None +x.get("three", 0) # 0 +``` + +### dict·items + +`D.items()` returns a new list of key/value pairs, one per element in +dictionary D, in the same order as they would be returned by a `for` loop. + +```python +x = {"one": 1, "two": 2} +x.items() # [("one", 1), ("two", 2)] +``` + +### dict·keys + +`D.keys()` returns a new list containing the keys of dictionary D, in the +same order as they would be returned by a `for` loop. + +```python +x = {"one": 1, "two": 2} +x.keys() # ["one", "two"] +``` + +### dict·pop + +`D.pop(key[, default])` returns the value corresponding to the specified +key, and removes it from the dictionary. If the dictionary contains no +such value, and the optional `default` parameter is present, `pop` +returns that value; otherwise, it fails. + +`pop` fails if `key` is unhashable, or the dictionary is frozen or has active iterators. + +```python +x = {"one": 1, "two": 2} +x.pop("one") # 1 +x # {"two": 2} +x.pop("three", 0) # 0 +x.pop("four") # error: missing key +``` + +### dict·popitem + +`D.popitem()` returns the first key/value pair, removing it from the dictionary. + +`popitem` fails if the dictionary is empty, frozen, or has active iterators. + +```python +x = {"one": 1, "two": 2} +x.popitem() # ("one", 1) +x.popitem() # ("two", 2) +x.popitem() # error: empty dict +``` + +### dict·setdefault + +`D.setdefault(key[, default])` returns the dictionary value corresponding to the given key. +If the dictionary contains no such value, `setdefault`, like `get`, +returns `None` or the value of the optional `default` parameter if +present; `setdefault` additionally inserts the new key/value entry into the dictionary. + +`setdefault` fails if the key is unhashable, or if the dictionary is frozen or has active iterators. + +```python +x = {"one": 1, "two": 2} +x.setdefault("one") # 1 +x.setdefault("three", 0) # 0 +x # {"one": 1, "two": 2, "three": 0} +x.setdefault("four") # None +x # {"one": 1, "two": 2, "three": None} +``` + +### dict·update + +`D.update([pairs][, name=value[, ...])` makes a sequence of key/value +insertions into dictionary D, then returns `None.` + +If the positional argument `pairs` is present, it must be `None`, +another `dict`, or some other iterable. +If it is another `dict`, then its key/value pairs are inserted into D. +If it is an iterable, it must provide a sequence of pairs (or other iterables of length 2), +each of which is treated as a key/value pair to be inserted into D. + +For each `name=value` argument present, the name is converted to a +string and used as the key for an insertion into D, with its corresponding +value being `value`. + +`update` fails if the dictionary is frozen or has active iterators. + +```python +x = {} +x.update([("a", 1), ("b", 2)], c=3) +x.update({"d": 4}) +x.update(e=5) +x # {"a": 1, "b": "2", "c": 3, "d": 4, "e": 5} +``` + +### dict·values + +`D.values()` returns a new list containing the dictionary's values, in the +same order as they would be returned by a `for` loop over the +dictionary. + +```python +x = {"one": 1, "two": 2} +x.values() # [1, 2] +``` + +### list·append + +`L.append(x)` appends `x` to the list L, and returns `None`. + +`append` fails if the list is frozen or has active iterators. + +```python +x = [] +x.append(1) # None +x.append(2) # None +x.append(3) # None +x # [1, 2, 3] +``` + +### list·clear + +`L.clear()` removes all the elements of the list L and returns `None`. +It fails if the list is frozen or if there are active iterators. + +```python +x = [1, 2, 3] +x.clear() # None +x # [] +``` + +### list·extend + +`L.extend(x)` appends the elements of `x`, which must be iterable, to +the list L, and returns `None`. + +`extend` fails if `x` is not iterable, or if the list L is frozen or has active iterators. + +```python +x = [] +x.extend([1, 2, 3]) # None +x.extend(["foo"]) # None +x # [1, 2, 3, "foo"] +``` + +### list·index + +`L.insert(x[, start[, end]])` finds `x` within the list L and returns its index. + +The optional `start` and `end` parameters restrict the portion of +list L that is inspected. If provided and not `None`, they must be list +indices of type `int`. If an index is negative, `len(L)` is effectively +added to it, then if the index is outside the range `[0:len(L)]`, the +nearest value within that range is used; see [Indexing](#indexing). + +`insert` fails if `x` is not found in L, or if `start` or `end` +is not a valid index (`int` or `None`). + +```python +x = list("banana".split_bytes()) +x.index("a") # 1 (bAnana) +x.index("a", 2) # 3 (banAna) +x.index("a", -2) # 5 (bananA) +``` + +### list·insert + +`L.insert(i, x)` inserts the value `x` in the list L at index `i`, moving +higher-numbered elements along by one. It returns `None`. + +As usual, the index `i` must be an `int`. If its value is negative, +the length of the list is added, then its value is clamped to the +nearest value in the range `[0:len(L)]` to yield the effective index. + +`insert` fails if the list is frozen or has active iterators. + +```python +x = ["b", "c", "e"] +x.insert(0, "a") # None +x.insert(-1, "d") # None +x # ["a", "b", "c", "d", "e"] +``` + +### list·pop + +`L.pop([index])` removes and returns the last element of the list L, or, +if the optional index is provided, at that index. + +`insert` fails if the index is negative or not less than the length of +the list, of if the list is frozen or has active iterators. + +```python +x = [1, 2, 3] +x.pop() # 3 +x.pop() # 2 +x # [1] +``` + +### list·remove + +`L.remove(x)` removes the first occurrence of the value `x` from the list L, and returns `None`. + +`remove` fails if the list does not contain `x`, is frozen, or has active iterators. + +```python +x = [1, 2, 3, 2] +x.remove(2) # None (x == [1, 3, 2]) +x.remove(2) # None (x == [1, 3]) +x.remove(2) # error: element not found +``` + +### set·union + +`S.union(iterable)` returns a new set into which have been inserted +all the elements of set S and all the elements of the argument, which +must be iterable. + +`union` fails if any element of the iterable is not hashable. + +```python +x = set([1, 2]) +y = set([2, 3]) +x.union(y) # set([1, 2, 3]) +``` + +### string·bytes + +`S.bytes()` returns an iterable value containing the +sequence of numeric bytes values in the string S. + +To materialize the entire sequence of bytes, apply `list(...)` to the result. + +Example: + +```python +list("Hello, 世界".bytes()) # [72, 101, 108, 108, 111, 44, 32, 228, 184, 150, 231, 149, 140] +``` + +See also: `string·split_bytes`. + +<b>Implementation note:</b> `bytes` is not provided by the Java implementation. + +### string·capitalize + +`S.capitalize()` returns a copy of string S with all Unicode letters +that begin words changed to their title case. + +```python +"hello, world!".capitalize() # "Hello, World!" +``` + +### string·codepoints + +`S.codepoints()` returns an iterable value containing the +sequence of integer Unicode code points encoded by the string S. +Each invalid code within the string is treated as if it encodes the +Unicode replacement character, U+FFFD. + +By returning an iterable, not a list, the cost of decoding the string +is deferred until actually needed; apply `list(...)` to the result to +materialize the entire sequence. + +Example: + +```python +list("Hello, 世界".codepoints()) # [72, 101, 108, 108, 111, 44, 32, 19990, 30028] + +for cp in "Hello, 世界".codepoints(): + if cp == ord(','): + break + print(chr(cp)) # prints "H", "e", "l", "l", "o" +``` + +See also: `string·split_codepoints`. + +<b>Implementation note:</b> `codepoints` is not provided by the Java implementation. + +### string·count + +`S.count(sub[, start[, end]])` returns the number of occcurences of +`sub` within the string S, or, if the optional substring indices +`start` and `end` are provided, within the designated substring of S. +They are interpreted according to Skylark's [indexing conventions](#indexing). + +```python +"hello, world!".count("o") # 2 +"hello, world!".count("o", 7, 12) # 1 (in "world") +``` + +### string·endswith + +`S.endswith(suffix)` reports whether the string S has the specified suffix. + +```python +"filename.sky".endswith(".sky") # True +``` + +### string·find + +`S.find(sub[, start[, end]])` returns the index of the first +occurrence of the substring `sub` within S. + +If either or both of `start` or `end` are specified, +they specify a subrange of S to which the search should be restricted. +They are interpreted according to Skylark's [indexing conventions](#indexing). + +If no occurrence is found, `found` returns -1. + +```python +"bonbon".find("on") # 1 +"bonbon".find("on", 2) # 4 +"bonbon".find("on", 2, 5) # -1 +``` + +### string·format + +`S.format(*args, **kwargs)` returns a version of the format string S +in which bracketed portions `{...}` are replaced +by arguments from `args` and `kwargs`. + +Within the format string, a pair of braces `{{` or `}}` is treated as +a literal open or close brace. +Each unpaired open brace must be matched by a close brace `}`. +The optional text between corresponding open and close braces +specifies which argument to use and how to format it, and consists of +three components, all optional: +a field name, a conversion preceded by '`!`', and a format specifier +preceded by '`:`'. + +```text +{field} +{field:spec} +{field!conv} +{field!conv:spec} +``` + +The *field name* may be either a decimal number or a keyword. +A number is interpreted as the index of a positional argument; +a keyword specifies the value of a keyword argument. +If all the numeric field names form the sequence 0, 1, 2, and so on, +they may be omitted and those values will be implied; however, +the explicit and implicit forms may not be mixed. + +The *conversion* specifies how to convert an argument value `x` to a +string. It may be either `!r`, which converts the value using +`repr(x)`, or `!s`, which converts the value using `str(x)` and is +the default. + +The *format specifier*, after a colon, specifies field width, +alignment, padding, and numeric precision. +Currently it must be empty, but it is reserved for future use. + +```python +"a{x}b{y}c{}".format(1, x=2, y=3) # "a2b3c1" +"a{}b{}c".format(1, 2) # "a1b2c" +"({1}, {0})".format("zero", "one") # "(one, zero)" +"Is {0!r} {0!s}?".format('heterological') # 'is "heterological" heterological?' +``` + +### string·index + +`S.index(sub[, start[, end]])` returns the index of the first +occurrence of the substring `sub` within S, like `S.find`, except +that if the substring is not found, the operation fails. + +```python +"bonbon".index("on") # 1 +"bonbon".index("on", 2) # 4 +"bonbon".index("on", 2, 5) # error: substring not found (in "nbo") +``` + +### string·isalnum + +`S.isalpha()` reports whether the string S is non-empty and consists only +Unicode letters and digits. + +```python +"base64".isalnum() # True +"Catch-22".isalnum() # False +``` + +### string·isalpha + +`S.isalpha()` reports whether the string S is non-empty and consists only of Unicode letters. + +```python +"ABC".isalpha() # True +"Catch-22".isalpha() # False +"".isalpha() # False +``` + +### string·isdigit + +`S.isdigit()` reports whether the string S is non-empty and consists only of Unicode digits. + +```python +"123".isdigit() # True +"Catch-22".isdigit() # False +"".isdigit() # False +``` + +### string·islower + +`S.islower()` reports whether the string S contains at least one cased Unicode +letter, and all such letters are lowercase. + +```python +"hello, world".islower() # True +"Catch-22".islower() # False +"123".islower() # False +``` + +### string·isspace + +`S.isspace()` reports whether the string S is non-empty and consists only of Unicode spaces. + +```python +" ".isspace() # True +"\r\t\n".isspace() # True +"".isspace() # False +``` + +### string·istitle + +`S.istitle()` reports whether the string S contains at least one cased Unicode +letter, and all such letters that begin a word are in title case. + +```python +"Hello, World!".istitle() # True +"Catch-22".istitle() # True +"HAL-9000".istitle() # False +"123".istitle() # False +``` + +### string·isupper + +`S.isupper()` reports whether the string S contains at least one cased Unicode +letter, and all such letters are uppercase. + +```python +"HAL-9000".isupper() # True +"Catch-22".isupper() # False +"123".isupper() # False +``` + +### string·join + +`S.join(iterable)` returns the string formed by concatenating each +element of its argument, with a copy of the string S between +successive elements. The argument must be an iterable whose elements +are strings. + +```python +", ".join(["one", "two", "three"]) # "one, two, three" +"a".join("ctmrn") # "catamaran" +``` + +### string·lower + +`S.lower()` returns a copy of the string S with letters converted to lowercase. + +```python +"Hello, World!".lower() # "hello, world!" +``` + +### string·lstrip + +`S.lstrip()` returns a copy of the string S with leading whitespace removed. + +```python +" hello ".lstrip() # " hello" +``` + +### string·partition + +`S.partition(x)` splits string S into three parts and returns them as +a tuple: the portion before the first occurrence of string `x`, `x` itself, +and the portion following it. +If S does not contain `x`, `partition` returns `(S, "", "")`. + +`partition` fails if `x` is not a string, or is the empty string. + +```python +"one/two/three".partition("/") # ("one", "/", "two/three") +``` + +### string·replace + +`S.replace(old, new[, count])` returns a copy of string S with all +occurrences of substring `old` replaced by `new`. If the optional +argument `count`, which must be an `int`, is non-negative, it +specifies a maximum number of occurrences to replace. + +```python +"banana".replace("a", "o") # "bonono" +"banana".replace("a", "o", 2) # "bonona" +``` + +### string·rfind + +`S.rfind(sub[, start[, end]])` returns the index of the substring `sub` within +S, like `S.find`, except that `rfind` returns the index of the substring's +_last_ occurrence. + +```python +"bonbon".rfind("on") # 4 +"bonbon".rfind("on", None, 5) # 1 +"bonbon".rfind("on", 2, 5) # -1 +``` + +### string·rindex + +`S.rindex(sub[, start[, end]])` returns the index of the substring `sub` within +S, like `S.index`, except that `rindex` returns the index of the substring's +_last_ occurrence. + +```python +"bonbon".rindex("on") # 4 +"bonbon".rindex("on", None, 5) # 1 (in "bonbo") +"bonbon".rindex("on", 2, 5) # error: substring not found (in "nbo") +``` + +### string·rpartition + +`S.rpartition(x)` is like `partition`, but splits `S` at the last occurrence of `x`. + +```python +"one/two/three".partition("/") # ("one/two", "/", "three") +``` + +### string·rsplit + +`S.rsplit([sep[, maxsplit]])` splits a string into substrings like `S.split`, +except that when a maximum number of splits is specified, `rsplit` chooses the +rightmost splits. + +```python +"banana".rsplit("n") # ["ba", "a", "a"] +"banana".rsplit("n", 1) # ["bana", "a"] +"one two three".rsplit(None, 1) # ["one two", "three"] +``` + +TODO: `rsplit(None, maxsplit)` where `maxsplit > 0` (as in the last +example above) is not yet implemented and currently returns an error. + +### string·rstrip + +`S.rstrip()` returns a copy of the string S with trailing whitespace removed. + +```python +" hello ".rstrip() # "hello " +``` + +### string·split + +`S.split([sep [, maxsplit]])` returns the list of substrings of S, +splitting at occurrences of `sep`. +If `sep` is not specified or is `None`, `split` splits the string +between space characters and discards empty substrings. +If `sep` is the empty string, `split` fails. + +If `maxsplit` is given, it specifies the maximum number of splits. + +```python +"one two three".split() # ["one", "two", "three"] +"one two three".split(" ") # ["one", "two", "", "three"] +"one two three".split(None, 1) # ["one", "two three"] +"banana".split("n") # ["ba", "a", "a"] +"banana".split("n", 1) # ["ba", "ana"] +``` + +### string·split_bytes + +`S.split_bytes()` returns an iterable value containing successive +1-byte substrings of S. +To materialize the entire sequence, apply `list(...)` to the result. + +Example: + +```python +list('Hello, 世界'.split_bytes()) # ["H", "e", "l", "l", "o", ",", " ", "\xe4", "\xb8", "\x96", "\xe7", "\x95", "\x8c"] +``` + +See also: `string·bytes`. + +<b>Implementation note:</b> `split_bytes` is not provided by the Java implementation. + +### string·split_codepoints + +`S.split_codepoints()` returns an iterable value containing the sequence of +substrings of S that each encode a single Unicode code point. +Each invalid code within the string is treated as if it encodes the +Unicode replacement character, U+FFFD. + +By returning an iterable, not a list, the cost of decoding the string +is deferred until actually needed; apply `list(...)` to the result to +materialize the entire sequence. + +Example: + +```python +list('Hello, 世界'.split_codepoints()) # ['H', 'e', 'l', 'l', 'o', ',', ' ', '世', 'ç•Œ'] + +for cp in 'Hello, 世界'.split_codepoints(): + if cp == ',': + break + print(cp) # prints 'H', 'e', 'l', 'l', 'o' +``` + +See also: `string·codepoints`. + +<b>Implementation note:</b> `split_codepoints` is not provided by the Java implementation. + +### string·splitlines + +`S.splitlines([keepends])` returns a list whose elements are the +successive lines of S, that is, the strings formed by splitting S at +line terminators (currently assumed to be a single newline, `\n`, +regardless of platform). + +The optional argument, `keepends`, is interpreted as a Boolean. +If true, line terminators are preserved in the result, though +the final element does not necessarily end with a line terminator. + +```python +"one\n\ntwo".splitlines() # ["one", "", "two"] +"one\n\ntwo".splitlines(True) # ["one\n", "\n", "two"] +``` + + +### string·startswith + +`S.startswith(suffix)` reports whether the string S has the specified prefix. + +```python +"filename.sky".startswith("filename") # True +``` + +### string·strip + +`S.strip()` returns a copy of the string S with leading and trailing whitespace removed. + +```python +" hello ".strip() # "hello" +``` + +### string·title + +`S.lower()` returns a copy of the string S with letters converted to titlecase. + +Letters are converted to uppercase at the start of words, lowercase elsewhere. + +```python +"hElLo, WoRlD!".title() # "Hello, World!" +``` + +### string·upper + +`S.lower()` returns a copy of the string S with letters converted to lowercase. + +```python +"Hello, World!".upper() # "HELLO, WORLD!" +``` + +## Dialect differences + +The list below summarizes features of the Go implementation that are +known to differ from the Java implementation of Skylark used by Bazel. +Some of these features may be controlled by global options to allow +applications to mimic the Bazel dialect more closely. Our goal is +eventually to eliminate all such differences on a case-by-case basis. + +* Integers are represented with infinite precision. +* Integer arithmetic is exact. +* Floating-point literals are supported (option: `-float`). +* The `float` built-in is provided (option: `-float`). +* Real division using `float / float` is supported (option: `-float`). +* `def` statements may be nested (option: `-nesteddef`). +* `lambda` expressions are supported (option: `-lambda`). +* String elements are bytes. +* Non-ASCII strings are encoded using UTF-8. +* Strings have the additional methods `bytes`, `split_bytes`, `codepoints`, and `split_codepoints`. +* The `chr` and `ord` built-ins are supported. +* The `set` built-in is provided (option: `-set`). +* `x += y` rebindings are permitted at top level. +* `assert` is a valid identifier. +* `&` is a token; `int & int` and `set & set` are supported. +* `int | int` is supported. +* The `freeze` built-in is provided (option: `-freeze`). +* The parser accepts unary `+` expressions. +* A method call `x.f()` may be separated into two steps: `y = x.f; y()`. +* Dot expressions may appear on the left side of an assignment: `x.f = 1`. +* `hash` accepts operands besides strings. +* `sorted` accepts the additional parameters `cmp`, `key`, and `reversed`. +* The `dict` type has a `clear` method. @@ -0,0 +1,1999 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package skylark + +import ( + "bytes" + "fmt" + "log" + "math" + "sort" + "strings" + "unicode" + "unicode/utf8" + + "github.com/google/skylark/resolve" + "github.com/google/skylark/syntax" +) + +const debug = false + +// A Thread contains the state of a Skylark thread, +// such as its call stack and thread-local storage. +// The Thread is threaded throughout the evaluator +type Thread struct { + // frame is the current Skylark execution frame. + frame *Frame + + // Print is the client-supplied implementation of the Skylark + // 'print' function. If nil, fmt.Fprintln(os.Stderr, msg) is + // used instead. + Print func(thread *Thread, msg string) + + // Load is the client-supplied implementation of module loading. + // Repeated calls with the same module name must return the same + // module environment or error. + Load func(thread *Thread, module string) (StringDict, error) + + // locals holds arbitrary "thread-local" values belonging to the client. + locals map[string]interface{} +} + +// SetLocal sets the thread-local value associated with the specified key. +// It must not be called after execution begins. +func (thread *Thread) SetLocal(key string, value interface{}) { + if thread.locals == nil { + thread.locals = make(map[string]interface{}) + } + thread.locals[key] = value +} + +// Local returns the thread-local value associated with the specified key. +func (thread *Thread) Local(key string) interface{} { + return thread.locals[key] +} + +// Caller returns the frame of the innermost enclosing Skylark function. +// It should only be used in built-ins called from Skylark code. +func (thread *Thread) Caller() *Frame { + return thread.frame +} + +// A StringDict is a mapping from names to values, and represents +// an environment such as the global variables of a module. +// It is not a true skylark.Value. +type StringDict map[string]Value + +func (d StringDict) String() string { + names := make([]string, 0, len(d)) + for name := range d { + names = append(names, name) + } + sort.Strings(names) + + var buf bytes.Buffer + path := make([]Value, 0, 4) + buf.WriteByte('{') + sep := "" + for _, name := range names { + buf.WriteString(sep) + buf.WriteString(name) + buf.WriteString(": ") + writeValue(&buf, d[name], path) + sep = ", " + } + buf.WriteByte('}') + return buf.String() +} + +func (d StringDict) Freeze() { + for _, v := range d { + v.Freeze() + } +} + +func (d StringDict) has(name string) bool { _, ok := d[name]; return ok } + +// A Frame holds the execution state of a single Skylark function call +// or module toplevel. +type Frame struct { + thread *Thread // thread-associated state + parent *Frame // caller's frame (or nil) + posn syntax.Position // source position of PC (set during call and error) + fn *Function // current function (nil at toplevel) + globals StringDict // current global environment + locals []Value // local variables, starting with parameters + result Value // operand of current function's return statement +} + +func (fr *Frame) errorf(posn syntax.Position, format string, args ...interface{}) *EvalError { + fr.posn = posn + msg := fmt.Sprintf(format, args...) + return &EvalError{Msg: msg, Frame: fr} +} + +// Position returns the source position of the current point of execution in this frame. +func (fr *Frame) Position() syntax.Position { return fr.posn } + +// Function returns the frame's function, or nil for the top-level of a module. +func (fr *Frame) Function() *Function { return fr.fn } + +// Parent returns the frame of the enclosing function call, if any. +func (fr *Frame) Parent() *Frame { return fr.parent } + +// set updates the environment binding for name to value. +func (fr *Frame) set(id *syntax.Ident, v Value) { + switch resolve.Scope(id.Scope) { + case resolve.Local: + fr.locals[id.Index] = v + case resolve.Global: + fr.globals[id.Name] = v + default: + log.Fatalf("%s: set(%s): neither global nor local (%d)", id.NamePos, id.Name, id.Scope) + } +} + +// lookup returns the value of name in the environment. +func (fr *Frame) lookup(id *syntax.Ident) (Value, error) { + switch resolve.Scope(id.Scope) { + case resolve.Local: + if v := fr.locals[id.Index]; v != nil { + return v, nil + } + case resolve.Free: + return fr.fn.freevars[id.Index], nil + case resolve.Global: + if v := fr.globals[id.Name]; v != nil { + return v, nil + } + if id.Name == "PACKAGE_NAME" { + // Gross spec, gross hack. + // Users should just call package_name() function. + if v, ok := fr.globals["package_name"].(*Builtin); ok { + return v.fn(fr.thread, v, nil, nil) + } + } + case resolve.Builtin: + return Universe[id.Name], nil + } + return nil, fr.errorf(id.NamePos, "%s variable %s referenced before assignment", + resolve.Scope(id.Scope), id.Name) +} + +// An EvalError is a Skylark evaluation error and its associated call stack. +type EvalError struct { + Msg string + Frame *Frame +} + +func (e *EvalError) Error() string { return e.Msg } + +// Backtrace returns a user-friendly error message describing the stack +// of calls that led to this error. +func (e *EvalError) Backtrace() string { + var buf bytes.Buffer + e.Frame.WriteBacktrace(&buf) + fmt.Fprintf(&buf, "Error: %s", e.Msg) + return buf.String() +} + +// WriteBacktrace writes a user-friendly description of the stack to buf. +func (fr *Frame) WriteBacktrace(out *bytes.Buffer) { + fmt.Fprintf(out, "Traceback (most recent call last):\n") + var print func(fr *Frame) + print = func(fr *Frame) { + if fr != nil { + print(fr.parent) + + name := "<toplevel>" + if fr.fn != nil { + name = fr.fn.Name() + } + fmt.Fprintf(out, " %s:%d:%d: in %s\n", + fr.posn.Filename(), + fr.posn.Line, + fr.posn.Col, + name) + } + } + print(fr) +} + +// Stack returns the stack of frames, innermost first. +func (e *EvalError) Stack() []*Frame { + var stack []*Frame + for fr := e.Frame; fr != nil; fr = fr.parent { + stack = append(stack, fr) + } + return stack +} + +// ExecFile parses, resolves, and executes a Skylark file in the +// specified global environment, which may be modified during execution. +// +// The filename and src parameters are as for syntax.Parse. +// +// If ExecFile fails during evaluation, it returns an *EvalError +// containing a backtrace. +func ExecFile(thread *Thread, filename string, src interface{}, globals StringDict) error { + return Exec(ExecOptions{Thread: thread, Filename: filename, Source: src, Globals: globals}) +} + +// ExecOptions specifies the arguments to Exec. +// +// TODO(adonovan): give Eval the same treatment? +type ExecOptions struct { + // Thread is the state associated with the Skylark thread. + Thread *Thread + + // Filename is the name of the file to execute, + // and the name that appears in error messages. + Filename string + + // Source is an optional source of bytes to use + // instead of Filename. See syntax.Parse for details. + Source interface{} + + // Globals is the environment of the module. + // It may be modified during execution. + Globals StringDict + + // BeforeExec is an optional function that is called after the + // syntax tree has been resolved but before execution. If it + // returns an error, execution is not attempted. + BeforeExec func(*Thread, syntax.Node) error +} + +// Exec is a variant of ExecFile that gives the client greater control +// over optional features. +func Exec(opts ExecOptions) error { + if debug { + fmt.Printf("ExecFile %s\n", opts.Filename) + defer fmt.Printf("ExecFile %s done\n", opts.Filename) + } + f, err := syntax.Parse(opts.Filename, opts.Source) + if err != nil { + return err + } + + globals := opts.Globals + if err := resolve.File(f, globals.has, Universe.has); err != nil { + return err + } + + thread := opts.Thread + + if opts.BeforeExec != nil { + if err := opts.BeforeExec(thread, f); err != nil { + return err + } + } + + fr := &Frame{ + thread: thread, + parent: thread.frame, + globals: globals, + locals: make([]Value, len(f.Locals)), + } + thread.frame = fr + err = execStmts(fr, f.Stmts) + thread.frame = fr.parent + + // Freeze the global environment. + globals.Freeze() + + return err +} + +// Eval parses, resolves, and evaluates an expression within the +// specified global environment. +// +// Evaluation cannot mutate the globals dictionary itself, though it may +// modify variables reachable from the dictionary. +// +// The filename and src parameters are as for syntax.Parse. +// +// If Eval fails during evaluation, it returns an *EvalError +// containing a backtrace. +func Eval(thread *Thread, filename string, src interface{}, globals StringDict) (Value, error) { + expr, err := syntax.ParseExpr(filename, src) + if err != nil { + return nil, err + } + + locals, err := resolve.Expr(expr, globals.has, Universe.has) + if err != nil { + return nil, err + } + + fr := &Frame{ + thread: thread, + parent: thread.frame, + globals: globals, + locals: make([]Value, len(locals)), + } + thread.frame = fr + v, err := eval(fr, expr) + thread.frame = fr.parent + return v, err +} + +// Sentinel values used for control flow. Internal use only. +var ( + errContinue = fmt.Errorf("continue") + errBreak = fmt.Errorf("break") + errReturn = fmt.Errorf("return") +) + +func execStmts(fr *Frame, stmts []syntax.Stmt) error { + for _, stmt := range stmts { + if err := exec(fr, stmt); err != nil { + return err + } + } + return nil +} + +func exec(fr *Frame, stmt syntax.Stmt) error { + switch stmt := stmt.(type) { + case *syntax.ExprStmt: + _, err := eval(fr, stmt.X) + return err + + case *syntax.BranchStmt: + switch stmt.Token { + case syntax.PASS: + return nil // no-op + case syntax.BREAK: + return errBreak + case syntax.CONTINUE: + return errContinue + } + + case *syntax.IfStmt: + cond, err := eval(fr, stmt.Cond) + if err != nil { + return err + } + if cond.Truth() { + return execStmts(fr, stmt.True) + } else { + return execStmts(fr, stmt.False) + } + + case *syntax.AssignStmt: + switch stmt.Op { + case syntax.EQ: + // simple assignment: x = y + y, err := eval(fr, stmt.RHS) + if err != nil { + return err + } + return assign(fr, stmt.OpPos, stmt.LHS, y) + + case syntax.PLUS_EQ, + syntax.MINUS_EQ, + syntax.STAR_EQ, + syntax.SLASH_EQ, + syntax.SLASHSLASH_EQ, + syntax.PERCENT_EQ: + // augmented assignment: x += y + + var old Value // old value loaded from "address" x + var set func(fr *Frame, new Value) error + + // Evaluate "address" of x exactly once to avoid duplicate side-effects. + switch lhs := stmt.LHS.(type) { + case *syntax.Ident: + // x += ... + x, err := fr.lookup(lhs) + if err != nil { + return err + } + old = x + set = func(fr *Frame, new Value) error { + fr.set(lhs, new) + return nil + } + + case *syntax.IndexExpr: + // x[y] += ... + x, err := eval(fr, lhs.X) + if err != nil { + return err + } + y, err := eval(fr, lhs.Y) + if err != nil { + return err + } + old, err = getIndex(fr, lhs.Lbrack, x, y) + if err != nil { + return err + } + set = func(fr *Frame, new Value) error { + return setIndex(fr, lhs.Lbrack, x, y, new) + } + + case *syntax.DotExpr: + // x.f += ... + x, err := eval(fr, lhs.X) + if err != nil { + return err + } + old, err = getAttr(fr, x, lhs) + if err != nil { + return err + } + set = func(fr *Frame, new Value) error { + return setField(fr, x, lhs, new) + } + } + + y, err := eval(fr, stmt.RHS) + if err != nil { + return err + } + + // Special case, following Python: + // If x is a list, x += y is sugar for x.extend(y). + if xlist, ok := old.(*List); ok && stmt.Op == syntax.PLUS_EQ { + yiter, ok := y.(Iterable) + if !ok { + return fr.errorf(stmt.OpPos, "invalid operation: list += %s", y.Type()) + } + if err := xlist.checkMutable("apply += to", true); err != nil { + return fr.errorf(stmt.OpPos, "%v", err) + } + listExtend(xlist, yiter) + return nil + } + + new, err := Binary(stmt.Op-syntax.PLUS_EQ+syntax.PLUS, old, y) + if err != nil { + return fr.errorf(stmt.OpPos, "%v", err) + } + return set(fr, new) + + default: + log.Fatalf("%s: unexpected assignment operator: %s", stmt.OpPos, stmt.Op) + } + + case *syntax.DefStmt: + f, err := evalFunction(fr, stmt.Def, stmt.Name.Name, &stmt.Function) + if err != nil { + return err + } + fr.set(stmt.Name, f) + return nil + + case *syntax.ForStmt: + x, err := eval(fr, stmt.X) + if err != nil { + return err + } + iter := Iterate(x) + if iter == nil { + return fr.errorf(stmt.For, "%s value is not iterable", x.Type()) + } + defer iter.Done() + var elem Value + for iter.Next(&elem) { + if err := assign(fr, stmt.For, stmt.Vars, elem); err != nil { + return err + } + if err := execStmts(fr, stmt.Body); err != nil { + if err == errBreak { + break + } else if err == errContinue { + continue + } else { + return err + } + } + } + return nil + + case *syntax.ReturnStmt: + if stmt.Result != nil { + x, err := eval(fr, stmt.Result) + if err != nil { + return err + } + fr.result = x + } else { + fr.result = None + } + return errReturn + + case *syntax.LoadStmt: + module := stmt.Module.Value.(string) + if fr.thread.Load == nil { + return fr.errorf(stmt.Load, "load not implemented by this application") + } + fr.posn = stmt.Load + dict, err := fr.thread.Load(fr.thread, module) + if err != nil { + return fr.errorf(stmt.Load, "cannot load %s: %v", module, err) + } + for i, from := range stmt.From { + v, ok := dict[from.Name] + if !ok { + return fr.errorf(stmt.From[i].NamePos, "load: name %s not found in module %s", from.Name, module) + } + fr.set(stmt.To[i], v) + } + return nil + } + + start, _ := stmt.Span() + log.Fatalf("%s: exec: unexpected statement %T", start, stmt) + panic("unreachable") +} + +// list += iterable +func listExtend(x *List, y Iterable) { + if ylist, ok := y.(*List); ok { + // fast path: list += list + x.elems = append(x.elems, ylist.elems...) + } else { + iter := y.Iterate() + defer iter.Done() + var z Value + for iter.Next(&z) { + x.elems = append(x.elems, z) + } + } +} + +// getAttr implements x.dot. +func getAttr(fr *Frame, x Value, dot *syntax.DotExpr) (Value, error) { + name := dot.Name.Name + + // field or method? + if x, ok := x.(HasAttrs); ok { + if v, err := x.Attr(name); v != nil || err != nil { + return v, wrapError(fr, dot.Dot, err) + } + } + + return nil, fr.errorf(dot.Dot, "%s has no .%s field or method", x.Type(), name) +} + +// setField implements x.name = y. +func setField(fr *Frame, x Value, dot *syntax.DotExpr, y Value) error { + if x, ok := x.(HasSetField); ok { + err := x.SetField(dot.Name.Name, y) + return wrapError(fr, dot.Dot, err) + } + return fr.errorf(dot.Dot, "can't assign to .%s field of %s", dot.Name.Name, x.Type()) +} + +// getIndex implements x[y]. +func getIndex(fr *Frame, lbrack syntax.Position, x, y Value) (Value, error) { + switch x := x.(type) { + case Mapping: // dict + z, found, err := x.Get(y) + if err != nil { + return nil, fr.errorf(lbrack, "%v", err) + } + if !found { + return nil, fr.errorf(lbrack, "key %v not in %s", y, x.Type()) + } + return z, nil + + case Indexable: // string, list, tuple + n := x.Len() + i, err := AsInt32(y) + if err != nil { + return nil, fr.errorf(lbrack, "%s index: %s", x.Type(), err) + } + if i < 0 { + i += n + } + if i < 0 || i >= n { + return nil, fr.errorf(lbrack, "%s index %d out of range [0:%d]", + x.Type(), i, n) + } + return x.Index(i), nil + } + return nil, fr.errorf(lbrack, "unhandled index operation %s[%s]", x.Type(), y.Type()) +} + +// setIndex implements x[y] = z. +func setIndex(fr *Frame, lbrack syntax.Position, x, y, z Value) error { + switch x := x.(type) { + case *Dict: + if err := x.Set(y, z); err != nil { + return fr.errorf(lbrack, "%v", err) + } + + case HasSetIndex: + i, err := AsInt32(y) + if err != nil { + return wrapError(fr, lbrack, err) + } + if i < 0 { + i += x.Len() + } + if i < 0 || i >= x.Len() { + return fr.errorf(lbrack, "%s index %d out of range [0:%d]", x.Type(), i, x.Len()) + } + return wrapError(fr, lbrack, x.SetIndex(i, z)) + + default: + return fr.errorf(lbrack, "%s value does not support item assignment", x.Type()) + } + return nil +} + +// assign implements lhs = rhs for arbitrary expressions lhs. +func assign(fr *Frame, pos syntax.Position, lhs syntax.Expr, rhs Value) error { + switch lhs := lhs.(type) { + case *syntax.Ident: + // x = rhs + fr.set(lhs, rhs) + + case *syntax.TupleExpr: + // (x, y) = rhs + return assignSequence(fr, pos, lhs.List, rhs) + + case *syntax.ListExpr: + // [x, y] = rhs + return assignSequence(fr, pos, lhs.List, rhs) + + case *syntax.IndexExpr: + // x[y] = rhs + x, err := eval(fr, lhs.X) + if err != nil { + return err + } + y, err := eval(fr, lhs.Y) + if err != nil { + return err + } + return setIndex(fr, lhs.Lbrack, x, y, rhs) + + case *syntax.DotExpr: + // x.f = rhs + x, err := eval(fr, lhs.X) + if err != nil { + return err + } + return setField(fr, x, lhs, rhs) + + default: + return fr.errorf(pos, "ill-formed assignment: %T", lhs) + } + return nil +} + +func assignSequence(fr *Frame, pos syntax.Position, lhs []syntax.Expr, rhs Value) error { + nlhs := len(lhs) + n := Len(rhs) + if n < 0 { + return fr.errorf(pos, "got %s in sequence assignment", rhs.Type()) + } else if n > nlhs { + return fr.errorf(pos, "too many values to unpack (got %d, want %d)", n, nlhs) + } else if n < nlhs { + return fr.errorf(pos, "too few values to unpack (got %d, want %d)", n, nlhs) + } + + // If the rhs is not indexable, extract its elements into a + // temporary tuple before doing the assignment. + ix, ok := rhs.(Indexable) + if !ok { + tuple := make(Tuple, n) + iter := Iterate(rhs) + if iter == nil { + return fr.errorf(pos, "non-iterable sequence: %s", rhs.Type()) + } + for i := 0; i < n; i++ { + iter.Next(&tuple[i]) + } + iter.Done() + ix = tuple + } + + for i := 0; i < n; i++ { + if err := assign(fr, pos, lhs[i], ix.Index(i)); err != nil { + return err + } + } + return nil +} + +func eval(fr *Frame, e syntax.Expr) (Value, error) { + switch e := e.(type) { + case *syntax.Ident: + return fr.lookup(e) + + case *syntax.Literal: + switch e.Token { + case syntax.INT: + return MakeInt64(e.Value.(int64)), nil + case syntax.FLOAT: + return Float(e.Value.(float64)), nil + case syntax.STRING: + return String(e.Value.(string)), nil + } + + case *syntax.ListExpr: + vals := make([]Value, len(e.List)) + for i, x := range e.List { + v, err := eval(fr, x) + if err != nil { + return nil, err + } + vals[i] = v + } + return NewList(vals), nil + + case *syntax.CondExpr: + cond, err := eval(fr, e.Cond) + if err != nil { + return nil, err + } + if cond.Truth() { + return eval(fr, e.True) + } else { + return eval(fr, e.False) + } + + case *syntax.IndexExpr: + x, err := eval(fr, e.X) + if err != nil { + return nil, err + } + y, err := eval(fr, e.Y) + if err != nil { + return nil, err + } + return getIndex(fr, e.Lbrack, x, y) + + case *syntax.SliceExpr: + return evalSliceExpr(fr, e) + + case *syntax.Comprehension: + var result Value + if e.Curly { + result = new(Dict) + } else { + result = new(List) + } + return result, evalComprehension(fr, e, result, 0) + + case *syntax.TupleExpr: + n := len(e.List) + tuple := make(Tuple, n) + for i, x := range e.List { + v, err := eval(fr, x) + if err != nil { + return nil, err + } + tuple[i] = v + } + return tuple, nil + + case *syntax.DictExpr: + dict := new(Dict) + for i, entry := range e.List { + entry := entry.(*syntax.DictEntry) + k, err := eval(fr, entry.Key) + if err != nil { + return nil, err + } + v, err := eval(fr, entry.Value) + if err != nil { + return nil, err + } + if err := dict.Set(k, v); err != nil { + return nil, fr.errorf(e.Lbrace, "%v", err) + } + if dict.Len() != i+1 { + return nil, fr.errorf(e.Lbrace, "duplicate key: %v", k) + } + } + return dict, nil + + case *syntax.UnaryExpr: + x, err := eval(fr, e.X) + if err != nil { + return nil, err + } + y, err := Unary(e.Op, x) + if err != nil { + return nil, fr.errorf(e.OpPos, "%s", err) + } + return y, nil + + case *syntax.BinaryExpr: + x, err := eval(fr, e.X) + if err != nil { + return nil, err + } + + // short-circuit operators + switch e.Op { + case syntax.OR: + if x.Truth() { + return x, nil + } + return eval(fr, e.Y) + case syntax.AND: + if !x.Truth() { + return x, nil + } + return eval(fr, e.Y) + } + + y, err := eval(fr, e.Y) + if err != nil { + return nil, err + } + + // comparisons + switch e.Op { + case syntax.EQL, syntax.NEQ, syntax.GT, syntax.LT, syntax.LE, syntax.GE: + if ok, err := Compare(e.Op, x, y); err != nil { + return nil, fr.errorf(e.OpPos, "%s", err) + } else { + return Bool(ok), nil + } + } + + // binary operators + z, err := Binary(e.Op, x, y) + if err != nil { + return nil, fr.errorf(e.OpPos, "%s", err) + } + return z, nil + + case *syntax.DotExpr: + x, err := eval(fr, e.X) + if err != nil { + return nil, err + } + return getAttr(fr, x, e) + + case *syntax.CallExpr: + return evalCall(fr, e) + + case *syntax.LambdaExpr: + return evalFunction(fr, e.Lambda, "lambda", &e.Function) + } + + start, _ := e.Span() + log.Fatalf("%s: unexpected expr %T", start, e) + panic("unreachable") +} + +// Unary applies a unary operator (+, -, not) to its operand. +func Unary(op syntax.Token, x Value) (Value, error) { + switch op { + case syntax.MINUS: + switch x := x.(type) { + case Int: + return zero.Sub(x), nil + case Float: + return -x, nil + } + case syntax.PLUS: + switch x.(type) { + case Int, Float: + return x, nil + } + case syntax.NOT: + return !x.Truth(), nil + } + return nil, fmt.Errorf("unknown unary op: %s %s", op, x.Type()) +} + +// Binary applies a strict binary operator (not AND or OR) to its operands. +// For equality tests or ordered comparisons, use Compare instead. +func Binary(op syntax.Token, x, y Value) (Value, error) { + switch op { + case syntax.PLUS: + switch x := x.(type) { + case String: + if y, ok := y.(String); ok { + return x + y, nil + } + case Int: + switch y := y.(type) { + case Int: + return x.Add(y), nil + case Float: + return x.Float() + y, nil + } + case Float: + switch y := y.(type) { + case Float: + return x + y, nil + case Int: + return x + y.Float(), nil + } + case *List: + if y, ok := y.(*List); ok { + z := make([]Value, 0, x.Len()+y.Len()) + z = append(z, x.elems...) + z = append(z, y.elems...) + return NewList(z), nil + } + case Tuple: + if y, ok := y.(Tuple); ok { + z := make(Tuple, 0, len(x)+len(y)) + z = append(z, x...) + z = append(z, y...) + return z, nil + } + case *Dict: + // Python doesn't have dict+dict, and I can't find + // it documented for Skylark. But it is used; see: + // tools/build_defs/haskell/def.bzl:448 + // TODO(adonovan): clarify spec; see b/36360157. + if y, ok := y.(*Dict); ok { + z := new(Dict) + for _, item := range x.Items() { + z.Set(item[0], item[1]) + } + for _, item := range y.Items() { + z.Set(item[0], item[1]) + } + return z, nil + } + } + + case syntax.MINUS: + switch x := x.(type) { + case Int: + switch y := y.(type) { + case Int: + return x.Sub(y), nil + case Float: + return x.Float() - y, nil + } + case Float: + switch y := y.(type) { + case Float: + return x - y, nil + case Int: + return x - y.Float(), nil + } + } + + case syntax.STAR: + switch x := x.(type) { + case Int: + switch y := y.(type) { + case Int: + return x.Mul(y), nil + case Float: + return x.Float() * y, nil + case String: + if i, err := AsInt32(x); err == nil { + if i < 1 { + return String(""), nil + } + return String(strings.Repeat(string(y), i)), nil + } + case *List: + if i, err := AsInt32(x); err == nil { + return NewList(repeat(y.elems, i)), nil + } + case Tuple: + if i, err := AsInt32(x); err == nil { + return Tuple(repeat([]Value(y), i)), nil + } + } + case Float: + switch y := y.(type) { + case Float: + return x * y, nil + case Int: + return x * y.Float(), nil + } + case String: + if y, ok := y.(Int); ok { + if i, err := AsInt32(y); err == nil { + if i < 1 { + return String(""), nil + } + return String(strings.Repeat(string(x), i)), nil + } + } + case *List: + if y, ok := y.(Int); ok { + if i, err := AsInt32(y); err == nil { + return NewList(repeat(x.elems, i)), nil + } + } + case Tuple: + if y, ok := y.(Int); ok { + if i, err := AsInt32(y); err == nil { + return Tuple(repeat([]Value(x), i)), nil + } + } + + } + + case syntax.SLASH: + switch x := x.(type) { + case Int: + switch y := y.(type) { + case Int: + yf := y.Float() + if yf == 0.0 { + return nil, fmt.Errorf("real division by zero") + } + return x.Float() / yf, nil + case Float: + if y == 0.0 { + return nil, fmt.Errorf("real division by zero") + } + return x.Float() / y, nil + } + case Float: + switch y := y.(type) { + case Float: + if y == 0.0 { + return nil, fmt.Errorf("real division by zero") + } + return x / y, nil + case Int: + yf := y.Float() + if yf == 0.0 { + return nil, fmt.Errorf("real division by zero") + } + return x / yf, nil + } + } + + case syntax.SLASHSLASH: + switch x := x.(type) { + case Int: + switch y := y.(type) { + case Int: + if y.Sign() == 0 { + return nil, fmt.Errorf("floored division by zero") + } + return x.Div(y), nil + case Float: + if y == 0.0 { + return nil, fmt.Errorf("floored division by zero") + } + return floor((x.Float() / y)), nil + } + case Float: + switch y := y.(type) { + case Float: + if y == 0.0 { + return nil, fmt.Errorf("floored division by zero") + } + return floor(x / y), nil + case Int: + yf := y.Float() + if yf == 0.0 { + return nil, fmt.Errorf("floored division by zero") + } + return floor(x / yf), nil + } + } + + case syntax.PERCENT: + switch x := x.(type) { + case Int: + switch y := y.(type) { + case Int: + if y.Sign() == 0 { + return nil, fmt.Errorf("integer modulo by zero") + } + return x.Mod(y), nil + case Float: + if y == 0 { + return nil, fmt.Errorf("float modulo by zero") + } + return x.Float().Mod(y), nil + } + case Float: + switch y := y.(type) { + case Float: + if y == 0.0 { + return nil, fmt.Errorf("float modulo by zero") + } + return Float(math.Mod(float64(x), float64(y))), nil + case Int: + if y.Sign() == 0 { + return nil, fmt.Errorf("float modulo by zero") + } + return x.Mod(y.Float()), nil + } + case String: + return interpolate(string(x), y) + } + + case syntax.NOT_IN: + z, err := Binary(syntax.IN, x, y) + if err != nil { + return nil, err + } + return !z.Truth(), nil + + case syntax.IN: + switch y := y.(type) { + case *List: + for _, elem := range y.elems { + if eq, err := Equal(elem, x); err != nil { + return nil, err + } else if eq { + return True, nil + } + } + return False, nil + case Tuple: + for _, elem := range y { + if eq, err := Equal(elem, x); err != nil { + return nil, err + } else if eq { + return True, nil + } + } + return False, nil + case Mapping: // e.g. dict + _, found, err := y.Get(x) + return Bool(found), err + case *Set: + ok, err := y.Has(x) + return Bool(ok), err + case String: + needle, ok := x.(String) + if !ok { + return nil, fmt.Errorf("'in <string>' requires string as left operand, not %s", x.Type()) + } + return Bool(strings.Contains(string(y), string(needle))), nil + } + + case syntax.PIPE: + switch x := x.(type) { + case Int: + if y, ok := y.(Int); ok { + return x.Or(y), nil + } + case *Set: // union + if iter := Iterate(y); iter != nil { + defer iter.Done() + return x.Union(iter) + } + } + + case syntax.AMP: + switch x := x.(type) { + case Int: + if y, ok := y.(Int); ok { + return x.And(y), nil + } + case *Set: // intersection + if y, ok := y.(*Set); ok { + set := new(Set) + if x.Len() > y.Len() { + x, y = y, x // opt: range over smaller set + } + for _, xelem := range x.elems() { + // Has, Insert cannot fail here. + if found, _ := y.Has(xelem); found { + set.Insert(xelem) + } + } + return set, nil + } + } + + default: + // unknown operator + goto unknown + } + + // user-defined types + if x, ok := x.(HasBinary); ok { + z, err := x.Binary(op, y, Left) + if z != nil || err != nil { + return z, err + } + } + if y, ok := y.(HasBinary); ok { + z, err := y.Binary(op, x, Right) + if z != nil || err != nil { + return z, err + } + } + + // unsupported operand types +unknown: + return nil, fmt.Errorf("unknown binary op: %s %s %s", x.Type(), op, y.Type()) +} + +func repeat(elems []Value, n int) (res []Value) { + if n > 0 { + res = make([]Value, 0, len(elems)*n) + for i := 0; i < n; i++ { + res = append(res, elems...) + } + } + return res +} + +func evalCall(fr *Frame, call *syntax.CallExpr) (Value, error) { + var fn Value + + // Use optimized path for calling methods of built-ins: x.f(...) + if dot, ok := call.Fn.(*syntax.DotExpr); ok { + recv, err := eval(fr, dot.X) + if err != nil { + return nil, err + } + + name := dot.Name.Name + if method := builtinMethodOf(recv, name); method != nil { + args, kwargs, err := evalArgs(fr, call) + if err != nil { + return nil, err + } + + // Make the call. + res, err := method(name, recv, args, kwargs) + return res, wrapError(fr, call.Lparen, err) + } + + // Fall back to usual path. + fn, err = getAttr(fr, recv, dot) + if err != nil { + return nil, err + } + } else { + var err error + fn, err = eval(fr, call.Fn) + if err != nil { + return nil, err + } + } + + args, kwargs, err := evalArgs(fr, call) + if err != nil { + return nil, err + } + + // Make the call. + fr.posn = call.Lparen + res, err := Call(fr.thread, fn, args, kwargs) + return res, wrapError(fr, call.Lparen, err) +} + +// wrapError wraps the error in a skylark.EvalError only if needed. +func wrapError(fr *Frame, posn syntax.Position, err error) error { + switch err := err.(type) { + case nil, *EvalError: + return err + } + return fr.errorf(posn, "%s", err.Error()) +} + +func evalArgs(fr *Frame, call *syntax.CallExpr) (args Tuple, kwargs []Tuple, err error) { + // evaluate arguments. + var kwargsAlloc Tuple // allocate a single backing array + for i, arg := range call.Args { + // keyword argument, k=v + if binop, ok := arg.(*syntax.BinaryExpr); ok && binop.Op == syntax.EQ { + k := binop.X.(*syntax.Ident).Name + v, err := eval(fr, binop.Y) + if err != nil { + return nil, nil, err + } + if kwargs == nil { + nkwargs := len(call.Args) - i // more than enough + kwargsAlloc = make(Tuple, 2*nkwargs) + kwargs = make([]Tuple, 0, nkwargs) + } + pair := kwargsAlloc[:2:2] + kwargsAlloc = kwargsAlloc[2:] + pair[0], pair[1] = String(k), v + kwargs = append(kwargs, pair) + continue + } + + // *args and **kwargs arguments + if unop, ok := arg.(*syntax.UnaryExpr); ok { + if unop.Op == syntax.STAR { + // *args + x, err := eval(fr, unop.X) + if err != nil { + return nil, nil, err + } + iter := Iterate(x) + if iter == nil { + return nil, nil, fr.errorf(unop.OpPos, "argument after * must be iterable, not %s", x.Type()) + } + defer iter.Done() + var elem Value + for iter.Next(&elem) { + args = append(args, elem) + } + continue + } + + if unop.Op == syntax.STARSTAR { + // **kwargs + x, err := eval(fr, unop.X) + if err != nil { + return nil, nil, err + } + xdict, ok := x.(*Dict) + if !ok { + return nil, nil, fr.errorf(unop.OpPos, "argument after ** must be a mapping, not %s", x.Type()) + } + items := xdict.Items() + for _, item := range items { + if _, ok := item[0].(String); !ok { + return nil, nil, fr.errorf(unop.OpPos, "keywords must be strings, not %s", item[0].Type()) + } + } + if kwargs == nil { + kwargs = items + } else { + kwargs = append(kwargs, items...) + } + continue + } + } + + // ordinary argument + v, err := eval(fr, arg) + if err != nil { + return nil, nil, err + } + args = append(args, v) + } + return args, kwargs, err +} + +// Call calls the function fn with the specified positional and keyword arguments. +func Call(thread *Thread, fn Value, args Tuple, kwargs []Tuple) (Value, error) { + c, ok := fn.(Callable) + if !ok { + return nil, fmt.Errorf("invalid call of non-function (%s)", fn.Type()) + } + res, err := c.Call(thread, args, kwargs) + // Sanity check: nil is not a valid Skylark value. + if err == nil && res == nil { + return nil, fmt.Errorf("internal error: nil (not None) returned from %s", fn) + } + return res, err +} + +func evalSliceExpr(fr *Frame, e *syntax.SliceExpr) (Value, error) { + // Unlike Python, Skylark does not allow a slice on the LHS of + // an assignment statement. + + x, err := eval(fr, e.X) + if err != nil { + return nil, err + } + + var lo, hi, step Value = None, None, None + if e.Lo != nil { + lo, err = eval(fr, e.Lo) + if err != nil { + return nil, err + } + } + if e.Hi != nil { + hi, err = eval(fr, e.Hi) + if err != nil { + return nil, err + } + } + if e.Step != nil { + step, err = eval(fr, e.Step) + if err != nil { + return nil, err + } + } + res, err := slice(x, lo, hi, step) + if err != nil { + return nil, fr.errorf(e.Lbrack, "%s", err) + } + return res, nil +} + +func slice(x, lo, hi, step_ Value) (Value, error) { + n := Len(x) + if n < 0 { + n = 0 // n < 0 => invalid operand; will be rejected by type switch + } + + step := 1 + if step_ != None { + var err error + step, err = AsInt32(step_) + if err != nil { + return nil, fmt.Errorf("got %s for slice step, want int", step_.Type()) + } + if step == 0 { + return nil, fmt.Errorf("zero is not a valid slice step") + } + } + + // TODO(adonovan): opt: preallocate result array. + + var start, end int + if step > 0 { + // positive stride + // default indices are [0:n]. + var err error + start, end, err = indices(lo, hi, n) + if err != nil { + return nil, err + } + + if end < start { + end = start // => empty result + } + + if step == 1 { + // common case: simple subsequence + switch x := x.(type) { + case String: + return String(x[start:end]), nil + case *List: + elems := append([]Value{}, x.elems[start:end]...) + return NewList(elems), nil + case Tuple: + return x[start:end], nil + } + } + } else { + // negative stride + // default indices are effectively [n-1:-1], though to + // get this effect using explicit indices requires + // [n-1:-1-n:-1] because of the treatment of -ve values. + start = n - 1 + if err := asIndex(lo, n, &start); err != nil { + return nil, fmt.Errorf("invalid start index: %s", err) + } + if start >= n { + start = n - 1 + } + + end = -1 + if err := asIndex(hi, n, &end); err != nil { + return nil, fmt.Errorf("invalid end index: %s", err) + } + if end < -1 { + end = -1 + } + + if start < end { + start = end // => empty result + } + } + + // For positive strides, the loop condition is i < end. + // For negative strides, the loop condition is i > end. + sign := signum(step) + switch x := x.(type) { + case String: + var str []byte + for i := start; signum(end-i) == sign; i += step { + str = append(str, x[i]) + } + return String(str), nil + case *List: + var list []Value + for i := start; signum(end-i) == sign; i += step { + list = append(list, x.elems[i]) + } + return NewList(list), nil + case Tuple: + var tuple Tuple + for i := start; signum(end-i) == sign; i += step { + tuple = append(tuple, x[i]) + } + return tuple, nil + } + + return nil, fmt.Errorf("invalid slice operand %s", x.Type()) +} + +// From Hacker's Delight, section 2.8. +func signum(x int) int { return int(uint64(int64(x)>>63) | (uint64(-x) >> 63)) } + +// indices converts start_ and end_ to indices in the range [0:len]. +// The start index defaults to 0 and the end index defaults to len. +// An index -len < i < 0 is treated like i+len. +// All other indices outside the range are clamped to the nearest value in the range. +// Beware: start may be greater than end. +// This function is suitable only for slices with positive strides. +func indices(start_, end_ Value, len int) (start, end int, err error) { + start = 0 + if err := asIndex(start_, len, &start); err != nil { + return 0, 0, fmt.Errorf("invalid start index: %s", err) + } + // Clamp to [0:len]. + if start < 0 { + start = 0 + } else if start > len { + start = len + } + + end = len + if err := asIndex(end_, len, &end); err != nil { + return 0, 0, fmt.Errorf("invalid end index: %s", err) + } + // Clamp to [0:len]. + if end < 0 { + end = 0 + } else if end > len { + end = len + } + + return start, end, nil +} + +// asIndex sets *result to the integer value of v, adding len to it +// if it is negative. If v is nil or None, *result is unchanged. +func asIndex(v Value, len int, result *int) error { + if v != nil && v != None { + var err error + *result, err = AsInt32(v) + if err != nil { + return fmt.Errorf("got %s, want int", v.Type()) + } + if *result < 0 { + *result += len + } + } + return nil +} + +func evalComprehension(fr *Frame, comp *syntax.Comprehension, result Value, clauseIndex int) error { + if clauseIndex == len(comp.Clauses) { + if comp.Curly { + // dict: {k:v for ...} + // Parser ensures that body is of form k:v. + // Python-style set comprehensions {body for vars in x} + // are not supported. + entry := comp.Body.(*syntax.DictEntry) + k, err := eval(fr, entry.Key) + if err != nil { + return err + } + v, err := eval(fr, entry.Value) + if err != nil { + return err + } + if err := result.(*Dict).Set(k, v); err != nil { + return fr.errorf(entry.Colon, "%v", err) + } + } else { + // list: [body for vars in x] + x, err := eval(fr, comp.Body) + if err != nil { + return err + } + list := result.(*List) + list.elems = append(list.elems, x) + } + return nil + } + + clause := comp.Clauses[clauseIndex] + switch clause := clause.(type) { + case *syntax.IfClause: + cond, err := eval(fr, clause.Cond) + if err != nil { + return err + } + if cond.Truth() { + return evalComprehension(fr, comp, result, clauseIndex+1) + } + return nil + + case *syntax.ForClause: + x, err := eval(fr, clause.X) + if err != nil { + return err + } + iter := Iterate(x) + if iter == nil { + return fr.errorf(clause.For, "%s value is not iterable", x.Type()) + } + defer iter.Done() + var elem Value + for iter.Next(&elem) { + if err := assign(fr, clause.For, clause.Vars, elem); err != nil { + return err + } + + if err := evalComprehension(fr, comp, result, clauseIndex+1); err != nil { + return err + } + } + return nil + } + + start, _ := clause.Span() + log.Fatalf("%s: unexpected comprehension clause %T", start, clause) + panic("unreachable") +} + +func evalFunction(fr *Frame, pos syntax.Position, name string, function *syntax.Function) (Value, error) { + // Example: f(x, y=dflt, *args, **kwargs) + + // Evaluate parameter defaults. + var defaults Tuple // parameter default values + for _, param := range function.Params { + if binary, ok := param.(*syntax.BinaryExpr); ok { + // e.g. y=dflt + dflt, err := eval(fr, binary.Y) + if err != nil { + return nil, err + } + defaults = append(defaults, dflt) + } + } + + // Capture the values of the function's + // free variables from the lexical environment. + freevars := make([]Value, len(function.FreeVars)) + for i, freevar := range function.FreeVars { + v, err := fr.lookup(freevar) + if err != nil { + return nil, fr.errorf(pos, "%s", err) + } + freevars[i] = v + } + + return &Function{ + name: name, + position: pos, + syntax: function, + globals: fr.globals, + defaults: defaults, + freevars: freevars, + }, nil +} + +func (fn *Function) Call(thread *Thread, args Tuple, kwargs []Tuple) (Value, error) { + if debug { + fmt.Printf("call of %s %v %v\n", fn.Name(), args, kwargs) + } + + // detect recursion + for fr := thread.frame; fr != nil; fr = fr.parent { + // We look for the same syntactic function, + // not function value, otherwise the user could + // defeat it by writing the Y combinator. + if fr.fn != nil && fr.fn.syntax == fn.syntax { + return nil, fmt.Errorf("function %s called recursively", fn.Name()) + } + } + + fr := &Frame{ + thread: thread, + parent: thread.frame, + fn: fn, + globals: fn.globals, + locals: make([]Value, len(fn.syntax.Locals)), + } + + if err := fn.setArgs(fr, args, kwargs); err != nil { + return nil, err + } + + thread.frame = fr + err := execStmts(fr, fn.syntax.Body) + thread.frame = fr.parent // restore + + if err != nil { + if err == errReturn { + return fr.result, nil + } + return nil, err + } + return None, nil +} + +// setArgs sets the values of the formal parameters of function fn in +// frame fr based on the actual parameter values in args and kwargs. +func (fn *Function) setArgs(fr *Frame, args Tuple, kwargs []Tuple) error { + cond := func(x bool, y, z interface{}) interface{} { + if x { + return y + } + return z + } + + // nparams is the number of ordinary parameters (sans * or **). + nparams := len(fn.syntax.Params) + if fn.syntax.HasVarargs { + nparams-- + } + if fn.syntax.HasKwargs { + nparams-- + } + + // This is the algorithm from PyEval_EvalCodeEx. + var kwdict *Dict + n := len(args) + if nparams > 0 || fn.syntax.HasVarargs || fn.syntax.HasKwargs { + if fn.syntax.HasKwargs { + kwdict = new(Dict) + fr.locals[len(fn.syntax.Params)-1] = kwdict + } + + // too many args? + if len(args) > nparams { + if !fn.syntax.HasVarargs { + return fr.errorf(fn.position, "function %s takes %s %d argument%s (%d given)", + fn.Name(), + cond(len(fn.defaults) > 0, "at most", "exactly"), + nparams, + cond(nparams == 1, "", "s"), + len(args)+len(kwargs)) + } + n = nparams + } + + // set of defined (regular) parameters + var defined intset + defined.init(nparams) + + // ordinary parameters + for i := 0; i < n; i++ { + fr.locals[i] = args[i] + defined.set(i) + } + + // variadic arguments + if fn.syntax.HasVarargs { + tuple := make(Tuple, len(args)-n) + for i := n; i < len(args); i++ { + tuple[i-n] = args[i] + } + fr.locals[nparams] = tuple + } + + // keyword arguments + paramIdents := fn.syntax.Locals[:nparams] + for _, pair := range kwargs { + k, v := pair[0].(String), pair[1] + if i := findParam(paramIdents, string(k)); i >= 0 { + if defined.set(i) { + return fr.errorf(fn.position, "function %s got multiple values for keyword argument %s", fn.Name(), k) + } + fr.locals[i] = v + continue + } + if kwdict == nil { + return fr.errorf(fn.position, "function %s got an unexpected keyword argument %s", fn.Name(), k) + } + kwdict.Set(k, v) + } + + // default values + if len(args) < nparams { + m := nparams - len(fn.defaults) // first default + + // report errors for missing non-optional arguments + i := len(args) + for ; i < m; i++ { + if !defined.get(i) { + return fr.errorf(fn.position, "function %s takes %s %d argument%s (%d given)", + fn.Name(), + cond(fn.syntax.HasVarargs || len(fn.defaults) > 0, "at least", "exactly"), + m, + cond(m == 1, "", "s"), + defined.len()) + } + } + + // set default values + for ; i < nparams; i++ { + if !defined.get(i) { + fr.locals[i] = fn.defaults[i-m] + } + } + } + } else if nactual := len(args) + len(kwargs); nactual > 0 { + return fr.errorf(fn.position, "function %s takes no arguments (%d given)", fn.Name(), nactual) + } + return nil +} + +func findParam(params []*syntax.Ident, name string) int { + for i, param := range params { + if param.Name == name { + return i + } + } + return -1 +} + +type intset struct { + small uint64 // bitset, used if n < 64 + large map[int]bool // set, used if n >= 64 +} + +func (is *intset) init(n int) { + if n >= 64 { + is.large = make(map[int]bool) + } +} + +func (is *intset) set(i int) (prev bool) { + if is.large == nil { + prev = is.small&(1<<uint(i)) != 0 + is.small |= 1 << uint(i) + } else { + prev = is.large[i] + is.large[i] = true + } + return +} + +func (is *intset) get(i int) bool { + if is.large == nil { + return is.small&(1<<uint(i)) != 0 + } + return is.large[i] +} + +func (is *intset) len() int { + if is.large == nil { + // Suboptimal, but used only for error reporting. + len := 0 + for i := 0; i < 64; i++ { + if is.small&(1<<uint(i)) != 0 { + len++ + } + } + return len + } + return len(is.large) +} + +// See https://docs.python.org/2/library/stdtypes.html#string-formatting. +func interpolate(format string, x Value) (Value, error) { + var buf bytes.Buffer + path := make([]Value, 0, 4) + index := 0 + for { + i := strings.IndexByte(format, '%') + if i < 0 { + buf.WriteString(format) + break + } + buf.WriteString(format[:i]) + format = format[i+1:] + + if format != "" && format[0] == '%' { + buf.WriteByte('%') + format = format[1:] + continue + } + + var arg Value + if format != "" && format[0] == '(' { + // keyword argument: %(name)s. + format = format[1:] + j := strings.IndexByte(format, ')') + if j < 0 { + return nil, fmt.Errorf("incomplete format key") + } + key := format[:j] + if dict, ok := x.(Mapping); !ok { + return nil, fmt.Errorf("format requires a mapping") + } else if v, found, _ := dict.Get(String(key)); found { + arg = v + } else { + return nil, fmt.Errorf("key not found: %s", key) + } + format = format[j+1:] + } else { + // positional argument: %s. + if tuple, ok := x.(Tuple); ok { + if index >= len(tuple) { + return nil, fmt.Errorf("not enough arguments for format string") + } + arg = tuple[index] + } else if index > 0 { + return nil, fmt.Errorf("not enough arguments for format string") + } else { + arg = x + } + } + + // NOTE: Skylark does not support any of these optional Python features: + // - optional conversion flags: [#0- +], etc. + // - optional minimum field width (number or *). + // - optional precision (.123 or *) + // - optional length modifier + + // conversion type + if format == "" { + return nil, fmt.Errorf("incomplete format") + } + switch c := format[0]; c { + case 's', 'r': + if str, ok := AsString(arg); ok && c == 's' { + buf.WriteString(str) + } else { + writeValue(&buf, arg, path) + } + case 'd', 'i', 'o', 'x', 'X': + i, err := ConvertToInt(arg) + if err != nil { + return nil, fmt.Errorf("%%%c format requires integer: %v", c, err) + } + switch c { + case 'd', 'i': + buf.WriteString(i.bigint.Text(10)) + case 'o': + buf.WriteString(i.bigint.Text(8)) + case 'x': + buf.WriteString(i.bigint.Text(16)) + case 'X': + buf.WriteString(strings.ToUpper(i.bigint.Text(16))) + } + case 'e', 'f', 'g', 'E', 'F', 'G': + f, ok := AsFloat(arg) + if !ok { + return nil, fmt.Errorf("%%%c format requires float, not %s", c, arg.Type()) + } + switch c { + case 'e': + fmt.Fprintf(&buf, "%e", f) + case 'f': + fmt.Fprintf(&buf, "%f", f) + case 'g': + fmt.Fprintf(&buf, "%g", f) + case 'E': + fmt.Fprintf(&buf, "%E", f) + case 'F': + fmt.Fprintf(&buf, "%F", f) + case 'G': + fmt.Fprintf(&buf, "%G", f) + } + case 'c': + switch arg := arg.(type) { + case Int: + // chr(int) + r, err := AsInt32(arg) + if err != nil || r < 0 || r > unicode.MaxRune { + return nil, fmt.Errorf("%%c format requires a valid Unicode code point, got %s", arg) + } + buf.WriteRune(rune(r)) + case String: + r, size := utf8.DecodeRuneInString(string(arg)) + if size != len(arg) { + return nil, fmt.Errorf("%%c format requires a single-character string") + } + buf.WriteRune(r) + default: + return nil, fmt.Errorf("%%c format requires int or single-character string, not %s", arg.Type()) + } + case '%': + buf.WriteByte('%') + default: + return nil, fmt.Errorf("unknown conversion %%%c", c) + } + format = format[1:] + index++ + } + + if tuple, ok := x.(Tuple); ok && index < len(tuple) { + return nil, fmt.Errorf("too many arguments for format string") + } + + return String(buf.String()), nil +} diff --git a/eval_test.go b/eval_test.go new file mode 100644 index 0000000..41cb716 --- /dev/null +++ b/eval_test.go @@ -0,0 +1,435 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package skylark_test + +import ( + "bytes" + "fmt" + "math" + "path/filepath" + "strings" + "testing" + + "github.com/google/skylark" + "github.com/google/skylark/internal/chunkedfile" + "github.com/google/skylark/resolve" + "github.com/google/skylark/skylarktest" +) + +func init() { + // The tests make extensive use of these not-yet-standard features. + resolve.AllowLambda = true + resolve.AllowNestedDef = true + resolve.AllowFloat = true + resolve.AllowFreeze = true + resolve.AllowSet = true +} + +func TestEvalExpr(t *testing.T) { + // This is mostly redundant with the new *.sky tests. + // TODO(adonovan): move checks into *.sky files and + // reduce this to a mere unit test of skylark.Eval. + thread := new(skylark.Thread) + for _, test := range []struct{ src, want string }{ + {`123`, `123`}, + {`-1`, `-1`}, + {`"a"+"b"`, `"ab"`}, + {`1+2`, `3`}, + + // lists + {`[]`, `[]`}, + {`[1]`, `[1]`}, + {`[1,]`, `[1]`}, + {`[1, 2]`, `[1, 2]`}, + {`[2 * x for x in [1, 2, 3]]`, `[2, 4, 6]`}, + {`[2 * x for x in [1, 2, 3] if x > 1]`, `[4, 6]`}, + {`[(x, y) for x in [1, 2] for y in [3, 4]]`, + `[(1, 3), (1, 4), (2, 3), (2, 4)]`}, + {`[(x, y) for x in [1, 2] if x == 2 for y in [3, 4]]`, + `[(2, 3), (2, 4)]`}, + // tuples + {`()`, `()`}, + {`(1)`, `1`}, + {`(1,)`, `(1,)`}, + {`(1, 2)`, `(1, 2)`}, + {`(1, 2, 3, 4, 5)`, `(1, 2, 3, 4, 5)`}, + // dicts + {`{}`, `{}`}, + {`{"a": 1}`, `{"a": 1}`}, + {`{"a": 1,}`, `{"a": 1}`}, + + // conditional + {`1 if 3 > 2 else 0`, `1`}, + {`1 if "foo" else 0`, `1`}, + {`1 if "" else 0`, `0`}, + + // indexing + {`["a", "b"][0]`, `"a"`}, + {`["a", "b"][1]`, `"b"`}, + {`("a", "b")[0]`, `"a"`}, + {`("a", "b")[1]`, `"b"`}, + {`"aΩb"[0]`, `"a"`}, + {`"aΩb"[1]`, `"\xce"`}, + {`"aΩb"[3]`, `"b"`}, + {`{"a": 1}["a"]`, `1`}, + {`{"a": 1}["b"]`, `key "b" not in dict`}, + {`{}[[]]`, `unhashable type: list`}, + {`{"a": 1}[[]]`, `unhashable type: list`}, + {`[x for x in range(3)]`, "[0, 1, 2]"}, + } { + var got string + if v, err := skylark.Eval(thread, "<expr>", test.src, nil); err != nil { + got = err.Error() + } else { + got = v.String() + } + if got != test.want { + t.Errorf("eval %s = %s, want %s", test.src, got, test.want) + } + } +} + +func TestExecFile(t *testing.T) { + testdata := skylarktest.DataFile("skylark", ".") + thread := &skylark.Thread{Load: load} + skylarktest.SetReporter(thread, t) + for _, file := range []string{ + "testdata/assign.sky", + "testdata/bool.sky", + "testdata/builtins.sky", + "testdata/control.sky", + "testdata/dict.sky", + "testdata/float.sky", + "testdata/function.sky", + "testdata/int.sky", + "testdata/list.sky", + "testdata/misc.sky", + "testdata/set.sky", + "testdata/string.sky", + "testdata/tuple.sky", + } { + filename := filepath.Join(testdata, file) + for _, chunk := range chunkedfile.Read(filename, t) { + globals := skylark.StringDict{ + "hasfields": skylark.NewBuiltin("hasfields", newHasFields), + "fibonacci": fib{}, + } + err := skylark.ExecFile(thread, filename, chunk.Source, globals) + switch err := err.(type) { + case *skylark.EvalError: + found := false + for _, fr := range err.Stack() { + posn := fr.Position() + if posn.Filename() == filename { + chunk.GotError(int(posn.Line), err.Error()) + found = true + break + } + } + if !found { + t.Error(err.Backtrace()) + } + case nil: + // success + default: + t.Error(err) + } + chunk.Done() + } + } +} + +// A fib is an iterable value representing the infinite Fibonacci sequence. +type fib struct{} + +func (t fib) Freeze() {} +func (t fib) String() string { return "fib" } +func (t fib) Type() string { return "fib" } +func (t fib) Truth() skylark.Bool { return true } +func (t fib) Hash() (uint32, error) { return 0, fmt.Errorf("fib is unhashable") } +func (t fib) Iterate() skylark.Iterator { return &fibIterator{0, 1} } + +type fibIterator struct{ x, y int } + +func (it *fibIterator) Next(p *skylark.Value) bool { + *p = skylark.MakeInt(it.x) + it.x, it.y = it.y, it.x+it.y + return true +} +func (it *fibIterator) Done() {} + +// load implements the 'load' operation as used in the evaluator tests. +func load(thread *skylark.Thread, module string) (skylark.StringDict, error) { + if module == "assert.sky" { + return skylarktest.LoadAssertModule() + } + + // TODO(adonovan): test load() using this execution path. + globals := make(skylark.StringDict) + filename := filepath.Join(filepath.Dir(thread.Caller().Position().Filename()), module) + err := skylark.ExecFile(thread, filename, nil, globals) + return globals, err +} + +func newHasFields(thread *skylark.Thread, _ *skylark.Builtin, args skylark.Tuple, kwargs []skylark.Tuple) (skylark.Value, error) { + return &hasfields{attrs: make(map[string]skylark.Value)}, nil +} + +// hasfields is a test-only implementation of HasAttrs. +// It permits any field to be set. +// Clients will likely want to provide their own implementation, +// so we don't have any public implementation. +type hasfields struct { + attrs skylark.StringDict + frozen bool +} + +var _ skylark.HasAttrs = (*hasfields)(nil) + +func (hf *hasfields) String() string { return "hasfields" } +func (hf *hasfields) Type() string { return "hasfields" } +func (hf *hasfields) Truth() skylark.Bool { return true } +func (hf *hasfields) Hash() (uint32, error) { return 42, nil } + +func (hf *hasfields) Freeze() { + if !hf.frozen { + hf.frozen = true + for _, v := range hf.attrs { + v.Freeze() + } + } +} + +func (hf *hasfields) Attr(name string) (skylark.Value, error) { return hf.attrs[name], nil } + +func (hf *hasfields) SetField(name string, val skylark.Value) error { + if hf.frozen { + return fmt.Errorf("cannot set field on a frozen hasfields") + } + hf.attrs[name] = val + return nil +} + +func (hf *hasfields) AttrNames() []string { + names := make([]string, 0, len(hf.attrs)) + for key := range hf.attrs { + names = append(names, key) + } + return names +} + +func TestParameterPassing(t *testing.T) { + const filename = "parameters.go" + const src = ` +def a(): + return +def b(a, b): + return a, b +def c(a, b=42): + return a, b +def d(*args): + return args +def e(**kwargs): + return kwargs +def f(a, b=42, *args, **kwargs): + return a, b, args, kwargs +` + + thread := new(skylark.Thread) + globals := make(skylark.StringDict) + if err := skylark.ExecFile(thread, filename, src, globals); err != nil { + t.Fatal(err) + } + + for _, test := range []struct{ src, want string }{ + {`a()`, `None`}, + {`a(1)`, `function a takes no arguments (1 given)`}, + {`b()`, `function b takes exactly 2 arguments (0 given)`}, + {`b(1)`, `function b takes exactly 2 arguments (1 given)`}, + {`b(1, 2)`, `(1, 2)`}, + {`b`, `<function b>`}, // asserts that b's parameter b was treated as a local variable + {`b(1, 2, 3)`, `function b takes exactly 2 arguments (3 given)`}, + {`b(1, b=2)`, `(1, 2)`}, + {`b(1, a=2)`, `function b got multiple values for keyword argument "a"`}, + {`b(1, x=2)`, `function b got an unexpected keyword argument "x"`}, + {`b(a=1, b=2)`, `(1, 2)`}, + {`b(b=1, a=2)`, `(2, 1)`}, + {`b(b=1, a=2, x=1)`, `function b got an unexpected keyword argument "x"`}, + {`b(x=1, b=1, a=2)`, `function b got an unexpected keyword argument "x"`}, + {`c()`, `function c takes at least 1 argument (0 given)`}, + {`c(1)`, `(1, 42)`}, + {`c(1, 2)`, `(1, 2)`}, + {`c(1, 2, 3)`, `function c takes at most 2 arguments (3 given)`}, + {`c(1, b=2)`, `(1, 2)`}, + {`c(1, a=2)`, `function c got multiple values for keyword argument "a"`}, + {`c(a=1, b=2)`, `(1, 2)`}, + {`c(b=1, a=2)`, `(2, 1)`}, + {`d()`, `()`}, + {`d(1)`, `(1,)`}, + {`d(1, 2)`, `(1, 2)`}, + {`d(1, 2, k=3)`, `function d got an unexpected keyword argument "k"`}, + {`d(args=[])`, `function d got an unexpected keyword argument "args"`}, + {`e()`, `{}`}, + {`e(1)`, `function e takes exactly 0 arguments (1 given)`}, + {`e(k=1)`, `{"k": 1}`}, + {`e(kwargs={})`, `{"kwargs": {}}`}, + {`f()`, `function f takes at least 1 argument (0 given)`}, + {`f(0)`, `(0, 42, (), {})`}, + {`f(0)`, `(0, 42, (), {})`}, + {`f(0, 1)`, `(0, 1, (), {})`}, + {`f(0, 1, 2)`, `(0, 1, (2,), {})`}, + {`f(0, 1, 2, 3)`, `(0, 1, (2, 3), {})`}, + {`f(a=0)`, `(0, 42, (), {})`}, + {`f(0, b=1)`, `(0, 1, (), {})`}, + {`f(0, a=1)`, `function f got multiple values for keyword argument "a"`}, + {`f(0, b=1, c=2)`, `(0, 1, (), {"c": 2})`}, + } { + var got string + if v, err := skylark.Eval(thread, "<expr>", test.src, globals); err != nil { + got = err.Error() + } else { + got = v.String() + } + if got != test.want { + t.Errorf("eval %s = %s, want %s", test.src, got, test.want) + } + } +} + +// TestPrint ensures that the Skylark print function calls +// Thread.Print, if provided. +func TestPrint(t *testing.T) { + const src = ` +print("hello") +def f(): print("world") +f() +` + buf := new(bytes.Buffer) + print := func(thread *skylark.Thread, msg string) { + caller := thread.Caller() + name := "<module>" + if caller.Function() != nil { + name = caller.Function().Name() + } + fmt.Fprintf(buf, "%s: %s: %s\n", caller.Position(), name, msg) + } + thread := &skylark.Thread{Print: print} + globals := make(skylark.StringDict) + if err := skylark.ExecFile(thread, "foo.go", src, globals); err != nil { + t.Fatal(err) + } + want := "foo.go:2:6: <module>: hello\n" + + "foo.go:3:15: f: world\n" + if got := buf.String(); got != want { + t.Errorf("output was %s, want %s", got, want) + } +} + +func Benchmark(b *testing.B) { + testdata := skylarktest.DataFile("skylark", ".") + thread := new(skylark.Thread) + for _, file := range []string{ + "testdata/benchmark.sky", + // ... + } { + filename := filepath.Join(testdata, file) + + // Evaluate the file once. + globals := make(skylark.StringDict) + if err := skylark.ExecFile(thread, filename, nil, globals); err != nil { + reportEvalError(b, err) + } + + // Repeatedly call each global function named bench_* as a benchmark. + for name, value := range globals { + if fn, ok := value.(*skylark.Function); ok && strings.HasPrefix(name, "bench_") { + b.Run(name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, err := skylark.Call(thread, fn, nil, nil) + if err != nil { + reportEvalError(b, err) + } + } + }) + } + } + } +} + +func reportEvalError(tb testing.TB, err error) { + if err, ok := err.(*skylark.EvalError); ok { + tb.Fatal(err.Backtrace()) + } + tb.Fatal(err) +} + +// TestInt exercises the Int.Int64 and Int.Uint64 methods. +// If we can move their logic into math/big, delete this test. +func TestInt(t *testing.T) { + one := skylark.MakeInt(1) + + for _, test := range []struct { + i skylark.Int + wantInt64 string + wantUint64 string + }{ + {skylark.MakeInt64(math.MinInt64).Sub(one), "error", "error"}, + {skylark.MakeInt64(math.MinInt64), "-9223372036854775808", "error"}, + {skylark.MakeInt64(-1), "-1", "error"}, + {skylark.MakeInt64(0), "0", "0"}, + {skylark.MakeInt64(1), "1", "1"}, + {skylark.MakeInt64(math.MaxInt64), "9223372036854775807", "9223372036854775807"}, + {skylark.MakeUint64(math.MaxUint64), "error", "18446744073709551615"}, + {skylark.MakeUint64(math.MaxUint64).Add(one), "error", "error"}, + } { + gotInt64, gotUint64 := "error", "error" + if i, ok := test.i.Int64(); ok { + gotInt64 = fmt.Sprint(i) + } + if u, ok := test.i.Uint64(); ok { + gotUint64 = fmt.Sprint(u) + } + if gotInt64 != test.wantInt64 { + t.Errorf("(%s).Int64() = %s, want %s", test.i, gotInt64, test.wantInt64) + } + if gotUint64 != test.wantUint64 { + t.Errorf("(%s).Uint64() = %s, want %s", test.i, gotUint64, test.wantUint64) + } + } +} + +func TestBacktrace(t *testing.T) { + // This test ensures continuity of the stack of active Skylark + // functions, including propagation through built-ins such as 'min' + // (though min does not itself appear in the stack). + const src = ` +def f(x): return 1//x +def g(x): f(x) +def h(): return min([1, 2, 0], key=g) +def i(): return h() +i() +` + thread := new(skylark.Thread) + globals := make(skylark.StringDict) + err := skylark.ExecFile(thread, "crash.go", src, globals) + switch err := err.(type) { + case *skylark.EvalError: + got := err.Backtrace() + const want = `Traceback (most recent call last): + crash.go:6:2: in <toplevel> + crash.go:5:18: in i + crash.go:4:20: in h + crash.go:3:12: in g + crash.go:2:19: in f +Error: floored division by zero` + if got != want { + t.Errorf("error was %s, want %s", got, want) + } + case nil: + t.Error("ExecFile succeeded unexpectedly") + default: + t.Errorf("ExecFile failed with %v, wanted *EvalError", err) + } +} diff --git a/example_test.go b/example_test.go new file mode 100644 index 0000000..7ab6c43 --- /dev/null +++ b/example_test.go @@ -0,0 +1,295 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package skylark_test + +import ( + "fmt" + "log" + "sort" + "strings" + "sync" + "sync/atomic" + "unsafe" + + "github.com/google/skylark" +) + +// ExampleEmbedding demonstrates a simple embedding +// of the Skylark interpreter into a Go program. +func ExampleEmbedding() { + const data = ` +print(greeting + ", world") + +squares = [x*x for x in range(10)] +` + + thread := &skylark.Thread{ + Print: func(_ *skylark.Thread, msg string) { fmt.Println(msg) }, + } + globals := skylark.StringDict{ + "greeting": skylark.String("hello"), + } + if err := skylark.ExecFile(thread, "apparent/filename.sky", data, globals); err != nil { + if evalErr, ok := err.(*skylark.EvalError); ok { + log.Fatal(evalErr.Backtrace()) + } + log.Fatal(err) + } + + // Print the global environment. + var names []string + for name := range globals { + names = append(names, name) + } + sort.Strings(names) + fmt.Println("\nGlobals:") + for _, name := range names { + v := globals[name] + fmt.Printf("%s (%s) = %s\n", name, v.Type(), v.String()) + } + + // Output: + // hello, world + // + // Globals: + // greeting (string) = "hello" + // squares (list) = [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] +} + +// ExampleLoadSequential demonstrates a simple caching +// implementation of 'load' that works sequentially. +func ExampleLoadSequential() { + fakeFilesystem := map[string]string{ + "c.sky": `load("b.sky", "b"); c = b + "!"`, + "b.sky": `load("a.sky", "a"); b = a + ", world"`, + "a.sky": `a = "Hello"`, + } + + type entry struct { + globals skylark.StringDict + err error + } + + cache := make(map[string]*entry) + + load := func(thread *skylark.Thread, module string) (skylark.StringDict, error) { + e, ok := cache[module] + if e == nil { + if ok { + // request for package whose loading is in progress + return nil, fmt.Errorf("cycle in load graph") + } + + // Add a placeholder to indicate "load in progress". + cache[module] = nil + + // Load it. + data := fakeFilesystem[module] + globals := make(skylark.StringDict) + err := skylark.ExecFile(thread, module, data, globals) + e = &entry{globals, err} + + // Update the cache. + cache[module] = e + } + return e.globals, e.err + } + + thread := &skylark.Thread{Load: load} + globals, err := load(thread, "c.sky") + if err != nil { + log.Fatal(err) + } + fmt.Println(globals["c"]) + + // Output: + // "Hello, world!" +} + +// ExampleLoadParallel demonstrates a parallel implementation +// of 'load' with caching, duplicate suppression, and cycle detection. +func ExampleLoadParallel() { + cache := &cache{ + cache: make(map[string]*entry), + fakeFilesystem: map[string]string{ + "c.sky": `load("a.sky", "a"); c = a * 2`, + "b.sky": `load("a.sky", "a"); b = a * 3`, + "a.sky": `a = 1; print("loaded a")`, + }, + } + + // We load modules b and c in parallel by concurrent calls to + // cache.Load. Both of them load module a, but a is executed + // only once, as witnessed by the sole output of its print + // statement. + + ch := make(chan string) + for _, name := range []string{"b", "c"} { + go func(name string) { + globals, err := cache.Load(name + ".sky") + if err != nil { + log.Fatal(err) + } + ch <- fmt.Sprintf("%s = %s", name, globals[name]) + }(name) + } + got := []string{<-ch, <-ch} + sort.Strings(got) + fmt.Println(strings.Join(got, "\n")) + + // Output: + // loaded a + // b = 3 + // c = 2 +} + +// ExampleLoadParallelCycle demonstrates detection +// of cycles during parallel loading. +func ExampleLoadParallelCycle() { + cache := &cache{ + cache: make(map[string]*entry), + fakeFilesystem: map[string]string{ + "c.sky": `load("b.sky", "b"); c = b * 2`, + "b.sky": `load("a.sky", "a"); b = a * 3`, + "a.sky": `load("c.sky", "c"); a = c * 5; print("loaded a")`, + }, + } + + ch := make(chan string) + for _, name := range "bc" { + name := string(name) + go func() { + _, err := cache.Load(name + ".sky") + if err == nil { + log.Fatalf("Load of %s.sky succeeded unexpectedly", name) + } + ch <- err.Error() + }() + } + got := []string{<-ch, <-ch} + sort.Strings(got) + fmt.Println(strings.Join(got, "\n")) + + // Output: + // cannot load a.sky: cannot load c.sky: cycle in load graph + // cannot load b.sky: cannot load a.sky: cannot load c.sky: cycle in load graph +} + +// cache is a concurrency-safe, duplicate-suppressing, +// non-blocking cache of the doLoad function. +// See Section 9.7 of gopl.io for an explanation of this structure. +// It also features online deadlock (load cycle) detection. +type cache struct { + cacheMu sync.Mutex + cache map[string]*entry + + fakeFilesystem map[string]string +} + +type entry struct { + owner unsafe.Pointer // a *cycleChecker; see cycleCheck + globals skylark.StringDict + err error + ready chan struct{} +} + +func (c *cache) Load(module string) (skylark.StringDict, error) { + return c.get(new(cycleChecker), module) +} + +// get loads and returns an entry (if not already loaded). +func (c *cache) get(cc *cycleChecker, module string) (skylark.StringDict, error) { + c.cacheMu.Lock() + e := c.cache[module] + if e != nil { + c.cacheMu.Unlock() + // Some other goroutine is getting this module. + // Wait for it to become ready. + + // Detect load cycles to avoid deadlocks. + if err := cycleCheck(e, cc); err != nil { + return nil, err + } + + cc.setWaitsFor(e) + <-e.ready + cc.setWaitsFor(nil) + } else { + // First request for this module. + e = &entry{ready: make(chan struct{})} + c.cache[module] = e + c.cacheMu.Unlock() + + e.setOwner(cc) + e.globals, e.err = c.doLoad(cc, module) + e.setOwner(nil) + + // Broadcast that the entry is now ready. + close(e.ready) + } + return e.globals, e.err +} + +func (c *cache) doLoad(cc *cycleChecker, module string) (skylark.StringDict, error) { + thread := &skylark.Thread{ + Print: func(_ *skylark.Thread, msg string) { fmt.Println(msg) }, + Load: func(_ *skylark.Thread, module string) (skylark.StringDict, error) { + // Tunnel the cycle-checker state for this "thread of loading". + return c.get(cc, module) + }, + } + data := c.fakeFilesystem[module] + globals := make(skylark.StringDict) + err := skylark.ExecFile(thread, module, data, globals) + if err != nil { + return nil, err + } + return globals, nil +} + +// -- concurrent cycle checking -- + +// A cycleChecker is used for concurrent deadlock detection. +// Each top-level call to Load creates its own cycleChecker, +// which is passed to all recursive calls it makes. +// It corresponds to a logical thread in the deadlock detection literature. +type cycleChecker struct { + waitsFor unsafe.Pointer // an *entry; see cycleCheck +} + +func (cc *cycleChecker) setWaitsFor(e *entry) { + atomic.StorePointer(&cc.waitsFor, unsafe.Pointer(e)) +} + +func (e *entry) setOwner(cc *cycleChecker) { + atomic.StorePointer(&e.owner, unsafe.Pointer(cc)) +} + +// cycleCheck reports whether there is a path in the waits-for graph +// from resource 'e' to thread 'me'. +// +// The waits-for graph (WFG) is a bipartite graph whose nodes are +// alternately of type entry and cycleChecker. Each node has at most +// one outgoing edge. An entry has an "owner" edge to a cycleChecker +// while it is being readied by that cycleChecker, and a cycleChecker +// has a "waits-for" edge to an entry while it is waiting for that entry +// to become ready. +// +// Before adding a waits-for edge, the cache checks whether the new edge +// would form a cycle. If so, this indicates that the load graph is +// cyclic and that the following wait operation would deadlock. +func cycleCheck(e *entry, me *cycleChecker) error { + for e != nil { + cc := (*cycleChecker)(atomic.LoadPointer(&e.owner)) + if cc == nil { + break + } + if cc == me { + return fmt.Errorf("cycle in load graph") + } + e = (*entry)(atomic.LoadPointer(&cc.waitsFor)) + } + return nil +} diff --git a/hashtable.go b/hashtable.go new file mode 100644 index 0000000..e5c3cbf --- /dev/null +++ b/hashtable.go @@ -0,0 +1,342 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package skylark + +import "fmt" + +// hashtable is used to represent Skylark dict and set values. +// It is a hash table whose key/value entries form a doubly-linked list +// in the order the entries were inserted. +type hashtable struct { + table []bucket // len is zero or a power of two + bucket0 [1]bucket // inline allocation for small maps. + len uint32 + itercount uint32 // number of active iterators (ignored if frozen) + head *entry // insertion order doubly-linked list; may be nil + tailLink **entry // address of nil link at end of list (perhaps &head) + frozen bool +} + +const bucketSize = 8 + +type bucket struct { + entries [bucketSize]entry + next *bucket // linked list of buckets +} + +type entry struct { + hash uint32 // nonzero => in use + key, value Value + next *entry // insertion order doubly-linked list; may be nil + prevLink **entry // address of link to this entry (perhaps &head) +} + +func (ht *hashtable) freeze() { + if !ht.frozen { + ht.frozen = true + for i := range ht.table { + for p := &ht.table[i]; p != nil; p = p.next { + for i := range p.entries { + e := &p.entries[i] + if e.hash != 0 { + e.key.Freeze() + e.value.Freeze() + } + } + } + } + } +} + +func (ht *hashtable) insert(k, v Value) error { + if ht.frozen { + return fmt.Errorf("cannot insert into frozen hash table") + } + if ht.itercount > 0 { + return fmt.Errorf("cannot insert into hash table during iteration") + } + if ht.table == nil { + ht.table = ht.bucket0[:1] + ht.tailLink = &ht.head + } + h, err := k.Hash() + if err != nil { + return err + } + if h == 0 { + h = 1 // zero is reserved + } + +retry: + var insert *entry + + // Inspect each bucket in the bucket list. + p := &ht.table[h&(uint32(len(ht.table)-1))] + for { + for i := range p.entries { + e := &p.entries[i] + if e.hash != h { + if e.hash == 0 { + // Found empty entry; make a note. + insert = e + } + continue + } + if eq, err := Equal(k, e.key); err != nil { + return err // e.g. excessively recursive tuple + } else if !eq { + continue + } + // Key already present; update value. + e.value = v + return nil + } + if p.next == nil { + break + } + p = p.next + } + + // Key not found. p points to the last bucket. + + // Does the number of elements exceed the buckets' load factor? + if overloaded(int(ht.len), len(ht.table)) { + ht.grow() + goto retry + } + + if insert == nil { + // No space in existing buckets. Add a new one to the bucket list. + b := new(bucket) + p.next = b + insert = &b.entries[0] + } + + // Insert key/value pair. + insert.hash = h + insert.key = k + insert.value = v + + // Append entry to doubly-linked list. + insert.prevLink = ht.tailLink + *ht.tailLink = insert + ht.tailLink = &insert.next + + ht.len++ + + return nil +} + +func overloaded(elems, buckets int) bool { + const loadFactor = 6.5 // just a guess + return elems >= bucketSize && float64(elems) >= loadFactor*float64(buckets) +} + +func (ht *hashtable) grow() { + // Double the number of buckets and rehash. + // TODO(adonovan): opt: + // - avoid reentrant calls to ht.insert, and specialize it. + // e.g. we know the calls to Equals will return false since + // there are no duplicates among the old keys. + // - saving the entire hash in the bucket would avoid the need to + // recompute the hash. + // - save the old buckets on a free list. + ht.table = make([]bucket, len(ht.table)<<1) + oldhead := ht.head + ht.head = nil + ht.tailLink = &ht.head + ht.len = 0 + for e := oldhead; e != nil; e = e.next { + ht.insert(e.key, e.value) + } + ht.bucket0[0] = bucket{} // clear out unused initial bucket +} + +func (ht *hashtable) lookup(k Value) (v Value, found bool, err error) { + h, err := k.Hash() + if err != nil { + return nil, false, err // unhashable + } + if h == 0 { + h = 1 // zero is reserved + } + if ht.table == nil { + return None, false, nil // empty + } + + // Inspect each bucket in the bucket list. + for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { + for i := range p.entries { + e := &p.entries[i] + if e.hash == h { + if eq, err := Equal(k, e.key); err != nil { + return nil, false, err // e.g. excessively recursive tuple + } else if eq { + return e.value, true, nil // found + } + } + } + } + return None, false, nil // not found +} + +// Items returns all the items in the map (as key/value pairs) in insertion order. +func (ht *hashtable) items() []Tuple { + items := make([]Tuple, 0, ht.len) + array := make([]Value, ht.len*2) // allocate a single backing array + for e := ht.head; e != nil; e = e.next { + pair := Tuple(array[:2:2]) + array = array[2:] + pair[0] = e.key + pair[1] = e.value + items = append(items, pair) + } + return items +} + +func (ht *hashtable) first() (Value, bool) { + if ht.head != nil { + return ht.head.key, true + } + return None, false +} + +func (ht *hashtable) keys() []Value { + keys := make([]Value, 0, ht.len) + for e := ht.head; e != nil; e = e.next { + keys = append(keys, e.key) + } + return keys +} + +func (ht *hashtable) delete(k Value) (v Value, found bool, err error) { + if ht.frozen { + return nil, false, fmt.Errorf("cannot delete from frozen hash table") + } + if ht.itercount > 0 { + return nil, false, fmt.Errorf("cannot delete from hash table during iteration") + } + if ht.table == nil { + return None, false, nil // empty + } + h, err := k.Hash() + if err != nil { + return nil, false, err // unhashable + } + if h == 0 { + h = 1 // zero is reserved + } + + // Inspect each bucket in the bucket list. + for p := &ht.table[h&(uint32(len(ht.table)-1))]; p != nil; p = p.next { + for i := range p.entries { + e := &p.entries[i] + if e.hash == h { + if eq, err := Equal(k, e.key); err != nil { + return nil, false, err + } else if eq { + // Remove e from doubly-linked list. + *e.prevLink = e.next + if e.next == nil { + ht.tailLink = e.prevLink // deletion of last entry + } else { + e.next.prevLink = e.prevLink + } + + v := e.value + *e = entry{} + ht.len-- + return v, true, nil // found + } + } + } + } + + // TODO(adonovan): opt: remove completely empty bucket from bucket list. + + return None, false, nil // not found +} + +func (ht *hashtable) clear() error { + if ht.frozen { + return fmt.Errorf("cannot clear frozen hash table") + } + if ht.itercount > 0 { + return fmt.Errorf("cannot clear hash table during iteration") + } + if ht.table != nil { + for i := range ht.table { + ht.table[i] = bucket{} + } + } + ht.head = nil + ht.tailLink = &ht.head + ht.len = 0 + return nil +} + +// dump is provided as an aid to debugging. +func (ht *hashtable) dump() { + fmt.Printf("hashtable %p len=%d head=%p tailLink=%p", + ht, ht.len, ht.head, ht.tailLink) + if ht.tailLink != nil { + fmt.Printf(" *tailLink=%p", *ht.tailLink) + } + fmt.Println() + for j := range ht.table { + fmt.Printf("bucket chain %d\n", j) + for p := &ht.table[j]; p != nil; p = p.next { + fmt.Printf("bucket %p\n", p) + for i := range p.entries { + e := &p.entries[i] + fmt.Printf("\tentry %d @ %p hash=%d key=%v value=%v\n", + i, e, e.hash, e.key, e.value) + fmt.Printf("\t\tnext=%p &next=%p prev=%p", + e.next, &e.next, e.prevLink) + if e.prevLink != nil { + fmt.Printf(" *prev=%p", *e.prevLink) + } + fmt.Println() + } + } + } +} + +func (ht *hashtable) iterate() *keyIterator { + if !ht.frozen { + ht.itercount++ + } + return &keyIterator{ht: ht, e: ht.head} +} + +type keyIterator struct { + ht *hashtable + e *entry +} + +func (it *keyIterator) Next(k *Value) bool { + if it.e != nil { + *k = it.e.key + it.e = it.e.next + return true + } + return false +} + +func (it *keyIterator) Done() { + if !it.ht.frozen { + it.ht.itercount-- + } +} + +// hashString computes the FNV hash of s. +func hashString(s string) uint32 { + var h uint32 + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= 16777619 + } + return h +} diff --git a/hashtable_test.go b/hashtable_test.go new file mode 100644 index 0000000..2d41997 --- /dev/null +++ b/hashtable_test.go @@ -0,0 +1,77 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package skylark + +import ( + "math/rand" + "testing" +) + +func TestHashtable(t *testing.T) { + testHashtable(t, make(map[int]bool)) +} + +func BenchmarkHashtable(b *testing.B) { + // TODO(adonovan): MakeInt probably dominates the cost of this benchmark. + // Optimise or avoid it. + for i := 0; i < b.N; i++ { + testHashtable(b, nil) + } +} + +// testHashtable is both a test and a benchmark of hashtable. +// When sane != nil, it acts as a test against the semantics of Go's map. +func testHashtable(tb testing.TB, sane map[int]bool) { + zipf := rand.NewZipf(rand.New(rand.NewSource(0)), 1.1, 1.0, 1000.0) + var ht hashtable + + // Insert 10000 random ints into the map. + for j := 0; j < 10000; j++ { + k := int(zipf.Uint64()) + if err := ht.insert(MakeInt(k), None); err != nil { + tb.Fatal(err) + } + if sane != nil { + sane[k] = true + } + } + + // Do 10000 random lookups in the map. + for j := 0; j < 10000; j++ { + k := int(zipf.Uint64()) + _, found, err := ht.lookup(MakeInt(k)) + if err != nil { + tb.Fatal(err) + } + if sane != nil { + _, found2 := sane[k] + if found != found2 { + tb.Fatal("sanity check failed") + } + } + } + + // Do 10000 random deletes from the map. + for j := 0; j < 10000; j++ { + k := int(zipf.Uint64()) + _, found, err := ht.delete(MakeInt(k)) + if err != nil { + tb.Fatal(err) + } + if sane != nil { + _, found2 := sane[k] + if found != found2 { + tb.Fatal("sanity check failed") + } + delete(sane, k) + } + } + + if sane != nil { + if int(ht.len) != len(sane) { + tb.Fatal("sanity check failed") + } + } +} @@ -0,0 +1,223 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package skylark + +import ( + "fmt" + "math" + "math/big" + + "github.com/google/skylark/syntax" +) + +// Int is the type of a Skylark int. +type Int struct{ bigint *big.Int } + +// MakeInt returns a Skylark int for the specified signed integer. +func MakeInt(x int) Int { return MakeInt64(int64(x)) } + +// MakeInt64 returns a Skylark int for the specified int64. +func MakeInt64(x int64) Int { + if 0 <= x && x < int64(len(smallint)) { + if !smallintok { + panic("MakeInt64 used before initialization") + } + return Int{&smallint[x]} + } + return Int{new(big.Int).SetInt64(x)} +} + +// MakeUint returns a Skylark int for the specified unsigned integer. +func MakeUint(x uint) Int { return MakeUint64(uint64(x)) } + +// MakeUint64 returns a Skylark int for the specified uint64. +func MakeUint64(x uint64) Int { + if x < uint64(len(smallint)) { + if !smallintok { + panic("MakeUint64 used before initialization") + } + return Int{&smallint[x]} + } + return Int{new(big.Int).SetUint64(uint64(x))} +} + +var ( + smallint [256]big.Int + smallintok bool + zero, one Int +) + +func init() { + for i := range smallint { + smallint[i].SetInt64(int64(i)) + } + smallintok = true + + zero = MakeInt64(0) + one = MakeInt64(1) +} + +// Int64 returns the value as an int64. +// If it is not exactly representable the result is undefined and ok is false. +func (i Int) Int64() (_ int64, ok bool) { + x, acc := bigintToInt64(i.bigint) + if acc != big.Exact { + return // inexact + } + return x, true +} + +// Uint64 returns the value as a uint64. +// If it is not exactly representable the result is undefined and ok is false. +func (i Int) Uint64() (_ uint64, ok bool) { + x, acc := bigintToUint64(i.bigint) + if acc != big.Exact { + return // inexact + } + return x, true +} + +// The math/big API should provide this function. +func bigintToInt64(i *big.Int) (int64, big.Accuracy) { + sign := i.Sign() + if sign > 0 { + if i.Cmp(maxint64) > 0 { + return math.MaxInt64, big.Below + } + } else if sign < 0 { + if i.Cmp(minint64) < 0 { + return math.MinInt64, big.Above + } + } + return i.Int64(), big.Exact +} + +// The math/big API should provide this function. +func bigintToUint64(i *big.Int) (uint64, big.Accuracy) { + sign := i.Sign() + if sign > 0 { + if i.BitLen() > 64 { + return math.MaxUint64, big.Below + } + } else if sign < 0 { + return 0, big.Above + } + return i.Uint64(), big.Exact +} + +var ( + minint64 = new(big.Int).SetInt64(math.MinInt64) + maxint64 = new(big.Int).SetInt64(math.MaxInt64) +) + +func (i Int) String() string { return i.bigint.String() } +func (i Int) Type() string { return "int" } +func (i Int) Freeze() {} // immutable +func (i Int) Truth() Bool { return i.Sign() != 0 } +func (i Int) Hash() (uint32, error) { + var lo big.Word + if i.bigint.Sign() != 0 { + lo = i.bigint.Bits()[0] + } + return 12582917 * uint32(lo+3), nil +} +func (x Int) CompareSameType(op syntax.Token, y Value, depth int) (bool, error) { + return threeway(op, x.bigint.Cmp(y.(Int).bigint)), nil +} + +// Float returns the float value nearest i. +func (i Int) Float() Float { + // TODO(adonovan): opt: handle common values without allocation. + f, _ := new(big.Float).SetInt(i.bigint).Float64() + return Float(f) +} + +func (x Int) Sign() int { return x.bigint.Sign() } +func (x Int) Add(y Int) Int { return Int{new(big.Int).Add(x.bigint, y.bigint)} } +func (x Int) Sub(y Int) Int { return Int{new(big.Int).Sub(x.bigint, y.bigint)} } +func (x Int) Mul(y Int) Int { return Int{new(big.Int).Mul(x.bigint, y.bigint)} } +func (x Int) Or(y Int) Int { return Int{new(big.Int).Or(x.bigint, y.bigint)} } +func (x Int) And(y Int) Int { return Int{new(big.Int).And(x.bigint, y.bigint)} } + +// Precondition: y is nonzero. +func (x Int) Div(y Int) Int { + // http://python-history.blogspot.com/2010/08/why-pythons-integer-division-floors.html + var quo, rem big.Int + quo.QuoRem(x.bigint, y.bigint, &rem) + if (x.bigint.Sign() < 0) != (y.bigint.Sign() < 0) && rem.Sign() != 0 { + quo.Sub(&quo, one.bigint) + } + return Int{&quo} +} + +// Precondition: y is nonzero. +func (x Int) Mod(y Int) Int { + var quo, rem big.Int + quo.QuoRem(x.bigint, y.bigint, &rem) + if (x.bigint.Sign() < 0) != (y.bigint.Sign() < 0) && rem.Sign() != 0 { + rem.Add(&rem, y.bigint) + } + return Int{&rem} +} + +func (i Int) rational() *big.Rat { return new(big.Rat).SetInt(i.bigint) } + +// AsInt32 returns the value of x if is representable as an int32. +func AsInt32(x Value) (int, error) { + i, ok := x.(Int) + if !ok { + return 0, fmt.Errorf("got %s, want int", x.Type()) + } + if i.bigint.BitLen() <= 32 { + v := i.bigint.Int64() + if v >= math.MinInt32 && v <= math.MaxInt32 { + return int(v), nil + } + } + return 0, fmt.Errorf("%s out of range", i) +} + +// ConvertToInt converts x to an integer value. An int is returned +// unchanged, a bool becomes 0 or 1, a float is truncated towards +// zero. ConvertToInt reports an error for all other values. +func ConvertToInt(x Value) (Int, error) { + switch x := x.(type) { + case Bool: + if x { + return one, nil + } else { + return zero, nil + } + case Int: + return x, nil + case Float: + f := float64(x) + if math.IsInf(f, 0) { + return zero, fmt.Errorf("cannot convert float infinity to integer") + } else if math.IsNaN(f) { + return zero, fmt.Errorf("cannot convert float NaN to integer") + } else { + return finiteFloatToInt(x), nil + } + } + return zero, fmt.Errorf("cannot convert %s to int", x.Type()) +} + +// finiteFloatToInt converts f to an Int, truncating towards zero. +// f must be finite. +func finiteFloatToInt(f Float) Int { + var i big.Int + if math.MinInt64 <= f && f <= math.MaxInt64 { + // small values + i.SetInt64(int64(f)) + } else { + rat := f.rational() + if rat == nil { + panic(f) // non-finite + } + i.Div(rat.Num(), rat.Denom()) + } + return Int{&i} +} diff --git a/internal/chunkedfile/chunkedfile.go b/internal/chunkedfile/chunkedfile.go new file mode 100644 index 0000000..9200537 --- /dev/null +++ b/internal/chunkedfile/chunkedfile.go @@ -0,0 +1,121 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package chunkedfile provides utilities for testing that source code +// errors are reported in the appropriate places. +// +// A chunked file consists of several chunks of input text separated by +// "---" lines. Each chunk is an input to the program under test, such +// as an evaluator. Lines containing "###" are interpreted as +// expectations of failure: the following text is a Go string literal +// denoting a regular expression that should match the failure message. +// +// Example: +// +// x = 1 / 0 ### "division by zero" +// --- +// x = 1 +// print(x + "") ### "int + string not supported" +// +// A client test feeds each chunk of text into the program under test, +// then calls chunk.GotError for each error that actually occurred. Any +// discrepancy between the actual and expected errors is reported using +// the client's reporter, which is typically a testing.T. +package chunkedfile + +import ( + "fmt" + "io/ioutil" + "regexp" + "strconv" + "strings" +) + +const debug = false + +// A Chunk is a portion of a source file. +// It contains a set of expected errors. +type Chunk struct { + Source string + filename string + report Reporter + wantErrs map[int]*regexp.Regexp +} + +// Reporter is implemented by *testing.T. +type Reporter interface { + Errorf(format string, args ...interface{}) +} + +// Read parses a chunked file and returns its chunks. +// It reports failures using the reporter. +func Read(filename string, report Reporter) (chunks []Chunk) { + data, err := ioutil.ReadFile(filename) + if err != nil { + report.Errorf("%s", err) + return + } + linenum := 1 + for i, chunk := range strings.Split(string(data), "\n---\n") { + chunk := string(chunk) + if debug { + fmt.Printf("chunk %d at line %d: %s\n", i, linenum, chunk) + } + // Pad with newlines so the line numbers match the original file. + src := strings.Repeat("\n", linenum-1) + chunk + + wantErrs := make(map[int]*regexp.Regexp) + + // Parse comments of the form: + // ### "expected error". + lines := strings.Split(chunk, "\n") + for j := 0; j < len(lines); j, linenum = j+1, linenum+1 { + line := lines[j] + hashes := strings.Index(line, "###") + if hashes < 0 { + continue + } + rest := strings.TrimSpace(line[hashes+len("###"):]) + pattern, err := strconv.Unquote(rest) + if err != nil { + report.Errorf("%s:%d: not a quoted regexp: %s", filename, linenum, rest) + continue + } + rx, err := regexp.Compile(pattern) + if err != nil { + report.Errorf("%s:%d: %v", filename, linenum, err) + continue + } + wantErrs[linenum] = rx + if debug { + fmt.Printf("\t%d\t%s\n", linenum, rx) + } + } + linenum++ + + chunks = append(chunks, Chunk{src, filename, report, wantErrs}) + } + return chunks +} + +// GotError should be called by the client to report an error at a particular line. +// GotError reports unexpected errors to the chunk's reporter. +func (chunk *Chunk) GotError(linenum int, msg string) { + if rx, ok := chunk.wantErrs[linenum]; ok { + delete(chunk.wantErrs, linenum) + if !rx.MatchString(msg) { + chunk.report.Errorf("%s:%d: error %q does not match pattern %q", chunk.filename, linenum, msg, rx) + } + } else { + chunk.report.Errorf("%s:%d: unexpected error: %v", chunk.filename, linenum, msg) + } +} + +// Done should be called by the client to indicate that the chunk has no more errors. +// Done reports expected errors that did not occur to the chunk's reporter. +func (chunk *Chunk) Done() { + for linenum, rx := range chunk.wantErrs { + chunk.report.Errorf("%s:%d: expected error matching %q", chunk.filename, linenum, rx) + } +} diff --git a/library.go b/library.go new file mode 100644 index 0000000..db978e0 --- /dev/null +++ b/library.go @@ -0,0 +1,1963 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package skylark + +// This file defines the library of built-ins. +// +// Built-ins must explicitly check the "frozen" flag before updating +// mutable types such as lists and dicts. + +import ( + "bytes" + "fmt" + "log" + "math/big" + "os" + "sort" + "strconv" + "strings" + "unicode" + "unicode/utf8" + + "github.com/google/skylark/syntax" +) + +// Universe defines the set of universal built-ins, such as None, True, and len. +// +// The Go application may add or remove items from the +// universe dictionary before Skylark evaluation begins. +// All values in the dictionary must be immutable. +// Skylark programs cannot modify the dictionary. +var Universe StringDict + +func init() { + // See https://bazel.build/versions/master/docs/skylark/lib/globals.html#XYZ + Universe = StringDict{ + "None": None, + "True": True, + "False": False, + "any": NewBuiltin("any", any), + "all": NewBuiltin("all", all), + "bool": NewBuiltin("bool", bool_), + "chr": NewBuiltin("chr", chr), + "cmp": NewBuiltin("cmp", cmp), + "dict": NewBuiltin("dict", dict), + "dir": NewBuiltin("dir", dir), + "enumerate": NewBuiltin("enumerate", enumerate), + "float": NewBuiltin("float", float), // requires resolve.AllowFloat + "freeze": NewBuiltin("freeze", freeze), // requires resolve.AllowFreeze + "getattr": NewBuiltin("getattr", getattr), + "hasattr": NewBuiltin("hasattr", hasattr), + "hash": NewBuiltin("hash", hash), + "int": NewBuiltin("int", int_), + "len": NewBuiltin("len", len_), + "list": NewBuiltin("list", list), + "max": NewBuiltin("max", minmax), + "min": NewBuiltin("min", minmax), + "ord": NewBuiltin("ord", ord), + "print": NewBuiltin("print", print), + "range": NewBuiltin("range", range_), + "repr": NewBuiltin("repr", repr), + "reversed": NewBuiltin("reversed", reversed), + "set": NewBuiltin("set", set), // requires resolve.AllowSet + "sorted": NewBuiltin("sorted", sorted), + "str": NewBuiltin("str", str), + "tuple": NewBuiltin("tuple", tuple), + "type": NewBuiltin("type", type_), + "zip": NewBuiltin("zip", zip), + } +} + +type builtinMethod func(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) + +// methods of built-in types +var ( + // See https://bazel.build/versions/master/docs/skylark/lib/dict.html. + dictMethods = map[string]builtinMethod{ + "clear": dict_clear, + "get": dict_get, + "items": dict_items, + "keys": dict_keys, + "pop": dict_pop, + "popitem": dict_popitem, + "setdefault": dict_setdefault, + "update": dict_update, + "values": dict_values, + } + + // See https://bazel.build/versions/master/docs/skylark/lib/list.html. + listMethods = map[string]builtinMethod{ + "append": list_append, + "clear": list_clear, + "extend": list_extend, + "index": list_index, + "insert": list_insert, + "pop": list_pop, + "remove": list_remove, + } + + // See https://bazel.build/versions/master/docs/skylark/lib/string.html. + stringMethods = map[string]builtinMethod{ + "bytes": string_iterable, + "capitalize": string_capitalize, + "codepoints": string_iterable, + "count": string_count, + "endswith": string_endswith, + "find": string_find, + "format": string_format, + "index": string_index, + "isalnum": string_isalnum, + "isalpha": string_isalpha, + "isdigit": string_isdigit, + "islower": string_islower, + "isspace": string_isspace, + "istitle": string_istitle, + "isupper": string_isupper, + "join": string_join, + "lower": string_lower, + "lstrip": string_strip, // sic + "partition": string_partition, + "replace": string_replace, + "rfind": string_rfind, + "rindex": string_rindex, + "rpartition": string_partition, // sic + "rsplit": string_split, // sic + "rstrip": string_strip, // sic + "split": string_split, + "splitlines": string_splitlines, + "split_bytes": string_iterable, // sic + "split_codepoints": string_iterable, // sic + "startswith": string_startswith, + "strip": string_strip, + "title": string_title, + "upper": string_upper, + } + + // See https://bazel.build/versions/master/docs/skylark/lib/set.html. + setMethods = map[string]builtinMethod{ + "union": set_union, + } +) + +func builtinMethodOf(recv Value, name string) builtinMethod { + switch recv.(type) { + case String: + return stringMethods[name] + case *List: + return listMethods[name] + case *Dict: + return dictMethods[name] + case *Set: + return setMethods[name] + } + return nil +} + +func builtinAttr(recv Value, name string, methods map[string]builtinMethod) (Value, error) { + method := methods[name] + if method == nil { + return nil, nil // no such method + } + + // Allocate a closure over 'method'. + impl := func(thread *Thread, b *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + return method(b.Name(), b.Receiver(), args, kwargs) + } + return NewBuiltin(name, impl).BindReceiver(recv), nil +} + +func builtinAttrNames(methods map[string]builtinMethod) []string { + names := make([]string, 0, len(methods)) + for name := range methods { + names = append(names, name) + } + sort.Strings(names) + return names +} + +// UnpackArgs unpacks the positional and keyword arguments into the +// supplied parameter variables. pairs is an alternating list of names +// and pointers to variables. +// +// If the variable is a bool, int, string, *List, *Dict, Callable, or Iterable, +// UnpackArgs performs the appropriate type check. (An int uses the +// AsInt32 check.) If the parameter name ends with "?", it and all +// following parameters are optional. +// +// Beware: an optional *List, *Dict, Callable, Iterable, or Value variable that is +// not assigned is not a valid Skylark Value, so the caller must +// explicitly handle such cases by interpreting nil as None or some +// computed default. +func UnpackArgs(fnname string, args Tuple, kwargs []Tuple, pairs ...interface{}) error { + nparams := len(pairs) / 2 + var defined intset + defined.init(nparams) + + // positional arguments + if len(args) > nparams { + return fmt.Errorf("%s: got %d arguments, want at most %d", + fnname, len(args), nparams) + } + for i, arg := range args { + defined.set(i) + if err := unpackOneArg(arg, pairs[2*i+1]); err != nil { + return fmt.Errorf("%s: for parameter %d: %s", fnname, i+1, err) + } + } + + // keyword arguments +kwloop: + for _, item := range kwargs { + name, arg := item[0].(String), item[1] + for i := 0; i < nparams; i++ { + paramName := pairs[2*i].(string) + if paramName[len(paramName)-1] == '?' { + paramName = paramName[:len(paramName)-1] + } + if paramName == string(name) { + // found it + if defined.set(i) { + return fmt.Errorf("%s: got multiple values for keyword argument %s", + fnname, name) + } + ptr := pairs[2*i+1] + if err := unpackOneArg(arg, ptr); err != nil { + return fmt.Errorf("%s: for parameter %s: %s", fnname, name, err) + } + continue kwloop + } + } + return fmt.Errorf("%s: unexpected keyword argument %s", fnname, name) + } + + // Check that all non-optional parameters are defined. + // (We needn't check the first len(args).) + for i := len(args); i < nparams; i++ { + name := pairs[2*i].(string) + if strings.HasSuffix(name, "?") { + break // optional + } + if !defined.get(i) { + return fmt.Errorf("%s: missing argument for %s", fnname, name) + } + } + + return nil +} + +// UnpackPositionalArgs unpacks the positional arguments into +// corresponding variables. Each element of vars is a pointer; see +// UnpackArgs for allowed types and conversions. +// +// UnpackPositionalArgs reports an error if the number of arguments is +// less than min or greater than len(vars), if kwargs is nonempty, or if +// any conversion fails. +func UnpackPositionalArgs(fnname string, args Tuple, kwargs []Tuple, min int, vars ...interface{}) error { + if len(kwargs) > 0 { + return fmt.Errorf("%s: unexpected keyword arguments", fnname) + } + max := len(vars) + if len(args) < min { + var atleast string + if min < max { + atleast = "at least " + } + return fmt.Errorf("%s: got %d arguments, want %s%d", fnname, len(args), atleast, min) + } + if len(args) > max { + var atmost string + if max > min { + atmost = "at most " + } + return fmt.Errorf("%s: got %d arguments, want %s%d", fnname, len(args), atmost, max) + } + for i, arg := range args { + if err := unpackOneArg(arg, vars[i]); err != nil { + return fmt.Errorf("%s: for parameter %d: %s", fnname, i+1, err) + } + } + return nil +} + +func unpackOneArg(v Value, ptr interface{}) error { + ok := true + switch ptr := ptr.(type) { + case *Value: + *ptr = v + case *string: + *ptr, ok = AsString(v) + if !ok { + return fmt.Errorf("got %s, want string", v.Type()) + } + case *bool: + *ptr = bool(v.Truth()) + case *int: + var err error + *ptr, err = AsInt32(v) + if err != nil { + return err + } + case **List: + *ptr, ok = v.(*List) + if !ok { + return fmt.Errorf("got %s, want list", v.Type()) + } + case **Dict: + *ptr, ok = v.(*Dict) + if !ok { + return fmt.Errorf("got %s, want dict", v.Type()) + } + case *Callable: + *ptr, ok = v.(Callable) + if !ok { + return fmt.Errorf("got %s, want callable", v.Type()) + } + case *Iterable: + *ptr, ok = v.(Iterable) + if !ok { + return fmt.Errorf("got %s, want iterable", v.Type()) + } + default: + log.Fatalf("internal error: invalid ptr type: %T", ptr) + } + return nil +} + +// ---- builtin functions ---- + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#all +func all(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("all", args, kwargs, 1, &iterable); err != nil { + return nil, err + } + iter := iterable.Iterate() + defer iter.Done() + var x Value + for iter.Next(&x) { + if !x.Truth() { + return False, nil + } + } + return True, nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#any +func any(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("all", args, kwargs, 1, &iterable); err != nil { + return nil, err + } + iter := iterable.Iterate() + defer iter.Done() + var x Value + for iter.Next(&x) { + if x.Truth() { + return True, nil + } + } + return False, nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#bool +func bool_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var x Value = False + if err := UnpackPositionalArgs("bool", args, kwargs, 0, &x); err != nil { + return nil, err + } + return x.Truth(), nil +} + +func chr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("chr does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("chr: got %d arguments, want 1", len(args)) + } + i, err := AsInt32(args[0]) + if err != nil { + return nil, fmt.Errorf("chr: got %s, want int", args[0].Type()) + } + if i < 0 { + return nil, fmt.Errorf("chr: Unicode code point %d out of range (<0)", i) + } + if i > unicode.MaxRune { + return nil, fmt.Errorf("chr: Unicode code point U+%X out of range (>0x10FFFF)", i) + } + return String(string(i)), nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#cmp +func cmp(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("cmp does not accept keyword arguments") + } + if len(args) != 2 { + return nil, fmt.Errorf("cmp: got %d arguments, want 2", len(args)) + } + x := args[0] + y := args[1] + if lt, err := Compare(syntax.LT, x, y); err != nil { + return nil, err + } else if lt { + return MakeInt(+1), nil // x < y + } + if gt, err := Compare(syntax.GT, x, y); err != nil { + return nil, err + } else if gt { + return MakeInt(-1), nil // x > y + } + return zero, nil // x == y or one of the operands is NaN +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#dict +func dict(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(args) > 1 { + return nil, fmt.Errorf("dict: got %d arguments, want at most 1", len(args)) + } + dict := new(Dict) + if err := updateDict(dict, args, kwargs); err != nil { + return nil, fmt.Errorf("dict: %v", err) + } + return dict, nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#dir +func dir(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("dir does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("dir: got %d arguments, want 1", len(args)) + } + + var names []string + if x, ok := args[0].(HasAttrs); ok { + names = x.AttrNames() + } + elems := make([]Value, len(names)) + for i, name := range names { + elems[i] = String(name) + } + return NewList(elems), nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#enumerate +func enumerate(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + var start int + if err := UnpackPositionalArgs("enumerate", args, kwargs, 1, &iterable, &start); err != nil { + return nil, err + } + + iter := iterable.Iterate() + if iter == nil { + return nil, fmt.Errorf("enumerate: got %s, want iterable", iterable.Type()) + } + defer iter.Done() + + var pairs []Value + var x Value + + if n := Len(iterable); n >= 0 { + // common case: known length + pairs = make([]Value, 0, n) + array := make(Tuple, 2*n) // allocate a single backing array + for i := 0; iter.Next(&x); i++ { + pair := array[:2:2] + array = array[2:] + pair[0] = MakeInt(start + i) + pair[1] = x + pairs = append(pairs, pair) + } + } else { + // non-sequence (unknown length) + for i := 0; iter.Next(&x); i++ { + pair := Tuple{MakeInt(start + i), x} + pairs = append(pairs, pair) + } + } + + return NewList(pairs), nil +} + +func float(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("float does not accept keyword arguments") + } + if len(args) == 0 { + return Float(0.0), nil + } + if len(args) != 1 { + return nil, fmt.Errorf("float got %d arguments, wants 1", len(args)) + } + switch x := args[0].(type) { + case Bool: + if x { + return Float(1.0), nil + } else { + return Float(0.0), nil + } + case Int: + return x.Float(), nil + case Float: + return x, nil + case String: + f, err := strconv.ParseFloat(string(x), 64) + if err != nil { + return nil, err + } + return Float(f), nil + default: + return nil, fmt.Errorf("float got %s, want number or string", x.Type()) + } +} + +func freeze(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("freeze does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("freeze got %d arguments, wants 1", len(args)) + } + args[0].Freeze() + return args[0], nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#getattr +func getattr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var object, dflt Value + var name string + if err := UnpackPositionalArgs("getattr", args, kwargs, 2, &object, &name, &dflt); err != nil { + return nil, err + } + if o, ok := object.(HasAttrs); ok { + if v, err := o.Attr(name); v != nil || err != nil { + return v, err + } + } + if dflt != nil { + return dflt, nil + } + return nil, fmt.Errorf("%s has no .%s field or method", object.Type(), name) +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#hasattr +func hasattr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var object Value + var name string + if err := UnpackPositionalArgs("hasattr", args, kwargs, 2, &object, &name); err != nil { + return nil, err + } + if object, ok := object.(HasAttrs); ok { + if v, err := object.Attr(name); v != nil || err != nil { + return True, nil + } + } + return False, nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#hash +func hash(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var x Value + if err := UnpackPositionalArgs("hash", args, kwargs, 1, &x); err != nil { + return nil, err + } + h, err := x.Hash() + return MakeUint(uint(h)), err +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#int +func int_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var x Value = zero + var base Value + if err := UnpackArgs("int", args, kwargs, "x?", &x, "base?", &base); err != nil { + return nil, err + } + + // "If x is not a number or base is given, x must be a string." + if s, ok := AsString(x); ok { + b := 10 + if base != nil { + var err error + b, err = AsInt32(base) + if err != nil || b != 0 && (b < 2 || b > 36) { + return nil, fmt.Errorf("int: base must be an integer >= 2 && <= 36") + } + } + + orig := s // save original for error message + + if len(s) > 1 { + var sign string + i := 0 + if s[0] == '+' || s[0] == '-' { + sign = s[:1] + i++ + } + + if i < len(s) && s[i] == '0' { + hasbase := 0 + if i+2 < len(s) { + switch s[i+1] { + case 'o', 'O': + // SetString doesn't understand "0o755" + // so modify s to "0755". + // Octals are rare, so allocation is fine. + s = sign + "0" + s[i+2:] + hasbase = 8 + case 'x', 'X': + hasbase = 16 + } + + if hasbase != 0 && b != 0 { + // Explicit base doesn't match prefix, + // e.g. int("0o755", 16). + if hasbase != b { + goto invalid + } + + // SetString requires base=0 + // if there's a base prefix. + b = 0 + } + } + + // For automatic base detection, + // a string starting with zero + // must be all zeros. + // Thus we reject "0755". + if hasbase == 0 && b == 0 { + for ; i < len(s); i++ { + if s[i] != '0' { + goto invalid + } + } + } + } + } + + // NOTE: int(x) permits arbitrary precision, unlike the scanner. + if i, ok := new(big.Int).SetString(s, b); ok { + return Int{i}, nil + } + + invalid: + return nil, fmt.Errorf("int: invalid literal with base %d: %s", b, orig) + } + + if base != nil { + return nil, fmt.Errorf("int: can't convert non-string with explicit base") + } + i, err := ConvertToInt(x) + if err != nil { + return nil, fmt.Errorf("int: %s", err) + } + return i, nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#len +func len_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var x Value + if err := UnpackPositionalArgs("len", args, kwargs, 1, &x); err != nil { + return nil, err + } + len := Len(x) + if len < 0 { + return nil, fmt.Errorf("value of type %s has no len", x.Type()) + } + return MakeInt(len), nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#list +func list(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("list", args, kwargs, 0, &iterable); err != nil { + return nil, err + } + var elems []Value + if iterable != nil { + iter := iterable.Iterate() + defer iter.Done() + if n := Len(iterable); n > 0 { + elems = make([]Value, 0, n) // preallocate if length known + } + var x Value + for iter.Next(&x) { + elems = append(elems, x) + } + } + return NewList(elems), nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#min +func minmax(thread *Thread, fn *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(args) == 0 { + return nil, fmt.Errorf("%s requires at least one positional argument", fn.Name()) + } + var keyFunc Callable + if err := UnpackArgs(fn.Name(), nil, kwargs, "key?", &keyFunc); err != nil { + return nil, err + } + var op syntax.Token + if fn.Name() == "max" { + op = syntax.GT + } else { + op = syntax.LT + } + var iterable Value + if len(args) == 1 { + iterable = args[0] + } else { + iterable = args + } + iter := Iterate(iterable) + if iter == nil { + return nil, fmt.Errorf("%s: %s value is not iterable", fn.Name(), iterable.Type()) + } + defer iter.Done() + var extremum Value + if !iter.Next(&extremum) { + return nil, fmt.Errorf("%s: argument is an empty sequence", fn.Name()) + } + + var extremeKey Value + var keyargs Tuple + if keyFunc == nil { + extremeKey = extremum + } else { + keyargs = Tuple{extremum} + res, err := Call(thread, keyFunc, keyargs, nil) + if err != nil { + return nil, err + } + extremeKey = res + } + + var x Value + for iter.Next(&x) { + var key Value + if keyFunc == nil { + key = x + } else { + keyargs[0] = x + res, err := Call(thread, keyFunc, keyargs, nil) + if err != nil { + return nil, err + } + key = res + } + + if ok, err := Compare(op, key, extremeKey); err != nil { + return nil, err + } else if ok { + extremum = x + extremeKey = key + } + } + return extremum, nil +} + +func ord(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("ord does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("ord: got %d arguments, want 1", len(args)) + } + s, ok := AsString(args[0]) + if !ok { + return nil, fmt.Errorf("ord: got %s, want string", args[0].Type()) + } + r, sz := utf8.DecodeRuneInString(s) + if sz == 0 || sz != len(s) { + n := utf8.RuneCountInString(s) + return nil, fmt.Errorf("ord: string encodes %d Unicode code points, want 1", n) + } + return MakeInt(int(r)), nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#print +func print(thread *Thread, fn *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var buf bytes.Buffer + path := make([]Value, 0, 4) + sep := "" + for _, v := range args { + buf.WriteString(sep) + if s, ok := AsString(v); ok { + buf.WriteString(s) + } else { + writeValue(&buf, v, path) + } + sep = " " + } + for _, pair := range kwargs { + buf.WriteString(sep) + buf.WriteString(string(pair[0].(String))) + buf.WriteString("=") + if s, ok := AsString(pair[1]); ok { + buf.WriteString(s) + } else { + writeValue(&buf, pair[1], path) + } + sep = " " + } + + if thread.Print != nil { + thread.Print(thread, buf.String()) + } else { + fmt.Fprintln(os.Stderr, &buf) + } + return None, nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#range +func range_(thread *Thread, fn *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var start, stop, step int + step = 1 + if err := UnpackPositionalArgs("range", args, kwargs, 1, &start, &stop, &step); err != nil { + return nil, err + } + list := new(List) + switch len(args) { + case 1: + // range(stop) + start, stop = 0, start + fallthrough + case 2: + // range(start, stop) + for i := start; i < stop; i += step { + list.elems = append(list.elems, MakeInt(i)) + } + case 3: + // range(start, stop, step) + if step == 0 { + return nil, fmt.Errorf("range: step argument must not be zero") + } + if step > 0 { + for i := start; i < stop; i += step { + list.elems = append(list.elems, MakeInt(i)) + } + } else { + for i := start; i >= stop; i += step { + list.elems = append(list.elems, MakeInt(i)) + } + } + } + return list, nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#repr +func repr(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var x Value + if err := UnpackPositionalArgs("repr", args, kwargs, 1, &x); err != nil { + return nil, err + } + return String(x.String()), nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#reversed. +func reversed(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("reversed", args, kwargs, 1, &iterable); err != nil { + return nil, err + } + iter := iterable.Iterate() + defer iter.Done() + var elems []Value + if n := Len(args[0]); n >= 0 { + elems = make([]Value, 0, n) // preallocate if length known + } + var x Value + for iter.Next(&x) { + elems = append(elems, x) + } + n := len(elems) + for i := 0; i < n>>1; i++ { + elems[i], elems[n-1-i] = elems[n-1-i], elems[i] + } + return NewList(elems), nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#set +func set(thread *Thread, fn *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("set", args, kwargs, 0, &iterable); err != nil { + return nil, err + } + set := new(Set) + if iterable != nil { + iter := iterable.Iterate() + defer iter.Done() + var x Value + for iter.Next(&x) { + if err := set.Insert(x); err != nil { + return nil, err + } + } + } + return set, nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#sorted +func sorted(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + var cmp Callable + var reverse bool + if err := UnpackArgs("sorted", args, kwargs, + "iterable", &iterable, + "cmp?", &cmp, + "reverse?", &reverse, + ); err != nil { + return nil, err + } + + iter := iterable.Iterate() + defer iter.Done() + var elems []Value + if n := Len(iterable); n > 0 { + elems = make(Tuple, 0, n) // preallocate if length is known + } + var x Value + for iter.Next(&x) { + elems = append(elems, x) + } + slice := &sortSlice{thread: thread, elems: elems, cmp: cmp} + if reverse { + sort.Sort(sort.Reverse(slice)) + } else { + sort.Sort(slice) + } + return NewList(slice.elems), slice.err +} + +type sortSlice struct { + thread *Thread + elems []Value + cmp Callable + err error + pair [2]Value +} + +func (s *sortSlice) Len() int { return len(s.elems) } +func (s *sortSlice) Less(i, j int) bool { + x, y := s.elems[i], s.elems[j] + if s.cmp != nil { + // Strange things will happen if cmp fails, or returns a non-int. + s.pair[0], s.pair[1] = x, y // avoid allocation + res, err := Call(s.thread, s.cmp, Tuple(s.pair[:]), nil) + if err != nil { + s.err = err + } + cmp, ok := res.(Int) + return ok && cmp.Sign() < 0 + } else { + ok, err := Compare(syntax.LT, x, y) + if err != nil { + s.err = err + } + return ok + } +} +func (s *sortSlice) Swap(i, j int) { + s.elems[i], s.elems[j] = s.elems[j], s.elems[i] +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#str +func str(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("str does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("str: got %d arguments, want exactly 1", len(args)) + } + x := args[0] + if _, ok := AsString(x); !ok { + x = String(x.String()) + } + return x, nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#tuple +func tuple(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs("tuple", args, kwargs, 0, &iterable); err != nil { + return nil, err + } + if len(args) == 0 { + return Tuple(nil), nil + } + iter := iterable.Iterate() + defer iter.Done() + var elems Tuple + if n := Len(iterable); n > 0 { + elems = make(Tuple, 0, n) // preallocate if length is known + } + var x Value + for iter.Next(&x) { + elems = append(elems, x) + } + return elems, nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#type +func type_(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("type does not accept keyword arguments") + } + if len(args) != 1 { + return nil, fmt.Errorf("type: got %d arguments, want exactly 1", len(args)) + } + return String(args[0].Type()), nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/globals.html#zip +func zip(thread *Thread, _ *Builtin, args Tuple, kwargs []Tuple) (Value, error) { + if len(kwargs) > 0 { + return nil, fmt.Errorf("zip does not accept keyword arguments") + } + rows, cols := 0, len(args) + iters := make([]Iterator, cols) + for i, seq := range args { + it := Iterate(seq) + if it == nil { + return nil, fmt.Errorf("zip: argument #%d is not iterable: %s", i+1, seq.Type()) + } + iters[i] = it + n := Len(seq) + if n < 0 { + // TODO(adonovan): support iterables of unknown length. + return nil, fmt.Errorf("zip: argument #%d has unknown length", i+1) + } + if i == 0 || n < rows { + rows = n + } + } + result := make([]Value, rows) + array := make(Tuple, cols*rows) // allocate a single backing array + for i := 0; i < rows; i++ { + tuple := array[:cols:cols] + array = array[cols:] + for j, iter := range iters { + iter.Next(&tuple[j]) + } + result[i] = tuple + } + return NewList(result), nil +} + +// ---- methods of built-in types --- + +// https://docs.python.org/2/library/stdtypes.html#dict.get +func dict_get(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + var key, dflt Value + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &key, &dflt); err != nil { + return nil, err + } + if v, ok, err := recv.(*Dict).Get(key); err != nil { + return nil, err + } else if ok { + return v, nil + } else if dflt != nil { + return dflt, nil + } else { + return None, nil + } +} + +// https://docs.python.org/2/library/stdtypes.html#dict.clear +func dict_clear(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + return None, recv.(*Dict).Clear() +} + +// https://docs.python.org/2/library/stdtypes.html#dict.items +func dict_items(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + items := recv.(*Dict).Items() + res := make([]Value, len(items)) + for i, item := range items { + res[i] = item // convert [2]Value to Value + } + return NewList(res), nil +} + +// https://docs.python.org/2/library/stdtypes.html#dict.keys +func dict_keys(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + return NewList(recv.(*Dict).Keys()), nil +} + +// https://docs.python.org/2/library/stdtypes.html#dict.pop +func dict_pop(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := recv_.(*Dict) + var k, d Value + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &k, &d); err != nil { + return nil, err + } + if v, found, err := recv.Delete(k); err != nil { + return nil, err // dict is frozen or key is unhashable + } else if found { + return v, nil + } else if d != nil { + return d, nil + } else { + return nil, fmt.Errorf("pop: missing key") + } +} + +// https://docs.python.org/2/library/stdtypes.html#dict.popitem +func dict_popitem(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + recv := recv_.(*Dict) + k, ok := recv.ht.first() + if !ok { + return nil, fmt.Errorf("popitem: empty dict") + } + v, _, err := recv.Delete(k) + if err != nil { + return nil, err // dict is frozen + } + return Tuple{k, v}, nil +} + +// https://docs.python.org/2/library/stdtypes.html#dict.setdefault +func dict_setdefault(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + var key, dflt Value = nil, None + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &key, &dflt); err != nil { + return nil, err + } + dict := recv.(*Dict) + if v, ok, err := dict.Get(key); err != nil { + return nil, err + } else if ok { + return v, nil + } else { + return dflt, dict.Set(key, dflt) + } +} + +// https://docs.python.org/2/library/stdtypes.html#dict.update +func dict_update(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if len(args) > 1 { + return nil, fmt.Errorf("update: got %d arguments, want at most 1", len(args)) + } + if err := updateDict(recv.(*Dict), args, kwargs); err != nil { + return nil, fmt.Errorf("update: %v", err) + } + return None, nil +} + +// https://docs.python.org/2/library/stdtypes.html#dict.update +func dict_values(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + items := recv.(*Dict).Items() + res := make([]Value, len(items)) + for i, item := range items { + res[i] = item[1] + } + return NewList(res), nil +} + +// https://docs.python.org/2/library/stdtypes.html#list.append +func list_append(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := recv_.(*List) + var object Value + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &object); err != nil { + return nil, err + } + if err := recv.checkMutable("append to", true); err != nil { + return nil, err + } + recv.elems = append(recv.elems, object) + return None, nil +} + +// https://docs.python.org/2/library/stdtypes.html#list.clear +func list_clear(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + return None, recv_.(*List).Clear() +} + +// https://docs.python.org/2/library/stdtypes.html#list.extend +func list_extend(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := recv_.(*List) + var iterable Iterable + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &iterable); err != nil { + return nil, err + } + if err := recv.checkMutable("extend", true); err != nil { + return nil, err + } + listExtend(recv, iterable) + return None, nil +} + +// https://docs.python.org/2/library/stdtypes.html#list.index +func list_index(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := recv_.(*List) + var value, start_, end_ Value + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &value, &start_, &end_); err != nil { + return nil, err + } + + start, end, err := indices(start_, end_, recv.Len()) + if err != nil { + return nil, fmt.Errorf("%s: %s", fnname, err) + } + + for i := start; i < end; i++ { + if eq, err := Equal(recv.elems[i], value); err != nil { + return nil, fmt.Errorf("index: %s", err) + } else if eq { + return MakeInt(i), nil + } + } + return nil, fmt.Errorf("index: value not in list") +} + +// https://docs.python.org/2/library/stdtypes.html#list.insert +func list_insert(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := recv_.(*List) + var index int + var object Value + if err := UnpackPositionalArgs(fnname, args, kwargs, 2, &index, &object); err != nil { + return nil, err + } + if err := recv.checkMutable("insert into", true); err != nil { + return nil, err + } + + if index < 0 { + index += recv.Len() + } + + if index >= recv.Len() { + // end + recv.elems = append(recv.elems, object) + } else { + if index < 0 { + index = 0 // start + } + recv.elems = append(recv.elems, nil) + copy(recv.elems[index+1:], recv.elems[index:]) // slide up one + recv.elems[index] = object + } + return None, nil +} + +// https://docs.python.org/2/library/stdtypes.html#list.remove +func list_remove(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := recv_.(*List) + var value Value + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &value); err != nil { + return nil, err + } + if err := recv.checkMutable("remove from", true); err != nil { + return nil, err + } + for i, elem := range recv.elems { + if eq, err := Equal(elem, value); err != nil { + return nil, fmt.Errorf("remove: %v", err) + } else if eq { + recv.elems = append(recv.elems[:i], recv.elems[i+1:]...) + return None, nil + } + } + return nil, fmt.Errorf("remove: element not found") +} + +// https://docs.python.org/2/library/stdtypes.html#list.pop +func list_pop(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + list := recv.(*List) + index := list.Len() - 1 + if err := UnpackPositionalArgs(fnname, args, kwargs, 0, &index); err != nil { + return nil, err + } + if index < 0 || index >= list.Len() { + return nil, fmt.Errorf("pop: index %d is out of range [0:%d]", index, list.Len()) + } + if err := list.checkMutable("pop from", true); err != nil { + return nil, err + } + res := list.elems[index] + list.elems = append(list.elems[:index], list.elems[index+1:]...) + return res, nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.capitalize +func string_capitalize(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + return String(strings.Title(string(recv.(String)))), nil +} + +// string_iterable returns an unspecified iterable value whose iterator yields: +// - bytes: numeric values of successive bytes +// - codepoints: numeric values of successive Unicode code points +// - split_bytes: successive 1-byte substrings +// - split_codepoints: successive substrings that encode a single Unicode code point. +func string_iterable(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + return stringIterable{ + s: recv.(String), + split: fnname[0] == 's', + codepoints: fnname[len(fnname)-2] == 't', + }, nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.count +func string_count(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := string(recv_.(String)) + + var sub string + var start_, end_ Value + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &sub, &start_, &end_); err != nil { + return nil, err + } + + start, end, err := indices(start_, end_, len(recv)) + if err != nil { + return nil, fmt.Errorf("%s: %s", fnname, err) + } + + var slice string + if start < end { + slice = recv[start:end] + } + return MakeInt(strings.Count(slice, sub)), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.endswith +func string_endswith(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := string(recv_.(String)) + var suffix string + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &suffix); err != nil { + return nil, err + } + return Bool(strings.HasSuffix(recv, suffix)), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.isalnum +func string_isalnum(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + recv := string(recv_.(String)) + for _, r := range recv { + if !unicode.IsLetter(r) && !unicode.IsDigit(r) { + return False, nil + } + } + return Bool(recv != ""), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.isalpha +func string_isalpha(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + recv := string(recv_.(String)) + for _, r := range recv { + if !unicode.IsLetter(r) { + return False, nil + } + } + return Bool(recv != ""), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.isdigit +func string_isdigit(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + recv := string(recv_.(String)) + for _, r := range recv { + if !unicode.IsDigit(r) { + return False, nil + } + } + return Bool(recv != ""), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.islower +func string_islower(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + recv := string(recv_.(String)) + return Bool(isCasedString(recv) && recv == strings.ToLower(recv)), nil +} + +// isCasedString reports whether its argument contains any cased characters. +func isCasedString(s string) bool { + for _, r := range s { + if 'a' <= r && r <= 'z' || 'A' <= r && r <= 'Z' || unicode.SimpleFold(r) != r { + return true + } + } + return false +} + +// https://docs.python.org/2/library/stdtypes.html#str.isspace +func string_isspace(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + recv := string(recv_.(String)) + for _, r := range recv { + if !unicode.IsSpace(r) { + return False, nil + } + } + return Bool(recv != ""), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.istitle +func string_istitle(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + recv := string(recv_.(String)) + + // Python semantics differ from x==strings.{To,}Title(x) in Go: + // "uppercase characters may only follow uncased characters and + // lowercase characters only cased ones." + var cased, prevCased bool + for _, r := range recv { + if unicode.IsUpper(r) { + if prevCased { + return False, nil + } + cased = true + prevCased = true + } else if unicode.IsLower(r) { + if !prevCased { + return False, nil + } + prevCased = true + cased = true + } else { + prevCased = false + } + } + return Bool(cased), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.isupper +func string_isupper(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + recv := string(recv_.(String)) + return Bool(isCasedString(recv) && recv == strings.ToUpper(recv)), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.find +func string_find(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + return string_find_impl(fnname, string(recv.(String)), args, kwargs, true, false) +} + +// https://docs.python.org/2/library/stdtypes.html#str.format +func string_format(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + format := string(recv_.(String)) + var auto, manual bool // kinds of positional indexing used + path := make([]Value, 0, 4) + var buf bytes.Buffer + index := 0 + for { + // TODO(adonovan): replace doubled "}}" with "}" and reject single '}'. + i := strings.IndexByte(format, '{') + if i < 0 { + buf.WriteString(format) + break + } + buf.WriteString(format[:i]) + + if i+1 < len(format) && format[i+1] == '{' { + // "{{" means a literal '{' + buf.WriteByte('{') + format = format[i+2:] + continue + } + + format = format[i+1:] + i = strings.IndexByte(format, '}') + if i < 0 { + return nil, fmt.Errorf("unmatched '{' in format") + } + + var arg Value + conv := "s" + var spec string + + field := format[:i] + format = format[i+1:] + + var name string + if i := strings.IndexByte(field, '!'); i < 0 { + // "name" or "name:spec" + if i := strings.IndexByte(field, ':'); i < 0 { + name = field + } else { + name = field[:i] + spec = field[i+1:] + } + } else { + // "name!conv" or "name!conv:spec" + name = field[:i] + field = field[i+1:] + // "conv" or "conv:spec" + if i := strings.IndexByte(field, ':'); i < 0 { + conv = field + } else { + conv = field[:i] + spec = field[i+1:] + } + } + + if name == "" { + // "{}": automatic indexing + if manual { + return nil, fmt.Errorf("cannot switch from manual field specification to automatic field numbering") + } + auto = true + if index >= len(args) { + return nil, fmt.Errorf("tuple index out of range") + } + arg = args[index] + index++ + } else if num, err := strconv.Atoi(name); err == nil { + // positional argument + if auto { + return nil, fmt.Errorf("cannot switch from automatic field numbering to manual field specification") + } + manual = true + if num >= len(args) { + return nil, fmt.Errorf("tuple index out of range") + } else { + arg = args[num] + } + } else { + // keyword argument + for _, kv := range kwargs { + if string(kv[0].(String)) == name { + arg = kv[1] + break + } + } + if arg == nil { + // Skylark does not support Python's x.y or a[i] syntaxes. + if strings.Contains(name, ".") { + return nil, fmt.Errorf("attribute syntax x.y is not supported in replacement fields: %s", name) + } + if strings.Contains(name, "[") { + return nil, fmt.Errorf("element syntax a[i] is not supported in replacement fields: %s", name) + } + return nil, fmt.Errorf("keyword %s not found", name) + } + } + + if spec != "" { + // Skylark does not support Python's format_spec features. + return nil, fmt.Errorf("format spec features not supported in replacement fields: %s", spec) + } + + switch conv { + case "s": + if str, ok := AsString(arg); ok { + buf.WriteString(str) + } else { + writeValue(&buf, arg, path) + } + case "r": + writeValue(&buf, arg, path) + default: + return nil, fmt.Errorf("unknown conversion %q", conv) + } + } + return String(buf.String()), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.index +func string_index(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + return string_find_impl(fnname, string(recv.(String)), args, kwargs, false, false) +} + +// https://docs.python.org/2/library/stdtypes.html#str.join +func string_join(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := string(recv_.(String)) + var iterable Iterable + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &iterable); err != nil { + return nil, err + } + iter := iterable.Iterate() + defer iter.Done() + var buf bytes.Buffer + var x Value + for i := 0; iter.Next(&x); i++ { + if i > 0 { + buf.WriteString(recv) + } + if s, ok := AsString(x); !ok { + return nil, fmt.Errorf("in list, want string, got %s", x.Type()) + } else { + buf.WriteString(s) + } + } + return String(buf.String()), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.lower +func string_lower(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + return String(strings.ToLower(string(recv.(String)))), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.lstrip +func string_lstrip(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + return String(strings.TrimLeftFunc(string(recv.(String)), unicode.IsSpace)), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.partition +func string_partition(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := string(recv_.(String)) + var sep string + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &sep); err != nil { + return nil, err + } + if sep == "" { + return nil, fmt.Errorf("%s: empty separator", fnname) + } + var i int + if fnname[0] == 'p' { + i = strings.Index(recv, sep) // partition + } else { + i = strings.LastIndex(recv, sep) // rpartition + } + tuple := make(Tuple, 0, 3) + if i < 0 { + if fnname[0] == 'p' { + tuple = append(tuple, String(recv), String(""), String("")) + } else { + tuple = append(tuple, String(""), String(""), String(recv)) + } + } else { + tuple = append(tuple, String(recv[:i]), String(sep), String(recv[i+len(sep):])) + } + return tuple, nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.replace +func string_replace(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := string(recv_.(String)) + var old, new string + count := -1 + if err := UnpackPositionalArgs(fnname, args, kwargs, 2, &old, &new, &count); err != nil { + return nil, err + } + return String(strings.Replace(recv, old, new, count)), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.rfind +func string_rfind(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + return string_find_impl(fnname, string(recv.(String)), args, kwargs, true, true) +} + +// https://docs.python.org/2/library/stdtypes.html#str.rindex +func string_rindex(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + return string_find_impl(fnname, string(recv.(String)), args, kwargs, false, true) +} + +// https://docs.python.org/2/library/stdtypes.html#str.rstrip +func string_rstrip(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + return String(strings.TrimRightFunc(string(recv.(String)), unicode.IsSpace)), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.startswith +func string_startswith(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := string(recv_.(String)) + var prefix string + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &prefix); err != nil { + return nil, err + } + return Bool(strings.HasPrefix(recv, prefix)), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.strip +// https://docs.python.org/2/library/stdtypes.html#str.lstrip +// https://docs.python.org/2/library/stdtypes.html#str.rstrip +func string_strip(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + var chars string + if err := UnpackPositionalArgs(fnname, args, kwargs, 0, &chars); err != nil { + return nil, err + } + recv := string(recv_.(String)) + var s string + switch fnname[0] { + case 's': // strip + if chars != "" { + s = strings.Trim(recv, chars) + } else { + s = strings.TrimSpace(recv) + } + case 'l': // lstrip + if chars != "" { + s = strings.TrimLeft(recv, chars) + } else { + s = strings.TrimLeftFunc(recv, unicode.IsSpace) + } + case 'r': // rstrip + if chars != "" { + s = strings.TrimRight(recv, chars) + } else { + s = strings.TrimRightFunc(recv, unicode.IsSpace) + } + } + return String(s), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.title +func string_title(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + return String(strings.Title(strings.ToLower(string(recv.(String))))), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.upper +func string_upper(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + if err := UnpackPositionalArgs(fnname, args, kwargs, 0); err != nil { + return nil, err + } + return String(strings.ToUpper(string(recv.(String)))), nil +} + +// https://docs.python.org/2/library/stdtypes.html#str.split +// https://docs.python.org/2/library/stdtypes.html#str.rsplit +func string_split(fnname string, recv_ Value, args Tuple, kwargs []Tuple) (Value, error) { + recv := string(recv_.(String)) + var sep_ Value + maxsplit := -1 + if err := UnpackPositionalArgs(fnname, args, kwargs, 0, &sep_, &maxsplit); err != nil { + return nil, err + } + + var res []string + + if sep_ == nil || sep_ == None { + // special case: split on whitespace + if maxsplit == 0 { + res = append(res, recv) + } else if maxsplit < 0 { + res = strings.Fields(recv) + } else if fnname == "split" { + res = splitspace(recv, maxsplit+1) + } else { // rsplit + // TODO(adonovan): implement. + return nil, fmt.Errorf("rsplit(None, %d): maxsplit > 0 not yet supported", maxsplit) + } + + } else if sep, ok := AsString(sep_); ok { + if sep == "" { + return nil, fmt.Errorf("split: empty seperator") + } + // usual case: split on non-empty separator + if maxsplit == 0 { + res = append(res, recv) + } else if maxsplit < 0 { + res = strings.Split(recv, sep) + } else if fnname == "split" { + res = strings.SplitN(recv, sep, maxsplit+1) + } else { // rsplit + res = strings.Split(recv, sep) + if excess := len(res) - maxsplit; excess > 0 { + res[0] = strings.Join(res[:excess], sep) + res = append(res[:1], res[excess:]...) + } + } + + } else { + return nil, fmt.Errorf("split: got %s for seperator, want string", sep_.Type()) + } + + list := make([]Value, len(res)) + for i, x := range res { + list[i] = String(x) + } + return NewList(list), nil +} + +func splitspace(s string, max int) []string { + var res []string + start := -1 // index of field start, or -1 in a region of spaces + for i, r := range s { + if unicode.IsSpace(r) { + if start >= 0 { + if len(res)+1 == max { + break // let this field run to the end + } + res = append(res, s[start:i]) + start = -1 + } + } else if start == -1 { + start = i + } + } + if start >= 0 { + res = append(res, s[start:]) + } + return res +} + +// https://docs.python.org/2/library/stdtypes.html#str.splitlines +func string_splitlines(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + var keepends bool + if err := UnpackPositionalArgs(fnname, args, kwargs, 0, &keepends); err != nil { + return nil, err + } + s := string(recv.(String)) + var lines []string + // TODO(adonovan): handle CRLF correctly. + if keepends { + lines = strings.SplitAfter(s, "\n") + } else { + lines = strings.Split(s, "\n") + } + if strings.HasSuffix(s, "\n") { + lines = lines[:len(lines)-1] + } + list := make([]Value, len(lines)) + for i, x := range lines { + list[i] = String(x) + } + return NewList(list), nil +} + +// See https://bazel.build/versions/master/docs/skylark/lib/set.html#union. +func set_union(fnname string, recv Value, args Tuple, kwargs []Tuple) (Value, error) { + var iterable Iterable + if err := UnpackPositionalArgs(fnname, args, kwargs, 0, &iterable); err != nil { + return nil, err + } + iter := iterable.Iterate() + defer iter.Done() + union, err := recv.(*Set).Union(iter) + if err != nil { + return nil, fmt.Errorf("union: %v", err) + } + return union, nil +} + +// Common implementation of string_{r}{find,index}. +func string_find_impl(fnname string, s string, args Tuple, kwargs []Tuple, allowError, last bool) (Value, error) { + var sub string + var start_, end_ Value + if err := UnpackPositionalArgs(fnname, args, kwargs, 1, &sub, &start_, &end_); err != nil { + return nil, err + } + + start, end, err := indices(start_, end_, len(s)) + if err != nil { + return nil, fmt.Errorf("%s: %s", fnname, err) + } + var slice string + if start < end { + slice = s[start:end] + } + + var i int + if last { + i = strings.LastIndex(slice, sub) + } else { + i = strings.Index(slice, sub) + } + if i < 0 { + if !allowError { + return nil, fmt.Errorf("substring not found") + } + return MakeInt(-1), nil + } + return MakeInt(i + start), nil +} + +// Common implementation of builtin dict function and dict.update method. +// Precondition: len(updates) == 0 or 1. +func updateDict(dict *Dict, updates Tuple, kwargs []Tuple) error { + if len(updates) == 1 { + switch updates := updates[0].(type) { + case NoneType: + // no-op + case *Dict: + // Iterate over dict's key/value pairs, not just keys. + for _, item := range updates.Items() { + if err := dict.Set(item[0], item[1]); err != nil { + return err // dict is frozen + } + } + default: + // all other sequences + iter := Iterate(updates) + if iter == nil { + return fmt.Errorf("got %s, want iterable", updates.Type()) + } + defer iter.Done() + var pair Value + for i := 0; iter.Next(&pair); i++ { + iter2 := Iterate(pair) + if iter2 == nil { + return fmt.Errorf("dictionary update sequence element #%d is not iterable (%s)", i, pair.Type()) + + } + defer iter2.Done() + len := Len(pair) + if len < 0 { + return fmt.Errorf("dictionary update sequence element #%d has unknown length (%s)", i, pair.Type()) + } else if len != 2 { + return fmt.Errorf("dictionary update sequence element #%d has length %d, want 2", i, len) + } + var k, v Value + iter2.Next(&k) + iter2.Next(&v) + if err := dict.Set(k, v); err != nil { + return err + } + } + } + } + + // Then add the kwargs. + for _, pair := range kwargs { + if err := dict.Set(pair[0], pair[1]); err != nil { + return err // dict is frozen + } + } + + return nil +} diff --git a/resolve/resolve.go b/resolve/resolve.go new file mode 100644 index 0000000..9ccf482 --- /dev/null +++ b/resolve/resolve.go @@ -0,0 +1,769 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package resolve defines a name-resolution pass for Skylark abstract +// syntax trees. +// +// The resolver sets the Locals and FreeVars arrays of each DefStmt and +// the LocalIndex field of each syntax.Ident that refers to a local or +// free variable. It also sets the Locals array of a File for locals +// bound by comprehensions outside any function. Identifiers for global +// variables do not get an index. +package resolve + +// All references to names are statically resolved. Names may be +// built-in (e.g. None, len), global and predeclared (e.g. glob in the +// build language), global and user-defined (e.g. x=1 at toplevel), or +// local to a function or module-level comprehension. The resolver maps +// each local name to a small integer for fast and compact +// representation in the evaluator. +// +// The lexical environment is a tree of blocks with the module block at +// its root. The module's child blocks may be of two kinds: functions +// and comprehensions, and these may have further children of either +// kind. +// +// Python-style resolution requires multiple passes because a name is +// determined to be local to a function only if the function contains a +// "binding" use of it, and this use may lexically follow a non-binding +// use. In the first pass, we inspect each function, recording in +// 'uses' each identifier and the environment block in which it occurs. +// If a use of a name is binding, such as a function parameter or +// assignment, we add the name to the block's bindings mapping and add a +// local variable to the enclosing function. +// +// As we finish resolving each function, we inspect all the uses within +// that function and discard ones that were found to be local. The +// remaining ones must be either free (local to some lexically enclosing +// function) or global/built-in, but we cannot tell which until we have +// finished inspecting the outermost enclosing function. At that point, +// we can distinguish local from global names (and this is when Python +// would compute free variables). +// +// However, Skylark additionally requires that all references to global +// names are satisfied by some declaration in the current module; +// Skylark permits a function to forward-reference a global that has not +// been declared yet so long as it is declared before the end of the +// module. So, instead of re-resolving the unresolved references after +// each top-level function, we defer this until the end of the module +// and ensure that all such references are satisfied by some definition. +// +// At the end of the module, we visit each of the nested function blocks +// in bottom-up order, doing a recursive lexical lookup for each +// unresolved name. If the name is found to be local to some enclosing +// function, we must create a DefStmt.FreeVar (capture) parameter for +// each intervening function. We enter these synthetic bindings into +// the bindings map so that we create at most one freevar per name. If +// the name was not local, we check that it was defined at module level. +// +// We resolve all uses of locals in the module (due to comprehensions) +// in a similar way and compute the set of its local variables. +// +// Skylark enforces that all global names are assigned at most once on +// all control flow paths by forbidding if/else statements and loops at +// top level. +// +// TODO(adonovan): opt: reuse local slots once locals go out of scope. + +import ( + "fmt" + "log" + "strings" + + "github.com/google/skylark/syntax" +) + +const debug = false +const doesnt = "this Skylark dialect does not " + +// global options +// These features are either not standard Skylark (yet), or deprecated +// features of the BUILD language, so we put them behind flags. +var ( + AllowNestedDef = false // allow def statements within function bodies + AllowLambda = false // allow lambda expressions + AllowFloat = false // allow floating point literals, the 'float' built-in, and x / y + AllowFreeze = false // allow the 'freeze' built-in + AllowSet = false // allow the 'set' built-in + AllowGlobalReassign = false // allow reassignment to globals declared in same file (deprecated) +) + +// File resolves the specified file. +func File(file *syntax.File, isPredeclaredGlobal, isBuiltin func(name string) bool) error { + r := newResolver(isPredeclaredGlobal, isBuiltin) + r.stmts(file.Stmts) + + r.env.resolveLocalUses() + + // At the end of the module, resolve all non-local variable references, + // computing closures. + // Function bodies may contain forward references to later global declarations. + r.resolveNonLocalUses(r.env) + + file.Locals = r.moduleLocals + + if len(r.errors) > 0 { + return r.errors + } + return nil +} + +// Expr resolves the specified expression. +// It returns the local variables bound within the expression. +func Expr(expr syntax.Expr, isPredeclaredGlobal, isBuiltin func(name string) bool) ([]*syntax.Ident, error) { + r := newResolver(isPredeclaredGlobal, isBuiltin) + r.expr(expr) + r.env.resolveLocalUses() + r.resolveNonLocalUses(r.env) // globals & builtins + if len(r.errors) > 0 { + return nil, r.errors + } + return r.moduleLocals, nil +} + +// An ErrorList is a non-empty list of resolver error messages. +type ErrorList []Error // len > 0 + +func (e ErrorList) Error() string { return e[0].Error() } + +// An Error describes the nature and position of a resolver error. +type Error struct { + Pos syntax.Position + Msg string +} + +func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg } + +// The Scope of a syntax.Ident indicates what kind of scope it has. +type Scope uint8 + +const ( + Undefined Scope = iota // name is not defined + Local // name is local to its function + Free // name is local to some enclosing function + Global // name is global to module + Builtin // name is universal (e.g. len) +) + +var scopeNames = [...]string{ + Undefined: "undefined", + Local: "local", + Free: "free", + Global: "global", + Builtin: "builtin", +} + +func (scope Scope) String() string { return scopeNames[scope] } + +func newResolver(isPredeclaredGlobal, isBuiltin func(name string) bool) *resolver { + return &resolver{ + env: new(block), // module block + isPredeclaredGlobal: isPredeclaredGlobal, + isBuiltin: isBuiltin, + globals: make(map[string]syntax.Position), + } +} + +type resolver struct { + // env is the current local environment: + // a linked list of blocks, innermost first. + // The tail of the list is the module block. + env *block + + // moduleLocals contains the local variables of the module + // (due to comprehensions outside any function). + moduleLocals []*syntax.Ident + + // globals contains the names of global variables defined + // within the module (but not predeclared ones). + // The position is that of the original binding. + globals map[string]syntax.Position + + // These predicates report whether a name is + // a pre-declared global or built-in. + isPredeclaredGlobal, isBuiltin func(name string) bool + + loops int // number of enclosing for loops + + errors ErrorList +} + +// container returns the innermost enclosing "container" block: +// a function (function != nil) or module (function == nil). +// Container blocks accumulate local variable bindings. +func (r *resolver) container() *block { + for b := r.env; ; b = b.parent { + if b.function != nil || b.isModule() { + return b + } + } +} + +func (r *resolver) push(b *block) { + r.env.children = append(r.env.children, b) + b.parent = r.env + r.env = b +} + +func (r *resolver) pop() { r.env = r.env.parent } + +type block struct { + parent *block // nil for module block + + // In the module (root) block, both these fields are nil. + function *syntax.Function // only for function blocks + comp *syntax.Comprehension // only for comprehension blocks + + // bindings maps a name to its binding. + // A local binding has an index into its innermost enclosing container's locals array. + // A free binding has an index into its innermost enclosing function's freevars array. + bindings map[string]binding + + // children records the child blocks of the current one. + children []*block + + // uses records all identifiers seen in this container (function or module), + // and a reference to the environment in which they appear. + // As we leave each container block, we resolve them, + // so that only free and global ones remain. + // At the end of each top-level function we compute closures. + uses []use +} + +type binding struct { + scope Scope + index int +} + +func (b *block) isModule() bool { return b.parent == nil } + +func (b *block) bind(name string, bind binding) { + if b.bindings == nil { + b.bindings = make(map[string]binding) + } + b.bindings[name] = bind +} + +func (b *block) String() string { + if b.function != nil { + return "function block at " + fmt.Sprint(b.function.Span()) + } + if b.comp != nil { + return "comprehension block at " + fmt.Sprint(b.function.Span()) + } + return "module block" +} + +func (r *resolver) errorf(posn syntax.Position, format string, args ...interface{}) { + r.errors = append(r.errors, Error{posn, fmt.Sprintf(format, args...)}) +} + +// A use records an identifier and the environment in which it appears. +type use struct { + id *syntax.Ident + env *block +} + +// bind creates a binding for id in the current block, +// if there is not one already, and reports an error if +// a global was re-bound and allowRebind is false. +// It returns whether a binding already existed. +func (r *resolver) bind(id *syntax.Ident, allowRebind bool) bool { + // Binding outside any local (comprehension/function) block? + if r.env.isModule() { + prevPos, ok := r.globals[id.Name] + if ok { + id.Scope = uint8(Global) + + // Global reassignments are permitted only if + // they are of the form x += y. We can't tell + // statically whether it's a reassignment + // (e.g. int += int) or a mutation (list += list). + if !allowRebind && !AllowGlobalReassign { + r.errorf(id.NamePos, "cannot reassign global %s declared at %s", id.Name, prevPos) + } + } else { + id.Scope = uint8(Global) + r.globals[id.Name] = id.NamePos + } + return ok + } + + // Mark this name as local to current block. + // Assign it a new local (positive) index in the current container. + _, ok := r.env.bindings[id.Name] + if !ok { + var locals *[]*syntax.Ident + if fn := r.container().function; fn != nil { + locals = &fn.Locals + } else { + locals = &r.moduleLocals + } + r.env.bind(id.Name, binding{Local, len(*locals)}) + *locals = append(*locals, id) + } + + r.use(id) + return ok +} + +func (r *resolver) use(id *syntax.Ident) { + // Reference outside any local (comprehension/function) block? + if r.env.isModule() { + r.useGlobal(id) + return + } + + b := r.container() + b.uses = append(b.uses, use{id, r.env}) +} + +func (r *resolver) useGlobal(id *syntax.Ident) (scope Scope) { + if _, ok := r.globals[id.Name]; ok { + scope = Global // use of global declared by module + } else if r.isPredeclaredGlobal(id.Name) { + scope = Global // use of pre-declared global + } else if id.Name == "PACKAGE_NAME" { + scope = Global // nasty hack in Skylark spec; will go away (b/34240042). + } else if r.isBuiltin(id.Name) { + scope = Builtin // use of built-in + if !AllowFloat && id.Name == "float" { + r.errorf(id.NamePos, doesnt+"support floating point") + } + if !AllowSet && id.Name == "set" { + r.errorf(id.NamePos, doesnt+"support sets") + } + if !AllowFreeze && id.Name == "freeze" { + r.errorf(id.NamePos, doesnt+"provide the 'freeze' built-in function") + } + } else { + scope = Undefined + r.errorf(id.NamePos, "undefined: %s", id.Name) + } + id.Scope = uint8(scope) + return scope +} + +// resolveLocalUses is called when leaving a container (function/module) +// block. It resolves all uses of locals within that block. +func (b *block) resolveLocalUses() { + unresolved := b.uses[:0] + for _, use := range b.uses { + if bind := lookupLocal(use); bind.scope == Local { + use.id.Scope = uint8(bind.scope) + use.id.Index = bind.index + } else { + unresolved = append(unresolved, use) + } + } + b.uses = unresolved +} + +func (r *resolver) stmts(stmts []syntax.Stmt) { + for _, stmt := range stmts { + r.stmt(stmt) + } +} + +func (r *resolver) stmt(stmt syntax.Stmt) { + switch stmt := stmt.(type) { + case *syntax.ExprStmt: + r.expr(stmt.X) + + case *syntax.BranchStmt: + if r.loops == 0 && (stmt.Token == syntax.BREAK || stmt.Token == syntax.CONTINUE) { + r.errorf(stmt.TokenPos, "%s not in a loop", stmt.Token) + } + + case *syntax.IfStmt: + if r.container().function == nil { + r.errorf(stmt.If, "if statement not within a function") + } + r.expr(stmt.Cond) + r.stmts(stmt.True) + r.stmts(stmt.False) + + case *syntax.AssignStmt: + r.expr(stmt.RHS) + // x += y may be a re-binding of a global variable, + // but we cannot tell without knowing the type of x. + // (If x is a list it's equivalent to x.extend(y).) + // The use is conservatively treated as binding, + // but we suppress the error if it's an already-bound global. + isAugmented := stmt.Op != syntax.EQ + r.assign(stmt.LHS, isAugmented) + + case *syntax.DefStmt: + if !AllowNestedDef && r.container().function != nil { + r.errorf(stmt.Def, doesnt+"support nested def") + } + const allowRebind = false + r.bind(stmt.Name, allowRebind) + r.function(stmt.Def, stmt.Name.Name, &stmt.Function) + + case *syntax.ForStmt: + if r.container().function == nil { + r.errorf(stmt.For, "for loop not within a function") + } + r.expr(stmt.X) + const allowRebind = false + r.assign(stmt.Vars, allowRebind) + r.loops++ + r.stmts(stmt.Body) + r.loops-- + + case *syntax.ReturnStmt: + if r.container().function == nil { + r.errorf(stmt.Return, "return statement not within a function") + } + if stmt.Result != nil { + r.expr(stmt.Result) + } + + case *syntax.LoadStmt: + if r.container().function != nil { + r.errorf(stmt.Load, "load statement within a function") + } + + const allowRebind = false + for i, from := range stmt.From { + if from.Name == "" { + r.errorf(from.NamePos, "load: empty identifier") + continue + } + if from.Name[0] == '_' { + r.errorf(from.NamePos, "load: names with leading underscores are not exported: %s", from.Name) + } + r.bind(stmt.To[i], allowRebind) + } + + default: + log.Fatalf("unexpected stmt %T", stmt) + } +} + +func (r *resolver) assign(lhs syntax.Expr, isAugmented bool) { + switch lhs := lhs.(type) { + case *syntax.Ident: + // x = ... + allowRebind := isAugmented + r.bind(lhs, allowRebind) + + case *syntax.IndexExpr: + // x[i] = ... + r.expr(lhs.X) + r.expr(lhs.Y) + + case *syntax.DotExpr: + // x.f = ... + r.expr(lhs.X) + + case *syntax.TupleExpr: + // (x, y) = ... + if len(lhs.List) == 0 { + r.errorf(syntax.Start(lhs), "can't assign to ()") + } + if isAugmented { + r.errorf(syntax.Start(lhs), "can't use tuple expression in augmented assignment") + } + for _, elem := range lhs.List { + r.assign(elem, isAugmented) + } + + case *syntax.ListExpr: + // [x, y, z] = ... + if len(lhs.List) == 0 { + r.errorf(syntax.Start(lhs), "can't assign to []") + } + if isAugmented { + r.errorf(syntax.Start(lhs), "can't use list expression in augmented assignment") + } + for _, elem := range lhs.List { + r.assign(elem, isAugmented) + } + + default: + name := strings.ToLower(strings.TrimPrefix(fmt.Sprintf("%T", lhs), "*syntax.")) + r.errorf(syntax.Start(lhs), "can't assign to %s", name) + } +} + +func (r *resolver) expr(e syntax.Expr) { + switch e := e.(type) { + case *syntax.Ident: + r.use(e) + + case *syntax.Literal: + if !AllowFloat && e.Token == syntax.FLOAT { + r.errorf(e.TokenPos, doesnt+"support floating point") + } + + case *syntax.ListExpr: + for _, x := range e.List { + r.expr(x) + } + + case *syntax.CondExpr: + r.expr(e.Cond) + r.expr(e.True) + r.expr(e.False) + + case *syntax.IndexExpr: + r.expr(e.X) + r.expr(e.Y) + + case *syntax.DictEntry: + r.expr(e.Key) + r.expr(e.Value) + + case *syntax.SliceExpr: + r.expr(e.X) + if e.Lo != nil { + r.expr(e.Lo) + } + if e.Hi != nil { + r.expr(e.Hi) + } + if e.Step != nil { + r.expr(e.Step) + } + + case *syntax.Comprehension: + // A list/dict comprehension defines a new lexical block. + // Locals defined within the block will be allotted + // distinct slots in the locals array of the innermost + // enclosing container (function/module) block. + r.push(&block{comp: e}) + const allowRebind = false + for _, clause := range e.Clauses { + switch clause := clause.(type) { + case *syntax.IfClause: + r.expr(clause.Cond) + case *syntax.ForClause: + r.assign(clause.Vars, allowRebind) + r.expr(clause.X) + } + } + r.expr(e.Body) // body may be *DictEntry + r.pop() + + case *syntax.TupleExpr: + for _, x := range e.List { + r.expr(x) + } + + case *syntax.DictExpr: + for _, entry := range e.List { + entry := entry.(*syntax.DictEntry) + r.expr(entry.Key) + r.expr(entry.Value) + } + + case *syntax.UnaryExpr: + r.expr(e.X) + + case *syntax.BinaryExpr: + if !AllowFloat && e.Op == syntax.SLASH { + r.errorf(e.OpPos, doesnt+"support floating point (use //)") + } + r.expr(e.X) + r.expr(e.Y) + + case *syntax.DotExpr: + r.expr(e.X) + // ignore e.Name + + case *syntax.CallExpr: + r.expr(e.Fn) + seenVarargs := false + seenKwargs := false + for _, arg := range e.Args { + pos, _ := arg.Span() + if unop, ok := arg.(*syntax.UnaryExpr); ok && unop.Op == syntax.STARSTAR { + // **kwargs + if seenKwargs { + r.errorf(pos, "multiple **kwargs not allowed") + } + seenKwargs = true + r.expr(arg) + } else if ok && unop.Op == syntax.STAR { + // *args + if seenKwargs { + r.errorf(pos, "*args may not follow **kwargs") + } else if seenVarargs { + r.errorf(pos, "multiple *args not allowed") + } + seenVarargs = true + r.expr(arg) + } else if binop, ok := arg.(*syntax.BinaryExpr); ok && binop.Op == syntax.EQ { + // k=v + if seenKwargs { + r.errorf(pos, "argument may not follow **kwargs") + } + // ignore binop.X + r.expr(binop.Y) + } else { + // positional argument + if seenVarargs { + r.errorf(pos, "argument may not follow *args") + } else if seenKwargs { + r.errorf(pos, "argument may not follow **kwargs") + } + r.expr(arg) + } + } + + case *syntax.LambdaExpr: + if !AllowLambda { + r.errorf(e.Lambda, doesnt+"support lambda") + } + r.function(e.Lambda, "lambda", &e.Function) + + default: + log.Fatalf("unexpected expr %T", e) + } +} + +func (r *resolver) function(pos syntax.Position, name string, function *syntax.Function) { + // Resolve defaults in enclosing environment. + for _, param := range function.Params { + if binary, ok := param.(*syntax.BinaryExpr); ok { + r.expr(binary.Y) + } + } + + // Enter function block. + b := &block{function: function} + r.push(b) + + const allowRebind = false + seenVarargs := false + seenKwargs := false + for _, param := range function.Params { + switch param := param.(type) { + case *syntax.Ident: + // e.g. x + if seenKwargs { + r.errorf(pos, "parameter may not follow **kwargs") + } else if seenVarargs { + r.errorf(pos, "parameter may not follow *args") + } + if r.bind(param, allowRebind) { + r.errorf(pos, "duplicate parameter: %s", param.Name) + } + + case *syntax.BinaryExpr: + // e.g. y=dflt + if seenKwargs { + r.errorf(pos, "parameter may not follow **kwargs") + } else if seenVarargs { + r.errorf(pos, "parameter may not follow *args") + } + if id := param.X.(*syntax.Ident); r.bind(id, allowRebind) { + r.errorf(pos, "duplicate parameter: %s", id.Name) + } + + case *syntax.UnaryExpr: + // *args or **kwargs + if param.Op == syntax.STAR { + if seenKwargs { + r.errorf(pos, "*args may not follow **kwargs") + } else if seenVarargs { + r.errorf(pos, "multiple *args not allowed") + } + seenVarargs = true + } else { + if seenKwargs { + r.errorf(pos, "multiple **kwargs not allowed") + } + seenKwargs = true + } + if id := param.X.(*syntax.Ident); r.bind(id, allowRebind) { + r.errorf(pos, "duplicate parameter: %s", id.Name) + } + } + } + function.HasVarargs = seenVarargs + function.HasKwargs = seenKwargs + r.stmts(function.Body) + + // Resolve all uses of this function's local vars, + // and keep just the remaining uses of free/global vars. + b.resolveLocalUses() + + // Leave function block. + r.pop() + + // References within the function body to globals are not + // resolved until the end of the module. +} + +func (r *resolver) resolveNonLocalUses(b *block) { + // First resolve inner blocks. + for _, child := range b.children { + r.resolveNonLocalUses(child) + } + for _, use := range b.uses { + bind := r.lookupLexical(use.id, use.env) + use.id.Scope = uint8(bind.scope) + use.id.Index = bind.index + } +} + +// lookupLocal looks up an identifier within its immediately enclosing function. +func lookupLocal(use use) binding { + for env := use.env; env != nil; env = env.parent { + if bind, ok := env.bindings[use.id.Name]; ok { + if bind.scope == Free { + // shouldn't exist till later + log.Fatalf("%s: internal error: %s, %d", use.id.NamePos, use.id.Name, bind) + } + return bind // found + } + if env.function != nil { + break + } + } + return binding{} // not found in this function +} + +// lookupLexical looks up an identifier within its lexically enclosing environment. +func (r *resolver) lookupLexical(id *syntax.Ident, env *block) (bind binding) { + if debug { + fmt.Printf("lookupLexical %s in %s = ...\n", id.Name, env) + defer func() { fmt.Printf("= %d\n", bind) }() + } + + // Is this the module block? + if env.isModule() { + return binding{r.useGlobal(id), 0} // global or builtin, or not found + } + + // Defined in this block? + bind, ok := env.bindings[id.Name] + if !ok { + // Defined in parent block? + bind = r.lookupLexical(id, env.parent) + if env.function != nil && (bind.scope == Local || bind.scope == Free) { + // Found in parent block, which belongs to enclosing function. + id := &syntax.Ident{ + Name: id.Name, + Scope: uint8(bind.scope), + Index: bind.index, + } + bind.scope = Free + bind.index = len(env.function.FreeVars) + env.function.FreeVars = append(env.function.FreeVars, id) + if debug { + fmt.Printf("creating freevar %v in function at %s: %s\n", + len(env.function.FreeVars), fmt.Sprint(env.function.Span()), id.Name) + } + } + + // Memoize, to avoid duplicate free vars + // and redundant global (failing) lookups. + env.bind(id.Name, bind) + } + return bind +} diff --git a/resolve/resolve_test.go b/resolve/resolve_test.go new file mode 100644 index 0000000..6223515 --- /dev/null +++ b/resolve/resolve_test.go @@ -0,0 +1,87 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package resolve_test + +import ( + "strings" + "testing" + + "github.com/google/skylark/internal/chunkedfile" + "github.com/google/skylark/resolve" + "github.com/google/skylark/skylarktest" + "github.com/google/skylark/syntax" +) + +func TestResolve(t *testing.T) { + filename := skylarktest.DataFile("skylark/resolve", "testdata/resolve.sky") + for _, chunk := range chunkedfile.Read(filename, t) { + f, err := syntax.Parse(filename, chunk.Source) + if err != nil { + t.Error(err) + continue + } + + // A chunk may set options by containing e.g. "option:float". + resolve.AllowNestedDef = option(chunk.Source, "nesteddef") + resolve.AllowLambda = option(chunk.Source, "lambda") + resolve.AllowFloat = option(chunk.Source, "float") + resolve.AllowFreeze = option(chunk.Source, "freeze") + resolve.AllowSet = option(chunk.Source, "set") + resolve.AllowGlobalReassign = option(chunk.Source, "global_reassign") + + if err := resolve.File(f, isPredeclaredGlobal, isBuiltin); err != nil { + for _, err := range err.(resolve.ErrorList) { + chunk.GotError(int(err.Pos.Line), err.Msg) + } + } + chunk.Done() + } +} + +func option(chunk, name string) bool { + return strings.Contains(chunk, "option:"+name) +} + +func TestDefVarargsAndKwargsSet(t *testing.T) { + source := "def f(*args, **kwargs): pass\n" + file, err := syntax.Parse("foo.sky", source) + if err != nil { + t.Fatal(err) + } + if err := resolve.File(file, isPredeclaredGlobal, isBuiltin); err != nil { + t.Fatal(err) + } + fn := file.Stmts[0].(*syntax.DefStmt) + if !fn.HasVarargs { + t.Error("HasVarargs not set") + } + if !fn.HasKwargs { + t.Error("HasKwargs not set") + } +} + +func TestLambdaVarargsAndKwargsSet(t *testing.T) { + resolve.AllowLambda = true + source := "f = lambda *args, **kwargs: 0\n" + file, err := syntax.Parse("foo.sky", source) + if err != nil { + t.Fatal(err) + } + if err := resolve.File(file, isPredeclaredGlobal, isBuiltin); err != nil { + t.Fatal(err) + } + lam := file.Stmts[0].(*syntax.AssignStmt).RHS.(*syntax.LambdaExpr) + if !lam.HasVarargs { + t.Error("HasVarargs not set") + } + if !lam.HasKwargs { + t.Error("HasKwargs not set") + } +} + +func isPredeclaredGlobal(name string) bool { return strings.HasPrefix(name, "G") } +func isBuiltin(name string) bool { + return strings.HasPrefix(name, "B") || name == "float" +} diff --git a/resolve/testdata/resolve.sky b/resolve/testdata/resolve.sky new file mode 100644 index 0000000..c48af1c --- /dev/null +++ b/resolve/testdata/resolve.sky @@ -0,0 +1,246 @@ +# Tests of resolver errors. + +# use of declared global +x = 1 +_ = x + +--- +# premature use of global +_ = x ### "undefined: x" +x = 1 + +--- +# use of undefined global +_ = x ### "undefined: x" + +--- +# redeclaration of global +x = 1 +x = 2 ### "cannot reassign global x declared at .*resolve.sky:18:1" + +--- +# redeclaration of predeclared global or built-in + +# This rule permits tool maintainers to add members to the global +# environment without breaking exsiting programs. + +G = 1 # ok +G = 2 ### "cannot reassign global G declared at .*/resolve.sky" + +B = 1 # ok +B = 1 ### "cannot reassign global B declared at .*/resolve.sky" + +--- +# reference to built-in +B() + +--- +# locals may be referenced before they are defined + +def f(): + G(x) # dynamic error + x = 1 + +--- +# Various forms of assignment: + +def f(x): # parameter + G(x) + G(y) ### "undefined: y" + +(a, b) = 1, 2 +G(a) +G(b) +G(c) ### "undefined: c" + +[p, q] = 1, 2 +G(p) +G(q) +G(r) ### "undefined: r" + +--- +# a comprehension introduces a separate lexical block + +_ = [x for x in "abc"] +G(x) ### "undefined: x" + +--- +# Functions may have forward refs. (option:lambda option:nesteddef) +def f(): + g() + h() ### "undefined: h" + def inner(): + i() + i = lambda: 0 + + +def g(): + f() + +--- +# It's permitted to rebind a global using a += assignment. + +x = [1] +x.extend([2]) # ok +x += [3] # ok (a list mutation, not a global rebinding) + +def f(): + x += [4] # x is local to f + +y = 1 +y += 2 # ok (even though it is in fact a global rebinding) + +z += 3 # ok (but fails dynamically because z is undefined) + +--- +def f(a): + if 1==1: + b = 1 + c = 1 + G(a) # ok: param + G(b) # ok: maybe bound local + G(c) # ok: bound local + G(d) # NB: we don't do a use-before-def check on local vars! + G(e) # ok: global + G(f) # ok: global + d = 1 + +e = 1 + +--- +# This program should resolve successfully but fail dynamically. +# However, the Java implementation currently reports the dynamic +# error at the x=2 statement. +x = 1 + +def f(): + G(x) # dynamic error: reference to undefined local + x = 2 + +f() + +--- + +def f(): + load("foo", "bar") ### "load statement within a function" + +load("foo", + "", ### "load: empty identifier" + "_a", ### "load: names with leading underscores are not exported: _a" + b="", ### "load: empty identifier" + c="_d", ### "load: names with leading underscores are not exported: _d" + _e="f") # ok + +--- +# A load() call as an expression statement is converted into a +# load statement, but load is not currently a reserved word. +# TODO(adonovan): clarify the Skylark spec on this issue. + +def load(): # doesn't affect following call + pass + +_ = 1 + load() # ok + +load("foo.sky", "") ### "load: empty identifier" + +--- + +def f(load): + _ = (load, load()) # ok + load("foo.sky", "x") ### "load statement within a function" + +--- +# return, if statements and for loops at top-level are forbidden + +for x in "abc": ### "for loop not within a function" + pass + +if x: ### "if statement not within a function" + pass + +return ### "return statement not within a function" + +--- +# The parser allows any expression on the LHS of an assignment. + +1 = 2 ### "can't assign to literal" +1+2 = 3 ### "can't assign to binaryexpr" +f() = 4 ### "can't assign to callexpr" + +[a, b] = [1, 2] +[a, b] += [3, 4] ### "can't use list expression in augmented assignment" +(a, b) += [3, 4] ### "can't use tuple expression in augmented assignment" +[] = [] ### "can't assign to \\[\\]" +() = () ### "can't assign to ()" + +--- +# break and continue statements must appear within a loop + +break ### "break not in a loop" + +continue ### "continue not in a loop" + +pass + +--- +# No parameters may follow **kwargs + +def f(**kwargs, x): ### `parameter may not follow \*\*kwargs` + pass + +def g(**kwargs, *args): ### `\*args may not follow \*\*kwargs` + pass + +def h(**kwargs1, **kwargs2): ### `multiple \*\*kwargs not allowed` + pass + +--- +# Only **kwargs may follow *args + +def f(*args, x): ### `parameter may not follow \*args` + pass + +def g(*args1, *args2): ### `multiple \*args not allowed` + pass + +def h(*args, **kwargs): # ok + pass + +--- +# No arguments may follow **kwargs +def f(*args, **kwargs): + pass + +f(**{}, 1) ### `argument may not follow \*\*kwargs` +f(**{}, x=1) ### `argument may not follow \*\*kwargs` +f(**{}, *[]) ### `\*args may not follow \*\*kwargs` +f(**{}, **{}) ### `multiple \*\*kwargs not allowed` + +--- +# Only keyword arguments may follow *args +def f(*args, **kwargs): + pass + +f(*[], 1) ### `argument may not follow \*args` +f(*[], a=1) # ok +f(*[], *[]) ### `multiple \*args not allowed` +f(*[], **{}) # ok + +--- +# Parameter names must be unique. + +def f(a, b, a): pass ### "duplicate parameter: a" +def g(args, b, *args): pass ### "duplicate parameter: args" +def h(kwargs, a, **kwargs): pass ### "duplicate parameter: kwargs" +def i(*x, **x): pass ### "duplicate parameter: x" + +--- +# No floating point +a = float("3.141") ### `dialect does not support floating point` +b = 1 / 2 ### `dialect does not support floating point \(use //\)` +c = 3.141 ### `dialect does not support floating point` +--- +# Floating point support (option:float) +a = float("3.141") +b = 1 / 2 +c = 3.141 diff --git a/skylarkstruct/struct.go b/skylarkstruct/struct.go new file mode 100644 index 0000000..38e4e97 --- /dev/null +++ b/skylarkstruct/struct.go @@ -0,0 +1,337 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package skylarkstruct defines the Skylark 'struct' type, +// an optional language extension. +package skylarkstruct + +// TODO(adonovan): This package is implicitly covered by other +// packages' tests, but it should really have some of its own. + +// TODO(adonovan): the deprecated struct methods "to_json" and +// "to_proto" do not appear in AttrNames, and hence dir(struct), since +// that would force the majority to have to ignore them, but they may +// nontheless be called if the struct does not have fields of these +// names. Ideally these will go away soon. See b/36412967. + +import ( + "bytes" + "fmt" + "sort" + + "github.com/google/skylark" + "github.com/google/skylark/syntax" +) + +// Make is the implementation of a built-in function that instantiates +// an immutable struct from the specified keyword arguments. +// +// An application can add 'struct' to the Skylark environment like so: +// +// globals := skylark.StringDict{ +// "struct": skylark.NewBuiltin("struct", skylarkstruct.Make), +// } +// +func Make(_ *skylark.Thread, _ *skylark.Builtin, args skylark.Tuple, kwargs []skylark.Tuple) (skylark.Value, error) { + if len(args) > 0 { + return nil, fmt.Errorf("struct: unexpected positional arguments") + } + return FromKeywords(Default, kwargs), nil +} + +// FromKeywords returns a new struct instance whose fields are specified by the +// key/value pairs in kwargs. (Each kwargs[i][0] must be a skylark.String.) +func FromKeywords(constructor skylark.Value, kwargs []skylark.Tuple) *Struct { + if constructor == nil { + panic("nil constructor") + } + s := &Struct{ + constructor: constructor, + entries: make(entries, 0, len(kwargs)), + } + for _, kwarg := range kwargs { + k := string(kwarg[0].(skylark.String)) + v := kwarg[1] + s.entries = append(s.entries, entry{k, v}) + } + sort.Sort(s.entries) + return s +} + +// FromStringDict returns a whose elements are those of d. +// The constructor parameter specifies the constructor; use Default for an ordinary struct. +func FromStringDict(constructor skylark.Value, d skylark.StringDict) *Struct { + if constructor == nil { + panic("nil constructor") + } + s := &Struct{ + constructor: constructor, + entries: make(entries, 0, len(d)), + } + for k, v := range d { + s.entries = append(s.entries, entry{k, v}) + } + sort.Sort(s.entries) + return s +} + +// Struct is an immutable Skylark type that maps field names to values. +// It is not iterable. +// +// A struct has a constructor, a distinct value that identifies a class +// of structs, and which appears in the struct's string representation. +// +// Operations such as x+y fail if the constructors of the two operands +// are not equal. +// +// The default constructor, Default, is the string "struct", but +// clients may wish to 'brand' structs for their own purposes. +// The constructor value appears in the printed form of the value, +// and is accessible using the Constructor method. +// +// Use Attr to access its fields and AttrNames to enumerate them. +type Struct struct { + constructor skylark.Value + entries entries // sorted by name +} + +// Default is the default constructor for structs. +// It is merely the string "struct". +const Default = skylark.String("struct") + +type entries []entry + +func (a entries) Len() int { return len(a) } +func (a entries) Less(i, j int) bool { return a[i].name < a[j].name } +func (a entries) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +type entry struct { + name string // not to_{proto,json} + value skylark.Value +} + +var ( + _ skylark.HasAttrs = (*Struct)(nil) + _ skylark.HasBinary = (*Struct)(nil) +) + +// ToStringDict adds a name/value entry to d for each field of the struct. +func (s *Struct) ToStringDict(d skylark.StringDict) { + for _, e := range s.entries { + d[e.name] = e.value + } +} + +func (s *Struct) String() string { + var buf bytes.Buffer + buf.WriteString(s.constructor.String()) + buf.WriteByte('(') + for i, e := range s.entries { + if i > 0 { + buf.WriteString(", ") + } + buf.WriteString(e.name) + buf.WriteString(" = ") + buf.WriteString(e.value.String()) + } + buf.WriteByte(')') + return buf.String() +} + +// Constructor returns the constructor used to create this struct. +func (s *Struct) Constructor() skylark.Value { return s.constructor } + +func (s *Struct) Type() string { return "struct" } +func (s *Struct) Truth() skylark.Bool { return true } // even when empty +func (s *Struct) Hash() (uint32, error) { + // Same algorithm as Tuple.hash, but with different primes. + var x, m uint32 = 8731, 9839 + for _, e := range s.entries { + namehash, _ := skylark.String(e.name).Hash() + x = x ^ 3*namehash + y, err := e.value.Hash() + if err != nil { + return 0, err + } + x = x ^ y*m + m += 7349 + } + return x, nil +} +func (s *Struct) Freeze() { + for _, e := range s.entries { + e.value.Freeze() + } +} + +func (x *Struct) Binary(op syntax.Token, y skylark.Value, side skylark.Side) (skylark.Value, error) { + if y, ok := y.(*Struct); ok && op == syntax.PLUS { + if side == skylark.Right { + x, y = y, x + } + + if eq, err := skylark.Equal(x.constructor, y.constructor); err != nil { + return nil, fmt.Errorf("in %s + %s: error comparing constructors: %v", + x.constructor, y.constructor, err) + } else if !eq { + return nil, fmt.Errorf("cannot add structs of different constructors: %s + %s", + x.constructor, y.constructor) + } + + z := make(skylark.StringDict, x.Len()+y.Len()) + for _, e := range x.entries { + z[e.name] = e.value + } + for _, e := range y.entries { + z[e.name] = e.value + } + + return FromStringDict(x.constructor, z), nil + } + return nil, nil // unhandled +} + +// Attr returns the value of the specified field, +// or deprecated method if the name is "to_json" or "to_proto" +// and the struct has no field of that name. +func (s *Struct) Attr(name string) (skylark.Value, error) { + // Binary search the entries. + // This implementation is a specialization of + // sort.Search that avoids dynamic dispatch. + n := len(s.entries) + i, j := 0, n + for i < j { + h := int(uint(i+j) >> 1) + if s.entries[h].name < name { + i = h + 1 + } else { + j = h + } + } + if i < n && s.entries[i].name == name { + return s.entries[i].value, nil + } + + // TODO(adonovan): to_{json,proto} are deprecated (b/36412967). + switch name { + case "to_json", "to_proto": + return skylark.NewBuiltin(name, func(thread *skylark.Thread, fn *skylark.Builtin, args skylark.Tuple, kwargs []skylark.Tuple) (skylark.Value, error) { + var buf bytes.Buffer + if name == "to_json" { + writeJSON(&buf, s) + } else { + writeTextProto(&buf, s) + } + return skylark.String(buf.String()), nil + }), nil + } + + return nil, fmt.Errorf("%v has no .%s attribute", s.constructor, name) +} + +func writeTextProto(out *bytes.Buffer, v skylark.Value) error { + return fmt.Errorf("to_proto not yet implemented") +} + +// writeJSON writes the JSON representation of a Skylark value to out. +// TODO(adonovan): there may be a nice feature for core skylark.Value here, +// but the current feature is incomplete and underspecified. +func writeJSON(out *bytes.Buffer, v skylark.Value) error { + // TODO(adonovan): improve error error messages to show the path + // through the object graph. + switch v := v.(type) { + case skylark.NoneType: + out.WriteString("null") + case skylark.Bool: + fmt.Fprintf(out, "%t", v) + case skylark.Int: + // TODO(adonovan): test large numbers. + out.WriteString(v.String()) + case skylark.Float: + // TODO(adonovan): test. + fmt.Fprintf(out, "%g", v) + case skylark.String: + // TODO(adonovan): test with all bytes. + // I think JSON can represent only UTF-16 encoded Unicode. + fmt.Fprintf(out, "%q", string(v)) + case skylark.Indexable: // Tuple, List + out.WriteByte('[') + for i, n := 0, skylark.Len(v); i < n; i++ { + if i > 0 { + out.WriteString(", ") + } + if err := writeJSON(out, v.Index(i)); err != nil { + return err + } + } + out.WriteByte(']') + case *Struct: + out.WriteByte('{') + for i, e := range v.entries { + if i > 0 { + out.WriteString(", ") + } + if err := writeJSON(out, skylark.String(e.name)); err != nil { + return err + } + out.WriteString(": ") + if err := writeJSON(out, e.value); err != nil { + return err + } + } + out.WriteByte('}') + default: + // function, builtin, set, dict, and all user-defined types. + return fmt.Errorf("cannot convert %s to JSON", v.Type()) + } + return nil +} + +func (s *Struct) Len() int { return len(s.entries) } + +// AttrNames returns a new sorted list of the struct fields. +func (s *Struct) AttrNames() []string { + names := make([]string, len(s.entries)) + for i, e := range s.entries { + names[i] = e.name + } + return names +} + +func (x *Struct) CompareSameType(op syntax.Token, y_ skylark.Value, depth int) (bool, error) { + y := y_.(*Struct) + switch op { + case syntax.EQL: + return structsEqual(x, y, depth) + case syntax.NEQ: + eq, err := structsEqual(x, y, depth) + return !eq, err + default: + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) + } +} + +func structsEqual(x, y *Struct, depth int) (bool, error) { + if x.Len() != y.Len() { + return false, nil + } + + if eq, err := skylark.Equal(x.constructor, y.constructor); err != nil { + return false, fmt.Errorf("error comparing struct constructors: %v", + x.constructor, y.constructor, err) + } else if !eq { + return false, nil + } + + for i, n := 0, x.Len(); i < n; i++ { + if x.entries[i].name != y.entries[i].name { + return false, nil + } else if eq, err := skylark.EqualDepth(x.entries[i].value, y.entries[i].value, depth-1); err != nil { + return false, err + } else if !eq { + return false, nil + } + } + return true, nil +} diff --git a/skylarktest/assert.sky b/skylarktest/assert.sky new file mode 100644 index 0000000..d1dfeee --- /dev/null +++ b/skylarktest/assert.sky @@ -0,0 +1,47 @@ + +# Built-ins defined in this module: +# +# error(msg): report an error in Go's test framework without halting execution. +# catch(f): evaluate f() and returns its evaluation error message, if any +# matches(str, pattern): report whether str matches regular expression pattern. +# struct: a constructor for a simple HasFields implementation. +# +# Clients may use these functions to define their own testing abstractions. + +def _eq(x, y): + if x != y: + error("%r != %r" % (x, y)) + +def _ne(x, y): + if x == y: + error("%r == %r" % (x, y)) + +def _true(cond, msg="assertion failed"): + if not cond: + error(msg) + +def _lt(x, y): + if not (x < y): + error("%s is not less than %s" % (x, y)) + +def _contains(x, y): + if y not in x: + error("%s does not contain %s" % (x, y)) + +def _fails(f, pattern): + "assert_fails asserts that evaluation of f() fails with the specified error." + msg = catch(f) + if msg == None: + error("evaluation succeeded unexpectedly (want error matching %r)" % pattern) + elif not matches(pattern, msg): + error("regular expression (%s) did not match error (%s)" % (pattern, msg)) + +assert = struct( + fail = error, + eq = _eq, + ne = _ne, + true = _true, + lt = _lt, + contains = _contains, + fails = _fails, +) diff --git a/skylarktest/skylarktest.go b/skylarktest/skylarktest.go new file mode 100644 index 0000000..3c09571 --- /dev/null +++ b/skylarktest/skylarktest.go @@ -0,0 +1,125 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package skylarktest defines utilities for testing Skylark programs. +// +// Clients can call LoadAssertModule to load a module that defines +// several functions useful for testing. See assert.sky for its +// definition. +// +// The assert.error function, which reports errors to the current Go +// testing.T, requires that clients call SetTest(thread, t) before use. +package skylarktest + +import ( + "bytes" + "fmt" + "go/build" + "path/filepath" + "regexp" + "sync" + + "github.com/google/skylark" + "github.com/google/skylark/skylarkstruct" +) + +const localKey = "Reporter" + +// A Reporter is a value to which errors may be reported. +// It is satisfied by *testing.T. +type Reporter interface { + Error(args ...interface{}) +} + +// SetReporter associates an error reporter (such as a testing.T in +// a Go test) with the Skylark thread so that Skylark programs may +// report errors to it. +func SetReporter(thread *skylark.Thread, r Reporter) { + thread.SetLocal(localKey, r) +} + +// GetReporter returns the Skylark thread's error reporter. +// It must be preceded by a call to SetReporter. +func GetReporter(thread *skylark.Thread) Reporter { + r, ok := thread.Local(localKey).(Reporter) + if !ok { + panic("internal error: skylarktest.SetReporter was not called") + } + return r +} + +var ( + once sync.Once + assert skylark.StringDict + assertErr error +) + +// LoadAssertModule loads the assert module. +// It is concurrency-safe and idempotent. +func LoadAssertModule() (skylark.StringDict, error) { + once.Do(func() { + globals := skylark.StringDict{ + "error": skylark.NewBuiltin("error", error_), + "catch": skylark.NewBuiltin("catch", catch), + "matches": skylark.NewBuiltin("matches", matches), + "struct": skylark.NewBuiltin("struct", skylarkstruct.Make), + } + filename := DataFile("skylark/skylarktest", "assert.sky") + thread := new(skylark.Thread) + err := skylark.ExecFile(thread, filename, nil, globals) + assert, assertErr = globals, err + }) + return assert, assertErr +} + +// catch(f) evaluates f() and returns its evaluation error message +// if it failed or None if it succeeded. +func catch(thread *skylark.Thread, _ *skylark.Builtin, args skylark.Tuple, kwargs []skylark.Tuple) (skylark.Value, error) { + var fn skylark.Callable + if err := skylark.UnpackArgs("catch", args, kwargs, "fn", &fn); err != nil { + return nil, err + } + if _, err := fn.Call(thread, nil, nil); err != nil { + return skylark.String(err.Error()), nil + } + return skylark.None, nil +} + +// matches(pattern, str) reports whether string str matches the regular expression pattern. +func matches(thread *skylark.Thread, _ *skylark.Builtin, args skylark.Tuple, kwargs []skylark.Tuple) (skylark.Value, error) { + var pattern, str string + if err := skylark.UnpackArgs("matches", args, kwargs, "pattern", &pattern, "str", &str); err != nil { + return nil, err + } + ok, err := regexp.MatchString(pattern, str) + if err != nil { + return nil, fmt.Errorf("matches: %s", err) + } + return skylark.Bool(ok), nil +} + +// error(x) reports an error to the Go test framework. +func error_(thread *skylark.Thread, _ *skylark.Builtin, args skylark.Tuple, kwargs []skylark.Tuple) (skylark.Value, error) { + if len(args) != 1 { + return nil, fmt.Errorf("error: got %d arguments, want 1", len(args)) + } + var buf bytes.Buffer + thread.Caller().WriteBacktrace(&buf) + buf.WriteString("Error: ") + if s, ok := skylark.AsString(args[0]); ok { + buf.WriteString(s) + } else { + buf.WriteString(args[0].String()) + } + GetReporter(thread).Error(buf.String()) + return skylark.None, nil +} + +// DataFile returns the effective filename of the specified +// test data resource. The function abstracts differences between +// 'go build', under which a test runs in its package directory, +// and Blaze, under which a test runs in the root of the tree. +var DataFile = func(pkgdir, filename string) string { + return filepath.Join(build.Default.GOPATH, "src/github.com/google", pkgdir, filename) +} diff --git a/syntax/grammar.txt b/syntax/grammar.txt new file mode 100644 index 0000000..4e6f788 --- /dev/null +++ b/syntax/grammar.txt @@ -0,0 +1,126 @@ + +Grammar of Skylark +================== + +File = {Statement | newline} eof . + +Statement = DefStmt | IfStmt | ForStmt | SimpleStmt . + +DefStmt = 'def' identifier '(' [Parameters [',']] ')' ':' Suite . + +Parameters = Parameter {',' Parameter}. + +Parameter = identifier | identifier '=' Test | '*' identifier | '**' identifier . + +IfStmt = 'if' Test ':' Suite {'elif' Test ':' Suite} ['else' ':' Suite] . + +ForStmt = 'for' LoopVariables 'in' Expression ':' Suite . + +Suite = [newline indent {Statement} outdent] SimpleStmt . + +SimpleStmt = SmallStmt {';' SmallStmt} [';'] '\n' . +# NOTE: '\n' optional at EOF + +SmallStmt = ReturnStmt + | BreakStmt | ContinueStmt | PassStmt + | AssignStmt + | ExprStmt + . + +ReturnStmt = 'return' Expression . +BreakStmt = 'break' . +ContinueStmt = 'continue' . +PassStmt = 'pass' . +AssignStmt = Expression ('=' | '+=' | '-=' | '*=' | '/=' | '//=' | '%=') Expression . +ExprStmt = Expression . + +Test = LambdaExpr + | IfExpr + | PrimaryExpr + | UnaryExpr + | BinaryExpr + . + +LambdaExpr = 'lambda' [Parameters] ':' Test . + +IfExpr = Test 'if' Test 'else' Test . + +PrimaryExpr = Operand + | PrimaryExpr DotSuffix + | PrimaryExpr CallSuffix + | PrimaryExpr SliceSuffix + . + +Operand = identifier + | int | float | string + | ListExpr | ListComp + | DictExpr | DictComp + | '(' [Expression [',']] ')' + | ('-' | '+') PrimaryExpr + . + +DotSuffix = '.' identifier . +CallSuffix = '(' [Arguments [',']] ')' . +SliceSuffix = '[' [Expression] [':' Test [':' Test]] ']' . + +Arguments = Argument {',' Argument} . +Argument = identifier | identifier '=' Test | '*' identifier | '**' identifier . + +ListExpr = '[' [Expression [',']] ']' . +ListComp = '[' Test {CompClause} ']'. + +DictExpr = '{' [Entries [',']] '}' . +DictComp = '{' Entry {CompClause} '}' . +Entries = Entry {',' Entry} . +Entry = Test ':' Test . + +CompClause = 'for' LoopVariables 'in' Test | 'if' Test . + +UnaryExpr = 'not' Test . + +BinaryExpr = Test {Binop Test} . + +Binop = 'or' + | 'and' + | '==' | '!=' | '<' | '>' | '<=' | '>=' | 'in' | 'not' 'in' + | '|' + | '&' + | '-' | '+' + | '*' | '%' | '/' | '//' + . + +Expression = Test {',' Test} . +# NOTE: trailing comma permitted only when within [...] or (...). + +LoopVariables = PrimaryExpr {',' PrimaryExpr} . + + +# Notation (similar to Go spec): +- lowercase and 'quoted' items are lexical tokens. +- Capitalized names denote grammar productions. +- (...) implies grouping +- x | y means either x or y. +- [x] means x is optional +- {x} means x is repeated zero or more times +- The end of each declaration is marked with a period. + +# Tokens +- spaces: newline, eof, indent, outdent. +- identifier. +- literals: string, int, float. +- plus all quoted tokens such as '+=', 'return'. + +# Notes: +- 'load' is an identifier, not a reserved word. + Load statements are synthesized when reducing 'small_stmt = expr' + if the name of the identifier is 'load'. This is a wart. +- Ambiguity is resolved using operator precedence. +- The grammar does not enforce the legal order of params and args, + nor that the first compclause must be a 'for'. + +TODO: +- explain how the lexer generates indent, outdent, and newline tokens. +- why is unary NOT separated from unary - and +? +- the grammar is (mostly) in LL(1) style so, for example, + dot expressions are formed suffixes, not complete expressions, + which makes the spec harder to read. Reorganize into non-LL(1) form? diff --git a/syntax/parse.go b/syntax/parse.go new file mode 100644 index 0000000..e3be151 --- /dev/null +++ b/syntax/parse.go @@ -0,0 +1,904 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +// This file defines a recursive-descent parser for Skylark. +// The LL(1) grammar of Skylark and the names of many productions follow Python 2.7. +// +// TODO(adonovan): use syntax.Error more systematically throughout the +// package. Verify that error positions are correct using the +// chunkedfile mechanism. + +import "log" + +// Enable this flag to print the token stream and log.Fatal on the first error. +const debug = false + +// Parse parses the input data and returns the corresponding parse tree. +// +// If src != nil, ParseFile parses the source from src and the filename +// is only used when recording position information. +// The type of the argument for the src parameter must be string, +// []byte, or io.Reader. +// If src == nil, ParseFile parses the file specified by filename. +func Parse(filename string, src interface{}) (f *File, err error) { + in, err := newScanner(filename, src) + if err != nil { + return nil, err + } + p := parser{in: in} + defer p.in.recover(&err) + + p.nextToken() // read first lookahead token + f = p.parseFile() + if f != nil { + f.Path = filename + } + return f, nil +} + +// ParseExpr parses a Skylark expression. +// See Parse for explanation of parameters. +func ParseExpr(filename string, src interface{}) (expr Expr, err error) { + in, err := newScanner(filename, src) + if err != nil { + return nil, err + } + p := parser{in: in} + defer p.in.recover(&err) + + p.nextToken() // read first lookahead token + expr = p.parseTest() + + if p.tok != EOF { + p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok) + } + + return expr, nil +} + +type parser struct { + in *scanner + tok Token + tokval tokenValue +} + +// nextToken advances the scanner and returns the position of the +// previous token. +func (p *parser) nextToken() Position { + oldpos := p.tokval.pos + p.tok = p.in.nextToken(&p.tokval) + // enable to see the token stream + if debug { + log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos) + } + return oldpos +} + +// file_input = (NEWLINE | stmt)* EOF +func (p *parser) parseFile() *File { + var stmts []Stmt + for p.tok != EOF { + if p.tok == NEWLINE { + p.nextToken() + continue + } + stmts = p.parseStmt(stmts) + } + return &File{Stmts: stmts} +} + +func (p *parser) parseStmt(stmts []Stmt) []Stmt { + if p.tok == DEF { + return append(stmts, p.parseDefStmt()) + } else if p.tok == IF { + return append(stmts, p.parseIfStmt()) + } else if p.tok == FOR { + return append(stmts, p.parseForStmt()) + } else { + return p.parseSimpleStmt(stmts) + } +} + +func (p *parser) parseDefStmt() Stmt { + defpos := p.nextToken() // consume DEF + id := p.parseIdent() + p.consume(LPAREN) + params := p.parseParams() + p.consume(RPAREN) + p.consume(COLON) + body := p.parseSuite() + return &DefStmt{ + Def: defpos, + Name: id, + Function: Function{ + StartPos: defpos, + Params: params, + Body: body, + }, + } +} + +func (p *parser) parseIfStmt() Stmt { + ifpos := p.nextToken() // consume IF + cond := p.parseTest() + p.consume(COLON) + body := p.parseSuite() + ifStmt := &IfStmt{ + If: ifpos, + Cond: cond, + True: body, + } + tail := ifStmt + for p.tok == ELIF { + elifpos := p.nextToken() // consume ELIF + cond := p.parseTest() + p.consume(COLON) + body := p.parseSuite() + elif := &IfStmt{ + If: elifpos, + Cond: cond, + True: body, + } + tail.ElsePos = elifpos + tail.False = []Stmt{elif} + tail = elif + } + if p.tok == ELSE { + tail.ElsePos = p.nextToken() // consume ELSE + p.consume(COLON) + tail.False = p.parseSuite() + } + return ifStmt +} + +func (p *parser) parseForStmt() Stmt { + forpos := p.nextToken() // consume FOR + vars := p.parseForLoopVariables() + p.consume(IN) + x := p.parseExpr(false) + p.consume(COLON) + body := p.parseSuite() + return &ForStmt{ + For: forpos, + Vars: vars, + X: x, + Body: body, + } +} + +// Equivalent to 'exprlist' production in Python grammar. +// +// loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA? +func (p *parser) parseForLoopVariables() Expr { + // Avoid parseExpr because it would consume the IN token + // following x in "for x in y: ...". + v := p.parsePrimaryWithSuffix() + if p.tok != COMMA { + return v + } + + list := []Expr{v} + for p.tok == COMMA { + p.nextToken() + if terminatesExprList(p.tok) { + break + } + list = append(list, p.parsePrimaryWithSuffix()) + } + return &TupleExpr{List: list} +} + +// simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE +func (p *parser) parseSimpleStmt(stmts []Stmt) []Stmt { + for { + stmts = append(stmts, p.parseSmallStmt()) + if p.tok != SEMI { + break + } + p.nextToken() // consume SEMI + if p.tok == NEWLINE || p.tok == EOF { + break + } + } + // EOF without NEWLINE occurs in `if x: pass`, for example. + if p.tok != EOF { + p.consume(NEWLINE) + } + return stmts +} + +// small_stmt = RETURN expr? +// | PASS | BREAK | CONTINUE +// | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=') expr // assign +// | expr +func (p *parser) parseSmallStmt() Stmt { + if p.tok == RETURN { + pos := p.nextToken() // consume RETURN + var result Expr + if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI { + result = p.parseExpr(false) + } + return &ReturnStmt{Return: pos, Result: result} + } + + switch p.tok { + case BREAK, CONTINUE, PASS: + tok := p.tok + pos := p.nextToken() // consume it + return &BranchStmt{Token: tok, TokenPos: pos} + } + + // Assignment + x := p.parseExpr(false) + switch p.tok { + case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ: + op := p.tok + pos := p.nextToken() // consume op + rhs := p.parseExpr(false) + return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs} + } + + // Convert load(...) call into LoadStmt special form. + // TODO(adonovan): This affects all calls to load, not just at toplevel. + // Spec clarification needed. + if call, ok := x.(*CallExpr); ok { + if id, ok := call.Fn.(*Ident); ok && id.Name == "load" { + return p.convertCallToLoad(call, id.NamePos) + } + } + + // Expression statement (e.g. function call, doc string). + return &ExprStmt{X: x} +} + +// Because load is not a reserved word, it is impossible to parse a load +// statement using an LL(1) grammar. Instead we parse load(...) as a function +// call and then convert it to a LoadStmt. +func (p *parser) convertCallToLoad(call *CallExpr, loadPos Position) *LoadStmt { + if len(call.Args) < 2 { + p.in.errorf(call.Lparen, "load statement needs at least 2 operands, got %d", len(call.Args)) + } + module, ok := call.Args[0].(*Literal) + if !ok || module.Token != STRING { + start, _ := call.Args[0].Span() + p.in.errorf(start, "first operand of load statement must be a string literal") + } + + // synthesize identifiers + args := call.Args[1:] + to := make([]*Ident, len(args)) + from := make([]*Ident, len(args)) + for i, arg := range args { + if lit, ok := arg.(*Literal); ok && lit.Token == STRING { + // load("module", "id") + // To name is same as original. + id := &Ident{ + NamePos: lit.TokenPos.add(`"`), + Name: lit.Value.(string), + } + to[i] = id + from[i] = id + continue + } else if binary, ok := arg.(*BinaryExpr); ok && binary.Op == EQ { + // load("module", to="from") + // Symbol is locally renamed. + if lit, ok := binary.Y.(*Literal); ok && lit.Token == STRING { + id := &Ident{ + NamePos: lit.TokenPos.add(`"`), + Name: lit.Value.(string), + } + to[i] = binary.X.(*Ident) + from[i] = id + continue + } + } + + start, _ := arg.Span() + p.in.errorf(start, `load operand must be "name" or localname="name"`) + } + return &LoadStmt{ + Load: loadPos, + Module: module, + To: to, + From: from, + Rparen: call.Rparen, + } + +} + +// suite is typically what follows a COLON (e.g. after DEF or FOR). +// suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT +func (p *parser) parseSuite() []Stmt { + if p.tok == NEWLINE { + p.nextToken() // consume NEWLINE + p.consume(INDENT) + var stmts []Stmt + for p.tok != OUTDENT && p.tok != EOF { + stmts = p.parseStmt(stmts) + } + p.consume(OUTDENT) + return stmts + } + + return p.parseSimpleStmt(nil) +} + +func (p *parser) parseIdent() *Ident { + if p.tok != IDENT { + p.in.error(p.in.pos, "not an identifier") + } + id := &Ident{ + NamePos: p.tokval.pos, + Name: p.tokval.raw, + } + p.nextToken() + return id +} + +func (p *parser) consume(t Token) Position { + if p.tok != t { + p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t) + } + return p.nextToken() +} + +// params = (param COMMA)* param +// | +// +// param = IDENT +// | IDENT EQ test +// | STAR IDENT +// | STARSTAR IDENT +// +// parseParams parses a parameter list. The resulting expressions are of the form: +// +// *Ident +// *Binary{Op: EQ, X: *Ident, Y: Expr} +// *Unary{Op: STAR, X: *Ident} +// *Unary{Op: STARSTAR, X: *Ident} +func (p *parser) parseParams() []Expr { + var params []Expr + stars := false + for p.tok != RPAREN && p.tok != COLON && p.tok != EOF { + if len(params) > 0 { + p.consume(COMMA) + } + if p.tok == RPAREN { + // list can end with a COMMA if there is neither * nor ** + if stars { + p.in.errorf(p.in.pos, "got %#v, want parameter", p.tok) + } + break + } + + // *args + if p.tok == STAR { + stars = true + pos := p.nextToken() + id := p.parseIdent() + params = append(params, &UnaryExpr{ + OpPos: pos, + Op: STAR, + X: id, + }) + continue + } + + // **kwargs + if p.tok == STARSTAR { + stars = true + pos := p.nextToken() + id := p.parseIdent() + params = append(params, &UnaryExpr{ + OpPos: pos, + Op: STARSTAR, + X: id, + }) + continue + } + + // IDENT + // IDENT = test + id := p.parseIdent() + if p.tok == EQ { // default value + eq := p.nextToken() + dflt := p.parseTest() + params = append(params, &BinaryExpr{ + X: id, + OpPos: eq, + Op: EQ, + Y: dflt, + }) + continue + } + + params = append(params, id) + } + return params +} + +// parseExpr parses an expression, possible consisting of a +// comma-separated list of 'test' expressions. +// +// In many cases we must use parseTest to avoid ambiguity such as +// f(x, y) vs. f((x, y)). +func (p *parser) parseExpr(inParens bool) Expr { + x := p.parseTest() + if p.tok != COMMA { + return x + } + + // tuple + exprs := p.parseExprs([]Expr{x}, inParens) + return &TupleExpr{List: exprs} +} + +// parseExprs parses a comma-separated list of expressions, starting with the comma. +// It is used to parse tuples and list elements. +// expr_list = (',' expr)* ','? +func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr { + for p.tok == COMMA { + pos := p.nextToken() + if terminatesExprList(p.tok) { + if !allowTrailingComma { + p.in.error(pos, "unparenthesized tuple with trailing comma") + } + break + } + exprs = append(exprs, p.parseTest()) + } + return exprs +} + +// parseTest parses a 'test', a single-component expression. +func (p *parser) parseTest() Expr { + if p.tok == LAMBDA { + lambda := p.nextToken() + var params []Expr + if p.tok != COLON { + params = p.parseParams() + } + p.consume(COLON) + body := p.parseTest() + return &LambdaExpr{ + Lambda: lambda, + Function: Function{ + StartPos: lambda, + Params: params, + Body: []Stmt{&ReturnStmt{Result: body}}, + }, + } + } + + x := p.parseTestPrec(0) + + // conditional expression (t IF cond ELSE f) + if p.tok == IF { + ifpos := p.nextToken() + cond := p.parseTestPrec(0) + if p.tok != ELSE { + p.in.error(ifpos, "conditional expression without else clause") + } + elsepos := p.nextToken() + else_ := p.parseTest() + return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_} + } + + return x +} + +func (p *parser) parseTestPrec(prec int) Expr { + if prec >= len(preclevels) { + return p.parsePrimaryWithSuffix() + } + + // expr = NOT expr + if p.tok == NOT && prec == int(precedence[NOT]) { + pos := p.nextToken() + x := p.parseTestPrec(prec + 1) + return &UnaryExpr{ + OpPos: pos, + Op: NOT, + X: x, + } + } + + return p.parseBinopExpr(prec) +} + +// expr = test (OP test)* +// Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing. +func (p *parser) parseBinopExpr(prec int) Expr { + x := p.parseTestPrec(prec + 1) + for first := true; ; first = false { + if p.tok == NOT { + p.nextToken() // consume NOT + // In this context, NOT must be followed by IN. + // Replace NOT IN by a single NOT_IN token. + if p.tok != IN { + p.in.errorf(p.in.pos, "got %#v, want in", p.tok) + } + p.tok = NOT_IN + } + + // Binary operator of specified precedence? + opprec := int(precedence[p.tok]) + if opprec < prec { + return x + } + + // Comparisons are non-associative. + if !first && opprec == int(precedence[EQL]) { + p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)", + x.(*BinaryExpr).Op, p.tok) + } + + op := p.tok + pos := p.nextToken() + y := p.parseTestPrec(opprec + 1) + x = makeBinaryExpr(op, pos, x, y) + } +} + +// precedence maps each operator to its precedence (0-7), or -1 for other tokens. +var precedence [maxToken]int8 + +// preclevels groups operators of equal precedence. +// Comparisons are nonassociative; other binary operators associate to the left. +// Unary MINUS and PLUS have higher precedence so are handled in parsePrimary. +// See http://docs.python.org/2/reference/expressions.html#operator-precedence +var preclevels = [...][]Token{ + {OR}, // or + {AND}, // and + {NOT}, // not (unary) + {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in + {PIPE}, // | + {AMP}, // & + {MINUS, PLUS}, // - + {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / // +} + +func init() { + // populate precedence table + for i := range precedence { + precedence[i] = -1 + } + for level, tokens := range preclevels { + for _, tok := range tokens { + precedence[tok] = int8(level) + } + } +} + +func makeBinaryExpr(op Token, pos Position, x, y Expr) Expr { + // Concatenate literal strings during parsing. + if op == PLUS { + if x, ok := x.(*Literal); ok && x.Token == STRING { + if y, ok := y.(*Literal); ok && y.Token == STRING { + // The Span of this synthetic node will be wrong. + return &Literal{ + Token: STRING, + TokenPos: x.TokenPos, + Raw: x.Raw + " + " + y.Raw, // ugh + Value: x.Value.(string) + y.Value.(string), + } + } + } + } + return &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y} +} + +// primary_with_suffix = primary +// | primary '.' IDENT +// | primary slice_suffix +// | primary call_suffix +func (p *parser) parsePrimaryWithSuffix() Expr { + x := p.parsePrimary() + for { + switch p.tok { + case DOT: + dot := p.nextToken() + id := p.parseIdent() + x = &DotExpr{Dot: dot, X: x, Name: id} + case LBRACK: + x = p.parseSliceSuffix(x) + case LPAREN: + x = p.parseCallSuffix(x) + default: + return x + } + } +} + +// slice_suffix = '[' expr? ':' expr? ':' expr? ']' +func (p *parser) parseSliceSuffix(x Expr) Expr { + lbrack := p.nextToken() + var lo, hi, step Expr + if p.tok != COLON { + y := p.parseExpr(false) + + // index x[y] + if p.tok == RBRACK { + rbrack := p.nextToken() + return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack} + } + + lo = y + } + + // slice or substring x[lo:hi:step] + if p.tok == COLON { + p.nextToken() + if p.tok != COLON && p.tok != RBRACK { + hi = p.parseTest() + } + } + if p.tok == COLON { + p.nextToken() + if p.tok != RBRACK { + step = p.parseTest() + } + } + rbrack := p.consume(RBRACK) + return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack} +} + +// call_suffix = '(' arg_list? ')' +func (p *parser) parseCallSuffix(fn Expr) Expr { + lparen := p.consume(LPAREN) + var rparen Position + var args []Expr + if p.tok == RPAREN { + rparen = p.nextToken() + } else { + args = p.parseArgs() + rparen = p.consume(RPAREN) + } + return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen} +} + +// parseArgs parses a list of actual parameter values (arguments). +// It mirrors the structure of parseParams. +// arg_list = ((arg COMMA)* arg COMMA?)? +func (p *parser) parseArgs() []Expr { + var args []Expr + stars := false + for p.tok != RPAREN && p.tok != EOF { + if len(args) > 0 { + p.consume(COMMA) + } + if p.tok == RPAREN { + // list can end with a COMMA if there is neither * nor ** + if stars { + p.in.errorf(p.in.pos, `got %#v, want argument`, p.tok) + } + break + } + + // *args + if p.tok == STAR { + stars = true + pos := p.nextToken() + x := p.parseTest() + args = append(args, &UnaryExpr{ + OpPos: pos, + Op: STAR, + X: x, + }) + continue + } + + // **kwargs + if p.tok == STARSTAR { + stars = true + pos := p.nextToken() + x := p.parseTest() + args = append(args, &UnaryExpr{ + OpPos: pos, + Op: STARSTAR, + X: x, + }) + continue + } + + // We use a different strategy from Bazel here to stay within LL(1). + // Instead of looking ahead two tokens (IDENT, EQ) we parse + // 'test = test' then check that the first was an IDENT. + x := p.parseTest() + + if p.tok == EQ { + // name = value + if _, ok := x.(*Ident); !ok { + p.in.errorf(p.in.pos, "keyword argument must have form name=expr") + } + eq := p.nextToken() + y := p.parseTest() + x = &BinaryExpr{ + X: x, + OpPos: eq, + Op: EQ, + Y: y, + } + } + + args = append(args, x) + } + return args +} + +// primary = IDENT +// | INT | FLOAT +// | STRING +// | '[' ... // list literal or comprehension +// | '{' ... // dict literal or comprehension +// | '(' ... // tuple or parenthesized expression +// | ('-'|'+') primary_with_suffix +func (p *parser) parsePrimary() Expr { + switch p.tok { + case IDENT: + return p.parseIdent() + + case INT, FLOAT, STRING: + var val interface{} + tok := p.tok + switch tok { + case INT: + val = p.tokval.int + case FLOAT: + val = p.tokval.float + case STRING: + val = p.tokval.string + } + raw := p.tokval.raw + pos := p.nextToken() + return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val} + + case LBRACK: + return p.parseList() + + case LBRACE: + return p.parseDict() + + case LPAREN: + lparen := p.nextToken() + if p.tok == RPAREN { + // empty tuple + rparen := p.nextToken() + return &TupleExpr{Lparen: lparen, Rparen: rparen} + } + e := p.parseExpr(true) // allow trailing comma + p.consume(RPAREN) + return e + + case MINUS, PLUS: + // unary minus/plus: + tok := p.tok + pos := p.nextToken() + x := p.parsePrimaryWithSuffix() + return &UnaryExpr{ + OpPos: pos, + Op: tok, + X: x, + } + } + p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok) + panic("unreachable") +} + +// list = '[' ']' +// | '[' expr ']' +// | '[' expr expr_list ']' +// | '[' expr (FOR loop_variables IN expr)+ ']' +func (p *parser) parseList() Expr { + lbrack := p.nextToken() + if p.tok == RBRACK { + // empty List + rbrack := p.nextToken() + return &ListExpr{Lbrack: lbrack, Rbrack: rbrack} + } + + x := p.parseTest() + + if p.tok == FOR { + // list comprehension + return p.parseComprehensionSuffix(lbrack, x, RBRACK) + } + + exprs := []Expr{x} + if p.tok == COMMA { + // multi-item list literal + exprs = p.parseExprs(exprs, true) // allow trailing comma + } + + rbrack := p.consume(RBRACK) + return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack} +} + +// dict = '{' '}' +// | '{' dict_entry_list '}' +// | '{' dict_entry FOR loop_variables IN expr '}' +func (p *parser) parseDict() Expr { + lbrace := p.nextToken() + if p.tok == RBRACE { + // empty dict + rbrace := p.nextToken() + return &DictExpr{Lbrace: lbrace, Rbrace: rbrace} + } + + x := p.parseDictEntry() + + if p.tok == FOR { + // dict comprehension + return p.parseComprehensionSuffix(lbrace, x, RBRACE) + } + + entries := []Expr{x} + for p.tok == COMMA { + p.nextToken() + if p.tok == RBRACE { + break + } + entries = append(entries, p.parseDictEntry()) + } + + rbrace := p.consume(RBRACE) + return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace} +} + +// dict_entry = test ':' test +func (p *parser) parseDictEntry() *DictEntry { + k := p.parseTest() + colon := p.consume(COLON) + v := p.parseTest() + return &DictEntry{Key: k, Colon: colon, Value: v} +} + +// comp_suffix = FOR loopvars IN expr comp_suffix +// | IF expr comp_suffix +// | ']' or ')' (end) +// +// There can be multiple FOR/IF clauses; the first is always a FOR. +func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr { + var clauses []Node + for p.tok != endBrace { + if p.tok == FOR { + pos := p.nextToken() + vars := p.parseForLoopVariables() + in := p.consume(IN) + // Following Python 3, the operand of IN cannot be: + // - a conditional expression ('x if y else z'), + // due to conflicts in Python grammar + // ('if' is used by the comprehension); + // - a lambda expression + // - an unparenthesized tuple. + x := p.parseTestPrec(0) + clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x}) + } else if p.tok == IF { + pos := p.nextToken() + cond := p.parseTest() + clauses = append(clauses, &IfClause{If: pos, Cond: cond}) + } else { + p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace) + } + } + rbrace := p.nextToken() + + return &Comprehension{ + Curly: endBrace == RBRACE, + Lbrack: lbrace, + Body: body, + Clauses: clauses, + Rbrack: rbrace, + } +} + +func terminatesExprList(tok Token) bool { + switch tok { + case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI: + return true + } + return false +} diff --git a/syntax/parse_test.go b/syntax/parse_test.go new file mode 100644 index 0000000..a5900cc --- /dev/null +++ b/syntax/parse_test.go @@ -0,0 +1,398 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax_test + +import ( + "bytes" + "fmt" + "reflect" + "strings" + "testing" + + "github.com/google/skylark/internal/chunkedfile" + "github.com/google/skylark/skylarktest" + "github.com/google/skylark/syntax" +) + +func TestExprParseTrees(t *testing.T) { + for _, test := range []struct { + input, want string + }{ + {`print(1)`, + `(CallExpr Fn=print Args=(1))`}, + {`x + 1`, + `(BinaryExpr X=x Op=+ Y=1)`}, + {`[x for x in y]`, + `(Comprehension Body=x Clauses=((ForClause Vars=x X=y)))`}, + {`[x for x in (a if b else c)]`, + `(Comprehension Body=x Clauses=((ForClause Vars=x X=(CondExpr Cond=b True=a False=c))))`}, + {`x[i].f(42)`, + `(CallExpr Fn=(DotExpr X=(IndexExpr X=x Y=i) Name=f) Args=(42))`}, + {`x.f()`, + `(CallExpr Fn=(DotExpr X=x Name=f))`}, + {`x+y*z`, + `(BinaryExpr X=x Op=+ Y=(BinaryExpr X=y Op=* Y=z))`}, + {`x%y-z`, + `(BinaryExpr X=(BinaryExpr X=x Op=% Y=y) Op=- Y=z)`}, + {`a + b not in c`, + `(BinaryExpr X=(BinaryExpr X=a Op=+ Y=b) Op=not in Y=c)`}, + {`lambda x, *args, **kwargs: None`, + `(LambdaExpr Function=(Function Params=(x (UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)) Body=((ReturnStmt Result=None))))`}, + {`{"one": 1}`, + `(DictExpr List=((DictEntry Key="one" Value=1)))`}, + {`a[i]`, + `(IndexExpr X=a Y=i)`}, + {`a[i:]`, + `(SliceExpr X=a Lo=i)`}, + {`a[:j]`, + `(SliceExpr X=a Hi=j)`}, + {`a[::]`, + `(SliceExpr X=a)`}, + {`a[::k]`, + `(SliceExpr X=a Step=k)`}, + {`[]`, + `(ListExpr)`}, + {`[1]`, + `(ListExpr List=(1))`}, + {`[1,]`, + `(ListExpr List=(1))`}, + {`[1, 2]`, + `(ListExpr List=(1 2))`}, + {`()`, + `(TupleExpr)`}, + {`(4,)`, + `(TupleExpr List=(4))`}, + {`(4)`, + `4`}, + {`(4, 5)`, + `(TupleExpr List=(4 5))`}, + {`{}`, + `(DictExpr)`}, + {`{"a": 1}`, + `(DictExpr List=((DictEntry Key="a" Value=1)))`}, + {`{"a": 1,}`, + `(DictExpr List=((DictEntry Key="a" Value=1)))`}, + {`{"a": 1, "b": 2}`, + `(DictExpr List=((DictEntry Key="a" Value=1) (DictEntry Key="b" Value=2)))`}, + {`{x: y for (x, y) in z}`, + `(Comprehension Curly Body=(DictEntry Key=x Value=y) Clauses=((ForClause Vars=(TupleExpr List=(x y)) X=z)))`}, + {`{x: y for a in b if c}`, + `(Comprehension Curly Body=(DictEntry Key=x Value=y) Clauses=((ForClause Vars=a X=b) (IfClause Cond=c)))`}, + {`-1 + +2`, + `(BinaryExpr X=(UnaryExpr Op=- X=1) Op=+ Y=(UnaryExpr Op=+ X=2))`}, + {`"foo" + "bar"`, + `"foobar"`}, // concatenated + {`-1 * 2`, // prec(unary -) > prec(binary *) + `(BinaryExpr X=(UnaryExpr Op=- X=1) Op=* Y=2)`}, + {`-x[i]`, // prec(unary -) < prec(x[i]) + `(UnaryExpr Op=- X=(IndexExpr X=x Y=i))`}, + {`a | b & c | d`, // prec(|) < prec(&) + `(BinaryExpr X=(BinaryExpr X=a Op=| Y=(BinaryExpr X=b Op=& Y=c)) Op=| Y=d)`}, + {`a or b and c or d`, + `(BinaryExpr X=(BinaryExpr X=a Op=or Y=(BinaryExpr X=b Op=and Y=c)) Op=or Y=d)`}, + {`a and b or c and d`, + `(BinaryExpr X=(BinaryExpr X=a Op=and Y=b) Op=or Y=(BinaryExpr X=c Op=and Y=d))`}, + {`f(1, x=y)`, + `(CallExpr Fn=f Args=(1 (BinaryExpr X=x Op== Y=y)))`}, + {`f(*args, **kwargs)`, + `(CallExpr Fn=f Args=((UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)))`}, + {`a if b else c`, + `(CondExpr Cond=b True=a False=c)`}, + {`a and not b`, + `(BinaryExpr X=a Op=and Y=(UnaryExpr Op=not X=b))`}, + } { + e, err := syntax.ParseExpr("foo.sky", test.input) + if err != nil { + t.Errorf("parse `%s` failed: %v", test.input, stripPos(err)) + continue + } + if got := treeString(e); test.want != got { + t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want) + } + } +} + +func TestStmtParseTrees(t *testing.T) { + for _, test := range []struct { + input, want string + }{ + {`print(1)`, + `(ExprStmt X=(CallExpr Fn=print Args=(1)))`}, + {`return 1, 2`, + `(ReturnStmt Result=(TupleExpr List=(1 2)))`}, + {`return`, + `(ReturnStmt)`}, + {`for i in "abc": break`, + `(ForStmt Vars=i X="abc" Body=((BranchStmt Token=break)))`}, + {`for i in "abc": continue`, + `(ForStmt Vars=i X="abc" Body=((BranchStmt Token=continue)))`}, + {`for x, y in z: pass`, + `(ForStmt Vars=(TupleExpr List=(x y)) X=z Body=((BranchStmt Token=pass)))`}, + {`if True: pass`, + `(IfStmt Cond=True True=((BranchStmt Token=pass)))`}, + {`if True: break`, + `(IfStmt Cond=True True=((BranchStmt Token=break)))`}, + {`if True: continue`, + `(IfStmt Cond=True True=((BranchStmt Token=continue)))`}, + {`if True: pass +else: + pass`, + `(IfStmt Cond=True True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))`}, + {"if a: pass\nelif b: pass\nelse: pass", + `(IfStmt Cond=a True=((BranchStmt Token=pass)) False=((IfStmt Cond=b True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))))`}, + {`x, y = 1, 2`, + `(AssignStmt Op== LHS=(TupleExpr List=(x y)) RHS=(TupleExpr List=(1 2)))`}, + {`x[i] = 1`, + `(AssignStmt Op== LHS=(IndexExpr X=x Y=i) RHS=1)`}, + {`x.f = 1`, + `(AssignStmt Op== LHS=(DotExpr X=x Name=f) RHS=1)`}, + {`(x, y) = 1`, + `(AssignStmt Op== LHS=(TupleExpr List=(x y)) RHS=1)`}, + {`load("", "a", b="c")`, + `(LoadStmt Module="" From=(a c) To=(a b))`}, + {`load = 1`, // load is not a reserved word + `(AssignStmt Op== LHS=load RHS=1)`}, + {`if True: load("", "a", b="c")`, // load needn't be at toplevel + `(IfStmt Cond=True True=((LoadStmt Module="" From=(a c) To=(a b))))`}, + {`def f(x, *args, **kwargs): + pass`, + `(DefStmt Name=f Function=(Function Params=(x (UnaryExpr Op=* X=args) (UnaryExpr Op=** X=kwargs)) Body=((BranchStmt Token=pass))))`}, + {`def f(**kwargs, *args): pass`, + `(DefStmt Name=f Function=(Function Params=((UnaryExpr Op=** X=kwargs) (UnaryExpr Op=* X=args)) Body=((BranchStmt Token=pass))))`}, + {`def f(a, b, c=d): pass`, + `(DefStmt Name=f Function=(Function Params=(a b (BinaryExpr X=c Op== Y=d)) Body=((BranchStmt Token=pass))))`}, + {`def f(a, b=c, d): pass`, + `(DefStmt Name=f Function=(Function Params=(a (BinaryExpr X=b Op== Y=c) d) Body=((BranchStmt Token=pass))))`}, // TODO(adonovan): fix this + {`def f(): + def g(): + pass + pass +def h(): + pass`, + `(DefStmt Name=f Function=(Function Body=((DefStmt Name=g Function=(Function Body=((BranchStmt Token=pass)))) (BranchStmt Token=pass))))`}, + } { + f, err := syntax.Parse("foo.sky", test.input) + if err != nil { + t.Errorf("parse `%s` failed: %v", test.input, stripPos(err)) + continue + } + if got := treeString(f.Stmts[0]); test.want != got { + t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want) + } + } +} + +// TestFileParseTrees tests sequences of statements, and particularly +// handling of indentation, newlines, line continuations, and blank lines. +func TestFileParseTrees(t *testing.T) { + for _, test := range []struct { + input, want string + }{ + {`x = 1 +print(x)`, + `(AssignStmt Op== LHS=x RHS=1) +(ExprStmt X=(CallExpr Fn=print Args=(x)))`}, + {"if cond:\n\tpass", + `(IfStmt Cond=cond True=((BranchStmt Token=pass)))`}, + {"if cond:\n\tpass\nelse:\n\tpass", + `(IfStmt Cond=cond True=((BranchStmt Token=pass)) False=((BranchStmt Token=pass)))`}, + {`def f(): + pass +pass + +pass`, + `(DefStmt Name=f Function=(Function Body=((BranchStmt Token=pass)))) +(BranchStmt Token=pass) +(BranchStmt Token=pass)`}, + {`pass; pass`, + `(BranchStmt Token=pass) +(BranchStmt Token=pass)`}, + {"pass\npass", + `(BranchStmt Token=pass) +(BranchStmt Token=pass)`}, + {"pass\n\npass", + `(BranchStmt Token=pass) +(BranchStmt Token=pass)`}, + {`x = (1 + +2)`, + `(AssignStmt Op== LHS=x RHS=(BinaryExpr X=1 Op=+ Y=2))`}, + {`x = 1 \ ++ 2`, + `(AssignStmt Op== LHS=x RHS=(BinaryExpr X=1 Op=+ Y=2))`}, + } { + f, err := syntax.Parse("foo.sky", test.input) + if err != nil { + t.Errorf("parse `%s` failed: %v", test.input, stripPos(err)) + continue + } + var buf bytes.Buffer + for i, stmt := range f.Stmts { + if i > 0 { + buf.WriteByte('\n') + } + writeTree(&buf, reflect.ValueOf(stmt)) + } + if got := buf.String(); test.want != got { + t.Errorf("parse `%s` = %s, want %s", test.input, got, test.want) + } + } +} + +func stripPos(err error) string { + s := err.Error() + if i := strings.Index(s, ": "); i >= 0 { + s = s[i+len(": "):] // strip file:line:col + } + return s +} + +// treeString prints a syntax node as a parenthesized tree. +// Idents are printed as foo and Literals as "foo" or 42. +// Structs are printed as (type name=value ...). +// Only non-empty fields are shown. +func treeString(n syntax.Node) string { + var buf bytes.Buffer + writeTree(&buf, reflect.ValueOf(n)) + return buf.String() +} + +func writeTree(out *bytes.Buffer, x reflect.Value) { + switch x.Kind() { + case reflect.String, reflect.Int, reflect.Bool: + fmt.Fprintf(out, "%v", x.Interface()) + case reflect.Ptr, reflect.Interface: + if elem := x.Elem(); elem.Kind() == 0 { + out.WriteString("nil") + } else { + writeTree(out, elem) + } + case reflect.Struct: + switch v := x.Interface().(type) { + case syntax.Literal: + if v.Token == syntax.STRING { + fmt.Fprintf(out, "%q", v.Value) + } else if v.Token == syntax.INT { + fmt.Fprintf(out, "%d", v.Value) + } + return + case syntax.Ident: + out.WriteString(v.Name) + return + } + fmt.Fprintf(out, "(%s", strings.TrimPrefix(x.Type().String(), "syntax.")) + for i, n := 0, x.NumField(); i < n; i++ { + f := x.Field(i) + if f.Type() == reflect.TypeOf(syntax.Position{}) { + continue // skip positions + } + name := x.Type().Field(i).Name + if f.Type() == reflect.TypeOf(syntax.Token(0)) { + fmt.Fprintf(out, " %s=%s", name, f.Interface()) + continue + } + + switch f.Kind() { + case reflect.Slice: + if n := f.Len(); n > 0 { + fmt.Fprintf(out, " %s=(", name) + for i := 0; i < n; i++ { + if i > 0 { + out.WriteByte(' ') + } + writeTree(out, f.Index(i)) + } + out.WriteByte(')') + } + continue + case reflect.Ptr, reflect.Interface: + if f.IsNil() { + continue + } + case reflect.Bool: + if f.Bool() { + fmt.Fprintf(out, " %s", name) + } + continue + } + fmt.Fprintf(out, " %s=", name) + writeTree(out, f) + } + fmt.Fprintf(out, ")") + default: + fmt.Fprintf(out, "%T", x.Interface()) + } +} + +func TestParseErrors(t *testing.T) { + filename := skylarktest.DataFile("skylark/syntax", "testdata/errors.sky") + for _, chunk := range chunkedfile.Read(filename, t) { + _, err := syntax.Parse(filename, chunk.Source) + switch err := err.(type) { + case nil: + // ok + case syntax.Error: + chunk.GotError(int(err.Pos.Line), err.Msg) + default: + t.Error(err) + } + chunk.Done() + } +} + +func TestWalk(t *testing.T) { + const src = ` +for x in y: + if x: + pass + else: + f([2*x for x in "abc"]) +` + // TODO(adonovan): test that it finds all syntax.Nodes + // (compare against a reflect-based implementation). + // TODO(adonovan): test that the result of f is used to prune + // the descent. + f, err := syntax.Parse("hello.go", src) + if err != nil { + t.Fatal(err) + } + + var buf bytes.Buffer + var depth int + syntax.Walk(f, func(n syntax.Node) bool { + if n == nil { + depth-- + return true + } + fmt.Fprintf(&buf, "%s%s\n", + strings.Repeat(" ", depth), + strings.TrimPrefix(reflect.TypeOf(n).String(), "*syntax.")) + depth++ + return true + }) + got := buf.String() + want := ` +File + ForStmt + Ident + Ident + IfStmt + Ident + BranchStmt + ExprStmt + CallExpr + Ident + Comprehension + ForClause + Ident + Literal + BinaryExpr + Literal + Ident` + got = strings.TrimSpace(got) + want = strings.TrimSpace(want) + if got != want { + t.Errorf("got %s, want %s", got, want) + } +} diff --git a/syntax/quote.go b/syntax/quote.go new file mode 100644 index 0000000..0a8321a --- /dev/null +++ b/syntax/quote.go @@ -0,0 +1,270 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +// Skylark quoted string utilities. + +import ( + "bytes" + "fmt" + "strconv" + "strings" +) + +// unesc maps single-letter chars following \ to their actual values. +var unesc = [256]byte{ + 'a': '\a', + 'b': '\b', + 'f': '\f', + 'n': '\n', + 'r': '\r', + 't': '\t', + 'v': '\v', + '\\': '\\', + '\'': '\'', + '"': '"', +} + +// esc maps escape-worthy bytes to the char that should follow \. +var esc = [256]byte{ + '\a': 'a', + '\b': 'b', + '\f': 'f', + '\n': 'n', + '\r': 'r', + '\t': 't', + '\v': 'v', + '\\': '\\', + '\'': '\'', + '"': '"', +} + +// notEsc is a list of characters that can follow a \ in a string value +// without having to escape the \. That is, since ( is in this list, we +// quote the Go string "foo\\(bar" as the Python literal "foo\(bar". +// This really does happen in BUILD files, especially in strings +// being used as shell arguments containing regular expressions. +const notEsc = " !#$%&()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~" + +// unquote unquotes the quoted string, returning the actual +// string value, whether the original was triple-quoted, and +// an error describing invalid input. +func unquote(quoted string) (s string, triple bool, err error) { + // Check for raw prefix: means don't interpret the inner \. + raw := false + if strings.HasPrefix(quoted, "r") { + raw = true + quoted = quoted[1:] + } + + if len(quoted) < 2 { + err = fmt.Errorf("string literal too short") + return + } + + if quoted[0] != '"' && quoted[0] != '\'' || quoted[0] != quoted[len(quoted)-1] { + err = fmt.Errorf("string literal has invalid quotes") + return + } + + // Check for triple quoted string. + quote := quoted[0] + if len(quoted) >= 6 && quoted[1] == quote && quoted[2] == quote && quoted[:3] == quoted[len(quoted)-3:] { + triple = true + quoted = quoted[3 : len(quoted)-3] + } else { + quoted = quoted[1 : len(quoted)-1] + } + + // Now quoted is the quoted data, but no quotes. + // If we're in raw mode or there are no escapes or + // carriage returns, we're done. + var unquoteChars string + if raw { + unquoteChars = "\r" + } else { + unquoteChars = "\\\r" + } + if !strings.ContainsAny(quoted, unquoteChars) { + s = quoted + return + } + + // Otherwise process quoted string. + // Each iteration processes one escape sequence along with the + // plain text leading up to it. + var buf bytes.Buffer + for { + // Remove prefix before escape sequence. + i := strings.IndexAny(quoted, unquoteChars) + if i < 0 { + i = len(quoted) + } + buf.WriteString(quoted[:i]) + quoted = quoted[i:] + + if len(quoted) == 0 { + break + } + + // Process carriage return. + if quoted[0] == '\r' { + buf.WriteByte('\n') + if len(quoted) > 1 && quoted[1] == '\n' { + quoted = quoted[2:] + } else { + quoted = quoted[1:] + } + continue + } + + // Process escape sequence. + if len(quoted) == 1 { + err = fmt.Errorf(`truncated escape sequence \`) + return + } + + switch quoted[1] { + default: + // In Python, if \z (for some byte z) is not a known escape sequence + // then it appears as literal text in the string. + buf.WriteString(quoted[:2]) + quoted = quoted[2:] + + case '\n': + // Ignore the escape and the line break. + quoted = quoted[2:] + + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '\'', '"': + // One-char escape + buf.WriteByte(unesc[quoted[1]]) + quoted = quoted[2:] + + case '0', '1', '2', '3', '4', '5', '6', '7': + // Octal escape, up to 3 digits. + n := int(quoted[1] - '0') + quoted = quoted[2:] + for i := 1; i < 3; i++ { + if len(quoted) == 0 || quoted[0] < '0' || '7' < quoted[0] { + break + } + n = n*8 + int(quoted[0]-'0') + quoted = quoted[1:] + } + if n >= 256 { + // NOTE: Python silently discards the high bit, + // so that '\541' == '\141' == 'a'. + // Let's see if we can avoid doing that in BUILD files. + err = fmt.Errorf(`invalid escape sequence \%03o`, n) + return + } + buf.WriteByte(byte(n)) + + case 'x': + // Hexadecimal escape, exactly 2 digits. + if len(quoted) < 4 { + err = fmt.Errorf(`truncated escape sequence %s`, quoted) + return + } + n, err1 := strconv.ParseInt(quoted[2:4], 16, 0) + if err1 != nil { + err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4]) + return + } + buf.WriteByte(byte(n)) + quoted = quoted[4:] + } + } + + s = buf.String() + return +} + +// indexByte returns the index of the first instance of b in s, or else -1. +func indexByte(s string, b byte) int { + for i := 0; i < len(s); i++ { + if s[i] == b { + return i + } + } + return -1 +} + +// hex is a list of the hexadecimal digits, for use in quoting. +// We always print lower-case hexadecimal. +const hex = "0123456789abcdef" + +// quote returns the quoted form of the string value "x". +// If triple is true, quote uses the triple-quoted form """x""". +func quote(unquoted string, triple bool) string { + q := `"` + if triple { + q = `"""` + } + + var buf bytes.Buffer + buf.WriteString(q) + + for i := 0; i < len(unquoted); i++ { + c := unquoted[i] + if c == '"' && triple && (i+1 < len(unquoted) && unquoted[i+1] != '"' || i+2 < len(unquoted) && unquoted[i+2] != '"') { + // Can pass up to two quotes through, because they are followed by a non-quote byte. + buf.WriteByte(c) + if i+1 < len(unquoted) && unquoted[i+1] == '"' { + buf.WriteByte(c) + i++ + } + continue + } + if triple && c == '\n' { + // Can allow newline in triple-quoted string. + buf.WriteByte(c) + continue + } + if c == '\'' { + // Can allow ' since we always use ". + buf.WriteByte(c) + continue + } + if c == '\\' { + if i+1 < len(unquoted) && indexByte(notEsc, unquoted[i+1]) >= 0 { + // Can pass \ through when followed by a byte that + // known not to be a valid escape sequence and also + // that does not trigger an escape sequence of its own. + // Use this, because various BUILD files do. + buf.WriteByte('\\') + buf.WriteByte(unquoted[i+1]) + i++ + continue + } + } + if esc[c] != 0 { + buf.WriteByte('\\') + buf.WriteByte(esc[c]) + continue + } + if c < 0x20 || c >= 0x80 { + // BUILD files are supposed to be Latin-1, so escape all control and high bytes. + // I'd prefer to use \x here, but Blaze does not implement + // \x in quoted strings (b/7272572). + buf.WriteByte('\\') + buf.WriteByte(hex[c>>6]) // actually octal but reusing hex digits 0-7. + buf.WriteByte(hex[(c>>3)&7]) + buf.WriteByte(hex[c&7]) + /* + buf.WriteByte('\\') + buf.WriteByte('x') + buf.WriteByte(hex[c>>4]) + buf.WriteByte(hex[c&0xF]) + */ + continue + } + buf.WriteByte(c) + continue + } + + buf.WriteString(q) + return buf.String() +} diff --git a/syntax/quote_test.go b/syntax/quote_test.go new file mode 100644 index 0000000..a39d217 --- /dev/null +++ b/syntax/quote_test.go @@ -0,0 +1,71 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "strings" + "testing" +) + +var quoteTests = []struct { + q string // quoted + s string // unquoted (actual string) + std bool // q is standard form for s +}{ + {`""`, "", true}, + {`''`, "", false}, + {`"hello"`, `hello`, true}, + {`'hello'`, `hello`, false}, + {`"quote\"here"`, `quote"here`, true}, + {`'quote\"here'`, `quote"here`, false}, + {`'quote"here'`, `quote"here`, false}, + {`"quote'here"`, `quote'here`, true}, + {`"quote\'here"`, `quote'here`, false}, + {`'quote\'here'`, `quote'here`, false}, + {`"""hello " ' world "" asdf ''' foo"""`, `hello " ' world "" asdf ''' foo`, true}, + {`"foo\(bar"`, `foo\(bar`, true}, + {`"""hello +world"""`, "hello\nworld", true}, + + {`"\a\b\f\n\r\t\v\000\377"`, "\a\b\f\n\r\t\v\000\xFF", true}, + {`"\a\b\f\n\r\t\v\x00\xff"`, "\a\b\f\n\r\t\v\000\xFF", false}, + {`"\a\b\f\n\r\t\v\000\xFF"`, "\a\b\f\n\r\t\v\000\xFF", false}, + {`"\a\b\f\n\r\t\v\000\377\"'\\\003\200"`, "\a\b\f\n\r\t\v\x00\xFF\"'\\\x03\x80", true}, + {`"\a\b\f\n\r\t\v\x00\xff\"'\\\x03\x80"`, "\a\b\f\n\r\t\v\x00\xFF\"'\\\x03\x80", false}, + {`"\a\b\f\n\r\t\v\000\xFF\"'\\\x03\x80"`, "\a\b\f\n\r\t\v\x00\xFF\"'\\\x03\x80", false}, + {`"\a\b\f\n\r\t\v\000\xFF\"\'\\\x03\x80"`, "\a\b\f\n\r\t\v\x00\xFF\"'\\\x03\x80", false}, + { + `"cat $(SRCS) | grep '\s*ip_block:' | sed -e 's/\s*ip_block: \"\([^ ]*\)\"/ \x27\\1\x27,/g' >> $@; "`, + "cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ '\\1',/g' >> $@; ", + false, + }, + { + `"cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\([^ ]*\)\"/ '\\1',/g' >> $@; "`, + "cat $(SRCS) | grep '\\s*ip_block:' | sed -e 's/\\s*ip_block: \"\\([^ ]*\\)\"/ '\\1',/g' >> $@; ", + true, + }, +} + +func TestQuote(t *testing.T) { + for _, tt := range quoteTests { + if !tt.std { + continue + } + q := quote(tt.s, strings.HasPrefix(tt.q, `"""`)) + if q != tt.q { + t.Errorf("quote(%#q) = %s, want %s", tt.s, q, tt.q) + } + } +} + +func TestUnquote(t *testing.T) { + for _, tt := range quoteTests { + s, triple, err := unquote(tt.q) + wantTriple := strings.HasPrefix(tt.q, `"""`) || strings.HasPrefix(tt.q, `'''`) + if s != tt.s || triple != wantTriple || err != nil { + t.Errorf("unquote(%s) = %#q, %v, %v want %#q, %v, nil", tt.q, s, triple, err, tt.s, wantTriple) + } + } +} diff --git a/syntax/scan.go b/syntax/scan.go new file mode 100644 index 0000000..516f865 --- /dev/null +++ b/syntax/scan.go @@ -0,0 +1,910 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +// A lexical scanner for Skylark. + +import ( + "fmt" + "io" + "io/ioutil" + "log" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// A Token represents a Skylark lexical token. +type Token int8 + +const ( + ILLEGAL Token = iota + EOF + + NEWLINE + INDENT + OUTDENT + + // Tokens with values + IDENT // x + INT // 123 + FLOAT // 1.23e45 + STRING // "foo" or 'foo' or '''foo''' or r'foo' or r"foo" + + // Punctuation + PLUS // + + MINUS // - + STAR // * + SLASH // / + SLASHSLASH // // + PERCENT // % + AMP // & + PIPE // | + DOT // . + COMMA // , + EQ // = + SEMI // ; + COLON // : + LPAREN // ( + RPAREN // ) + LBRACK // [ + RBRACK // ] + LBRACE // { + RBRACE // } + LT // < + GT // > + GE // >= + LE // <= + EQL // == + NEQ // != + PLUS_EQ // += (keep order consistent with PLUS..PERCENT) + MINUS_EQ // -= + STAR_EQ // *= + SLASH_EQ // /= + SLASHSLASH_EQ // //= + PERCENT_EQ // %= + STARSTAR // ** + + // Keywords + AND + BREAK + CONTINUE + DEF + ELIF + ELSE + FOR + IF + IN + LAMBDA + NOT + NOT_IN // synthesized by parser from NOT IN + OR + PASS + RETURN + + maxToken +) + +func (tok Token) String() string { return tokenNames[tok] } + +// GoString is like String but quotes punctuation tokens. +// Use Sprintf("%#v", tok) when constructing error messages. +func (tok Token) GoString() string { + if tok >= PLUS && tok <= STARSTAR { + return "'" + tokenNames[tok] + "'" + } + return tokenNames[tok] +} + +var tokenNames = [...]string{ + ILLEGAL: "illegal token", + EOF: "end of file", + NEWLINE: "newline", + INDENT: "indent", + OUTDENT: "outdent", + IDENT: "identifier", + INT: "int literal", + FLOAT: "float literal", + STRING: "string literal", + PLUS: "+", + MINUS: "-", + STAR: "*", + SLASH: "/", + SLASHSLASH: "//", + PERCENT: "%", + AMP: "&", + PIPE: "|", + DOT: ".", + COMMA: ",", + EQ: "=", + SEMI: ";", + COLON: ":", + LPAREN: "(", + RPAREN: ")", + LBRACK: "[", + RBRACK: "]", + LBRACE: "{", + RBRACE: "]", + LT: "<", + GT: ">", + GE: ">=", + LE: "<=", + EQL: "==", + NEQ: "!=", + PLUS_EQ: "+=", + MINUS_EQ: "-=", + STAR_EQ: "*=", + SLASH_EQ: "/=", + SLASHSLASH_EQ: "//=", + PERCENT_EQ: "%=", + STARSTAR: "**", + AND: "and", + BREAK: "break", + CONTINUE: "continue", + DEF: "def", + ELIF: "elif", + ELSE: "else", + FOR: "for", + IF: "if", + IN: "in", + LAMBDA: "lambda", + NOT: "not", + NOT_IN: "not in", + OR: "or", + PASS: "pass", + RETURN: "return", +} + +// A Position describes the location of a rune of input. +type Position struct { + file *string // filename (indirect for compactness) + Line int32 // 1-based line number + Col int32 // 1-based column number (strictly: rune) +} + +// IsValid reports whether the position is valid. +func (p Position) IsValid() bool { + return p.Line >= 1 +} + +// Filename returns the name of the file containing this position. +func (p Position) Filename() string { + if p.file != nil { + return *p.file + } + return "<unknown>" +} + +// add returns the position at the end of s, assuming it starts at p. +func (p Position) add(s string) Position { + if n := strings.Count(s, "\n"); n > 0 { + p.Line += int32(n) + s = s[strings.LastIndex(s, "\n")+1:] + p.Col = 1 + } + p.Col += int32(utf8.RuneCountInString(s)) + return p +} + +func (p Position) String() string { + return fmt.Sprintf("%s:%d:%d", p.Filename(), p.Line, p.Col) +} + +// An scanner represents a single input file being parsed. +type scanner struct { + complete []byte // entire input + rest []byte // rest of input + token []byte // token being scanned + pos Position // current input position + depth int // nesting of [ ] { } ( ) + indentstk []int // stack of indentation levels + dents int // number of saved INDENT (>0) or OUTDENT (<0) tokens to return + lineStart bool // after NEWLINE; convert spaces to indentation tokens +} + +func newScanner(filename string, src interface{}) (*scanner, error) { + data, err := readSource(filename, src) + if err != nil { + return nil, err + } + return &scanner{ + complete: data, + rest: data, + pos: Position{file: &filename, Line: 1, Col: 1}, + indentstk: make([]int, 1, 10), // []int{0} + spare capacity + lineStart: true, + }, nil +} + +func readSource(filename string, src interface{}) (data []byte, err error) { + switch src := src.(type) { + case string: + data = []byte(src) + case []byte: + data = src + case io.Reader: + data, err = ioutil.ReadAll(src) + case nil: + data, err = ioutil.ReadFile(filename) + default: + return nil, fmt.Errorf("invalid source: %T", src) + } + if err != nil { + return nil, fmt.Errorf("reading %s: %s", filename, err) + } + return data, nil +} + +// An Error describes the nature and position of a scanner or parser error. +type Error struct { + Pos Position + Msg string +} + +func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg } + +// errorf is called to report an error. +// errorf does not return: it panics. +func (sc *scanner) error(pos Position, s string) { + panic(Error{pos, s}) +} + +func (sc *scanner) errorf(pos Position, format string, args ...interface{}) { + sc.error(pos, fmt.Sprintf(format, args...)) +} + +func (sc *scanner) recover(err *error) { + // The scanner and parser panic both for routine errors like + // syntax errors and for programmer bugs like array index + // errors. Turn both into error returns. Catching bug panics + // is especially important when processing many files. + switch e := recover().(type) { + case nil: + // no panic + case Error: + *err = e + default: + *err = Error{sc.pos, fmt.Sprintf("internal error: %v", e)} + if debug { + log.Fatal(*err) + } + } +} + +// eof reports whether the input has reached end of file. +func (sc *scanner) eof() bool { + return len(sc.rest) == 0 +} + +// peekRune returns the next rune in the input without consuming it. +// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'. +func (sc *scanner) peekRune() rune { + if len(sc.rest) == 0 { + return 0 + } + + // fast path: ASCII + if b := sc.rest[0]; b < utf8.RuneSelf { + if b == '\r' { + return '\n' + } + return rune(b) + } + + r, _ := utf8.DecodeRune(sc.rest) + return r +} + +// readRune consumes and returns the next rune in the input. +// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'. +func (sc *scanner) readRune() rune { + if len(sc.rest) == 0 { + sc.error(sc.pos, "internal scanner error: readRune at EOF") + } + + // fast path: ASCII + if b := sc.rest[0]; b < utf8.RuneSelf { + r := rune(b) + sc.rest = sc.rest[1:] + if r == '\r' { + if len(sc.rest) > 0 && sc.rest[0] == '\n' { + sc.rest = sc.rest[1:] + } + r = '\n' + } + if r == '\n' { + sc.pos.Line++ + sc.pos.Col = 1 + } else { + sc.pos.Col++ + } + return r + } + + r, size := utf8.DecodeRune(sc.rest) + sc.rest = sc.rest[size:] + sc.pos.Col++ + return r +} + +// tokenValue records the position and value associated with each token. +type tokenValue struct { + raw string // raw text of token + int int64 // decoded int + float float64 // decoded float + string string // decoded string + pos Position // start position of token + triple bool // was string triple quoted? +} + +// startToken marks the beginning of the next input token. +// It must be followed by a call to endToken once the token has +// been consumed using readRune. +func (sc *scanner) startToken(val *tokenValue) { + sc.token = sc.rest + val.raw = "" + val.pos = sc.pos +} + +// endToken marks the end of an input token. +// It records the actual token string in val.raw if the caller +// has not done that already. +func (sc *scanner) endToken(val *tokenValue) { + if val.raw == "" { + val.raw = string(sc.token[:len(sc.token)-len(sc.rest)]) + } +} + +// nextToken is called by the parser to obtain the next input token. +// It returns the token value and sets val to the data associated with +// the token. +// +// For all our input tokens, the associated data is val.pos (the +// position where the token begins), val.raw (the input string +// corresponding to the token). For string and int tokens, the string +// and int fields additionally contain the token's interpreted value. +func (sc *scanner) nextToken(val *tokenValue) Token { + + // The following distribution of tokens guides case ordering: + // + // COMMA 27 % + // STRING 23 % + // IDENT 15 % + // EQL 11 % + // LBRACK 5.5 % + // RBRACK 5.5 % + // NEWLINE 3 % + // LPAREN 2.9 % + // RPAREN 2.9 % + // INT 2 % + // others < 1 % + // + // Although NEWLINE tokens are infrequent, and lineStart is + // usually (~97%) false on entry, skipped newlines account for + // about 50% of all iterations of the 'start' loop. + +start: + var c rune + + // Deal with leading spaces and indentation. + blank := false + savedLineStart := sc.lineStart + if sc.lineStart { + sc.lineStart = false + col := 0 + for { + c = sc.peekRune() + if c == ' ' { + col++ + sc.readRune() + } else if c == '\t' { + const tab = 8 + col += int(tab - (sc.pos.Col-1)%tab) + sc.readRune() + } else { + break + } + } + // The third clause is "trailing spaces without newline at EOF". + if c == '#' || c == '\n' || c == 0 && col > 0 { + blank = true + } + + // Compute indentation level for non-blank lines not + // inside an expression. This is not the common case. + if !blank && sc.depth == 0 { + cur := sc.indentstk[len(sc.indentstk)-1] + if col > cur { + // indent + sc.dents++ + sc.indentstk = append(sc.indentstk, col) + } else if col < cur { + // dedent(s) + for len(sc.indentstk) > 0 && col < sc.indentstk[len(sc.indentstk)-1] { + sc.dents-- + sc.indentstk = sc.indentstk[:len(sc.indentstk)-1] // pop + } + if col != sc.indentstk[len(sc.indentstk)-1] { + sc.error(sc.pos, "unindent does not match any outer indentation level") + } + } + } + } + + // Return saved indentation tokens. + if sc.dents != 0 { + sc.startToken(val) + sc.endToken(val) + if sc.dents < 0 { + sc.dents++ + return OUTDENT + } else { + sc.dents-- + return INDENT + } + } + + // start of line proper + c = sc.peekRune() + + // Skip spaces. + for c == ' ' || c == '\t' { + sc.readRune() + c = sc.peekRune() + } + + // comment + if c == '#' { + // Consume up to (but not including) newline. + for c != 0 && c != '\n' { + sc.readRune() + c = sc.peekRune() + } + } + + // newline + if c == '\n' { + sc.lineStart = true + if blank || sc.depth > 0 { + // Ignore blank lines, or newlines within expressions (common case). + sc.readRune() + goto start + } + // At top-level (not in an expression). + sc.startToken(val) + sc.readRune() + val.raw = "\n" + return NEWLINE + } + + // end of file + if c == 0 { + // Emit OUTDENTs for unfinished indentation, + // preceded by a NEWLINE if we haven't just emitted one. + if len(sc.indentstk) > 1 { + if savedLineStart { + sc.dents = 1 - len(sc.indentstk) + sc.indentstk = sc.indentstk[1:] + goto start + } else { + sc.lineStart = true + sc.startToken(val) + val.raw = "\n" + return NEWLINE + } + } + + sc.startToken(val) + sc.endToken(val) + return EOF + } + + // line continuation + if c == '\\' { + sc.readRune() + if sc.peekRune() != '\n' { + sc.errorf(sc.pos, "stray backslash in program") + } + sc.readRune() + goto start + } + + // start of the next token + sc.startToken(val) + + // comma (common case) + if c == ',' { + sc.readRune() + sc.endToken(val) + return COMMA + } + + // string literal + if c == '"' || c == '\'' { + return sc.scanString(val, c) + } + + // identifier or keyword + if isIdentStart(c) { + // raw string literal + if c == 'r' && len(sc.rest) > 1 && (sc.rest[1] == '"' || sc.rest[1] == '\'') { + sc.readRune() + c = sc.peekRune() + return sc.scanString(val, c) + } + + for isIdent(c) { + sc.readRune() + c = sc.peekRune() + } + sc.endToken(val) + if k, ok := keywordToken[val.raw]; ok { + return k + } + + return IDENT + } + + // brackets + switch c { + case '[', '(', '{': + sc.depth++ + sc.readRune() + sc.endToken(val) + switch c { + case '[': + return LBRACK + case '(': + return LPAREN + case '{': + return LBRACE + } + panic("unreachable") + + case ']', ')', '}': + if sc.depth == 0 { + sc.error(sc.pos, "indentation error") + } else { + sc.depth-- + } + sc.readRune() + sc.endToken(val) + switch c { + case ']': + return RBRACK + case ')': + return RPAREN + case '}': + return RBRACE + } + panic("unreachable") + } + + // int or float literal, or period + if isdigit(c) || c == '.' { + return sc.scanNumber(val, c) + } + + // other punctuation + defer sc.endToken(val) + switch c { + case '=', '<', '>', '!', '+', '-', '%', '/': // possibly followed by '=' + sc.readRune() + if sc.peekRune() == '=' { + sc.readRune() + switch c { + case '<': + return LE + case '>': + return GE + case '=': + return EQL + case '!': + return NEQ + case '+': + return PLUS_EQ + case '-': + return MINUS_EQ + case '/': + return SLASH_EQ + case '%': + return PERCENT_EQ + } + } + switch c { + case '=': + return EQ + case '<': + return LT + case '>': + return GT + case '!': + sc.error(sc.pos, "unexpected input character '!'") + case '+': + return PLUS + case '-': + return MINUS + case '/': + if sc.peekRune() == '/' { + sc.readRune() + if sc.peekRune() == '=' { + sc.readRune() + return SLASHSLASH_EQ + } else { + return SLASHSLASH + } + } + return SLASH + case '%': + return PERCENT + } + panic("unreachable") + + case ':', ';', '|', '&': // single-char tokens (except comma) + sc.readRune() + switch c { + case ':': + return COLON + case ';': + return SEMI + case '|': + return PIPE + case '&': + return AMP + } + panic("unreachable") + + case '*': // possibly followed by '*' or '=' + sc.readRune() + switch sc.peekRune() { + case '*': + sc.readRune() + return STARSTAR + case '=': + sc.readRune() + return STAR_EQ + } + return STAR + } + + sc.errorf(sc.pos, "unexpected input character %#q", c) + panic("unreachable") +} + +func (sc *scanner) scanString(val *tokenValue, quote rune) Token { + triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote) + sc.readRune() + if triple { + sc.readRune() + sc.readRune() + } + + quoteCount := 0 + for { + if sc.eof() { + sc.error(val.pos, "unexpected EOF in string") + } + c := sc.readRune() + if c == '\n' && !triple { + sc.error(val.pos, "unexpected newline in string") + } + if c == quote { + quoteCount++ + if !triple || quoteCount == 3 { + break + } + } else { + quoteCount = 0 + } + if c == '\\' { + if sc.eof() { + sc.error(val.pos, "unexpected EOF in string") + } + sc.readRune() + } + } + + sc.endToken(val) + s, _, err := unquote(val.raw) + if err != nil { + sc.error(sc.pos, err.Error()) + } + val.string = s + return STRING +} + +func (sc *scanner) scanNumber(val *tokenValue, c rune) Token { + // https://docs.python.org/2/reference/lexical_analysis.html#integer-and-long-integer-literals + // Not supported: + // - integer literals of >64 bits of precision + // - 123L or 123l long suffix + // - traditional octal: 0755 + + fraction, exponent := false, false + + if c == '.' { + // dot or start of fraction + sc.readRune() + c = sc.peekRune() + if !isdigit(c) { + sc.endToken(val) + return DOT + } + fraction = true + } else if c == '0' { + // hex, octal, or float + sc.readRune() + c = sc.peekRune() + + if c == '.' { + fraction = true + } else if c == 'x' || c == 'X' { + // hex + sc.readRune() + c = sc.peekRune() + if !isxdigit(c) { + sc.error(sc.pos, "invalid hex literal") + } + for isxdigit(c) { + sc.readRune() + c = sc.peekRune() + } + } else if c == 'o' || c == 'O' { + // octal + sc.readRune() + c = sc.peekRune() + if !isodigit(c) { + sc.error(sc.pos, "invalid octal literal") + } + for isodigit(c) { + sc.readRune() + c = sc.peekRune() + } + } else { + // float (or obsolete octal "0755") + allzeros, octal := true, true + for isdigit(c) { + if c != '0' { + allzeros = false + } + if c > '7' { + octal = false + } + sc.readRune() + c = sc.peekRune() + } + if c == '.' { + fraction = true + } else if c == 'e' || c == 'E' { + exponent = true + } else if octal && !allzeros { + // We must support old octal until the Java + // implementation groks the new one. + // TODO(adonovan): reenable the check. + if false { + sc.endToken(val) + sc.errorf(sc.pos, "obsolete form of octal literal; use 0o%s", val.raw[1:]) + } + } + } + } else { + // decimal + for isdigit(c) { + sc.readRune() + c = sc.peekRune() + } + + if c == '.' { + fraction = true + } else if c == 'e' || c == 'E' { + exponent = true + } + } + + if fraction { + sc.readRune() // consume '.' + c = sc.peekRune() + for isdigit(c) { + sc.readRune() + c = sc.peekRune() + } + + if c == 'e' || c == 'E' { + exponent = true + } + } + + if exponent { + sc.readRune() // consume [eE] + c = sc.peekRune() + if c == '+' || c == '-' { + sc.readRune() + c = sc.peekRune() + if !isdigit(c) { + sc.error(sc.pos, "invalid float literal") + } + } + for isdigit(c) { + sc.readRune() + c = sc.peekRune() + } + } + + sc.endToken(val) + if fraction || exponent { + var err error + val.float, err = strconv.ParseFloat(val.raw, 64) + if err != nil { + sc.error(sc.pos, "invalid float literal") + } + return FLOAT + } else { + var err error + s := val.raw + if len(s) > 2 && s[0] == '0' && (s[1] == 'o' || s[1] == 'O') { + val.int, err = strconv.ParseInt(s[2:], 8, 64) + } else { + val.int, err = strconv.ParseInt(s, 0, 64) + } + if err != nil { + sc.error(sc.pos, "invalid int literal") + } + return INT + } +} + +// isIdent reports whether c is an identifier rune. +func isIdent(c rune) bool { + return isdigit(c) || isIdentStart(c) +} + +func isIdentStart(c rune) bool { + return 'a' <= c && c <= 'z' || + 'A' <= c && c <= 'Z' || + c == '_' || + unicode.IsLetter(c) +} + +func isdigit(c rune) bool { return '0' <= c && c <= '9' } +func isodigit(c rune) bool { return '0' <= c && c <= '7' } +func isxdigit(c rune) bool { return isdigit(c) || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' } + +// keywordToken records the special tokens for +// strings that should not be treated as ordinary identifiers. +var keywordToken = map[string]Token{ + "and": AND, + "break": BREAK, + "continue": CONTINUE, + "def": DEF, + "elif": ELIF, + "else": ELSE, + "for": FOR, + "if": IF, + "in": IN, + "lambda": LAMBDA, + "not": NOT, + "or": OR, + "pass": PASS, + "return": RETURN, + + // reserved words: + "as": ILLEGAL, + // "assert": ILLEGAL, // heavily used by our tests + "class": ILLEGAL, + "del": ILLEGAL, + "except": ILLEGAL, + "finally": ILLEGAL, + "from": ILLEGAL, + "global": ILLEGAL, + "import": ILLEGAL, + "is": ILLEGAL, + "nonlocal": ILLEGAL, + "raise": ILLEGAL, + "try": ILLEGAL, + "while": ILLEGAL, + "with": ILLEGAL, + "yield": ILLEGAL, +} diff --git a/syntax/scan_test.go b/syntax/scan_test.go new file mode 100644 index 0000000..b5dd738 --- /dev/null +++ b/syntax/scan_test.go @@ -0,0 +1,226 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import ( + "bytes" + "fmt" + "go/build" + "io/ioutil" + "path/filepath" + "testing" +) + +func scan(src interface{}) (tokens string, err error) { + sc, err := newScanner("foo.sky", src) + if err != nil { + return "", err + } + + defer sc.recover(&err) + + var buf bytes.Buffer + var val tokenValue + for { + tok := sc.nextToken(&val) + + if buf.Len() > 0 { + buf.WriteByte(' ') + } + switch tok { + case EOF: + buf.WriteString("EOF") + case IDENT: + buf.WriteString(val.raw) + case INT: + fmt.Fprintf(&buf, "%d", val.int) + case FLOAT: + fmt.Fprintf(&buf, "%e", val.float) + case STRING: + fmt.Fprintf(&buf, "%q", val.string) + default: + buf.WriteString(tok.String()) + } + if tok == EOF { + break + } + } + return buf.String(), nil +} + +func TestScanner(t *testing.T) { + for _, test := range []struct { + input, want string + }{ + {``, "EOF"}, + {`123`, "123 EOF"}, + {`x.y`, "x . y EOF"}, + {`chocolate.éclair`, `chocolate . éclair EOF`}, + {`123 "foo" hello x.y`, `123 "foo" hello x . y EOF`}, + {`print(x)`, "print ( x ) EOF"}, + {`print(x); print(y)`, "print ( x ) ; print ( y ) EOF"}, + {`/ // /= //= ///=`, "/ // /= //= // /= EOF"}, + {`# hello +print(x)`, "print ( x ) EOF"}, + {`# hello +print(1) +cc_binary(name="foo") +def f(x): + return x+1 +print(1) +`, + `print ( 1 ) newline ` + + `cc_binary ( name = "foo" ) newline ` + + `def f ( x ) : newline ` + + `indent return x + 1 newline ` + + `outdent print ( 1 ) newline ` + + `EOF`}, + // EOF should act line an implicit newline. + {`def f(): pass`, + "def f ( ) : pass EOF"}, + {`def f(): + pass`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass +# oops`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass \ +`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass +`, + "def f ( ) : newline indent pass newline outdent EOF"}, + {`pass + + +pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated + {`def f(): + pass + `, "def f ( ) : newline indent pass newline outdent EOF"}, + {`def f(): + pass + ` + "\n", "def f ( ) : newline indent pass newline outdent EOF"}, + {"pass", "pass EOF"}, + {"pass\n", "pass newline EOF"}, + {"pass\n ", "pass newline EOF"}, + {"pass\n \n", "pass newline EOF"}, + {"if x:\n pass\n ", "if x : newline indent pass newline outdent EOF"}, + {`x = 1 + \ +2`, `x = 1 + 2 EOF`}, + {`x = 'a\nb'`, `x = "a\nb" EOF`}, + {`x = 'a\zb'`, `x = "a\\zb" EOF`}, + {`x = r'a\nb'`, `x = "a\\nb" EOF`}, + {`x = '\''`, `x = "'" EOF`}, + {`x = "\""`, `x = "\"" EOF`}, + {`x = r'\''`, `x = "\\'" EOF`}, + {`x = '''\''''`, `x = "'" EOF`}, + {`x = r'''\''''`, `x = "\\'" EOF`}, + {`x = ''''a'b'c'''`, `x = "'a'b'c" EOF`}, + {"x = '''a\nb'''", `x = "a\nb" EOF`}, + {"x = '''a\rb'''", `x = "a\nb" EOF`}, + {"x = '''a\r\nb'''", `x = "a\nb" EOF`}, + {"x = '''a\n\rb'''", `x = "a\n\nb" EOF`}, + {"x = r'a\\\nb'", `x = "a\\\nb" EOF`}, + {"x = r'a\\\rb'", `x = "a\\\nb" EOF`}, + {"x = r'a\\\r\nb'", `x = "a\\\nb" EOF`}, + {"a\rb", `a newline b EOF`}, + {"a\nb", `a newline b EOF`}, + {"a\r\nb", `a newline b EOF`}, + {"a\n\nb", `a newline b EOF`}, + // numbers + {"0", `0 EOF`}, + {"00", `0 EOF`}, + {"0.", `0.000000e+00 EOF`}, + {"0.e1", `0.000000e+00 EOF`}, + {".0", `0.000000e+00 EOF`}, + {"0.0", `0.000000e+00 EOF`}, + {".e1", `. e1 EOF`}, + {"1", `1 EOF`}, + {"1.", `1.000000e+00 EOF`}, + {".1", `1.000000e-01 EOF`}, + {".1e1", `1.000000e+00 EOF`}, + {".1e+1", `1.000000e+00 EOF`}, + {".1e-1", `1.000000e-02 EOF`}, + {"1e1", `1.000000e+01 EOF`}, + {"1e+1", `1.000000e+01 EOF`}, + {"1e-1", `1.000000e-01 EOF`}, + {"123", `123 EOF`}, + {"123e45", `1.230000e+47 EOF`}, + // hex + {"0xA", `10 EOF`}, + {"0xAAG", `170 G EOF`}, + {"0xG", `invalid hex literal`}, + {"0XA", `10 EOF`}, + {"0XG", `invalid hex literal`}, + {"0xA.", `10 . EOF`}, + {"0xA.e1", `10 . e1 EOF`}, + // octal + {"0o123", `83 EOF`}, + {"0o12834", `10 834 EOF`}, + {"0o12934", `10 934 EOF`}, + {"0o12934.", `10 9.340000e+02 EOF`}, + {"0o12934.1", `10 9.341000e+02 EOF`}, + {"0o12934e1", `10 9.340000e+03 EOF`}, + {"0o123.", `83 . EOF`}, + {"0o123.1", `83 1.000000e-01 EOF`}, + // TODO(adonovan): reenable later. + // {"0123", `obsolete form of octal literal; use 0o123`}, + {"0123", `83 EOF`}, + {"012834", `invalid int literal`}, + {"012934", `invalid int literal`}, + // octal escapes in string literals + {`"\037"`, `"\x1f" EOF`}, + {`"\377"`, `"\xff" EOF`}, + {`"\378"`, `"\x1f8" EOF`}, // = '\37' + '8' + {`"\400"`, `invalid escape sequence \400`}, // unlike Python 2 and 3 + // Backslashes that are not part of escapes are treated literally, + // but this behavior will change; see b/34519173. + {`"\+"`, `"\\+" EOF`}, + {`"\o123"`, `"\\o123" EOF`}, + // floats starting with octal digits + {"012934.", `1.293400e+04 EOF`}, + {"012934.1", `1.293410e+04 EOF`}, + {"012934e1", `1.293400e+05 EOF`}, + {"0123.", `1.230000e+02 EOF`}, + {"0123.1", `1.231000e+02 EOF`}, + } { + got, err := scan(test.input) + if err != nil { + got = err.(Error).Msg + } + if test.want != got { + t.Errorf("scan `%s` = [%s], want [%s]", test.input, got, test.want) + } + } +} + +// dataFile is the same as skylarktest.DataFile. +// We make a copy to avoid a dependency cycle. +var dataFile = func(pkgdir, filename string) string { + return filepath.Join(build.Default.GOPATH, "src/github.com/google", pkgdir, filename) +} + +func BenchmarkScan(b *testing.B) { + filename := dataFile("skylark/syntax", "testdata/def.bzl") + b.StopTimer() + data, err := ioutil.ReadFile(filename) + if err != nil { + b.Fatal(err) + } + b.StartTimer() + + for i := 0; i < b.N; i++ { + sc, err := newScanner(filename, data) + if err != nil { + b.Fatal(err) + } + var val tokenValue + for sc.nextToken(&val) != EOF { + } + } +} diff --git a/syntax/syntax.go b/syntax/syntax.go new file mode 100644 index 0000000..7850a41 --- /dev/null +++ b/syntax/syntax.go @@ -0,0 +1,436 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package syntax provides a Skylark parser and abstract syntax tree. +package syntax + +// A Node is a node in a Skylark syntax tree. +type Node interface { + // Span returns the start and end position of the expression. + Span() (start, end Position) +} + +// Start returns the start position of the expression. +func Start(n Node) Position { + start, _ := n.Span() + return start +} + +// End returns the end position of the expression. +func End(n Node) Position { + _, end := n.Span() + return end +} + +// A File represents a Skylark file. +type File struct { + Path string + Stmts []Stmt + + // set by resolver: + Locals []*Ident // this file's (comprehension-)local variables +} + +func (x *File) Span() (start, end Position) { + if len(x.Stmts) == 0 { + return + } + start, _ = x.Stmts[0].Span() + _, end = x.Stmts[len(x.Stmts)-1].Span() + return start, end +} + +// A Stmt is a Skylark statement. +type Stmt interface { + Node + stmt() +} + +func (*AssignStmt) stmt() {} +func (*BranchStmt) stmt() {} +func (*DefStmt) stmt() {} +func (*ExprStmt) stmt() {} +func (*ForStmt) stmt() {} +func (*IfStmt) stmt() {} +func (*LoadStmt) stmt() {} +func (*ReturnStmt) stmt() {} + +// An AssignStmt represents an assignment: +// x = 0 +// x, y = y, x +// x += 1 +type AssignStmt struct { + OpPos Position + Op Token // = EQ | {PLUS,MINUS,STAR,PERCENT}_EQ + LHS Expr + RHS Expr +} + +func (x *AssignStmt) Span() (start, end Position) { + start, _ = x.LHS.Span() + _, end = x.RHS.Span() + return +} + +// A Function represents the common parts of LambdaExpr and DefStmt. +type Function struct { + StartPos Position // position of DEF or LAMBDA token + Params []Expr // param = ident | ident=expr | *ident | **ident + Body []Stmt + + // set by resolver: + HasVarargs bool // whether params includes *args (convenience) + HasKwargs bool // whether params includes **kwargs (convenience) + Locals []*Ident // this function's local variables, parameters first + FreeVars []*Ident // enclosing local variables to capture in closure +} + +func (x *Function) Span() (start, end Position) { + _, end = x.Body[len(x.Body)-1].Span() + return x.StartPos, end +} + +// A DefStmt represents a function definition. +type DefStmt struct { + Def Position + Name *Ident + Function +} + +func (x *DefStmt) Span() (start, end Position) { + _, end = x.Function.Body[len(x.Body)-1].Span() + return x.Def, end +} + +// An ExprStmt is an expression evaluated for side effects. +type ExprStmt struct { + X Expr +} + +func (x *ExprStmt) Span() (start, end Position) { + return x.X.Span() +} + +// An IfStmt is a conditional: If Cond: True; else: False. +// 'elseif' is desugared into a chain of IfStmts. +type IfStmt struct { + If Position // IF or ELIF + Cond Expr + True []Stmt + ElsePos Position // ELSE or ELIF + False []Stmt // optional +} + +func (x *IfStmt) Span() (start, end Position) { + body := x.False + if body == nil { + body = x.True + } + _, end = body[len(body)-1].Span() + return x.If, end +} + +// A LoadStmt loads another module and binds names from it: +// load(Module, "x", y="foo"). +// +// The AST is slightly unfaithful to the concrete syntax here because +// Skylark's load statement, so that it can be implemented in Python, +// binds some names (like y above) with an identifier and some (like x) +// without. For consistency we create fake identifiers for all the +// strings. +type LoadStmt struct { + Load Position + Module *Literal // a string + From []*Ident // name defined in loading module + To []*Ident // name in loaded module + Rparen Position +} + +func (x *LoadStmt) Span() (start, end Position) { + return x.Load, x.Rparen +} + +// A BranchStmt changes the flow of control: break, continue, pass. +type BranchStmt struct { + Token Token // = BREAK | CONTINUE | PASS + TokenPos Position +} + +func (x *BranchStmt) Span() (start, end Position) { + return x.TokenPos, x.TokenPos.add(x.Token.String()) +} + +// A ReturnStmt returns from a function. +type ReturnStmt struct { + Return Position + Result Expr // may be nil +} + +func (x *ReturnStmt) Span() (start, end Position) { + if x.Result == nil { + return x.Return, x.Return.add("return") + } + _, end = x.Result.Span() + return x.Return, end +} + +// An Expr is a Skylark expression. +type Expr interface { + Node + expr() +} + +func (*BinaryExpr) expr() {} +func (*CallExpr) expr() {} +func (*Comprehension) expr() {} +func (*CondExpr) expr() {} +func (*DictEntry) expr() {} +func (*DictExpr) expr() {} +func (*DotExpr) expr() {} +func (*Ident) expr() {} +func (*IndexExpr) expr() {} +func (*LambdaExpr) expr() {} +func (*ListExpr) expr() {} +func (*Literal) expr() {} +func (*SliceExpr) expr() {} +func (*TupleExpr) expr() {} +func (*UnaryExpr) expr() {} + +// An Ident represents an identifier. +type Ident struct { + NamePos Position + Name string + + // set by resolver: + + Scope uint8 // one of resolve.{Undefined,Local,Free,Global,Builtin} + Index int // index into enclosing {DefStmt,File}.Locals (if scope==Local) or DefStmt.FreeVars (if scope==Free) +} + +func (x *Ident) Span() (start, end Position) { + return x.NamePos, x.NamePos.add(x.Name) +} + +// A Literal represents a literal string or number. +type Literal struct { + Token Token // = STRING | INT + TokenPos Position + Raw string // uninterpreted text + Value interface{} // = string | int +} + +func (x *Literal) Span() (start, end Position) { + return x.TokenPos, x.TokenPos.add(x.Raw) +} + +// A CallExpr represents a function call expression: Fn(Args). +type CallExpr struct { + Fn Expr + Lparen Position + Args []Expr + Rparen Position +} + +func (x *CallExpr) Span() (start, end Position) { + start, _ = x.Fn.Span() + return start, x.Rparen.add(")") +} + +// A DotExpr represents a field or method selector: X.Name. +type DotExpr struct { + X Expr + Dot Position + NamePos Position + Name *Ident +} + +func (x *DotExpr) Span() (start, end Position) { + start, _ = x.X.Span() + _, end = x.Name.Span() + return +} + +// A Comprehension represents a list or dict comprehension: +// [Body for ... if ...] or {Body for ... if ...} +type Comprehension struct { + Curly bool // {x:y for ...} or {x for ...}, not [x for ...] + Lbrack Position + Body Expr + Clauses []Node // = *ForClause | *IfClause + Rbrack Position +} + +func (x *Comprehension) Span() (start, end Position) { + return x.Lbrack, x.Rbrack.add("]") +} + +// A ForStmt represents a loop: for Vars in X: Body. +type ForStmt struct { + For Position + Vars Expr // name, or tuple of names + X Expr + Body []Stmt +} + +func (x *ForStmt) Span() (start, end Position) { + _, end = x.Body[len(x.Body)-1].Span() + return x.For, end +} + +// A ForClause represents a for clause in a list comprehension: for Vars in X. +type ForClause struct { + For Position + Vars Expr // name, or tuple of names + In Position + X Expr +} + +func (x *ForClause) Span() (start, end Position) { + _, end = x.X.Span() + return x.For, end +} + +// An IfClause represents an if clause in a list comprehension: if Cond. +type IfClause struct { + If Position + Cond Expr +} + +func (x *IfClause) Span() (start, end Position) { + _, end = x.Cond.Span() + return x.If, end +} + +// A DictExpr represents a dictionary literal: { List }. +type DictExpr struct { + Lbrace Position + List []Expr // all *DictEntrys + Rbrace Position +} + +func (x *DictExpr) Span() (start, end Position) { + return x.Lbrace, x.Rbrace.add("}") +} + +// A DictEntry represents a dictionary entry: Key: Value. +// Used only within a DictExpr. +type DictEntry struct { + Key Expr + Colon Position + Value Expr +} + +func (x *DictEntry) Span() (start, end Position) { + start, _ = x.Key.Span() + _, end = x.Value.Span() + return start, end +} + +// A LambdaExpr represents an inline function abstraction. +// +// Although they may be added in future, lambda expressions are not +// currently part of the Skylark spec, so their use is controlled by the +// resolver.AllowLambda flag. +type LambdaExpr struct { + Lambda Position + Function +} + +func (x *LambdaExpr) Span() (start, end Position) { + _, end = x.Function.Body[len(x.Body)-1].Span() + return x.Lambda, end +} + +// A ListExpr represents a list literal: [ List ]. +type ListExpr struct { + Lbrack Position + List []Expr + Rbrack Position +} + +func (x *ListExpr) Span() (start, end Position) { + return x.Lbrack, x.Rbrack.add("]") +} + +// CondExpr represents the conditional: X if COND else ELSE. +type CondExpr struct { + If Position + Cond Expr + True Expr + ElsePos Position + False Expr +} + +func (x *CondExpr) Span() (start, end Position) { + start, _ = x.True.Span() + _, end = x.False.Span() + return start, end +} + +// A TupleExpr represents a tuple literal: (List). +type TupleExpr struct { + Lparen Position // optional (e.g. in x, y = 0, 1), but required if List is empty + List []Expr + Rparen Position +} + +func (x *TupleExpr) Span() (start, end Position) { + if x.Lparen.IsValid() { + return x.Lparen, x.Rparen + } else { + return Start(x.List[0]), End(x.List[len(x.List)-1]) + } +} + +// A UnaryExpr represents a unary expression: Op X. +type UnaryExpr struct { + OpPos Position + Op Token + X Expr +} + +func (x *UnaryExpr) Span() (start, end Position) { + _, end = x.X.Span() + return x.OpPos, end +} + +// A BinaryExpr represents a binary expression: X Op Y. +type BinaryExpr struct { + X Expr + OpPos Position + Op Token + Y Expr +} + +func (x *BinaryExpr) Span() (start, end Position) { + start, _ = x.X.Span() + _, end = x.Y.Span() + return start, end +} + +// A SliceExpr represents a slice or substring expression: X[Lo:Hi:Step]. +type SliceExpr struct { + X Expr + Lbrack Position + Lo, Hi, Step Expr // all optional + Rbrack Position +} + +func (x *SliceExpr) Span() (start, end Position) { + start, _ = x.X.Span() + return start, x.Rbrack +} + +// An IndexExpr represents an index expression: X[Y]. +type IndexExpr struct { + X Expr + Lbrack Position + Y Expr + Rbrack Position +} + +func (x *IndexExpr) Span() (start, end Position) { + start, _ = x.X.Span() + return start, x.Rbrack +} diff --git a/syntax/testdata/def.bzl b/syntax/testdata/def.bzl new file mode 100644 index 0000000..22b1902 --- /dev/null +++ b/syntax/testdata/def.bzl @@ -0,0 +1,1273 @@ +# Copyright 2014 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# (From https://github.com/bazelbuild/rules_go/blob/master/go/def.bzl@a6f9d0c) + +load("//go/private:repositories.bzl", "go_repositories") +load("//go/private:go_repository.bzl", "go_repository", "new_go_repository") +load("//go/private:go_prefix.bzl", "go_prefix") +load("//go/private:json.bzl", "json_marshal") + +"""These are bare-bones Go rules. + +In order of priority: + +- BUILD file must be written by hand. + +- No support for SWIG + +- No test sharding or test XML. + +""" + +_DEFAULT_LIB = "go_default_library" + +_VENDOR_PREFIX = "/vendor/" + +go_filetype = FileType([ + ".go", + ".s", + ".S", + ".h", # may be included by .s +]) + +# be consistent to cc_library. +hdr_exts = [ + ".h", + ".hh", + ".hpp", + ".hxx", + ".inc", +] + +cc_hdr_filetype = FileType(hdr_exts) + +# Extensions of files we can build with the Go compiler or with cc_library. +# This is a subset of the extensions recognized by go/build. +cgo_filetype = FileType([ + ".go", + ".c", + ".cc", + ".cxx", + ".cpp", + ".s", + ".S", + ".h", + ".hh", + ".hpp", + ".hxx", +]) + +################ + +def go_environment_vars(ctx): + """Return a map of environment variables for use with actions, based on + the arguments. Uses the ctx.fragments.cpp.cpu attribute, if present, + and picks a default of target_os="linux" and target_arch="amd64" + otherwise. + + Args: + The skylark Context. + + Returns: + A dict of environment variables for running Go tool commands that build for + the target OS and architecture. + """ + default_toolchain = {"GOOS": "linux", "GOARCH": "amd64"} + bazel_to_go_toolchain = { + "k8": {"GOOS": "linux", "GOARCH": "amd64"}, + "piii": {"GOOS": "linux", "GOARCH": "386"}, + "darwin": {"GOOS": "darwin", "GOARCH": "amd64"}, + "darwin_x86_64": {"GOOS": "darwin", "GOARCH": "amd64"}, + "freebsd": {"GOOS": "freebsd", "GOARCH": "amd64"}, + "armeabi-v7a": {"GOOS": "linux", "GOARCH": "arm"}, + "arm": {"GOOS": "linux", "GOARCH": "arm"} + } + env = {} + if hasattr(ctx.file, "go_tool"): + env["GOROOT"] = ctx.file.go_tool.dirname + "/.." + env.update(bazel_to_go_toolchain.get(ctx.fragments.cpp.cpu, default_toolchain)) + return env + +def _is_darwin_cpu(ctx): + cpu = ctx.fragments.cpp.cpu + return cpu == "darwin" or cpu == "darwin_x86_64" + +def _emit_generate_params_action(cmds, ctx, fn): + cmds_all = [ + # Use bash explicitly. /bin/sh is default, and it may be linked to a + # different shell, e.g., /bin/dash on Ubuntu. + "#!/bin/bash", + "set -e", + ] + cmds_all += cmds + cmds_all_str = "\n".join(cmds_all) + "\n" + f = ctx.new_file(ctx.configuration.bin_dir, fn) + ctx.file_action( + output = f, + content = cmds_all_str, + executable = True) + return f + +def _emit_go_asm_action(ctx, source, hdrs, out_obj): + """Construct the command line for compiling Go Assembly code. + Constructs a symlink tree to accomodate for workspace name. + Args: + ctx: The skylark Context. + source: a source code artifact + hdrs: list of .h files that may be included + out_obj: the artifact (configured target?) that should be produced + """ + params = { + "go_tool": ctx.file.go_tool.path, + "includes": [f.dirname for f in hdrs] + [ctx.file.go_include.path], + "source": source.path, + "out": out_obj.path, + } + + inputs = hdrs + ctx.files.toolchain + [source] + ctx.action( + inputs = inputs, + outputs = [out_obj], + mnemonic = "GoAsmCompile", + executable = ctx.executable._asm, + arguments = [json_marshal(params)], + ) + +def _go_importpath(ctx): + """Returns the expected importpath of the go_library being built. + + Args: + ctx: The skylark Context + + Returns: + Go importpath of the library + """ + path = ctx.attr.importpath + if path != "": + return path + path = ctx.attr.go_prefix.go_prefix + if path.endswith("/"): + path = path[:-1] + if ctx.label.package: + path += "/" + ctx.label.package + if ctx.label.name != _DEFAULT_LIB: + path += "/" + ctx.label.name + if path.rfind(_VENDOR_PREFIX) != -1: + path = path[len(_VENDOR_PREFIX) + path.rfind(_VENDOR_PREFIX):] + if path[0] == "/": + path = path[1:] + return path + +def _emit_go_compile_action(ctx, sources, deps, libpaths, out_object, gc_goopts): + """Construct the command line for compiling Go code. + + Args: + ctx: The skylark Context. + sources: an iterable of source code artifacts (or CTs? or labels?) + deps: an iterable of dependencies. Each dependency d should have an + artifact in d.transitive_go_libraries representing all imported libraries. + libpaths: the set of paths to search for imported libraries. + out_object: the object file that should be produced + gc_goopts: additional flags to pass to the compiler. + """ + if ctx.coverage_instrumented(): + sources = _emit_go_cover_action(ctx, sources) + + # Compile filtered files. + args = [ + "-cgo", + ctx.file.go_tool.path, + "tool", "compile", + "-o", out_object.path, + "-trimpath", "-abs-.", + "-I", "-abs-.", + ] + inputs = depset(sources + ctx.files.toolchain) + for dep in deps: + inputs += dep.transitive_go_libraries + for path in libpaths: + args += ["-I", path] + args += gc_goopts + [("" if i.basename.startswith("_cgo") else "-filter-") + i.path for i in sources] + ctx.action( + inputs = list(inputs), + outputs = [out_object], + mnemonic = "GoCompile", + executable = ctx.executable._filter_exec, + arguments = args, + env = go_environment_vars(ctx), + ) + + return sources + +def _emit_go_pack_action(ctx, out_lib, objects): + """Construct the command line for packing objects together. + + Args: + ctx: The skylark Context. + out_lib: the archive that should be produced + objects: an iterable of object files to be added to the output archive file. + """ + ctx.action( + inputs = objects + ctx.files.toolchain, + outputs = [out_lib], + mnemonic = "GoPack", + executable = ctx.file.go_tool, + arguments = ["tool", "pack", "c", out_lib.path] + [a.path for a in objects], + env = go_environment_vars(ctx), + ) + +def _emit_go_cover_action(ctx, sources): + """Construct the command line for test coverage instrument. + + Args: + ctx: The skylark Context. + sources: an iterable of Go source files. + + Returns: + A list of Go source code files which might be coverage instrumented. + """ + outputs = [] + # TODO(linuxerwang): make the mode configurable. + count = 0 + + for src in sources: + if not src.path.endswith(".go") or src.path.endswith("_test.go"): + outputs += [src] + continue + + cover_var = "GoCover_%d" % count + out = ctx.new_file(src, src.basename[:-3] + '_' + cover_var + '.cover.go') + outputs += [out] + ctx.action( + inputs = [src] + ctx.files.toolchain, + outputs = [out], + mnemonic = "GoCover", + executable = ctx.file.go_tool, + arguments = ["tool", "cover", "--mode=set", "-var=%s" % cover_var, "-o", out.path, src.path], + env = go_environment_vars(ctx), + ) + count += 1 + + return outputs + +def go_library_impl(ctx): + """Implements the go_library() rule.""" + + sources = depset(ctx.files.srcs) + go_srcs = depset([s for s in sources if s.basename.endswith('.go')]) + asm_srcs = [s for s in sources if s.basename.endswith('.s') or s.basename.endswith('.S')] + asm_hdrs = [s for s in sources if s.basename.endswith('.h')] + deps = ctx.attr.deps + dep_runfiles = [d.data_runfiles for d in deps] + + cgo_object = None + if hasattr(ctx.attr, "cgo_object"): + cgo_object = ctx.attr.cgo_object + + if ctx.attr.library: + go_srcs += ctx.attr.library.go_sources + asm_srcs += ctx.attr.library.asm_sources + asm_hdrs += ctx.attr.library.asm_headers + deps += ctx.attr.library.direct_deps + dep_runfiles += [ctx.attr.library.data_runfiles] + if ctx.attr.library.cgo_object: + if cgo_object: + fail("go_library %s cannot have cgo_object because the package " + + "already has cgo_object in %s" % (ctx.label.name, + ctx.attr.library.name)) + cgo_object = ctx.attr.library.cgo_object + if not go_srcs: + fail("may not be empty", "srcs") + + transitive_cgo_deps = depset([], order="topological") + if cgo_object: + dep_runfiles += [cgo_object.data_runfiles] + transitive_cgo_deps += cgo_object.cgo_deps + + extra_objects = [cgo_object.cgo_obj] if cgo_object else [] + for src in asm_srcs: + obj = ctx.new_file(src, "%s.dir/%s.o" % (ctx.label.name, src.basename[:-2])) + _emit_go_asm_action(ctx, src, asm_hdrs, obj) + extra_objects += [obj] + + lib_name = _go_importpath(ctx) + ".a" + out_lib = ctx.new_file(lib_name) + out_object = ctx.new_file(ctx.label.name + ".o") + search_path = out_lib.path[:-len(lib_name)] + gc_goopts = _gc_goopts(ctx) + transitive_go_libraries = depset([out_lib]) + transitive_go_library_paths = depset([search_path]) + for dep in deps: + transitive_go_libraries += dep.transitive_go_libraries + transitive_cgo_deps += dep.transitive_cgo_deps + transitive_go_library_paths += dep.transitive_go_library_paths + + go_srcs = _emit_go_compile_action(ctx, + sources = go_srcs, + deps = deps, + libpaths = transitive_go_library_paths, + out_object = out_object, + gc_goopts = gc_goopts, + ) + _emit_go_pack_action(ctx, out_lib, [out_object] + extra_objects) + + dylibs = [] + if cgo_object: + dylibs += [d for d in cgo_object.cgo_deps if d.path.endswith(".so")] + + runfiles = ctx.runfiles(files = dylibs, collect_data = True) + for d in dep_runfiles: + runfiles = runfiles.merge(d) + + return struct( + label = ctx.label, + files = depset([out_lib]), + runfiles = runfiles, + go_sources = go_srcs, + asm_sources = asm_srcs, + asm_headers = asm_hdrs, + cgo_object = cgo_object, + direct_deps = ctx.attr.deps, + transitive_cgo_deps = transitive_cgo_deps, + transitive_go_libraries = transitive_go_libraries, + transitive_go_library_paths = transitive_go_library_paths, + gc_goopts = gc_goopts, + ) + +def _c_linker_options(ctx, blacklist=[]): + """Extracts flags to pass to $(CC) on link from the current context + + Args: + ctx: the current context + blacklist: Any flags starts with any of these prefixes are filtered out from + the return value. + + Returns: + A list of command line flags + """ + cpp = ctx.fragments.cpp + features = ctx.features + options = cpp.compiler_options(features) + options += cpp.unfiltered_compiler_options(features) + options += cpp.link_options + options += cpp.mostly_static_link_options(ctx.features, False) + filtered = [] + for opt in options: + if any([opt.startswith(prefix) for prefix in blacklist]): + continue + filtered.append(opt) + return filtered + +def _gc_goopts(ctx): + gc_goopts = [ctx.expand_make_variables("gc_goopts", f, {}) + for f in ctx.attr.gc_goopts] + if ctx.attr.library: + gc_goopts += ctx.attr.library.gc_goopts + return gc_goopts + +def _gc_linkopts(ctx): + gc_linkopts = [ctx.expand_make_variables("gc_linkopts", f, {}) + for f in ctx.attr.gc_linkopts] + for k, v in ctx.attr.x_defs.items(): + gc_linkopts += ["-X", "%s='%s'" % (k, v)] + return gc_linkopts + +def _extract_extldflags(gc_linkopts, extldflags): + """Extracts -extldflags from gc_linkopts and combines them into a single list. + + Args: + gc_linkopts: a list of flags passed in through the gc_linkopts attributes. + ctx.expand_make_variables should have already been applied. + extldflags: a list of flags to be passed to the external linker. + + Return: + A tuple containing the filtered gc_linkopts with external flags removed, + and a combined list of external flags. + """ + filtered_gc_linkopts = [] + is_extldflags = False + for opt in gc_linkopts: + if is_extldflags: + is_extldflags = False + extldflags += [opt] + elif opt == "-extldflags": + is_extldflags = True + else: + filtered_gc_linkopts += [opt] + return filtered_gc_linkopts, extldflags + +def _emit_go_link_action(ctx, transitive_go_library_paths, transitive_go_libraries, cgo_deps, libs, + executable, gc_linkopts): + """Sets up a symlink tree to libraries to link together.""" + config_strip = len(ctx.configuration.bin_dir.path) + 1 + pkg_depth = executable.dirname[config_strip:].count('/') + 1 + + ld = "%s" % ctx.fragments.cpp.compiler_executable + extldflags = _c_linker_options(ctx) + [ + "-Wl,-rpath,$ORIGIN/" + ("../" * pkg_depth), + ] + for d in cgo_deps: + if d.basename.endswith('.so'): + short_dir = d.dirname[len(d.root.path):] + extldflags += ["-Wl,-rpath,$ORIGIN/" + ("../" * pkg_depth) + short_dir] + gc_linkopts, extldflags = _extract_extldflags(gc_linkopts, extldflags) + + link_cmd = [ + ctx.file.go_tool.path, + "tool", "link", + "-L", "." + ] + for path in transitive_go_library_paths: + link_cmd += ["-L", path] + link_cmd += [ + "-o", executable.path, + ] + gc_linkopts + ['"${STAMP_XDEFS[@]}"'] + + # workaround for a bug in ld(1) on Mac OS X. + # http://lists.apple.com/archives/Darwin-dev/2006/Sep/msg00084.html + # TODO(yugui) Remove this workaround once rules_go stops supporting XCode 7.2 + # or earlier. + if not _is_darwin_cpu(ctx): + link_cmd += ["-s"] + + link_cmd += [ + "-extld", ld, + "-extldflags", "'%s'" % " ".join(extldflags), + ] + [lib.path for lib in libs] + + # Avoided -s on OSX but but it requires dsymutil to be on $PATH. + # TODO(yugui) Remove this workaround once rules_go stops supporting XCode 7.2 + # or earlier. + cmds = ["export PATH=$PATH:/usr/bin"] + + cmds += [ + "STAMP_XDEFS=()", + ] + + stamp_inputs = [] + if ctx.attr.linkstamp: + # read workspace status files, converting "KEY value" lines + # to "-X $linkstamp.KEY=value" arguments to the go linker. + stamp_inputs = [ctx.info_file, ctx.version_file] + for f in stamp_inputs: + cmds += [ + "while read -r key value || [[ -n $key ]]; do", + " STAMP_XDEFS+=(-X \"%s.$key=$value\")" % ctx.attr.linkstamp, + "done < " + f.path, + ] + + cmds += [' '.join(link_cmd)] + + f = _emit_generate_params_action(cmds, ctx, lib.basename + ".GoLinkFile.params") + + ctx.action( + inputs = [f] + (list(transitive_go_libraries) + [lib] + list(cgo_deps) + + ctx.files.toolchain + ctx.files._crosstool) + stamp_inputs, + outputs = [executable], + command = f.path, + mnemonic = "GoLink", + env = go_environment_vars(ctx), + ) + +def go_binary_impl(ctx): + """go_binary_impl emits actions for compiling and linking a go executable.""" + lib_result = go_library_impl(ctx) + _emit_go_link_action( + ctx, + transitive_go_libraries=lib_result.transitive_go_libraries, + transitive_go_library_paths=lib_result.transitive_go_library_paths, + cgo_deps=lib_result.transitive_cgo_deps, + libs=lib_result.files, + executable=ctx.outputs.executable, + gc_linkopts=_gc_linkopts(ctx)) + + return struct( + files = depset([ctx.outputs.executable]), + runfiles = lib_result.runfiles, + cgo_object = lib_result.cgo_object, + ) + +def go_test_impl(ctx): + """go_test_impl implements go testing. + + It emits an action to run the test generator, and then compiles the + test into a binary.""" + + lib_result = go_library_impl(ctx) + main_go = ctx.new_file(ctx.label.name + "_main_test.go") + main_object = ctx.new_file(ctx.label.name + "_main_test.o") + main_lib = ctx.new_file(ctx.label.name + "_main_test.a") + go_import = _go_importpath(ctx) + + cmds = [ + 'UNFILTERED_TEST_FILES=(%s)' % + ' '.join(["'%s'" % f.path for f in lib_result.go_sources]), + 'FILTERED_TEST_FILES=()', + 'while read -r line; do', + ' if [ -n "$line" ]; then', + ' FILTERED_TEST_FILES+=("$line")', + ' fi', + 'done < <(\'%s\' -cgo "${UNFILTERED_TEST_FILES[@]}")' % + ctx.executable._filter_tags.path, + ' '.join([ + "'%s'" % ctx.executable.test_generator.path, + '--package', + go_import, + '--output', + "'%s'" % main_go.path, + '"${FILTERED_TEST_FILES[@]}"', + ]), + ] + f = _emit_generate_params_action( + cmds, ctx, ctx.label.name + ".GoTestGenTest.params") + inputs = (list(lib_result.go_sources) + list(ctx.files.toolchain) + + [f, ctx.executable._filter_tags, ctx.executable.test_generator]) + ctx.action( + inputs = inputs, + outputs = [main_go], + command = f.path, + mnemonic = "GoTestGenTest", + env = dict(go_environment_vars(ctx), RUNDIR=ctx.label.package)) + + _emit_go_compile_action( + ctx, + sources=depset([main_go]), + deps=ctx.attr.deps + [lib_result], + libpaths=lib_result.transitive_go_library_paths, + out_object=main_object, + gc_goopts=_gc_goopts(ctx), + ) + _emit_go_pack_action(ctx, main_lib, [main_object]) + _emit_go_link_action( + ctx, + transitive_go_library_paths=lib_result.transitive_go_library_paths, + transitive_go_libraries=lib_result.transitive_go_libraries, + cgo_deps=lib_result.transitive_cgo_deps, + libs=[main_lib], + executable=ctx.outputs.executable, + gc_linkopts=_gc_linkopts(ctx)) + + # TODO(bazel-team): the Go tests should do a chdir to the directory + # holding the data files, so open-source go tests continue to work + # without code changes. + runfiles = ctx.runfiles(files = [ctx.outputs.executable]) + runfiles = runfiles.merge(lib_result.runfiles) + return struct( + files = depset([ctx.outputs.executable]), + runfiles = runfiles, + ) + +go_env_attrs = { + "toolchain": attr.label( + default = Label("//go/toolchain:toolchain"), + allow_files = True, + cfg = "host", + ), + "go_tool": attr.label( + default = Label("//go/toolchain:go_tool"), + single_file = True, + allow_files = True, + cfg = "host", + ), + "go_prefix": attr.label( + providers = ["go_prefix"], + default = Label( + "//:go_prefix", + relative_to_caller_repository = True, + ), + allow_files = False, + cfg = "host", + ), + "go_src": attr.label( + default = Label("//go/toolchain:go_src"), + allow_files = True, + cfg = "host", + ), + "go_include": attr.label( + default = Label("//go/toolchain:go_include"), + single_file = True, + allow_files = True, + cfg = "host", + ), + "go_root": attr.label( + providers = ["go_root"], + default = Label( + "//go/toolchain:go_root", + ), + allow_files = False, + cfg = "host", + ), + "_filter_tags": attr.label( + default = Label("//go/tools/filter_tags"), + cfg = "host", + executable = True, + single_file = True, + ), + "_filter_exec": attr.label( + default = Label("//go/tools/filter_exec"), + cfg = "host", + executable = True, + single_file = True, + ), + "_asm": attr.label( + default = Label("//go/tools/builders:asm"), + cfg = "host", + executable = True, + single_file = True, + ), +} + +go_library_attrs = go_env_attrs + { + "data": attr.label_list( + allow_files = True, + cfg = "data", + ), + "srcs": attr.label_list(allow_files = go_filetype), + "deps": attr.label_list( + providers = [ + "transitive_go_library_paths", + "transitive_go_libraries", + "transitive_cgo_deps", + ], + ), + "importpath": attr.string(), + "library": attr.label( + providers = [ + "direct_deps", + "go_sources", + "asm_sources", + "cgo_object", + "gc_goopts", + ], + ), + "gc_goopts": attr.string_list(), +} + +_crosstool_attrs = { + "_crosstool": attr.label( + default = Label("//tools/defaults:crosstool"), + ), +} + +go_link_attrs = go_library_attrs + _crosstool_attrs + { + "gc_linkopts": attr.string_list(), + "linkstamp": attr.string(), + "x_defs": attr.string_dict(), +} + +go_library = rule( + go_library_impl, + attrs = go_library_attrs + { + "cgo_object": attr.label( + providers = [ + "cgo_obj", + "cgo_deps", + ], + ), + }, + fragments = ["cpp"], +) + +go_binary = rule( + go_binary_impl, + attrs = go_library_attrs + _crosstool_attrs + go_link_attrs, + executable = True, + fragments = ["cpp"], +) + +go_test = rule( + go_test_impl, + attrs = go_library_attrs + _crosstool_attrs + go_link_attrs + { + "test_generator": attr.label( + executable = True, + default = Label( + "//go/tools:generate_test_main", + ), + cfg = "host", + ), + }, + executable = True, + fragments = ["cpp"], + test = True, +) + +def _pkg_dir(workspace_root, package_name): + if workspace_root and package_name: + return workspace_root + "/" + package_name + if workspace_root: + return workspace_root + if package_name: + return package_name + return "." + +def _exec_path(path): + if path.startswith('/'): + return path + return '${execroot}/' + path + +def _cgo_filter_srcs_impl(ctx): + srcs = ctx.files.srcs + dsts = [] + cmds = [] + for src in srcs: + stem, _, ext = src.path.rpartition('.') + dst_basename = "%s.filtered.%s" % (stem, ext) + dst = ctx.new_file(src, dst_basename) + cmds += [ + "if '%s' -cgo -quiet '%s'; then" % + (ctx.executable._filter_tags.path, src.path), + " cp '%s' '%s'" % (src.path, dst.path), + "else", + " echo -n >'%s'" % dst.path, + "fi", + ] + dsts.append(dst) + + if ctx.label.package == "": + script_name = ctx.label.name + ".CGoFilterSrcs.params" + else: + script_name = ctx.label.package + "/" + ctx.label.name + ".CGoFilterSrcs.params" + f = _emit_generate_params_action(cmds, ctx, script_name) + ctx.action( + inputs = [f, ctx.executable._filter_tags] + srcs, + outputs = dsts, + command = f.path, + mnemonic = "CgoFilterSrcs", + ) + return struct( + files = depset(dsts), + ) + +_cgo_filter_srcs = rule( + implementation = _cgo_filter_srcs_impl, + attrs = { + "srcs": attr.label_list( + allow_files = cgo_filetype, + ), + "_filter_tags": attr.label( + default = Label("//go/tools/filter_tags"), + cfg = "host", + executable = True, + single_file = True, + ), + }, + fragments = ["cpp"], +) + +def _cgo_codegen_impl(ctx): + go_srcs = ctx.files.srcs + srcs = go_srcs + ctx.files.c_hdrs + linkopts = ctx.attr.linkopts + copts = ctx.fragments.cpp.c_options + ctx.attr.copts + deps = depset([], order="topological") + for d in ctx.attr.deps: + srcs += list(d.cc.transitive_headers) + deps += d.cc.libs + copts += ['-D' + define for define in d.cc.defines] + for inc in d.cc.include_directories: + copts += ['-I', _exec_path(inc)] + for hdr in ctx.files.c_hdrs: + copts += ['-iquote', hdr.dirname] + for inc in d.cc.quote_include_directories: + copts += ['-iquote', _exec_path(inc)] + for inc in d.cc.system_include_directories: + copts += ['-isystem', _exec_path(inc)] + for lib in d.cc.libs: + if lib.basename.startswith('lib') and lib.basename.endswith('.so'): + linkopts += ['-L', lib.dirname, '-l', lib.basename[3:-3]] + else: + linkopts += [lib.path] + linkopts += d.cc.link_flags + + p = _pkg_dir(ctx.label.workspace_root, ctx.label.package) + "/" + if p == "./": + p = "" # workaround when cgo_library in repository root + out_dir = (ctx.configuration.genfiles_dir.path + '/' + + p + ctx.attr.outdir) + cc = ctx.fragments.cpp.compiler_executable + cmds = [ + # We cannot use env for CC because $(CC) on OSX is relative + # and '../' does not work fine due to symlinks. + 'export CC=$(cd $(dirname {cc}); pwd)/$(basename {cc})'.format(cc=cc), + 'export CXX=$CC', + 'objdir="%s/gen"' % out_dir, + 'execroot=$(pwd)', + 'mkdir -p "$objdir"', + 'unfiltered_go_files=(%s)' % ' '.join(["'%s'" % f.path for f in go_srcs]), + 'filtered_go_files=()', + 'for file in "${unfiltered_go_files[@]}"; do', + ' stem=$(basename "$file" .go)', + ' if %s -cgo -quiet "$file"; then' % ctx.executable._filter_tags.path, + ' filtered_go_files+=("$file")', + ' else', + ' grep --max-count 1 "^package " "$file" >"$objdir/$stem.go"', + ' echo -n >"$objdir/$stem.c"', + ' fi', + 'done', + 'if [ ${#filtered_go_files[@]} -eq 0 ]; then', + ' echo no buildable Go source files in %s >&1' % str(ctx.label), + ' exit 1', + 'fi', + '"$GOROOT/bin/go" tool cgo -objdir "$objdir" -- %s "${filtered_go_files[@]}"' % + ' '.join(['"%s"' % copt for copt in copts]), + # Rename the outputs using glob so we don't have to understand cgo's mangling + # TODO(#350): might be fixed by this?. + 'for file in "${filtered_go_files[@]}"; do', + ' stem=$(basename "$file" .go)', + ' mv "$objdir/"*"$stem.cgo1.go" "$objdir/$stem.go"', + ' mv "$objdir/"*"$stem.cgo2.c" "$objdir/$stem.c"', + 'done', + 'rm -f $objdir/_cgo_.o $objdir/_cgo_flags', + ] + + f = _emit_generate_params_action(cmds, ctx, out_dir + ".CGoCodeGenFile.params") + + inputs = (srcs + ctx.files.toolchain + ctx.files._crosstool + + [f, ctx.executable._filter_tags]) + ctx.action( + inputs = inputs, + outputs = ctx.outputs.outs, + mnemonic = "CGoCodeGen", + progress_message = "CGoCodeGen %s" % ctx.label, + command = f.path, + env = go_environment_vars(ctx) + { + "CGO_LDFLAGS": " ".join(linkopts), + }, + ) + return struct( + label = ctx.label, + files = depset(ctx.outputs.outs), + cgo_deps = deps, + ) + +_cgo_codegen_rule = rule( + _cgo_codegen_impl, + attrs = go_env_attrs + _crosstool_attrs + { + "srcs": attr.label_list( + allow_files = go_filetype, + non_empty = True, + ), + "c_hdrs": attr.label_list( + allow_files = cc_hdr_filetype, + ), + "deps": attr.label_list( + allow_files = False, + providers = ["cc"], + ), + "copts": attr.string_list(), + "linkopts": attr.string_list(), + "outdir": attr.string(mandatory = True), + "outs": attr.output_list( + mandatory = True, + non_empty = True, + ), + }, + fragments = ["cpp"], + output_to_genfiles = True, +) + +def _cgo_codegen(name, srcs, c_hdrs=[], deps=[], copts=[], linkopts=[], + go_tool=None, toolchain=None): + """Generates glue codes for interop between C and Go + + Args: + name: A unique name of the rule + srcs: list of Go source files. + Each of them must contain `import "C"`. + c_hdrs: C/C++ header files necessary to determine kinds of + C/C++ identifiers in srcs. + deps: A list of cc_library rules. + The generated codes are expected to be linked with these deps. + linkopts: A list of linker options, + These flags are passed to the linker when the generated codes + are linked into the target binary. + """ + outdir = name + ".dir" + outgen = outdir + "/gen" + + go_thunks = [] + c_thunks = [] + for s in srcs: + if not s.endswith('.go'): + fail("not a .go file: %s" % s) + basename = s[:-3] + if basename.rfind("/") >= 0: + basename = basename[basename.rfind("/")+1:] + go_thunks.append(outgen + "/" + basename + ".go") + c_thunks.append(outgen + "/" + basename + ".c") + + outs = struct( + name = name, + + outdir = outgen, + go_thunks = go_thunks, + c_thunks = c_thunks, + c_exports = [ + outgen + "/_cgo_export.c", + outgen + "/_cgo_export.h", + ], + c_dummy = outgen + "/_cgo_main.c", + gotypes = outgen + "/_cgo_gotypes.go", + ) + + _cgo_codegen_rule( + name = name, + srcs = srcs, + c_hdrs = c_hdrs, + deps = deps, + copts = copts, + linkopts = linkopts, + + go_tool = go_tool, + toolchain = toolchain, + + outdir = outdir, + outs = outs.go_thunks + outs.c_thunks + outs.c_exports + [ + outs.c_dummy, outs.gotypes, + ], + + visibility = ["//visibility:private"], + ) + return outs + +def _cgo_import_impl(ctx): + cmds = [ + (ctx.file.go_tool.path + " tool cgo" + + " -dynout " + ctx.outputs.out.path + + " -dynimport " + ctx.file.cgo_o.path + + " -dynpackage $(%s %s)" % (ctx.executable._extract_package.path, + ctx.file.sample_go_src.path)), + ] + f = _emit_generate_params_action(cmds, ctx, ctx.outputs.out.path + ".CGoImportGenFile.params") + ctx.action( + inputs = (ctx.files.toolchain + + [f, ctx.file.go_tool, ctx.executable._extract_package, + ctx.file.cgo_o, ctx.file.sample_go_src]), + outputs = [ctx.outputs.out], + command = f.path, + mnemonic = "CGoImportGen", + env = go_environment_vars(ctx), + ) + return struct( + files = depset([ctx.outputs.out]), + ) + +_cgo_import = rule( + _cgo_import_impl, + attrs = go_env_attrs + { + "cgo_o": attr.label( + allow_files = True, + single_file = True, + ), + "sample_go_src": attr.label( + allow_files = True, + single_file = True, + ), + "out": attr.output( + mandatory = True, + ), + "_extract_package": attr.label( + default = Label("//go/tools/extract_package"), + executable = True, + cfg = "host", + ), + }, + fragments = ["cpp"], +) + +def _cgo_genrule_impl(ctx): + return struct( + label = ctx.label, + go_sources = ctx.files.srcs, + asm_sources = [], + asm_headers = [], + cgo_object = ctx.attr.cgo_object, + direct_deps = ctx.attr.deps, + gc_goopts = [], + ) + +_cgo_genrule = rule( + _cgo_genrule_impl, + attrs = { + "srcs": attr.label_list(allow_files = FileType([".go"])), + "cgo_object": attr.label( + providers = [ + "cgo_obj", + "cgo_deps", + ], + ), + "deps": attr.label_list( + providers = [ + "direct_deps", + "transitive_go_library_paths", + "transitive_go_libraries", + "transitive_cgo_deps", + ], + ), + }, + fragments = ["cpp"], +) + +"""Generates symbol-import directives for cgo + +Args: + cgo_o: The loadable object to extract dynamic symbols from. + sample_go_src: A go source which is compiled together with the generated file. + The generated file will have the same Go package name as this file. + out: Destination of the generated codes. +""" + +def _cgo_object_impl(ctx): + arguments = _c_linker_options(ctx, blacklist=[ + # never link any dependency libraries + "-l", "-L", + # manage flags to ld(1) by ourselves + "-Wl,"]) + arguments += [ + "-o", ctx.outputs.out.path, + "-nostdlib", + "-Wl,-r", + ] + if _is_darwin_cpu(ctx): + arguments += ["-shared", "-Wl,-all_load"] + else: + arguments += ["-Wl,-whole-archive"] + + lo = ctx.files.src[-1] + arguments += [lo.path] + + ctx.action( + inputs = [lo] + ctx.files._crosstool, + outputs = [ctx.outputs.out], + mnemonic = "CGoObject", + progress_message = "Linking %s" % ctx.outputs.out.short_path, + executable = ctx.fragments.cpp.compiler_executable, + arguments = arguments, + ) + runfiles = ctx.runfiles(collect_data = True) + runfiles = runfiles.merge(ctx.attr.src.data_runfiles) + return struct( + files = depset([ctx.outputs.out]), + cgo_obj = ctx.outputs.out, + cgo_deps = ctx.attr.cgogen.cgo_deps, + runfiles = runfiles, + ) + +_cgo_object = rule( + _cgo_object_impl, + attrs = _crosstool_attrs + { + "src": attr.label( + mandatory = True, + providers = ["cc"], + ), + "cgogen": attr.label( + mandatory = True, + providers = ["cgo_deps"], + ), + "out": attr.output( + mandatory = True, + ), + }, + fragments = ["cpp"], +) + +"""Generates _all.o to be archived together with Go objects. + +Args: + src: source static library which contains objects + cgogen: _cgo_codegen rule which knows the dependency cc_library() rules + to be linked together with src when we generate the final go binary. +""" + +def _setup_cgo_library(name, srcs, cdeps, copts, clinkopts, go_tool, toolchain): + go_srcs = [s for s in srcs if s.endswith('.go')] + c_hdrs = [s for s in srcs if any([s.endswith(ext) for ext in hdr_exts])] + c_srcs = [s for s in srcs if not s in (go_srcs + c_hdrs)] + + # Split cgo files into .go parts and .c parts (plus some other files). + cgogen = _cgo_codegen( + name = name + ".cgo", + srcs = go_srcs, + c_hdrs = c_hdrs, + deps = cdeps, + copts = copts, + linkopts = clinkopts, + go_tool = go_tool, + toolchain = toolchain, + ) + + # Filter c_srcs with build constraints. + c_filtered_srcs = [] + if len(c_srcs) > 0: + c_filtered_srcs_name = name + "_filter_cgo_srcs" + _cgo_filter_srcs( + name = c_filtered_srcs_name, + srcs = c_srcs, + ) + c_filtered_srcs.append(":" + c_filtered_srcs_name) + + pkg_dir = _pkg_dir( + "external/" + REPOSITORY_NAME[1:] if len(REPOSITORY_NAME) > 1 else "", + PACKAGE_NAME) + + # Platform-specific settings + native.config_setting( + name = name + "_windows_setting", + values = { + "cpu": "x64_windows_msvc", + }, + ) + platform_copts = select({ + ":" + name + "_windows_setting": ["-mthreads"], + "//conditions:default": ["-pthread"], + }) + platform_linkopts = select({ + ":" + name + "_windows_setting": ["-mthreads"], + "//conditions:default": ["-pthread"], + }) + + # Bundles objects into an archive so that _cgo_.o and _all.o can share them. + native.cc_library( + name = cgogen.outdir + "/_cgo_lib", + srcs = cgogen.c_thunks + cgogen.c_exports + c_filtered_srcs + c_hdrs, + deps = cdeps, + copts = copts + platform_copts + [ + "-I", pkg_dir, + "-I", "$(GENDIR)/" + pkg_dir + "/" + cgogen.outdir, + # The generated thunks often contain unused variables. + "-Wno-unused-variable", + ], + linkopts = clinkopts + platform_linkopts, + linkstatic = 1, + # _cgo_.o and _all.o keep all objects in this archive. + # But it should not be very annoying in the final binary target + # because _cgo_object rule does not propagate alwayslink=1 + alwayslink = 1, + visibility = ["//visibility:private"], + ) + + # Loadable object which cgo reads when it generates _cgo_import.go + native.cc_binary( + name = cgogen.outdir + "/_cgo_.o", + srcs = [cgogen.c_dummy], + deps = cdeps + [cgogen.outdir + "/_cgo_lib"], + copts = copts, + linkopts = clinkopts, + visibility = ["//visibility:private"], + ) + _cgo_import( + name = "%s.cgo.importgen" % name, + cgo_o = cgogen.outdir + "/_cgo_.o", + out = cgogen.outdir + "/_cgo_import.go", + sample_go_src = go_srcs[0], + go_tool = go_tool, + toolchain = toolchain, + visibility = ["//visibility:private"], + ) + + _cgo_object( + name = cgogen.outdir + "/_cgo_object", + src = cgogen.outdir + "/_cgo_lib", + out = cgogen.outdir + "/_all.o", + cgogen = cgogen.name, + visibility = ["//visibility:private"], + ) + return cgogen + +def cgo_genrule(name, srcs, + copts=[], + clinkopts=[], + cdeps=[], + **kwargs): + cgogen = _setup_cgo_library( + name = name, + srcs = srcs, + cdeps = cdeps, + copts = copts, + clinkopts = clinkopts, + toolchain = None, + go_tool = None, + ) + _cgo_genrule( + name = name, + srcs = cgogen.go_thunks + [ + cgogen.gotypes, + cgogen.outdir + "/_cgo_import.go", + ], + cgo_object = cgogen.outdir + "/_cgo_object", + **kwargs + ) + +def cgo_library(name, srcs, + toolchain=None, + go_tool=None, + copts=[], + clinkopts=[], + cdeps=[], + **kwargs): + """Builds a cgo-enabled go library. + + Args: + name: A unique name for this rule. + srcs: List of Go, C and C++ files that are processed to build a Go library. + Those Go files must contain `import "C"`. + C and C++ files can be anything allowed in `srcs` attribute of + `cc_library`. + copts: Add these flags to the C++ compiler. + clinkopts: Add these flags to the C++ linker. + cdeps: List of C/C++ libraries to be linked into the binary target. + They must be `cc_library` rules. + deps: List of other libraries to be linked to this library target. + data: List of files needed by this rule at runtime. + + NOTE: + `srcs` cannot contain pure-Go files, which do not have `import "C"`. + So you need to define another `go_library` when you build a go package with + both cgo-enabled and pure-Go sources. + + ``` + cgo_library( + name = "cgo_enabled", + srcs = ["cgo-enabled.go", "foo.cc", "bar.S", "baz.a"], + ) + + go_library( + name = "go_default_library", + srcs = ["pure-go.go"], + library = ":cgo_enabled", + ) + ``` + """ + cgogen = _setup_cgo_library( + name = name, + srcs = srcs, + cdeps = cdeps, + copts = copts, + clinkopts = clinkopts, + go_tool = go_tool, + toolchain = toolchain, + ) + + go_library( + name = name, + srcs = cgogen.go_thunks + [ + cgogen.gotypes, + cgogen.outdir + "/_cgo_import.go", + ], + cgo_object = cgogen.outdir + "/_cgo_object", + go_tool = go_tool, + toolchain = toolchain, + **kwargs + ) diff --git a/syntax/testdata/errors.sky b/syntax/testdata/errors.sky new file mode 100644 index 0000000..71fb4da --- /dev/null +++ b/syntax/testdata/errors.sky @@ -0,0 +1,165 @@ +# Tests of parse errors. +# This is a "chunked" file; each "---" line demarcates a new parser input. +# +# TODO(adonovan): lots more tests. + +x = 1 + +2 ### "got newline, want primary expression" + +--- + +_ = *x ### `got '\*', want primary` + +--- + +def f(a, ): # trailing comma is ok + pass + +--- + +def f(*args, ): ### `got '\)', want parameter` + pass + +--- + +def f(**kwargs, ): ### `got '\)', want parameter` + pass + +--- + +def pass(): ### "not an identifier" + pass + +--- + +def f : ### `got ':', want '\('` + +--- + +f(a, ) # trailing comma is ok + +--- + +f(*args, ) ### `got '\)', want argument` + +--- + +f(**kwargs, ) ### `got '\)', want argument` + +--- + +_ = {x:y for y in z} # ok +_ = {x for y in z} ### `got for, want ':'` + +--- + +def f(): + pass + pass ### `unindent does not match any outer indentation level` + +--- +def f(): pass +--- +# Blank line after pass => outdent. +def f(): + pass + +--- +# No blank line after pass; EOF acts like a newline. +def f(): + pass +--- +# This is a well known parsing ambiguity in Python. +# Python 2.7 accepts it but Python3 and Skylark reject it. +_ = [x for x in lambda: True, lambda: False if x()] ### "got lambda, want primary" + +_ = [x for x in (lambda: True, lambda: False) if x()] # ok in all dialects + +--- +# Skylark, following Python 3, allows an unparenthesized +# tuple after 'in' only in a for statement but not in a comprehension. +# (Python 2.7 allows both.) +for x in 1, 2, 3: + print(x) + +_ = [x for x in 1, 2, 3] ### `got ',', want ']', for, or if` +--- +# Unparenthesized tuple is not allowed as operand of 'if' in comprehension. + +_ = [a for b in c if 1, 2] ### `got ',', want ']', for, or if` +--- +# Comparison operations are not associative. + +_ = (0 == 1) == 2 # ok +_ = 0 == (1 == 2) # ok +_ = 0 == 1 == 2 ### "== does not associate with ==" + +--- + +_ = (0 <= i) < n # ok +_ = 0 <= (i < n) # ok +_ = 0 <= i < n ### "<= does not associate with <" + +--- + +_ = (a in b) not in c # ok +_ = a in (b not in c) # ok +_ = a in b not in c ### "in does not associate with not in" + +--- +# shift/reduce ambiguity is reduced +_ = [x for x in a if b else c] ### `got else, want ']', for, or if` +--- +[a for b in c else d] ### `got else, want ']', for, or if` +--- +_ = a + b not c ### "got identifier, want in" +--- +f(1+2 = 3) ### "keyword argument must have form name=expr" +--- +print(1, 2, 3 ### `got end of file, want '\)'` +--- +_ = a if b ### "conditional expression without else clause" +--- +load("") ### "load statement needs at least 2 operands, got 1" +--- +load("", 1) ### `load operand must be "name" or localname="name"` +--- +load(1, 2) ### "first operand of load statement must be a string literal" +--- +load("a", x) ### `load operand must be "name" or localname="name"` +--- +load("a", x2=x) ### `load operand must be "name" or localname="name"` +--- +# All of these parse. +load("a", "x") +load("a", "x", y2="y") +load("a", x2="x", "y") # => positional-before-named arg check happens later (!) +--- +x = 1 + +2 ### "got newline, want primary expression" +--- +def f(): + pass +# this used to cause a spurious indentation error +--- +print 1 2 ### `got int literal, want newline` + +--- +# newlines are not allowed in raw string literals +raw = r'a ### `unexpected newline in string` +b' + +--- +# The parser permits an unparenthesized tuple expression for the first index. +x[1, 2:] # ok +--- +# But not if it has a trailing comma. +x[1, 2,:] ### `got ':', want primary` +--- +# Trailing tuple commas are permitted only within parens; see b/28867036. +(a, b,) = 1, 2 # ok +c, d = 1, 2 # ok +--- +a, b, = 1, 2 ### `unparenthesized tuple with trailing comma` +--- +a, b = 1, 2, ### `unparenthesized tuple with trailing comma` diff --git a/syntax/walk.go b/syntax/walk.go new file mode 100644 index 0000000..aeda473 --- /dev/null +++ b/syntax/walk.go @@ -0,0 +1,155 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +// Walk traverses a syntax tree in depth-first order. +// It starts by calling f(n); n must not be nil. +// If f returns true, Walk calls itself +// recursively for each non-nil child of n. +// Walk then calls f(nil). +func Walk(n Node, f func(Node) bool) { + if !f(n) { + return + } + + // TODO(adonovan): opt: order cases using profile data. + switch n := n.(type) { + case *File: + walkStmts(n.Stmts, f) + + case *ExprStmt: + Walk(n.X, f) + + case *BranchStmt: + // no-op + + case *IfStmt: + Walk(n.Cond, f) + walkStmts(n.True, f) + walkStmts(n.False, f) + + case *AssignStmt: + Walk(n.RHS, f) + Walk(n.LHS, f) + + case *DefStmt: + Walk(n.Name, f) + for _, param := range n.Function.Params { + Walk(param, f) + } + walkStmts(n.Function.Body, f) + + case *ForStmt: + Walk(n.Vars, f) + Walk(n.X, f) + walkStmts(n.Body, f) + + case *ReturnStmt: + if n.Result != nil { + Walk(n.Result, f) + } + + case *LoadStmt: + Walk(n.Module, f) + for _, from := range n.From { + Walk(from, f) + } + for _, to := range n.To { + Walk(to, f) + } + + case *Ident, *Literal: + // no-op + + case *ListExpr: + for _, x := range n.List { + Walk(x, f) + } + + case *CondExpr: + Walk(n.Cond, f) + Walk(n.True, f) + Walk(n.False, f) + + case *IndexExpr: + Walk(n.X, f) + Walk(n.Y, f) + + case *DictEntry: + Walk(n.Key, f) + Walk(n.Value, f) + + case *SliceExpr: + Walk(n.X, f) + if n.Lo != nil { + Walk(n.Lo, f) + } + if n.Hi != nil { + Walk(n.Hi, f) + } + if n.Step != nil { + Walk(n.Step, f) + } + + case *Comprehension: + for _, clause := range n.Clauses { + Walk(clause, f) + } + Walk(n.Body, f) + + case *IfClause: + Walk(n.Cond, f) + + case *ForClause: + Walk(n.Vars, f) + Walk(n.X, f) + + case *TupleExpr: + for _, x := range n.List { + Walk(x, f) + } + + case *DictExpr: + for _, entry := range n.List { + entry := entry.(*DictEntry) + Walk(entry.Key, f) + Walk(entry.Value, f) + } + + case *UnaryExpr: + Walk(n.X, f) + + case *BinaryExpr: + Walk(n.X, f) + Walk(n.Y, f) + + case *DotExpr: + Walk(n.X, f) + Walk(n.Name, f) + + case *CallExpr: + Walk(n.Fn, f) + for _, arg := range n.Args { + Walk(arg, f) + } + + case *LambdaExpr: + for _, param := range n.Function.Params { + Walk(param, f) + } + walkStmts(n.Function.Body, f) + + default: + panic(n) + } + + f(nil) +} + +func walkStmts(stmts []Stmt, f func(Node) bool) { + for _, stmt := range stmts { + Walk(stmt, f) + } +} diff --git a/testdata/assign.sky b/testdata/assign.sky new file mode 100644 index 0000000..90101d9 --- /dev/null +++ b/testdata/assign.sky @@ -0,0 +1,277 @@ +# Tests of Skylark assignment. + +# This is a "chunked" file: each "---" effectively starts a new file. + +# tuple assignment +load("assert.sky", "assert") + +a, b, c = 1, 2, 3 +assert.eq(a, 1) +assert.eq(b, 2) +assert.eq(c, 3) + +def f1(): (x,) = 1 +assert.fails(f1, "int in sequence assignment") +def f2(): a, b, c = 1, 2 +assert.fails(f2, "too few values to unpack") +def f3(): a, b = 1, 2, 3 +assert.fails(f3, "too many values to unpack") +def f4(): a, b = (1,) +assert.fails(f4, "too few values to unpack") +def f5(): (a,) = [1, 2, 3] +assert.fails(f5, "too many values to unpack") + +--- +# list assignment +load("assert.sky", "assert") + +[a, b, c] = [1, 2, 3] +assert.eq(a, 1) +assert.eq(b, 2) +assert.eq(c, 3) + +def f1(): [a, b, c,] = 1 +assert.fails(f1, "got int in sequence assignment") +def f2(): [a, b, c] = 1, 2 +assert.fails(f2, "too few values to unpack") +def f3(): [a, b] = 1, 2, 3 +assert.fails(f3, "too many values to unpack") +def f4(): [a, b] = (1,) +assert.fails(f4, "too few values to unpack") + +--- +# list-tuple assignment +load("assert.sky", "assert") + +[a, b, c] = (1, 2, 3) +assert.eq(a, 1) +assert.eq(b, 2) +assert.eq(c, 3) + +(d, e, f) = [1, 2, 3] +assert.eq(d, 1) +assert.eq(e, 2) +assert.eq(f, 3) + +[g, h, (i, j)] = (1, 2, [3, 4]) +assert.eq(g, 1) +assert.eq(h, 2) +assert.eq(i, 3) +assert.eq(j, 4) + +(k, l, [m, n]) = [1, 2, (3, 4)] +assert.eq(k, 1) +assert.eq(l, 2) +assert.eq(m, 3) +assert.eq(n, 4) + +--- +# misc assignment +load("assert.sky", "assert") + +def assignment(): + a = [1, 2, 3] + a[1] = 5 + assert.eq(a, [1, 5, 3]) + a[-2] = 2 + assert.eq(a, [1, 2, 3]) + assert.eq("%d %d" % (5, 7), "5 7") + x={} + x[1] = 2 + x[1] += 3 + assert.eq(x[1], 5) + def f12(): x[(1, "abc", {})] = 1 + assert.fails(f12, "unhashable type: dict") + +assignment() + +--- +# augmented assignment + +load("assert.sky", "assert") + +def f(): + x = 1 + x += 1 + assert.eq(x, 2) + x *= 3 + assert.eq(x, 6) +f() + +--- +# effects of evaluating LHS occur only once + +load("assert.sky", "assert") + +count = [0] # count[0] is the number of calls to f + +def f(): + count[0] += 1 + return count[0] + +x = [1, 2, 3] +x[f()] += 1 + +assert.eq(x, [1, 3, 3]) # sole call to f returned 1 +assert.eq(count[0], 1) # f was called only once + +--- +# Order of evaluation. + +load("assert.sky", "assert") + +calls = [] + +def f(name, result): + calls.append(name) + return result + +# The right side is evaluated before the left in an ordinary assignment. +calls.clear() +f("array", [0])[f("index", 0)] = f("rhs", 0) +assert.eq(calls, ["rhs", "array", "index"]) + +calls.clear() +f("lhs1", [0])[0], f("lhs2", [0])[0] = f("rhs1", 0), f("rhs2", 0) +assert.eq(calls, ["rhs1", "rhs2", "lhs1", "lhs2"]) + +# Left side is evaluated first (and only once) in an augmented assignment. +calls.clear() +f("array", [0])[f("index", 0)] += f("addend", 1) +assert.eq(calls, ["array", "index", "addend"]) + +--- +# global referenced before assignment + +def f(): + return g ### "global variable g referenced before assignment" + +f() + +g = 1 + +--- +# free variable captured before assignment + +def f(): + def g(): ### "local variable outer referenced before assignment" + return outer + outer = 1 + +f() + +--- +load("assert.sky", "assert") + +printok = [False] + +# This program should resolve successfully but fail dynamically. +# However, the Java implementation currently reports the dynamic +# error at the x=1 statement (b/33975425). I think we need to simplify +# the resolver algorithm to what we have implemented. +def use_before_def(): + print(x) # dynamic error: local var referenced before assignment + printok[0] = True + x = 1 # makes 'x' local + +assert.fails(use_before_def, 'local variable x referenced before assignment') +assert.true(not printok[0]) # execution of print statement failed + +--- +x = [1] +x.extend([2]) # ok + +def f(): + x += [4] ### "local variable x referenced before assignment" + +f() + +--- + +z += 3 ### "global variable z referenced before assignment" + +--- +# It's ok to define a global that shadows a built-in. + +load("assert.sky", "assert") + +assert.eq(type(list), "builtin") +list = [] +assert.eq(type(list), "list") + +# set and float are dialect-specific, +# but we shouldn't notice any difference. + +assert.eq(type(float), "builtin") +float = 1.0 +assert.eq(type(float), "float") + +assert.eq(type(set), "builtin") +set = [1, 2, 3] +assert.eq(type(set), "list") + +--- +# All 'in x' expressions in a comprehension are evaluated +# in the comprehension's lexical block. +# +# By contrast, Python yields [[1, 2], [1, 2]] because it evaluates +# the first 'in x' in the environment enclosing the comprehension. +x = [[1, 2]] +_ = [x for x in x for y in x] ### "local variable x referenced before assignment" + +--- +# A comprehension establishes a single new lexical block, +# not one per 'for' clause. +x = [1, 2] +_ = [x for _ in [3] for x in x] ### "local variable x referenced before assignment" + +--- +load("assert.sky", "assert") + +# assign singleton sequence to 1-tuple +(x,) = (1,) +assert.eq(x, 1) +(y,) = [1] +assert.eq(y, 1) + +# assign 1-tuple to variable +z = (1,) +assert.eq(type(z), "tuple") +assert.eq(len(z), 1) +assert.eq(z[0], 1) + +--- +# assignment to/from fields. +load("assert.sky", "assert") + +hf = hasfields() +hf.x = 1 +assert.eq(hf.x, 1) +hf.x = [1, 2] +hf.x += [3, 4] +assert.eq(hf.x, [1, 2, 3, 4]) +freeze(hf) +def setX(hf): + hf.x = 2 +def setY(hf): + hf.y = 3 +assert.fails(lambda: setX(hf), "cannot set field on a frozen hasfields") +assert.fails(lambda: setY(hf), "cannot set field on a frozen hasfields") + +--- +# destucturing assigmnent in a for loop. +load("assert.sky", "assert") + +def f(): + res = [] + for (x, y), z in [(["a", "b"], 3), (["c", "d"], 4)]: + res.append((x, y, z)) + return res +assert.eq(f(), [("a", "b", 3), ("c", "d", 4)]) + +def g(): + a = {} + for i, a[i] in [("one", 1), ("two", 2)]: + pass + return a +assert.eq(g(), {"one": 1, "two": 2}) diff --git a/testdata/benchmark.sky b/testdata/benchmark.sky new file mode 100644 index 0000000..e0d260a --- /dev/null +++ b/testdata/benchmark.sky @@ -0,0 +1,22 @@ +# Benchmarks of Skylark execution + +def bench_range(): + return range(200) + +# Make a 2-level call tree of 100 * 100 calls. +def bench_calling(): + list = range(100) + def g(): + for x in list: + pass + def f(): + for x in list: + g() + f() + +# Measure overhead of calling a trivial built-in method. +emptydict = {} +range1000 = range(1000) +def bench_builtin_method(): + for _ in range1000: + emptydict.get(None) diff --git a/testdata/bool.sky b/testdata/bool.sky new file mode 100644 index 0000000..19de9c2 --- /dev/null +++ b/testdata/bool.sky @@ -0,0 +1,43 @@ +# Tests of Skylark 'bool' + +load("assert.sky", "assert") + +# truth +assert.true(True) +assert.true(not False) + +# bool conversion +assert.eq([bool(), bool(1), bool(0), bool("hello"), bool("")], + [False, True, False, True, False]) + +# comparison +assert.true(None == None) +assert.true(None != False) +assert.true(None != True) +assert.eq(1==1, True) +assert.eq(1==2, False) +assert.true(False == False) +assert.true(True == True) + +# ordered comparison +assert.true(False < True) +assert.true(False <= True) +assert.true(False <= False) +assert.true(True > False) +assert.true(True >= False) +assert.true(True >= True) + +# conditional expression +assert.eq(1 if 3 > 2 else 0, 1) +assert.eq(1 if "foo" else 0, 1) +assert.eq(1 if "" else 0, 0) + +# short-circuit evaluation of 'and' and 'or': +# 'or' yields the first true operand, or the last if all are false. +assert.eq(0 or "" or [] or 0, 0) +assert.eq(0 or "" or [] or 123 or 1/0, 123) +assert.fails(lambda: 0 or "" or [] or 0 or 1/0, "division by zero") +# 'and' yields the first false operand, or the last if all are true. +assert.eq(1 and "a" and [1] and 123, 123) +assert.eq(1 and "a" and [1] and 0 and 1/0, 0) +assert.fails(lambda: 1 and "a" and [1] and 123 and 1/0, "division by zero") diff --git a/testdata/builtins.sky b/testdata/builtins.sky new file mode 100644 index 0000000..9fe7c91 --- /dev/null +++ b/testdata/builtins.sky @@ -0,0 +1,140 @@ +# Tests of Skylark built-in functions + +load("assert.sky", "assert") + +# len +assert.eq(len([1, 2, 3]), 3) +assert.eq(len((1, 2, 3)), 3) +assert.eq(len({1: 2}), 1) +assert.fails(lambda: len(1), "int.*has no len") + +# and, or +assert.eq(123 or "foo", 123) +assert.eq(0 or "foo", "foo") +assert.eq(123 and "foo", "foo") +assert.eq(0 and "foo", 0) +none = None +_1 = none and none[0] # rhs is not evaluated +_2 = (not none) or none[0] # rhs is not evaluated + +# any, all +assert.true(all([])) +assert.true(all([1, True, "foo"])) +assert.true(not all([1, True, ""])) +assert.true(not any([])) +assert.true(any([0, False, "foo"])) +assert.true(not any([0, False, ""])) + +# in +assert.true(3 in [1, 2, 3]) +assert.true(4 not in [1, 2, 3]) +assert.true(3 in (1, 2, 3)) +assert.true(4 not in (1, 2, 3)) +assert.fails(lambda: 3 in "foo", "in.*requires string as left operand") +assert.true(123 in {123: ""}) +assert.true(456 not in {123:""}) +assert.fails(lambda: [] in {123: ""}, "unhashable") + +# sorted +assert.eq(sorted([42, 123, 3]), [3, 42, 123]) +assert.eq(sorted([42, 123, 3], reverse=True), [123, 42, 3]) +assert.eq(sorted(["wiz", "foo", "bar"]), ["bar", "foo", "wiz"]) +assert.eq(sorted(["wiz", "foo", "bar"], reverse=True), ["wiz", "foo", "bar"]) +assert.fails(lambda: sorted([1, 2, None, 3]), "int < NoneType not implemented") +assert.fails(lambda: sorted([1, "one"]), "string < int not implemented") +# custom cmp +def cmplen(x, y): return len(x) - len(y) +assert.eq(sorted(["two", "three", "four"], cmp=cmplen), + ["two", "four", "three"]) +assert.eq(sorted(["two", "three", "four"], cmp=cmplen, reverse=True), + ["three", "four", "two"]) +assert.fails(lambda: sorted([1, 2, 3], cmp=None), "got NoneType, want callable") + +# reversed +assert.eq(reversed([1, 144, 81, 16]), [16, 81, 144, 1]) + +# set +assert.contains(set([1, 2, 3]), 1) +assert.true(4 not in set([1, 2, 3])) +assert.eq(len(set([1, 2, 3])), 3) +assert.eq(sorted([x for x in set([1, 2, 3])]), [1, 2, 3]) + +# dict +assert.eq(dict([(1, 2), (3, 4)]), {1: 2, 3: 4}) +assert.eq(dict([(1, 2), (3, 4)], foo="bar"), {1: 2, 3: 4, "foo": "bar"}) +assert.eq(dict({1:2, 3:4}), {1: 2, 3: 4}) +assert.eq(dict({1:2, 3:4}.items()), {1: 2, 3: 4}) + +# range +assert.eq(range(5), [0, 1, 2, 3, 4]) +assert.eq(range(-5), []) +assert.eq(range(2, 5), [2, 3, 4]) +assert.eq(range(5, 2), []) +assert.eq(range(-2, -5), []) +assert.eq(range(-5, -2), [-5, -4, -3]) +assert.eq(range(2, 10, 3), [2, 5, 8]) +assert.eq(range(10, 2, -3), [10, 7, 4]) +assert.eq(range(-2, -10, -3), [-2, -5, -8]) +assert.eq(range(-10, -2, 3), [-10, -7, -4]) + +# list +assert.eq(list("abc".split_bytes()), ["a", "b", "c"]) +assert.eq(sorted(list({"a": 1, "b": 2})), ['a', 'b']) + +# min, max +assert.eq(min(5, -2, 1, 7, 3), -2) +assert.eq(max(5, -2, 1, 7, 3), 7) +assert.eq(min([5, -2, 1, 7, 3]), -2) +assert.eq(min("one", "two", "three", "four"), "four") +assert.eq(max("one", "two", "three", "four"), "two") +assert.fails(min, "min requires at least one positional argument") +assert.fails(lambda: min(1), "not iterable") +assert.fails(lambda: min([]), "empty") +assert.eq(min(5, -2, 1, 7, 3, key=lambda x: x*x), 1) # min absolute value +assert.eq(min(5, -2, 1, 7, 3, key=lambda x: -x), 7) # min negated value + +# enumerate +assert.eq(enumerate("abc".split_bytes()), [(0, "a"), (1, "b"), (2, "c")]) +assert.eq(enumerate([False, True, None], 42), [(42, False), (43, True), (44, None)]) + +# zip +assert.eq(zip(), []) +assert.eq(zip([1, 2, 3]), [(1,), (2,), (3,)]) +assert.eq(zip(list("abc".split_bytes()), + list("def".split_bytes()), + list("hijk".split_bytes())), + [("a", "d", "h"), ("b", "e", "i"), ("c", "f", "j")]) + +# dir for builtins +assert.eq(dir(None), []) +assert.eq(dir({})[:3], ["clear", "get", "items"]) # etc +assert.eq(dir(1), []) +assert.eq(dir([])[:3], ["append", "clear", "extend"]) # etc + +# hasattr, getattr, dir +# hasfields is an application-defined type defined in eval_test.go. +hf = hasfields() +assert.eq(dir(hf), []) +assert.true(not hasattr(hf, "x")) +assert.fails(lambda: getattr(hf, "x"), "no .x field or method") +assert.eq(getattr(hf, "x", 42), 42) +hf.x = 1 +assert.true(hasattr(hf, "x")) +assert.eq(getattr(hf, "x"), 1) +assert.eq(hf.x, 1) +hf.x = 2 +assert.eq(getattr(hf, "x"), 2) +assert.eq(hf.x, 2) +# built-in types can have attributes (methods) too. +myset = set([]) +assert.eq(dir(myset), ["union"]) +assert.true(hasattr(myset, "union")) +assert.true(not hasattr(myset, "onion")) +assert.eq(str(getattr(myset, "union")), "<built-in method union of set value>") +assert.fails(lambda: getattr(myset, "onion"), "no .onion field or method") +assert.eq(getattr(myset, "onion", 42), 42) + +# repr +assert.eq(repr(1), "1") +assert.eq(repr("x"), '"x"') +assert.eq(repr(["x", 1]), '["x", 1]') diff --git a/testdata/control.sky b/testdata/control.sky new file mode 100644 index 0000000..9affacf --- /dev/null +++ b/testdata/control.sky @@ -0,0 +1,64 @@ +# Tests of Skylark control flow + +load("assert.sky", "assert") + +def controlflow(): + # elif + x = 0 + if True: + x=1 + elif False: + assert.fail("else of true") + else: + assert.fail("else of else of true") + assert.true(x) + + x = 0 + if False: + assert.fail("then of false") + elif True: + x = 1 + else: + assert.fail("else of true") + assert.true(x) + + x = 0 + if False: + assert.fail("then of false") + elif False: + assert.fail("then of false") + else: + x = 1 + assert.true(x) +controlflow() + +def loops(): + y = "" + for x in [1, 2, 3, 4, 5]: + if x == 2: + continue + if x == 4: + break + y = y + str(x) + return y +assert.eq(loops(), "13") + +# return +g = 123 +def f(x): + for g in (1, 2, 3): + if g == x: + return g +assert.eq(f(2), 2) +assert.eq(f(4), None) # falling off end => return None +assert.eq(g, 123) # unchanged by local use of g in function + +# infinite sequences +def fib(n): + seq = [] + for x in fibonacci: # fibonacci is an infinite iterable defined in eval_test.go + if len(seq) == n: + break + seq.append(x) + return seq +assert.eq(fib(10), [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]) diff --git a/testdata/dict.sky b/testdata/dict.sky new file mode 100644 index 0000000..6e6f1e5 --- /dev/null +++ b/testdata/dict.sky @@ -0,0 +1,208 @@ +# Tests of Skylark 'dict' + +load("assert.sky", "assert") + +# literals +assert.eq({}, {}) +assert.eq({"a": 1}, {"a": 1}) +assert.eq({"a": 1,}, {"a": 1}) + +# truth +assert.true({False: False}) +assert.true(not {}) + +# dict + dict (undocumented and deprecated; see b/36360157). +assert.eq({"a": 1, "b": 2} + {"a": 3, "c": 4}, {"a": 3, "b": 2, "c": 4}) + +# dict comprehension +assert.eq({x: x*x for x in range(3)}, {0: 0, 1: 1, 2: 4}) + +# dict.pop +x6 = {"a": 1, "b": 2} +assert.eq(x6.pop("a"), 1) +assert.eq(str(x6), '{"b": 2}') +assert.fails(lambda: x6.pop("c"), "pop: missing key") +assert.eq(x6.pop("c", 3), 3) +assert.eq(x6.pop("c", None), None) # default=None tests an edge case of UnpackArgs +assert.eq(x6.pop("b"), 2) +assert.eq(len(x6), 0) + +# dict.popitem +x7 = {"a": 1, "b": 2} +assert.eq([x7.popitem(), x7.popitem()], [("a", 1), ("b", 2)]) +assert.fails(x7.popitem, "empty dict") +assert.eq(len(x7), 0) + +# dict.keys, dict.values +x8 = {"a": 1, "b": 2} +assert.eq(x8.keys(), ["a", "b"]) +assert.eq(x8.values(), [1, 2]) + +# equality +assert.eq({"a": 1, "b": 2}, {"a": 1, "b": 2}) +assert.eq({"a": 1, "b": 2,}, {"a": 1, "b": 2}) +assert.eq({"a": 1, "b": 2}, {"b": 2, "a": 1}) + +# insertion order is preserved +assert.eq(dict([("a", 0), ("b", 1), ("c", 2), ("b", 3)]).keys(), ["a", "b", "c"]) +assert.eq(dict([("b", 0), ("a", 1), ("b", 2), ("c", 3)]).keys(), ["b", "a", "c"]) +assert.eq(dict([("b", 0), ("a", 1), ("b", 2), ("c", 3)])["b"], 2) +# ...even after rehashing (which currently occurs after key 'i'): +small = dict([("a", 0), ("b", 1), ("c", 2)]) +small.update([("d", 4), ("e", 5), ("f", 6), ("g", 7), ("h", 8), ("i", 9), ("j", 10), ("k", 11)]) +assert.eq(small.keys(), ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"]) + +# duplicate keys are not permitted in dictionary expressions (see b/35698444). +assert.fails(lambda: {"aa": 1, "bb": 2, "cc": 3, "bb": 4}, 'duplicate key: "bb"') + +# index +def setIndex(d, k, v): + d[k] = v + +x9 = {} +assert.fails(lambda: x9["a"], 'key "a" not in dict') +x9["a"] = 1 +assert.eq(x9["a"], 1) +assert.eq(x9, {"a": 1}) +assert.fails(lambda: setIndex(x9, [], 2), 'unhashable type: list') +freeze(x9) +assert.fails(lambda: setIndex(x9, "a", 3), 'cannot insert into frozen hash table') + +x9a = {} +x9a[1, 2] = 3 # unparenthesized tuple is allowed here +assert.eq(x9a.keys()[0], (1, 2)) + +# dict.get +x10 = {"a": 1} +assert.eq(x9.get("a"), 1) +assert.eq(x9.get("b"), None) +assert.eq(x9.get("a", 2), 1) +assert.eq(x9.get("b", 2), 2) + +# dict.clear +x11 = {"a": 1} +assert.contains(x10, "a") +assert.eq(x10["a"], 1) +x10.clear() +assert.fails(lambda: x10["a"], 'key "a" not in dict') +assert.true("a" not in x10) +freeze(x10) +assert.fails(x10.clear, "cannot clear frozen hash table") + +# dict.setdefault +x12 = {"a": 1} +assert.eq(x12.setdefault("a"), 1) +assert.eq(x12["a"], 1) +assert.eq(x12.setdefault("b"), None) +assert.eq(x12["b"], None) +assert.eq(x12.setdefault("c", 2), 2) +assert.eq(x12["c"], 2) +assert.eq(x12.setdefault("c", 3), 2) +assert.eq(x12["c"], 2) +freeze(x12) +assert.eq(x12.setdefault("a", 1), 1) # no change, no error +assert.fails(lambda: x12.setdefault("d", 1), "cannot insert into frozen hash table") + +# dict.update +x13 = {"a": 1} +x13.update(a=2, b=3) +assert.eq(x13, {"a": 2, "b": 3}) +x13.update([("b", 4), ("c", 5)]) +assert.eq(x13, {"a": 2, "b": 4, "c": 5}) +x13.update({"c": 6, "d": 7}) +assert.eq(x13, {"a": 2, "b": 4, "c": 6, "d": 7}) +freeze(x13) +assert.fails(lambda: x13.update({"a": 8}), "cannot insert into frozen hash table") + +# dict as a sequence +# +# for loop +x14 = {1:2, 3:4} +def keys(dict): + keys = [] + for k in dict: keys.append(k) + return keys +assert.eq(keys(x14), [1, 3]) +# +# comprehension +assert.eq([x for x in x14], [1, 3]) +# +# varargs +def varargs(*args): return args +x15 = {"one": 1} +assert.eq(varargs(*x15), ("one",)) + +# kwargs parameter does not alias the **kwargs dict +def kwargs(**kwargs): return kwargs +x16 = kwargs(**x15) +assert.eq(x16, x15) +x15["two"] = 2 # mutate +assert.ne(x16, x15) + +# iterator invalidation +def iterator1(): + dict = {1:1, 2:1} + for k in dict: + dict[2*k] = dict[k] +assert.fails(iterator1, "insert.*during iteration") + +def iterator2(): + dict = {1:1, 2:1} + for k in dict: + dict.pop(k) +assert.fails(iterator2, "delete.*during iteration") + +def iterator3(): + def f(d): + d[3] = 3 + dict = {1:1, 2:1} + _ = [f(dict) for x in dict] +assert.fails(iterator3, "insert.*during iteration") + +# This assignment is not a modification-during-iteration: +# the sequence x should be completely iterated before +# the assignment occurs. +def f(): + x = {1:2, 2:4} + a, x[0] = x + # There are two possible outcomes, depending on iteration order: + if not (a == 1 and x == {0: 2, 1: 2, 2: 4} or + a == 2 and x == {0: 1, 1: 2, 2: 4}): + assert.fail("unexpected results: a=%s x=%s" % (a, x)) +f() + +# Regression test for a bug in hashtable.delete +def test_delete(): + d = {} + + # delete tail first + d["one"] = 1 + d["two"] = 2 + assert.eq(str(d), '{"one": 1, "two": 2}') + d.pop("two") + assert.eq(str(d), '{"one": 1}') + d.pop("one") + assert.eq(str(d), '{}') + + # delete head first + d["one"] = 1 + d["two"] = 2 + assert.eq(str(d), '{"one": 1, "two": 2}') + d.pop("one") + assert.eq(str(d), '{"two": 2}') + d.pop("two") + assert.eq(str(d), '{}') + + # delete middle + d["one"] = 1 + d["two"] = 2 + d["three"] = 3 + assert.eq(str(d), '{"one": 1, "two": 2, "three": 3}') + d.pop("two") + assert.eq(str(d), '{"one": 1, "three": 3}') + d.pop("three") + assert.eq(str(d), '{"one": 1}') + d.pop("one") + assert.eq(str(d), '{}') + +test_delete() diff --git a/testdata/float.sky b/testdata/float.sky new file mode 100644 index 0000000..ea3ee94 --- /dev/null +++ b/testdata/float.sky @@ -0,0 +1,277 @@ +# Tests of Skylark 'float' + +load("assert.sky", "assert") + +# TODO(adonovan): more tests: +# - precision +# - limits + +# literals +assert.eq(type(1.234), "float") +assert.eq(type(1e10), "float") +assert.eq(type(1e+10), "float") +assert.eq(type(1e-10), "float") +assert.eq(type(1.234e10), "float") +assert.eq(type(1.234e+10), "float") +assert.eq(type(1.234e-10), "float") + +# truth +assert.true(123.0) +assert.true(-1.0) +assert.true(not 0.0) + +# addition +assert.eq(0.0 + 1.0, 1.0) +assert.eq(1.0 + 1.0, 2.0) +assert.eq(1.25 + 2.75, 4.0) +assert.eq(5.0 + 7.0, 12.0) +assert.eq(5.1 + 7, 12.1) # float + int +assert.eq(7 + 5.1, 12.1) # int + float + +# subtraction +assert.eq(5.0 - 7.0, -2.0) +assert.eq(5.1 - 7.1, -2.0) +assert.eq(5.5 - 7, -1.5) +assert.eq(5 - 7.5, -2.5) +assert.eq(0.0 - 1.0, -1.0) + +# multiplication +assert.eq(5.0 * 7.0, 35.0) +assert.eq(5.5 * 2.5, 13.75) +assert.eq(5.5 * 7, 38.5) +assert.eq(5 * 7.1, 35.5) + +# real division (like Python 3) +# The / operator is available only when the 'fp' dialect option is enabled. +assert.eq(100.0 / 8.0, 12.5) +assert.eq(100.0 / -8.0, -12.5) +assert.eq(-100.0 / 8.0, -12.5) +assert.eq(-100.0 / -8.0, 12.5) +assert.eq(98.0 / 8.0, 12.25) +assert.eq(98.0 / -8.0, -12.25) +assert.eq(-98.0 / 8.0, -12.25) +assert.eq(-98.0 / -8.0, 12.25) +assert.eq(2.5 / 2.0, 1.25) +assert.eq(2.5 / 2, 1.25) +assert.eq(5 / 4.0, 1.25) +assert.eq(5 / 4, 1.25) +assert.fails(lambda: 1.0 / 0, "real division by zero") +assert.fails(lambda: 1.0 / 0.0, "real division by zero") +assert.fails(lambda: 1 / 0.0, "real division by zero") + +# floored division +assert.eq(100.0 // 8.0, 12.0) +assert.eq(100.0 // -8.0, -13.0) +assert.eq(-100.0 // 8.0, -13.0) +assert.eq(-100.0 // -8.0, 12.0) +assert.eq(98.0 // 8.0, 12.0) +assert.eq(98.0 // -8.0, -13.0) +assert.eq(-98.0 // 8.0, -13.0) +assert.eq(-98.0 // -8.0, 12.0) +assert.eq(2.5 // 2.0, 1.0) +assert.eq(2.5 // 2, 1.0) +assert.eq(5 // 4.0, 1.0) +assert.eq(5 // 4, 1) +assert.eq(type(5 // 4), "int") +assert.fails(lambda: 1.0 // 0, "floored division by zero") +assert.fails(lambda: 1.0 // 0.0, "floored division by zero") +assert.fails(lambda: 1 // 0.0, "floored division by zero") + +# remainder +assert.eq(100.0 % 8.0, 4.0) +assert.eq(100.0 % -8.0, 4.0) +assert.eq(-100.0 % 8.0, -4.0) +assert.eq(-100.0 % -8.0, -4.0) +assert.eq(98.0 % 8.0, 2.0) +assert.eq(98.0 % -8.0, 2.0) +assert.eq(-98.0 % 8.0, -2.0) +assert.eq(-98.0 % -8.0, -2.0) +assert.eq(2.5 % 2.0, 0.5) +assert.eq(2.5 % 2, 0.5) +assert.eq(5 % 4.0, 1.0) +assert.fails(lambda: 1.0 % 0, "float modulo by zero") +assert.fails(lambda: 1.0 % 0.0, "float modulo by zero") +assert.fails(lambda: 1 % 0.0, "float modulo by zero") + +# floats cannot be used as indices, even if integral +assert.fails(lambda: "abc"[1.0], "want int") +assert.fails(lambda: ["A", "B", "C"].insert(1.0, "D"), "want int") + +# nan +nan = float("NaN") +def isnan(x): return x != x +assert.true(nan != nan) +assert.true(not (nan == nan)) + +# ordered comparisons with NaN +assert.true(not nan < nan) +assert.true(not nan > nan) +assert.true(not nan <= nan) +assert.true(not nan >= nan) +assert.true(not nan == nan) # use explicit operator, not assert.ne +assert.true(nan != nan) +assert.eq(cmp(nan, nan), 0) +assert.true(not nan < 0) +assert.true(not nan > 0) +assert.true(not [nan] < [nan]) +assert.true(not [nan] > [nan]) + +# Even a value containing NaN is not equal to itself. +nanlist = [nan] +assert.true(not nanlist < nanlist) +assert.true(not nanlist > nanlist) +assert.ne(nanlist, nanlist) + +# Since NaN values never compare equal, +# a dict may have any number of NaN keys. +nandict = {nan: 1, nan: 2, nan: 3} +assert.eq(len(nandict), 3) +assert.eq(str(nandict), "{NaN: 1, NaN: 2, NaN: 3}") +assert.true(nan not in nandict) +assert.eq(nandict.get(nan, None), None) + +# inf +inf = float("Inf") +neginf = float("-Inf") +assert.true(isnan(+inf / +inf)) +assert.true(isnan(+inf / -inf)) +assert.true(isnan(-inf / +inf)) +assert.eq(0.0 / +inf, 0.0) +assert.eq(0.0 / -inf, 0.0) +assert.true(inf > -inf) +assert.eq(inf, -neginf) +assert.eq(float(int("2" + "0" * 308)), inf) # 2e308 is too large to represent as a float +assert.eq(float(int("-2" + "0" * 308)), -inf) +# TODO(adonovan): assert inf > any finite number, etc. + +# negative zero +negz = -0 +assert.eq(negz, 0) + +# float/float comparisons +fltmax = 1.7976931348623157e+308 # approx +fltmin = 4.9406564584124654e-324 # approx +assert.lt(-inf, -fltmax) +assert.lt(-fltmax, -1.0) +assert.lt(-1.0, -fltmin) +assert.lt(-fltmin, 0.0) +assert.lt(0, fltmin) +assert.lt(fltmin, 1.0) +assert.lt(1.0, fltmax) +assert.lt(fltmax, inf) + +# int/float comparisons +assert.eq(0, 0.0) +assert.eq(1, 1.0) +assert.eq(-1, -1.0) +assert.ne(-1, -1.0 + 1e-7) +assert.lt(-2, -2 + 1e-15) + +# int conversion (rounds towards zero) +assert.eq(int(100.1), 100) +assert.eq(int(100.0), 100) +assert.eq(int(99.9), 99) +assert.eq(int(-99.9), -99) +assert.eq(int(-100.0), -100) +assert.eq(int(-100.1), -100) +assert.eq(int(1e100), int("10000000000000000159028911097599180468360808563945281389781327557747838772170381060813469985856815104")) +assert.fails(lambda: int(inf), "cannot convert.*infinity") +assert.fails(lambda: int(nan), "cannot convert.*NaN") + +# float conversion +assert.eq(float(), 0.0) +assert.eq(float(False), 0.0) +assert.eq(float(True), 1.0) +assert.eq(float(0), 0.0) +assert.eq(float(1), 1.0) +assert.eq(float(1.1), 1.1) +assert.eq(float("1.1"), 1.1) +assert.fails(lambda: float("1.1abc"), "invalid syntax") +assert.fails(lambda: float("1e100.0"), "invalid syntax") +assert.fails(lambda: float("1e1000"), "out of range") +assert.fails(lambda: float(None), "want number or string") +assert.eq(float("-1.1"), -1.1) +assert.eq(float("+1.1"), +1.1) +assert.eq(float("+Inf"), inf) +assert.eq(float("-Inf"), neginf) +assert.true(isnan(float("NaN"))) +assert.fails(lambda: float("+NaN"), "invalid syntax") +assert.fails(lambda: float("-NaN"), "invalid syntax") + +# hash +# Check that equal float and int values have the same hash. +def checkhash(): + for a in [1.23e100, 1.23e10, 1.23e1, 1.23, + 1, 4294967295, 8589934591, 9223372036854775807]: + for b in [a, -a, 1/a, -1/a]: + f = float(b) + i = int(b) + if f == i: + fh = hash(f) + ih = hash(i) + if fh != ih: + assert.true(False, "hash(%s) = %d, hash(%s) = %s" % (f, fh, i, ih)) +checkhash() + +# string formatting +assert.eq("%s" % 123.45e67, "1.2345e+69") +assert.eq("%r" % 123.45e67, "1.2345e+69") +assert.eq("%e" % 123.45e67, "1.234500e+69") +assert.eq("%f" % 123.45e67, "1234500000000000033987094856609369647752433474509923447907937257783296.000000") +assert.eq("%g" % 123.45e67, "1.2345e+69") +assert.eq("%e" % 123, "1.230000e+02") +assert.eq("%f" % 123, "123.000000") +assert.eq("%g" % 123, "123") +assert.fails(lambda: "%e" % "123", "requires float, not str") +assert.fails(lambda: "%f" % "123", "requires float, not str") +assert.fails(lambda: "%g" % "123", "requires float, not str") + +i0 = 1 +f0 = 1.0 +assert.eq(type(i0), "int") +assert.eq(type(f0), "float") + +ops = { + '+': lambda x, y: x + y, + '-': lambda x, y: x - y, + '*': lambda x, y: x * y, + '/': lambda x, y: x / y, + '//': lambda x, y: x // y, + '%': lambda x, y: x % y, +} + +# Check that if either argument is a float, so too is the result. +def checktypes(): + want = set(""" +int + int = int +int + float = float +float + int = float +float + float = float +int - int = int +int - float = float +float - int = float +float - float = float +int * int = int +int * float = float +float * int = float +float * float = float +int / int = float +int / float = float +float / int = float +float / float = float +int // int = int +int // float = float +float // int = float +float // float = float +int % int = int +int % float = float +float % int = float +float % float = float +"""[1:].splitlines()) + for opname in ("+", "-", "*", "/", "%"): + for x in [i0, f0]: + for y in [i0, f0]: + op = ops[opname] + got = "%s %s %s = %s" % (type(x), opname, type(y), type(op(x, y))) + assert.contains(want, got) +checktypes() diff --git a/testdata/function.sky b/testdata/function.sky new file mode 100644 index 0000000..5cdcca2 --- /dev/null +++ b/testdata/function.sky @@ -0,0 +1,175 @@ +# Tests of Skylark 'function' + +# TODO(adonovan): +# - add some introspection functions for looking at function values +# and test that functions have correct position, free vars, names of locals, etc. +# - move the hard-coded tests of parameter passing from eval_test.go to here. + +load("assert.sky", "assert") + +# Test lexical scope and closures: +def outer(x): + def inner(y): + return x + x + y # multiple occurrences of x should create only 1 freevar + return inner + +z = outer(3) +assert.eq(z(5), 11) +assert.eq(z(7), 13) +z2 = outer(4) +assert.eq(z2(5), 13) +assert.eq(z2(7), 15) +assert.eq(z(5), 11) +assert.eq(z(7), 13) + +# Function name +assert.eq(str(outer), '<function outer>') +assert.eq(str(z), '<function inner>') +assert.eq(str(str), '<built-in function str>') +assert.eq(str("".startswith), '<built-in method startswith of string value>') + +# Stateful closure +def squares(): + x = [0] + def f(): + x[0] += 1 + return x[0] * x[0] + return f + +sq = squares() +assert.eq(sq(), 1) +assert.eq(sq(), 4) +assert.eq(sq(), 9) +assert.eq(sq(), 16) + +# Freezing a closure +sq2 = freeze(sq) +assert.fails(sq2, "frozen list") + +# recursion detection, simple +def fib(x): + if x < 2: + return x + return fib(x-2) + fib(x-1) +assert.fails(lambda: fib(10), "function fib called recursively") + +# recursion detection, advanced +# +# A simplistic recursion check that looks for repeated calls to the +# same function value will not detect recursion using the Y +# combinator, which creates a new closure at each step of the +# recursion. To truly prohibit recursion, the dynamic check must look +# for repeated calls of the same syntactic function body. +Y = lambda f: (lambda x: x(x))(lambda y: f(lambda *args: y(y)(*args))) +fibgen = lambda fib: lambda x: (x if x<2 else fib(x-1)+fib(x-2)) +fib2 = Y(fibgen) +assert.fails(lambda: [fib2(x) for x in range(10)], "function lambda called recursively") + +# call of function not through its name +# (regression test for parsing suffixes of primary expressions) +hf = hasfields() +hf.x = [len] +assert.eq(hf.x[0]("abc"), 3) +def f(): + return lambda: 1 +assert.eq(f()(), 1) +assert.eq(["abc"][0][0].upper(), "A") + +# functions may be recursively defined, +# so long as they don't dynamically recur. +calls = [] +def yin(x): + calls.append("yin") + if x: + yang(False) + +def yang(x): + calls.append("yang") + if x: + yin(False) + +yin(True) +assert.eq(calls, ["yin", "yang"]) + +calls.clear() +yang(True) +assert.eq(calls, ["yang", "yin"]) + + +# hash(builtin) should be deterministic. +closures = set(["".count for _ in range(10)]) +assert.eq(len(closures), 10) +hashes = set([hash("".count) for _ in range(10)]) +assert.eq(len(hashes), 1) + +--- +# Default values of function parameters are mutable. +load("assert.sky", "assert") + +def f(x=[0]): + return x + +assert.eq(f(), [0]) + +f().append(1) +assert.eq(f(), [0, 1]) + +# Freezing a function value freezes its parameter defaults. +freeze(f) +assert.fails(lambda: f().append(2), "cannot append to frozen list") + +--- +# This is a well known corner case of parsing in Python. +load("assert.sky", "assert") + +f = lambda x: 1 if x else 0 +assert.eq(f(True), 1) +assert.eq(f(False), 0) + +x = True +f2 = (lambda x: 1) if x else 0 +assert.eq(f2(123), 1) + +tf = lambda: True, lambda: False +assert.true(tf[0]()) +assert.true(not tf[1]()) + +--- +# Missing parameters are correctly reported +# in functions of more than 64 parameters. +# (This tests a corner case of the implementation: +# we avoid a map allocation for <64 parameters) + +load("assert.sky", "assert") + +def f(a, b, c, d, e, f, g, h, + i, j, k, l, m, n, o, p, + q, r, s, t, u, v, w, x, + y, z, A, B, C, D, E, F, + G, H, I, J, K, L, M, N, + O, P, Q, R, S, T, U, V, + W, X, Y, Z, aa, bb, cc, dd, + ee, ff, gg, hh, ii, jj, kk, ll, + mm): + pass + +assert.fails(lambda: f( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64), "takes exactly 65 arguments .64 given.") + +assert.fails(lambda: f( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, 65, + mm = 100), 'multiple values for keyword argument "mm"') diff --git a/testdata/int.sky b/testdata/int.sky new file mode 100644 index 0000000..da64a99 --- /dev/null +++ b/testdata/int.sky @@ -0,0 +1,153 @@ +# Tests of Skylark 'int' + +load("assert.sky", "assert") + +# basic arithmetic +assert.eq(0 - 1, -1) +assert.eq(0 + 1, +1) +assert.eq(1 + 1, 2) +assert.eq(5 + 7, 12) +assert.eq(5 * 7, 35) +assert.eq(5 - 7, -2) + +# truth +assert.true(123) +assert.true(-1) +assert.true(not 0) + +# floored division +# (For real division, see float.sky.) +assert.eq(100 // 7, 14) +assert.eq(100 // -7, -15) +assert.eq(-100 // 7, -15) # NB: different from Go/Java +assert.eq(-100 // -7, 14) # NB: different from Go/Java +assert.eq(98 // 7, 14) +assert.eq(98 // -7, -14) +assert.eq(-98 // 7, -14) +assert.eq(-98 // -7, 14) + +# remainder +assert.eq(100 % 7, 2) +assert.eq(100 % -7, -5) # NB: different from Go/Java +assert.eq(-100 % 7, 5) # NB: different from Go/Java +assert.eq(-100 % -7, -2) +assert.eq(98 % 7, 0) +assert.eq(98 % -7, 0) +assert.eq(-98 % 7, 0) +assert.eq(-98 % -7, 0) + +# compound assignment +def compound(): + x = 1 + x += 1 + assert.eq(x, 2) + x -= 3 + assert.eq(x, -1) + x *= 39 + assert.eq(x, -39) + x //= 4 + assert.eq(x, -10) + x /= -2 + assert.eq(x, 5) + x %= 3 + assert.eq(x, 2) + +compound() + +# int conversion +# See float.sky for float-to-int conversions. +# We follow Python 3 here, but I can't see the method in its madness. +# int from bool/int/float +assert.eq(int(False), 0) +assert.eq(int(True), 1) +assert.eq(int(3), 3) +assert.eq(int(3.1), 3) +assert.fails(lambda: int(3, base=10), "non-string with explicit base") +# int from string, base implicitly 10 +assert.eq(int("100000000000000000000"), 10000000000 * 10000000000) +assert.eq(int("-100000000000000000000"), -10000000000 * 10000000000) +assert.eq(int("123"), 123) +assert.eq(int("-123"), -123) +assert.eq(int("0123"), 123) # not octal +assert.eq(int("-0123"), -123) +assert.fails(lambda: int("0x12"), "invalid literal with base 10") +assert.fails(lambda: int("-0x12"), "invalid literal with base 10") +assert.fails(lambda: int("0o123"), "invalid literal.*base 10") +assert.fails(lambda: int("-0o123"), "invalid literal.*base 10") +# int from string, explicit base +assert.eq(int("11", base=9), 10) +assert.eq(int("-11", base=9), -10) +assert.eq(int("10011", base=2), 19) +assert.eq(int("-10011", base=2), -19) +assert.eq(int("123", 8), 83) +assert.eq(int("-123", 8), -83) +assert.eq(int("0123", 8), 83) # redundant zeros permittedd +assert.eq(int("-0123", 8), -83) +assert.eq(int("00123", 8), 83) +assert.eq(int("-00123", 8), -83) +assert.eq(int("0o123", 8), 83) +assert.eq(int("-0o123", 8), -83) +assert.eq(int("123", 7), 66) # 1*7*7 + 2*7 + 3 +assert.eq(int("-123", 7), -66) +assert.eq(int("12", 16), 18) +assert.eq(int("-12", 16), -18) +assert.eq(int("0x12", 16), 18) +assert.eq(int("-0x12", 16), -18) +assert.fails(lambda: int("0x123", 8), "invalid literal.*base 8") +assert.fails(lambda: int("-0x123", 8), "invalid literal.*base 8") +assert.fails(lambda: int("0o123", 16), "invalid literal.*base 16") +assert.fails(lambda: int("-0o123", 16), "invalid literal.*base 16") +# int from string, auto detect base +assert.eq(int("123", 0), 123) +assert.eq(int("+123", 0), +123) +assert.eq(int("-123", 0), -123) +assert.eq(int("0x12", 0), 18) +assert.eq(int("+0x12", 0), +18) +assert.eq(int("-0x12", 0), -18) +assert.eq(int("0o123", 0), 83) +assert.eq(int("+0o123", 0), +83) +assert.eq(int("-0o123", 0), -83) +assert.fails(lambda: int("0123", 0), "invalid literal.*base 0") # valid in Python 2.7 +assert.fails(lambda: int("-0123", 0), "invalid literal.*base 0") + +# bitwise union (int|int) and intersection (int&int). +# TODO(adonovan): this is not yet in the Skylark spec, +# but there is consensus that it should be. +assert.eq(1|2, 3) +assert.eq(3|6, 7) +assert.eq((1|2) & (2|4), 2) + +# comparisons +# TODO(adonovan): test: < > == != etc +assert.lt(-2, -1) +assert.lt(-1, 0) +assert.lt(0, 1) +assert.lt(1, 2) +assert.true(2 >= 2) +assert.true(2 > 1) +assert.true(1 >= 1) +assert.true(1 > 0) +assert.true(0 >= 0) +assert.true(0 > -1) +assert.true(-1 >= -1) +assert.true(-1 > -2) + +# precision +maxint64 = 9223372036854775807 # = 2^63 +minint64 = -maxint64 - 1 # = -2^64 +assert.eq(str(maxint64), "9223372036854775807") +assert.eq(str(maxint64+1), "9223372036854775808") +assert.eq(str(minint64), "-9223372036854775808") +assert.eq(str(minint64-1), "-9223372036854775809") +assert.eq(str(minint64 * minint64), "85070591730234615865843651857942052864") + +# string formatting +assert.eq("%o %x %d" % (0o755, 0xDEADBEEF, 42), "755 deadbeef 42") +nums = [-95, -1, 0, +1, +95] +assert.eq(' '.join(["%o" % x for x in nums]), "-137 -1 0 1 137") +assert.eq(' '.join(["%d" % x for x in nums]), "-95 -1 0 1 95") +assert.eq(' '.join(["%i" % x for x in nums]), "-95 -1 0 1 95") +assert.eq(' '.join(["%x" % x for x in nums]), "-5f -1 0 1 5f") +assert.eq(' '.join(["%X" % x for x in nums]), "-5F -1 0 1 5F") +assert.eq("%o %x %d" % (123, 123, 123), "173 7b 123") +assert.eq("%o %x %d" % (123.1, 123.1, True), "173 7b 1") # non-int operands are acceptable diff --git a/testdata/list.sky b/testdata/list.sky new file mode 100644 index 0000000..fdeba58 --- /dev/null +++ b/testdata/list.sky @@ -0,0 +1,232 @@ +# Tests of Skylark 'list' + +load("assert.sky", "assert") + +# literals +assert.eq([], []) +assert.eq([1], [1]) +assert.eq([1,], [1]) +assert.eq([1, 2], [1, 2]) +assert.ne([1, 2, 3], [1, 2, 4]) + +# truth +assert.true([0]) +assert.true(not []) + +# indexing, x[i] +abc = list("abc".split_bytes()) +assert.fails(lambda: abc[-4], "list index -1 out of range \\[0:3\\]") +assert.eq(abc[-3], "a") +assert.eq(abc[-2], "b") +assert.eq(abc[-1], "c") +assert.eq(abc[0], "a") +assert.eq(abc[1], "b") +assert.eq(abc[2], "c") +assert.fails(lambda: abc[3], "list index 3 out of range \\[0:3\\]") + +# x[i] = ... +x3 = [0, 1, 2] +x3[1] = 2 +x3[2] += 3 +assert.eq(x3, [0, 2, 5]) +def f2(): x3[3] = 4 +assert.fails(f2, "out of range") +freeze(x3) +def f3(): x3[0] = 0 +assert.fails(f3, "cannot assign to element of frozen list") +assert.fails(x3.clear, "cannot clear frozen list") + +# list + list +assert.eq([1, 2, 3] + [3, 4, 5], [1, 2, 3, 3, 4, 5]) +assert.fails(lambda: [1, 2] + (3, 4), "unknown.*list \+ tuple") +assert.fails(lambda: (1, 2) + [3, 4], "unknown.*tuple \+ list") + +# list * int, int * list +assert.eq(abc * 0, []) +assert.eq(abc * -1, []) +assert.eq(abc * 1, abc) +assert.eq(abc * 3, ["a", "b", "c", "a", "b", "c", "a", "b", "c"]) +assert.eq(0 * abc, []) +assert.eq(-1 * abc, []) +assert.eq(1 * abc, abc) +assert.eq(3 * abc, ["a", "b", "c", "a", "b", "c", "a", "b", "c"]) + +# list comprehensions +assert.eq([2 * x for x in [1, 2, 3]], [2, 4, 6]) +assert.eq([2 * x for x in [1, 2, 3] if x > 1], [4, 6]) +assert.eq([(x, y) for x in [1, 2] for y in [3, 4]], + [(1, 3), (1, 4), (2, 3), (2, 4)]) +assert.eq([(x, y) for x in [1, 2] if x == 2 for y in [3, 4]],[(2, 3), (2, 4)]) +assert.eq([2 * x for x in (1, 2, 3)], [2, 4, 6]) +assert.eq([x for x in "abc".split_bytes()], ["a", "b", "c"]) +assert.eq([x for x in {"a": 1, "b": 2}], ["a", "b"]) +assert.eq([(y, x) for x, y in {1: 2, 3: 4}.items()], [(2, 1), (4, 3)]) + +# list function +assert.eq(list(), []) +assert.eq(list("ab".split_bytes()), ["a", "b"]) + +# A list comprehension defines a separate lexical block, +# whether at top-level... +a = [1, 2] +b = [a for a in [3, 4]] +assert.eq(a, [1, 2]) +assert.eq(b, [3, 4]) +# ...or local to a function. +def listcompblock(): + c = [1, 2] + d = [c for c in [3, 4]] + assert.eq(c, [1, 2]) + assert.eq(d, [3, 4]) +listcompblock() + +# list.pop +x4 = [1,2,3,4,5] +assert.eq(x4.pop(), 5) +assert.eq(x4, [1,2,3,4]) +assert.eq(x4.pop(1), 2) +assert.eq(x4, [1,3,4]) +assert.eq(x4.pop(0), 1) +assert.eq(x4, [3,4]) + +# TODO(adonovan): test uses of list as sequence +# (for loop, comprehension, library functions). + +# x += y for lists is equivalent to x.extend(y). +# y may be a sequence. +# TODO: Test that side-effects of 'x' occur only once. +def list_extend(): + a = [1, 2, 3] + b = a + a = a + [4] # creates a new list + assert.eq(a, [1, 2, 3, 4]) + assert.eq(b, [1, 2, 3]) # b is unchanged + + a = [1, 2, 3] + b = a + a += [4] # updates a (and thus b) in place + assert.eq(a, [1, 2, 3, 4]) + assert.eq(b, [1, 2, 3, 4]) # alias observes the change + + a = [1, 2, 3] + b = a + a.extend([4]) # updates existing list + assert.eq(a, [1, 2, 3, 4]) + assert.eq(b, [1, 2, 3, 4]) # alias observes the change +list_extend() + +# Unlike list.extend(iterable), list += iterable makes its LHS name local. +a_list = [] +def f4(): + a_list += [1] # binding use => a_list is a local var +assert.fails(f4, "local variable a_list referenced before assignment") + +# list += <not iterable> +def f5(): + x = [] + x += 1 +assert.fails(f5, "invalid operation: list \\+= int") + +# frozen list += iterable +def f6(): + x = [] + freeze(x) + x += [1] +assert.fails(f6, "cannot apply \\+= to frozen list") + +# append +x5 = [1, 2, 3] +x5.append(4) +x5.append("abc") +assert.eq(x5, [1, 2, 3, 4, "abc"]) + +# extend +x5a = [1, 2, 3] +x5a.extend("abc".split_bytes()) # string +x5a.extend((True, False)) # tuple +assert.eq(x5a, [1, 2, 3, "a", "b", "c", True, False]) + +# list.insert +def insert_at(index): + x = range(3) + x.insert(index, 42) + return x +assert.eq(insert_at(-99), [42, 0, 1, 2]) +assert.eq(insert_at(-2), [0, 42, 1, 2]) +assert.eq(insert_at(-1), [0, 1, 42, 2]) +assert.eq(insert_at( 0), [42, 0, 1, 2]) +assert.eq(insert_at( 1), [0, 42, 1, 2]) +assert.eq(insert_at( 2), [0, 1, 42, 2]) +assert.eq(insert_at( 3), [0, 1, 2, 42]) +assert.eq(insert_at( 4), [0, 1, 2, 42]) + +# list.remove +def remove(v): + x = [3, 1, 4, 1] + x.remove(v) + return x +assert.eq(remove(3), [1, 4, 1]) +assert.eq(remove(1), [3, 4, 1]) +assert.eq(remove(4), [3, 1, 1]) +assert.fails(lambda: [3, 1, 4, 1].remove(42), "remove: element not found") + +# list.index +bananas = list("bananas".split_bytes()) +assert.eq(bananas.index('a'), 1) # bAnanas +assert.fails(lambda: bananas.index('d'), "value not in list") +# start +assert.eq(bananas.index('a', -1000), 1) # bAnanas +assert.eq(bananas.index('a', 0), 1) # bAnanas +assert.eq(bananas.index('a', 1), 1) # bAnanas +assert.eq(bananas.index('a', 2), 3) # banAnas +assert.eq(bananas.index('a', 3), 3) # banAnas +assert.eq(bananas.index('b', 0), 0) # Bananas +assert.eq(bananas.index('n', -3), 4) # banaNas +assert.fails(lambda: bananas.index('n', -2), "value not in list") +assert.eq(bananas.index('s', -2), 6) # bananaS +assert.fails(lambda: bananas.index('b', 1), "value not in list") +# start, end +assert.eq(bananas.index('s', -1000, 7), 6) # bananaS +assert.fails(lambda: bananas.index('s', -1000, 6), "value not in list") +assert.fails(lambda: bananas.index('d', -1000, 1000), "value not in list") + +# slicing, x[i:j:k] +assert.eq(bananas[6::-2], list("snnb".split_bytes())) +assert.eq(bananas[5::-2], list("aaa".split_bytes())) +assert.eq(bananas[4::-2], list("nnb".split_bytes())) +assert.eq(bananas[99::-2], list("snnb".split_bytes())) +assert.eq(bananas[100::-2], list("snnb".split_bytes())) +# TODO(adonovan): many more tests + +# iterator invalidation +def iterator1(): + list = [0, 1, 2] + for x in list: + list[x] = 2 * x + return list +assert.eq(iterator1(), [0, 2, 4]) # element updates are allowed + +def iterator2(): + list = [0, 1, 2] + for x in list: + list.remove(x) +assert.fails(iterator2, "remove.*during iteration") + +def iterator3(): + list = [0, 1, 2] + for x in list: + list.append(3) +assert.fails(iterator3, "append.*during iteration") + +def iterator4(): + list = [0, 1, 2] + for x in list: + list.extend([3, 4]) +assert.fails(iterator4, "extend.*during iteration") + +def iterator5(): + def f(x): + x.append(4) + list = [1, 2, 3] + _ = [f(list) for x in list] +assert.fails(iterator5, "append.*during iteration") diff --git a/testdata/misc.sky b/testdata/misc.sky new file mode 100644 index 0000000..17e4b51 --- /dev/null +++ b/testdata/misc.sky @@ -0,0 +1,110 @@ +# Miscellaneous tests of Skylark evaluation. +# This is a "chunked" file: each "---" effectively starts a new file. + +# TODO(adonovan): move these tests into more appropriate files. +# TODO(adonovan): test coverage: +# - stmts: pass; if cond fail; += and failures; +# for x fail; for x not iterable; for can't assign; for +# error in loop body +# - subassign fail +# - x[i]=x fail in both operands; frozen x; list index not int; boundscheck +# - x.f = ... +# - failure in list expr [...]; tuple expr; dict expr (bad key) +# - cond expr semantics; failures +# - x[i] failures in both args; dict and iterator key and range checks; +# unhandled operand types +# - +: list/list, int/int, string/string, tuple+tuple, dict/dict; +# - * and ** calls: various errors +# - call of non-function +# - slice x[ijk] +# - comprehension: unhashable dict key; +# scope of vars (local and toplevel); noniterable for clause +# - unknown unary op +# - ordering of values +# - freeze, transitivity of its effect. +# - add an application-defined type to the environment so we can test it. +# - even more: +# +# eval +# pass statement +# assign to tuple l-value -- illegal +# assign to list l-value -- illegal +# assign to field +# tuple + tuple +# call with *args, **kwargs +# slice with step +# tuple slice +# interpolate with %c, %% + +load("assert.sky", "assert") + +# Ordered comparisons require values of the same type. +assert.fails(lambda: None < False, "not impl") +assert.fails(lambda: False < list, "not impl") +assert.fails(lambda: list < {}, "not impl") +assert.fails(lambda: {} < (lambda: None), "not impl") +assert.fails(lambda: (lambda: None) < 0, "not impl") +assert.fails(lambda: 0 < [], "not impl") +assert.fails(lambda: [] < "", "not impl") +assert.fails(lambda: "" < (), "not impl") +# Except int < float: +assert.lt(1, 2.0) +assert.lt(2.0, 3) + +--- +# cyclic data structures +load("assert.sky", "assert") + +cyclic = [1, 2, 3] # list cycle +cyclic[1] = cyclic +assert.eq(str(cyclic), "[1, [...], 3]") +assert.fails(lambda: cmp(cyclic, cyclic), "maximum recursion") +assert.fails(lambda: cyclic < cyclic, "maximum recursion") +assert.fails(lambda: cyclic == cyclic, "maximum recursion") +cyclic2 = [1, 2, 3] +cyclic2[1] = cyclic2 +assert.fails(lambda: cyclic2 == cyclic, "maximum recursion") + +cyclic3 = [1, [2, 3]] # list-list cycle +cyclic3[1][0] = cyclic3 +assert.eq(str(cyclic3), "[1, [[...], 3]]") +cyclic4 = {"x": 1} +cyclic4["x"] = cyclic4 +assert.eq(str(cyclic4), "{\"x\": {...}}") +cyclic5 = [0, {"x": 1}] # list-dict cycle +cyclic5[1]["x"] = cyclic5 +assert.eq(str(cyclic5), "[0, {\"x\": [...]}]") +assert.eq(str(cyclic5), "[0, {\"x\": [...]}]") +assert.fails(lambda: cyclic5 == cyclic5 ,"maximum recursion") +cyclic6 = [0, {"x": 1}] +cyclic6[1]["x"] = cyclic6 +assert.fails(lambda: cyclic5 == cyclic6, "maximum recursion") + +--- +# regression +load("assert.sky", "assert") + +# was a parse error: +assert.eq(("ababab"[2:]).replace("b", "c"), "acac") +assert.eq("ababab"[2:].replace("b", "c"), "acac") + +# test parsing of line continuation, at toplevel and in expression. +three = 1 + \ + 2 +assert.eq(1 + \ + 2, three) + +--- +# A regression test for error position information. + +_ = {}.get(1, default=2) ### "get: unexpected keyword arguments" + +--- +load("assert.sky", "assert") + +# load(...) calls outside an expression statement are not load statements. + +assert.eq([load("123") for load in [int, len]], [123, 3]) + +load = lambda x: x +assert.eq(load("abc"), "abc") diff --git a/testdata/set.sky b/testdata/set.sky new file mode 100644 index 0000000..7d96a69 --- /dev/null +++ b/testdata/set.sky @@ -0,0 +1,92 @@ +# Tests of Skylark 'set' + +# Sets are not (yet) a standard part of Skylark, so the features +# tested in this file must be enabled in the application by setting +# resolve.AllowSet. (All sets are created by calls to the 'set' +# built-in or derived from operations on existing sets.) +# The semantics are subject to change as the spec evolves. + +# TODO(adonovan): support set mutation: +# - del set[k] +# - set.remove +# - set.update +# - set.clear +# - set += iterable, perhaps? +# Test iterator invalidation. + +load("assert.sky", "assert") + +# literals +# Parser does not currently support {1, 2, 3}. +# TODO(adonovan): add test to syntax/testdata/errors.sky. + +# set comprehensions +# Parser does not currently support {x for x in y}. +# See syntax/testdata/errors.sky. + +# set constructor +assert.eq(type(set()), "set") +assert.eq(list(set()), []) +assert.eq(type(set([1, 3, 2, 3])), "set") +assert.eq(list(set([1, 3, 2, 3])), [1, 3, 2]) +assert.eq(type(set("hello".split_bytes())), "set") +assert.eq(list(set("hello".split_bytes())), ["h", "e", "l", "o"]) +assert.fails(lambda: set(1), "got int, want iterable") +assert.fails(lambda: set(1, 2, 3), "got 3 arguments") + +# truth +assert.true(not set()) +assert.true(set([False])) +assert.true(set([1, 2, 3])) + +x = set([1, 2, 3]) +y = set([3, 4, 5]) + +# set + any is not defined +assert.fails(lambda: x + y, "unknown.*: set \+ set") + +# union, set | iterable +assert.eq(list(set("a".split_bytes()) | set("b".split_bytes())), ["a", "b"]) +assert.eq(list(set("ab".split_bytes()) | set("bc".split_bytes())), ["a", "b", "c"]) +assert.eq(list(set("ab".split_bytes()) | "bc".split_bytes()), ["a", "b", "c"]) +assert.eq(type(x | y), "set") +assert.eq(list(x | y), [1, 2, 3, 4, 5]) +assert.eq(list(x | [5, 1]), [1, 2, 3, 5]) +assert.eq(list(x | (6, 5, 4)), [1, 2, 3, 6, 5, 4]) +assert.fails(lambda: x | [1, 2, {}], "unhashable type: dict") + +# intersection, set & set +assert.eq(list(set("a".split_bytes()) & set("b".split_bytes())), []) +assert.eq(list(set("ab".split_bytes()) & set("bc".split_bytes())), ["b"]) + +# set.union +assert.eq(list(x.union(y)), [1, 2, 3, 4, 5]) + +# len +assert.eq(len(x), 3) +assert.eq(len(y), 3) +assert.eq(len(x | y), 5) + +# str +# TODO(adonovan): make output deterministic when len > 1? +assert.eq(str(set([1])), "set([1])") + +# comparison +assert.eq(x, x) +assert.eq(y, y) +assert.true(x != y) +assert.eq(set([1, 2, 3]), set([3, 2, 1])) +assert.fails(lambda: x < y, "set < set not implemented") + +# iteration +assert.true(type([elem for elem in x]), "list") +assert.true(list([elem for elem in x]), [1, 2, 3]) +def iter(): + list = [] + for elem in x: + list.append(elem) + return list +assert.eq(iter(), [1, 2, 3]) + +# sets are not indexable +assert.fails(lambda: x[0], "unhandled.*operation") diff --git a/testdata/string.sky b/testdata/string.sky new file mode 100644 index 0000000..47d160f --- /dev/null +++ b/testdata/string.sky @@ -0,0 +1,305 @@ +# Tests of Skylark 'string' + +load("assert.sky", "assert") + +# raw string literals: +assert.eq(r'a\bc', "a\\bc") + +# truth +assert.true("abc") +assert.true("\0") +assert.true(not "") + +# str + str +assert.eq("a"+"b"+"c", "abc") + +# str * int, int * str +assert.eq("abc" * 0, "") +assert.eq("abc" * -1, "") +assert.eq("abc" * 1, "abc") +assert.eq("abc" * 5, "abcabcabcabcabc") +assert.eq(0 * "abc", "") +assert.eq(-1 * "abc", "") +assert.eq(1 * "abc", "abc") +assert.eq(5 * "abc", "abcabcabcabcabc") +assert.fails(lambda: 1.0 * "abc", "unknown.*float \\* str") + +# len +assert.eq(len("Hello, 世界!"), 14) + +# chr & ord +assert.eq(chr(65), "A") # 1-byte UTF-8 encoding +assert.eq(chr(1049), "Й") # 2-byte UTF-8 encoding +assert.eq(chr(0x1F63F), "😿") # 4-byte UTF-8 encoding +assert.fails(lambda: chr(-1), "Unicode code point -1 out of range \\(<0\\)") +assert.fails(lambda: chr(0x110000), "Unicode code point U\\+110000 out of range \\(>0x10FFFF\\)") +assert.eq(ord("A"), 65) +assert.eq(ord("Й"), 1049) +assert.eq(ord("😿"), 0x1F63F) +assert.eq(ord("Й"[1:]), 0xFFFD) # = Unicode replacement character +assert.fails(lambda: ord("abc"), "string encodes 3 Unicode code points, want 1") +assert.fails(lambda: ord(""), "string encodes 0 Unicode code points, want 1") +assert.fails(lambda: ord("😿"[1:]), "string encodes 3 Unicode code points, want 1") # 3 x 0xFFFD + +# string.codepoints +assert.eq(type("abcЙ😿".codepoints()), "codepoints") +assert.eq(str("abcЙ😿".codepoints()), '"abcЙ😿".codepoints()') +assert.eq(list("abcЙ😿".codepoints()), [97, 98, 99, 1049, 128575]) +assert.eq(list(("A" + "😿Z"[1:]).codepoints()), [ord("A"), 0xFFFD, 0xFFFD, 0xFFFD, ord("Z")]) +assert.eq(list("".codepoints()), []) + +# string.split_codepoints +assert.eq(type("abcЙ😿".split_codepoints()), "codepoints") +assert.eq(str("abcЙ😿".split_codepoints()), '"abcЙ😿".split_codepoints()') +assert.eq(list("abcЙ😿".split_codepoints()), ["a", "b", "c", "Й", "😿"]) +assert.eq(list(("A" + "😿Z"[1:]).split_codepoints()), ["A", "\x9f", "\x98", "\xbf", "Z"]) +assert.eq(list("".split_codepoints()), []) + +# string.bytes +assert.eq(type("abcЙ😿".bytes()), "bytes") +assert.eq(str("abcЙ😿".bytes()), '"abcЙ😿".bytes()') +assert.eq(list("abcЙ😿".bytes()), [97, 98, 99, 208, 153, 240, 159, 152, 191]) +assert.eq(list(("A" + "😿Z"[1:]).bytes()), [65, 159, 152, 191, 90]) +assert.eq(list("".bytes()), []) + +# string.split_bytes +assert.eq(type("abcЙ😿".split_bytes()), "bytes") +assert.eq(str("abcЙ😿".split_bytes()), '"abcЙ😿".split_bytes()') +assert.eq(list("abcЙ😿".split_bytes()), + ["a", "b", "c", "\xd0", "\x99", "\xf0", "\x9f", "\x98", "\xbf"]) +assert.eq(list(("A" + "😿Z"[1:]).split_bytes()), + ["A", "\x9f", "\x98", "\xbf", "Z"]) +assert.eq(list("".split_bytes()), []) + +# indexing, x[i] +assert.eq("Hello, 世界!"[0], "H") +assert.eq("Hello, 世界!"[7], "\xe4") +assert.eq("Hello, 世界!"[13], "!") +assert.fails(lambda: "abc"[-4], "out of range") +assert.eq("abc"[-3], "a") +assert.eq("abc"[-2], "b") +assert.eq("abc"[-1], "c") +assert.eq("abc"[0], "a") +assert.eq("abc"[1], "b") +assert.eq("abc"[2], "c") +assert.fails(lambda: "abc"[4], "out of range") + +# x[i] = ... +x2 = "abc" +def f(): x2[1] = 'B' +assert.fails(f, "string.*does not support.*assignment") + +# slicing, x[i:j] +assert.eq("abc"[:], "abc") +assert.eq("abc"[-4:], "abc") +assert.eq("abc"[-3:], "abc") +assert.eq("abc"[-2:], "bc") +assert.eq("abc"[-1:], "c") +assert.eq("abc"[0:], "abc") +assert.eq("abc"[1:], "bc") +assert.eq("abc"[2:], "c") +assert.eq("abc"[3:], "") +assert.eq("abc"[4:], "") +assert.eq("abc"[:-4], "") +assert.eq("abc"[:-3], "") +assert.eq("abc"[:-2], "a") +assert.eq("abc"[:-1], "ab") +assert.eq("abc"[:0], "") +assert.eq("abc"[:1], "a") +assert.eq("abc"[:2], "ab") +assert.eq("abc"[:3], "abc") +assert.eq("abc"[:4], "abc") +assert.eq("abc"[1:2], "b") +assert.eq("abc"[2:1], "") +# non-unit strides +assert.eq("abcd"[0:4:1], "abcd") +assert.eq("abcd"[::2], "ac") +assert.eq("abcd"[1::2], "bd") +assert.eq("abcd"[4:0:-1], "dcb") +assert.eq("banana"[7::-2], "aaa") +assert.eq("banana"[6::-2], "aaa") +assert.eq("banana"[5::-2], "aaa") +assert.eq("banana"[4::-2], "nnb") +assert.eq("banana"[::-1], "ananab") +assert.eq("banana"[None:None:-2], "aaa") +assert.fails(lambda: "banana"[1.0::], "invalid start index: got float, want int") +assert.fails(lambda: "banana"[:"":], "invalid end index: got string, want int") +assert.fails(lambda: "banana"[:"":True], "got bool for slice step, want int") + +# in, not in +assert.true("oo" in "food") +assert.true("ox" not in "food") +assert.true("" in "food") +assert.true("" in "") +assert.fails(lambda: 1 in "", "requires string as left operand") +assert.fails(lambda: "" in 1, "unknown binary op: string in int") + +# ==, != +assert.eq("hello", "he"+"llo") +assert.ne("hello", "Hello") + +# TODO(adonovan): ordered comparisons + +# string % tuple formatting +assert.eq("A %d %x Z" % (123, 456), "A 123 1c8 Z") +assert.eq("A %(foo)d %(bar)s Z" % {"foo": 123, "bar":"hi"}, "A 123 hi Z") +assert.eq("%s %r" % ("hi", "hi"), 'hi "hi"') # TODO(adonovan): use ''-quotation +assert.eq("%%d %d" % 1, "%d 1") +assert.fails(lambda: "%d %d" % 1, "not enough arguments for format string") +assert.fails(lambda: "%d %d" % (1, 2, 3), "too many arguments for format string") +# %c +assert.eq("%c" % 65, "A") +assert.eq("%c" % 0x3b1, "α") +assert.eq("%c" % "A", "A") +assert.eq("%c" % "α", "α") +assert.fails(lambda: "%c" % "abc", "requires a single-character string") +assert.fails(lambda: "%c" % 65.0, "requires int or single-character string") +assert.fails(lambda: "%c" % 10000000, "requires a valid Unicode code point") +assert.fails(lambda: "%c" % -1, "requires a valid Unicode code point") +# TODO(adonovan): more tests + +# str.format +assert.eq("a{}b".format(123), "a123b") +assert.eq("a{}b{}c{}d{}".format(1, 2, 3, 4), "a1b2c3d4") +assert.eq("a{{b".format(), "a{b") +assert.eq("a{x}b{y}c{}".format(1, x=2, y=3), "a2b3c1") +assert.fails(lambda: "a{z}b".format(x=1), "keyword z not found") +assert.fails(lambda: "a{123}b".format(), "tuple index out of range") +assert.fails(lambda: "a{}b{}c".format(1), "tuple index out of range") +assert.eq("a{010}b".format(0,1,2,3,4,5,6,7,8,9,10), "a10b") # index is decimal +assert.fails(lambda: "a{}b{1}c".format(1, 2), "cannot switch from automatic field numbering to manual") +assert.eq("a{!s}c".format("b"), "abc") +assert.eq("a{!r}c".format("b"), r'a"b"c') +assert.eq("a{x!r}c".format(x='b'), r'a"b"c') +assert.fails(lambda: "{x!}".format(x=1), "unknown conversion") +assert.fails(lambda: "{x!:}".format(x=1), "unknown conversion") + +# str.split, str.rsplit +assert.eq("a.b.c.d".split("."), ["a", "b", "c", "d"]) +assert.eq("a.b.c.d".rsplit("."), ["a", "b", "c", "d"]) +assert.eq("a.b.c.d".split(".", -1), ["a", "b", "c", "d"]) +assert.eq("a.b.c.d".rsplit(".", -1), ["a", "b", "c", "d"]) +assert.eq("a.b.c.d".split(".", 0), ["a.b.c.d"]) +assert.eq("a.b.c.d".rsplit(".", 0), ["a.b.c.d"]) +assert.eq("a.b.c.d".split(".", 1), ["a", "b.c.d"]) +assert.eq("a.b.c.d".rsplit(".", 1), ["a.b.c", "d"]) +assert.eq("a.b.c.d".split(".", 2), ["a", "b", "c.d"]) +assert.eq("a.b.c.d".rsplit(".", 2), ["a.b", "c", "d"]) + +assert.eq(" a bc\n def \t ghi".split(), ["a", "bc", "def", "ghi"]) +assert.eq(" a bc\n def \t ghi".split(None), ["a", "bc", "def", "ghi"]) +assert.eq(" a bc\n def \t ghi".split(None, 0), [" a bc\n def \t ghi"]) +assert.eq(" a bc\n def \t ghi".split(None, 1), ["a", "bc\n def \t ghi"]) +assert.eq(" a bc\n def \t ghi".split(None, 2), ["a", "bc", "def \t ghi"]) +assert.eq(" a bc\n def \t ghi".split(None, 3), ["a", "bc", "def", "ghi"]) +assert.eq(" a bc\n def \t ghi".split(None, 4), ["a", "bc", "def", "ghi"]) + +assert.eq("localhost:80".rsplit(":", 1)[-1], "80") + +# str.splitlines +assert.eq("\nabc\ndef".splitlines(), ["", "abc", "def"]) +assert.eq("\nabc\ndef\n".splitlines(), ["", "abc", "def"]) +assert.eq("\nabc\ndef".splitlines(True), ["\n", "abc\n", "def"]) +assert.eq("\nabc\ndef\n".splitlines(True), ["\n", "abc\n", "def\n"]) + +# str.{,l,r}strip +assert.eq(" \tfoo\n ".strip(), "foo") +assert.eq(" \tfoo\n ".lstrip(), "foo\n ") +assert.eq(" \tfoo\n ".rstrip(), " \tfoo") +assert.eq(" \tfoo\n ".strip(""), "foo") +assert.eq(" \tfoo\n ".lstrip(""), "foo\n ") +assert.eq(" \tfoo\n ".rstrip(""), " \tfoo") +assert.eq("blah.h".strip("b.h"), "la") +assert.eq("blah.h".lstrip("b.h"), "lah.h") +assert.eq("blah.h".rstrip("b.h"), "bla") + +# str.count +assert.eq("banana".count("a"), 3) +assert.eq("banana".count("a", 2), 2) +assert.eq("banana".count("a", -4, -2), 1) +assert.eq("banana".count("a", 1, 4), 2) +assert.eq("banana".count("a", 0, -100), 0) + +# str.{starts,ends}with +assert.true("foo".endswith("oo")) +assert.true(not "foo".endswith("x")) +assert.true("foo".startswith("fo")) +assert.true(not "foo".startswith("x")) +assert.fails(lambda: "foo".startswith(1), "got int.*want string") + +# str.replace +assert.eq("banana".replace("a", "o", 1), "bonana") +assert.eq("banana".replace("a", "o"), "bonono") +# TODO(adonovan): more tests + +# str.{,r}find +assert.eq("foofoo".find("oo"), 1) +assert.eq("foofoo".find("ox"), -1) +assert.eq("foofoo".find("oo", 2), 4) +assert.eq("foofoo".rfind("oo"), 4) +assert.eq("foofoo".rfind("ox"), -1) +assert.eq("foofoo".rfind("oo", 1, 4), 1) +assert.eq("foofoo".find(""), 0) +assert.eq("foofoo".rfind(""), 6) + +# str.{,r}partition +assert.eq("foo/bar/wiz".partition("/"), ("foo", "/", "bar/wiz")) +assert.eq("foo/bar/wiz".rpartition("/"), ("foo/bar", "/", "wiz")) +assert.eq("foo/bar/wiz".partition("."), ("foo/bar/wiz", "", "")) +assert.eq("foo/bar/wiz".rpartition("."), ("", "", "foo/bar/wiz")) +assert.fails(lambda: "foo/bar/wiz".partition(""), "empty separator") +assert.fails(lambda: "foo/bar/wiz".rpartition(""), "empty separator") + +assert.eq('?'.join(["foo", "a/b/c.go".rpartition("/")[0]]), 'foo?a/b') + +# str.is{alpha,...} +def test_predicates(): + predicates = ["alnum", "alpha", "digit", "lower", "space", "title", "upper"] + table = { + "Hello, World!": "title", + "hello, world!": "lower", + "base64": "alnum lower", + "HAL-9000": "upper", + "Catch-22": "title", + "": "", + "\n\t\r": "space", + "abc": "alnum alpha lower", + "ABC": "alnum alpha upper", + "123": "alnum digit", + } + for str, want in table.items(): + got = ' '.join([name for name in predicates if getattr(str, "is"+name)()]) + if got != want: + assert.fail("%r matched [%s], want [%s]" % (str, want, got)) +test_predicates() + +# Strings are not iterable. +# ok +assert.eq(len("abc"), 3) # len +assert.true("a" in "abc") # str in str +assert.eq("abc"[1], "b") # indexing +# not ok +def for_string(): + for x in "abc": + pass +def args(*args): return args +assert.fails(lambda: args(*"abc"), "must be iterable, not string") # varargs +assert.fails(lambda: list("abc"), "got string, want iterable") # list(str) +assert.fails(lambda: tuple("abc"), "got string, want iterable") # tuple(str) +assert.fails(lambda: set("abc"), "got string, want iterable") # set(str) +assert.fails(lambda: set() | "abc", "unknown binary op: set | string") # set union +assert.fails(lambda: enumerate("ab"), "got string, want iterable") # enumerate +assert.fails(lambda: sorted("abc"), "got string, want iterable") # sorted +assert.fails(lambda: [].extend("bc"), "got string, want iterable") # list.extend +assert.fails(lambda: ",".join("abc"), "got string, want iterable") # string.join +assert.fails(lambda: dict(["ab"]), "not iterable .*string") # dict +# The Java implementation does not correctly reject the following cases: +# (See Google Issue b/34385336) +assert.fails(for_string, "string value is not iterable") # for loop +assert.fails(lambda: [x for x in "abc"], "string value is not iterable") # comprehension +assert.fails(lambda: all("abc"), "got string, want iterable") # all +assert.fails(lambda: any("abc"), "got string, want iterable") # any +assert.fails(lambda: reversed("abc"), "got string, want iterable") # reversed +assert.fails(lambda: zip("ab", "cd"), "not iterable: string") # zip + +# TODO(adonovan): tests for: {,r}index join {capitalize,lower,title,upper} diff --git a/testdata/tuple.sky b/testdata/tuple.sky new file mode 100644 index 0000000..3a8980f --- /dev/null +++ b/testdata/tuple.sky @@ -0,0 +1,53 @@ +# Tests of Skylark 'tuple' + +load("assert.sky", "assert") + +# literal +assert.eq((), ()) +assert.eq((1), 1) +assert.eq((1,), (1,)) +assert.ne((1), (1,)) +assert.eq((1, 2), (1, 2)) +assert.eq((1, 2, 3, 4, 5), (1, 2, 3, 4, 5)) +assert.ne((1, 2, 3), (1, 2, 4)) + +# truth +assert.true((False,)) +assert.true((False, False)) +assert.true(not ()) + +# indexing, x[i] +assert.eq(("a", "b")[0], "a") +assert.eq(("a", "b")[1], "b") + +# slicing, x[i:j] +assert.eq("abcd"[0:4:1], "abcd") +assert.eq("abcd"[::2], "ac") +assert.eq("abcd"[1::2], "bd") +assert.eq("abcd"[4:0:-1], "dcb") +banana = tuple("banana".split_bytes()) +assert.eq(banana[7::-2], tuple("aaa".split_bytes())) +assert.eq(banana[6::-2], tuple("aaa".split_bytes())) +assert.eq(banana[5::-2], tuple("aaa".split_bytes())) +assert.eq(banana[4::-2], tuple("nnb".split_bytes())) + +# tuple +assert.eq(tuple(), ()) +assert.eq(tuple("abc".split_bytes()), ("a", "b", "c")) +assert.eq(tuple(["a", "b", "c"]), ("a", "b", "c")) +assert.eq(tuple([1]), (1,)) +assert.fails(lambda: tuple(1), "got int, want iterable") + +# tuple * int, int * tuple +abc = tuple("abc".split_bytes()) +assert.eq(abc * 0, ()) +assert.eq(abc * -1, ()) +assert.eq(abc * 1, abc) +assert.eq(abc * 3, ("a", "b", "c", "a", "b", "c", "a", "b", "c")) +assert.eq(0 * abc, ()) +assert.eq(-1 * abc, ()) +assert.eq(1 * abc, abc) +assert.eq(3 * abc, ("a", "b", "c", "a", "b", "c", "a", "b", "c")) + +# TODO(adonovan): test use of tuple as sequence +# (for loop, comprehension, library functions). diff --git a/value.go b/value.go new file mode 100644 index 0000000..62724c7 --- /dev/null +++ b/value.go @@ -0,0 +1,1081 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package skylark provides a Skylark interpreter. +// +// Skylark values are represented by the Value interface. +// The following built-in Value types are known to the evaluator: +// +// NoneType -- NoneType +// Bool -- bool +// Int -- int +// Float -- float +// String -- string +// *List -- list +// Tuple -- tuple +// *Dict -- dict +// *Set -- set +// *Function -- function (implemented in Skylark) +// *Builtin -- builtin (function or method implemented in Go) +// +// Client applications may define new data types that satisfy at least +// the Value interface. Such types may provide additional operations by +// implementing any of these optional interfaces: +// +// Callable -- value is callable like a function +// Comparable -- value defines its own comparison operations +// Iterable -- value is iterable using 'for' loops +// Sequence -- value is iterable sequence of known length +// Indexable -- value is sequence with efficient random access +// HasBinary -- value defines binary operations such as * and + +// HasAttrs -- value has readable fields or methods x.f +// HasSetField -- value has settable fields x.f +// HasSetIndex -- value supports element update using x[i]=y +// +// Client applications may also define domain-specific functions in Go +// and make them available to Skylark programs. Use NewBuiltin to +// construct a built-in value that wraps a Go function. The +// implementation of the Go function may use UnpackArgs to make sense of +// the positional and keyword arguments provided by the caller. +// +// Skylark's None value is not equal to Go's nil, but nil may be +// assigned to a Skylark Value. Be careful to avoid allowing Go nil +// values to leak into Skylark data structures. +// +// The Compare operation requires two arguments of the same +// type, but this constraint cannot be expressed in Go's type system. +// (This is the classic "binary method problem".) +// So, each Value type's CompareSameType method is a partial function +// that compares a value only against others of the same type. +// Use the package's standalone Compare (or Equal) function to compare +// an arbitrary pair of values. +// +// To parse and evaluate a Skylark source file, use ExecFile. The Eval +// function evaluates a single expression. All evaluator functions +// require a Thread parameter which defines the "thread-local storage" +// of a Skylark thread and may be used to plumb application state +// through Sklyark code and into callbacks. When evaluation fails it +// returns an EvalError from which the application may obtain a +// backtrace of active Skylark calls. +// +package skylark + +// This file defines the data types of Skylark and their basic operations. + +import ( + "bytes" + "fmt" + "math" + "math/big" + "reflect" + "strconv" + "strings" + "unicode/utf8" + + "github.com/google/skylark/syntax" +) + +// Value is a value in the Skylark interpreter. +type Value interface { + // String returns the string representation of the value. + // Skylark string values are quoted as if by Python's repr. + String() string + + // Type returns a short string describing the value's type. + Type() string + + // Freeze causes the value, and all values transitively + // reachable from it through collections and closures, to be + // marked as frozen. All subsequent mutations to the data + // structure through this API will fail dynamically, making the + // data structure immutable and safe for publishing to other + // Skylark interpreters running concurrently. + Freeze() + + // Truth returns the truth value of an object, according to Python rules. + // http://docs.python.org/2/library/stdtypes.html#truth-value-testing + Truth() Bool + + // Hash returns a function of x such that Equals(x, y) => Hash(x) == Hash(y). + // Hash may fail if the value's type is not hashable, or if the value + // contains a non-hashable value. + Hash() (uint32, error) +} + +// A Comparable is a value that defines its own equivalence relation and +// perhaps ordered comparisons. +type Comparable interface { + Value + // CompareSameType compares one value to another of the same Type(). + // The comparison operation must be one of EQL, NEQ, LT, LE, GT, or GE. + // CompareSameType returns an error if an ordered comparison was + // requested for a type that does not support it. + // + // Implementations that recursively compare subcomponents of + // the value should use the CompareDepth function, not Compare, to + // avoid infinite recursion on cyclic structures. + // + // The depth parameter is used to bound comparisons of cyclic + // data structures. Implementations should decrement depth + // before calling CompareDepth and should return an error if depth + // < 1. + // + // Client code should not call this method. Instead, use the + // standalone Compare or Equals functions, which are defined for + // all pairs of operands. + CompareSameType(op syntax.Token, y Value, depth int) (bool, error) +} + +var ( + _ Comparable = None + _ Comparable = Int{} + _ Comparable = False + _ Comparable = Float(0) + _ Comparable = String("") + _ Comparable = (*Dict)(nil) + _ Comparable = (*List)(nil) + _ Comparable = Tuple(nil) + _ Comparable = (*Set)(nil) +) + +// A Callable value f may be the operand of a function call, f(x). +type Callable interface { + Value + Name() string + Call(thread *Thread, args Tuple, kwargs []Tuple) (Value, error) +} + +var ( + _ Callable = (*Builtin)(nil) + _ Callable = (*Function)(nil) +) + +// An Iterable abstracts a sequence of values. +// An iterable value may be iterated over by a 'for' loop or used where +// any other Skylark iterable is allowed. Unlike a Sequence, the length +// of an Iterable is not necessarily known in advance of iteration. +type Iterable interface { + Value + Iterate() Iterator // must be followed by call to Iterator.Done +} + +// A Sequence is a sequence of values of known length. +type Sequence interface { + Iterable + Len() int +} + +var ( + _ Sequence = (*Dict)(nil) + _ Sequence = (*Set)(nil) +) + +// An Indexable is a sequence of known length that supports efficient random access. +// It is not necessarily iterable. +type Indexable interface { + Value + Index(i int) Value // requires 0 <= i < Len() + Len() int +} + +// A HasSetIndex is an Indexable value whose elements may be assigned (x[i] = y). +// +// The implementation should not add Len to a negative index as the +// evaluator does this before the call. +type HasSetIndex interface { + Indexable + SetIndex(index int, v Value) error +} + +var ( + _ HasSetIndex = (*List)(nil) + _ Indexable = Tuple(nil) + _ Indexable = String("") +) + +// An Iterator provides a sequence of values to the caller. +// +// The caller must call Done when the iterator is no longer needed. +// Operations that modify a sequence will fail if it has active iterators. +// +// Example usage: +// +// iter := iterable.Iterator() +// defer iter.Done() +// var x Value +// for iter.Next(&x) { +// ... +// } +// +type Iterator interface { + // If the iterator is exhausted, Next returns false. + // Otherwise it sets *p to the current element of the sequence, + // advances the iterator, and returns true. + Next(p *Value) bool + Done() +} + +// An Mapping is a mapping from keys to values, such as a dictionary. +type Mapping interface { + Value + // Get returns the value corresponding to the specified key, + // or !found if the mapping does not contain the key. + Get(Value) (v Value, found bool, err error) +} + +var _ Mapping = (*Dict)(nil) + +// A HasBinary value may be used as either operand of these binary operators: +// + - * / % in not in | & +// The Side argument indicates whether the receiver is the left or right operand. +// +// An implementation may decline to handle an operation by returning (nil, nil). +// For this reason, clients should always call the standalone Binary(op, x, y) +// function rather than calling the method directly. +type HasBinary interface { + Value + Binary(op syntax.Token, y Value, side Side) (Value, error) +} + +type Side bool + +const ( + Left Side = false + Right Side = true +) + +// A HasAttrs value has fields or methods that may be read by a dot expression (y = x.f). +// Attribute names may be listed using the built-in 'dir' function. +// +// For implementation convenience, a result of (nil, nil) from Attr is +// interpreted as a "no such field or method" error. Implementations are +// free to return a more precise error. +type HasAttrs interface { + Value + Attr(name string) (Value, error) // returns (nil, nil) if attribute not present + AttrNames() []string // callers must not modify the result. +} + +var ( + _ HasAttrs = String("") + _ HasAttrs = new(List) + _ HasAttrs = new(Dict) + _ HasAttrs = new(Set) +) + +// A HasSetField value has fields that may be written by a dot expression (x.f = y). +type HasSetField interface { + HasAttrs + SetField(name string, val Value) error +} + +// NoneType is the type of None. Its only legal value is None. +// (We represent it as a number, not struct{}, so that None may be constant.) +type NoneType byte + +const None = NoneType(0) + +func (NoneType) String() string { return "None" } +func (NoneType) Type() string { return "NoneType" } +func (NoneType) Freeze() {} // immutable +func (NoneType) Truth() Bool { return False } +func (NoneType) Hash() (uint32, error) { return 0, nil } +func (NoneType) CompareSameType(op syntax.Token, y Value, depth int) (bool, error) { + return threeway(op, 0), nil +} + +// Bool is the type of a Skylark bool. +type Bool bool + +const ( + False Bool = false + True Bool = true +) + +func (b Bool) String() string { + if b { + return "True" + } else { + return "False" + } +} +func (b Bool) Type() string { return "bool" } +func (b Bool) Freeze() {} // immutable +func (b Bool) Truth() Bool { return b } +func (b Bool) Hash() (uint32, error) { return uint32(b2i(bool(b))), nil } +func (x Bool) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(Bool) + return threeway(op, b2i(bool(x))-b2i(bool(y))), nil +} + +// Float is the type of a Skylark float. +type Float float64 + +func (f Float) String() string { return strconv.FormatFloat(float64(f), 'g', 6, 64) } +func (f Float) Type() string { return "float" } +func (f Float) Freeze() {} // immutable +func (f Float) Truth() Bool { return f != 0.0 } +func (f Float) Hash() (uint32, error) { + // Equal float and int values must yield the same hash. + // TODO(adonovan): opt: if f is non-integral, and thus not equal + // to any Int, we can avoid the Int conversion and use a cheaper hash. + if isFinite(float64(f)) { + return finiteFloatToInt(f).Hash() + } + return 1618033, nil // NaN, +/-Inf +} + +func floor(f Float) Float { return Float(math.Floor(float64(f))) } + +// isFinite reports whether f represents a finite rational value. +// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0). +func isFinite(f float64) bool { + return math.Abs(f) <= math.MaxFloat64 +} + +func (x Float) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(Float) + switch op { + case syntax.EQL: + return x == y, nil + case syntax.NEQ: + return x != y, nil + case syntax.LE: + return x <= y, nil + case syntax.LT: + return x < y, nil + case syntax.GE: + return x >= y, nil + case syntax.GT: + return x > y, nil + } + panic(op) +} + +func (f Float) rational() *big.Rat { return new(big.Rat).SetFloat64(float64(f)) } + +// AsFloat returns the float64 value closest to x. +// The f result is undefined if x is not a float or int. +func AsFloat(x Value) (f float64, ok bool) { + switch x := x.(type) { + case Float: + return float64(x), true + case Int: + return float64(x.Float()), true + } + return 0, false +} + +func (x Float) Mod(y Float) Float { return Float(math.Mod(float64(x), float64(y))) } + +// String is the type of a Skylark string. +// +// A String is an immutable sequence of bytes. Strings are iterable; +// iteration over a string yields each of its 1-byte substrings in order. +type String string + +func (s String) String() string { return strconv.Quote(string(s)) } +func (s String) Type() string { return "string" } +func (s String) Freeze() {} // immutable +func (s String) Truth() Bool { return len(s) > 0 } +func (s String) Hash() (uint32, error) { return hashString(string(s)), nil } +func (s String) Len() int { return len(s) } // bytes +func (s String) Index(i int) Value { return s[i : i+1] } + +func (s String) Attr(name string) (Value, error) { return builtinAttr(s, name, stringMethods) } +func (s String) AttrNames() []string { return builtinAttrNames(stringMethods) } + +func (x String) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(String) + return threeway(op, strings.Compare(string(x), string(y))), nil +} + +func AsString(x Value) (string, bool) { v, ok := x.(String); return string(v), ok } + +// A stringIterable is an iterable whose iterator yields a sequence of +// either Unicode code points or bytes, +// either numerically or as successive substrings. +type stringIterable struct { + s String + split bool + codepoints bool +} + +var _ Iterable = (*stringIterable)(nil) + +func (si stringIterable) String() string { + if si.split { + return si.s.String() + ".split_" + si.Type() + "()" + } else { + return si.s.String() + "." + si.Type() + "()" + } +} +func (si stringIterable) Type() string { + if si.codepoints { + return "codepoints" + } else { + return "bytes" + } +} +func (si stringIterable) Freeze() {} // immutable +func (si stringIterable) Truth() Bool { return True } +func (si stringIterable) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: %s", si.Type()) } +func (si stringIterable) Iterate() Iterator { return &stringIterator{si, 0} } + +type stringIterator struct { + si stringIterable + i int +} + +func (it *stringIterator) Next(p *Value) bool { + s := it.si.s[it.i:] + if s == "" { + return false + } + if it.si.codepoints { + r, sz := utf8.DecodeRuneInString(string(s)) + if it.si.split { + *p = s[:sz] + } else { + *p = MakeInt(int(r)) + } + it.i += sz + } else { + b := int(s[0]) + if it.si.split { + *p = s[:1] + } else { + *p = MakeInt(b) + } + it.i += 1 + } + return true +} + +func (*stringIterator) Done() {} + +// A Function is a function defined by a Skylark def statement. +type Function struct { + name string // "lambda" for anonymous functions + position syntax.Position // position of def or lambda token + syntax *syntax.Function + globals StringDict + defaults Tuple + freevars Tuple +} + +func (fn *Function) Name() string { return fn.name } +func (fn *Function) Hash() (uint32, error) { return hashString(fn.name), nil } +func (fn *Function) Freeze() { fn.defaults.Freeze(); fn.freevars.Freeze() } +func (fn *Function) String() string { return toString(fn) } +func (fn *Function) Type() string { return "function" } +func (fn *Function) Truth() Bool { return true } + +func (fn *Function) Syntax() *syntax.Function { return fn.syntax } + +// A Builtin is a function implemented in Go. +type Builtin struct { + name string + fn func(thread *Thread, fn *Builtin, args Tuple, kwargs []Tuple) (Value, error) + recv Value // for bound methods (e.g. "".startswith) +} + +func (b *Builtin) Name() string { return b.name } +func (b *Builtin) Freeze() { + if b.recv != nil { + b.recv.Freeze() + } +} +func (b *Builtin) Hash() (uint32, error) { + h := hashString(b.name) + if b.recv != nil { + h ^= 5521 + } + return h, nil +} +func (b *Builtin) Receiver() Value { return b.recv } +func (b *Builtin) String() string { return toString(b) } +func (b *Builtin) Type() string { return "builtin" } +func (b *Builtin) Call(thread *Thread, args Tuple, kwargs []Tuple) (Value, error) { + return b.fn(thread, b, args, kwargs) +} +func (b *Builtin) Truth() Bool { return true } + +// NewBuiltin returns a new 'builtin' value with the specified name +// and implementation. It compares unequal with all other values. +func NewBuiltin(name string, fn func(thread *Thread, fn *Builtin, args Tuple, kwargs []Tuple) (Value, error)) *Builtin { + return &Builtin{name: name, fn: fn} +} + +// BindReceiver returns a new Builtin value representing a method +// closure, that is, a built-in function bound to a receiver value. +// +// In the example below, the value of f is the string.index builtin bound to +// the receiver value "abc": +// +// f = "abc".index; f("a"); f("b") +// +// In the common case, the receiver is bound only during the call, +// but this still results in the creation of a temporary method closure: +// +// "abc".index("a") +// +func (b *Builtin) BindReceiver(recv Value) *Builtin { + return &Builtin{name: b.name, fn: b.fn, recv: recv} +} + +// A *Dict represents a Skylark dictionary. +type Dict struct { + ht hashtable +} + +func (d *Dict) Clear() error { return d.ht.clear() } +func (d *Dict) Delete(k Value) (v Value, found bool, err error) { return d.ht.delete(k) } +func (d *Dict) Get(k Value) (v Value, found bool, err error) { return d.ht.lookup(k) } +func (d *Dict) Items() []Tuple { return d.ht.items() } +func (d *Dict) Keys() []Value { return d.ht.keys() } +func (d *Dict) Len() int { return int(d.ht.len) } +func (d *Dict) Iterate() Iterator { return d.ht.iterate() } +func (d *Dict) Set(k, v Value) error { return d.ht.insert(k, v) } +func (d *Dict) String() string { return toString(d) } +func (d *Dict) Type() string { return "dict" } +func (d *Dict) Freeze() { d.ht.freeze() } +func (d *Dict) Truth() Bool { return d.Len() > 0 } +func (d *Dict) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable type: dict") } + +func (d *Dict) Attr(name string) (Value, error) { return builtinAttr(d, name, dictMethods) } +func (d *Dict) AttrNames() []string { return builtinAttrNames(dictMethods) } + +func (x *Dict) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(*Dict) + switch op { + case syntax.EQL: + ok, err := dictsEqual(x, y, depth) + return ok, err + case syntax.NEQ: + ok, err := dictsEqual(x, y, depth) + return !ok, err + default: + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) + } +} + +func dictsEqual(x, y *Dict, depth int) (bool, error) { + if x.Len() != y.Len() { + return false, nil + } + for _, xitem := range x.Items() { + key, xval := xitem[0], xitem[1] + + if yval, found, _ := y.Get(key); !found { + return false, nil + } else if eq, err := EqualDepth(xval, yval, depth-1); err != nil { + return false, err + } else if !eq { + return false, nil + } + } + return true, nil +} + +// A *List represents a Skylark list value. +type List struct { + elems []Value + frozen bool + itercount uint32 // number of active iterators (ignored if frozen) +} + +// NewList returns a list containing the specified elements. +// Callers should not subsequently modify elems. +func NewList(elems []Value) *List { return &List{elems: elems} } + +func (l *List) Freeze() { + if !l.frozen { + l.frozen = true + for _, elem := range l.elems { + elem.Freeze() + } + } +} + +// checkMutable reports an error if the list should not be mutated. +// verb+" list" should describe the operation. +// Structural mutations are not permitted during iteration. +func (l *List) checkMutable(verb string, structural bool) error { + if l.frozen { + return fmt.Errorf("cannot %s frozen list", verb) + } + if structural && l.itercount > 0 { + return fmt.Errorf("cannot %s list during iteration", verb) + } + return nil +} + +func (l *List) String() string { return toString(l) } +func (l *List) Type() string { return "list" } +func (l *List) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable type: list") } +func (l *List) Truth() Bool { return l.Len() > 0 } +func (l *List) Len() int { return len(l.elems) } +func (l *List) Index(i int) Value { return l.elems[i] } + +func (l *List) Attr(name string) (Value, error) { return builtinAttr(l, name, listMethods) } +func (l *List) AttrNames() []string { return builtinAttrNames(listMethods) } + +func (l *List) Iterate() Iterator { + if !l.frozen { + l.itercount++ + } + return &listIterator{l: l} +} + +func (x *List) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(*List) + // It's tempting to check x == y as an optimization here, + // but wrong because a list containing NaN is not equal to itself. + return sliceCompare(op, x.elems, y.elems, depth) +} + +func sliceCompare(op syntax.Token, x, y []Value, depth int) (bool, error) { + // Fast path: check length. + if len(x) != len(y) && (op == syntax.EQL || op == syntax.NEQ) { + return op == syntax.NEQ, nil + } + + // Find first element that is not equal in both lists. + for i := 0; i < len(x) && i < len(y); i++ { + if eq, err := EqualDepth(x[i], y[i], depth-1); err != nil { + return false, err + } else if !eq { + switch op { + case syntax.EQL: + return false, nil + case syntax.NEQ: + return true, nil + default: + return CompareDepth(op, x[i], y[i], depth-1) + } + } + } + + return threeway(op, len(x)-len(y)), nil +} + +type listIterator struct { + l *List + i int +} + +func (it *listIterator) Next(p *Value) bool { + if it.i < it.l.Len() { + *p = it.l.elems[it.i] + it.i++ + return true + } + return false +} + +func (it *listIterator) Done() { + if !it.l.frozen { + it.l.itercount-- + } +} + +func (l *List) SetIndex(i int, v Value) error { + if err := l.checkMutable("assign to element of", false); err != nil { + return err + } + l.elems[i] = v + return nil +} + +func (l *List) Append(v Value) error { + if err := l.checkMutable("append to", true); err != nil { + return err + } + l.elems = append(l.elems, v) + return nil +} + +func (l *List) Clear() error { + if err := l.checkMutable("clear", true); err != nil { + return err + } + for i := range l.elems { + l.elems[i] = nil // aid GC + } + l.elems = l.elems[:0] + return nil +} + +// A Tuple represents a Skylark tuple value. +type Tuple []Value + +func (t Tuple) Len() int { return len(t) } +func (t Tuple) Index(i int) Value { return t[i] } +func (t Tuple) Iterate() Iterator { return &tupleIterator{elems: t} } +func (t Tuple) Freeze() { + for _, elem := range t { + elem.Freeze() + } +} +func (t Tuple) String() string { return toString(t) } +func (t Tuple) Type() string { return "tuple" } +func (t Tuple) Truth() Bool { return len(t) > 0 } + +func (x Tuple) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(Tuple) + return sliceCompare(op, x, y, depth) +} + +func (t Tuple) Hash() (uint32, error) { + // Use same algorithm as Python. + var x, mult uint32 = 0x345678, 1000003 + for _, elem := range t { + y, err := elem.Hash() + if err != nil { + return 0, err + } + x = x ^ y*mult + mult += 82520 + uint32(len(t)+len(t)) + } + return x, nil +} + +type tupleIterator struct{ elems Tuple } + +func (it *tupleIterator) Next(p *Value) bool { + if len(it.elems) > 0 { + *p = it.elems[0] + it.elems = it.elems[1:] + return true + } + return false +} + +func (it *tupleIterator) Done() {} + +// A Set represents a Skylark set value. +type Set struct { + ht hashtable // values are all None +} + +func (s *Set) Delete(k Value) (found bool, err error) { _, found, err = s.ht.delete(k); return } +func (s *Set) Clear() error { return s.ht.clear() } +func (s *Set) Has(k Value) (found bool, err error) { _, found, err = s.ht.lookup(k); return } +func (s *Set) Insert(k Value) error { return s.ht.insert(k, None) } +func (s *Set) Len() int { return int(s.ht.len) } +func (s *Set) Iterate() Iterator { return s.ht.iterate() } +func (s *Set) String() string { return toString(s) } +func (s *Set) Type() string { return "set" } +func (s *Set) elems() []Value { return s.ht.keys() } +func (s *Set) Freeze() { s.ht.freeze() } +func (s *Set) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable type: set") } +func (s *Set) Truth() Bool { return s.Len() > 0 } + +func (s *Set) Attr(name string) (Value, error) { return builtinAttr(s, name, setMethods) } +func (s *Set) AttrNames() []string { return builtinAttrNames(setMethods) } + +func (x *Set) CompareSameType(op syntax.Token, y_ Value, depth int) (bool, error) { + y := y_.(*Set) + switch op { + case syntax.EQL: + ok, err := setsEqual(x, y, depth) + return ok, err + case syntax.NEQ: + ok, err := setsEqual(x, y, depth) + return !ok, err + default: + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) + } +} + +func setsEqual(x, y *Set, depth int) (bool, error) { + if x.Len() != y.Len() { + return false, nil + } + for _, elem := range x.elems() { + if found, _ := y.Has(elem); !found { + return false, nil + } + } + return true, nil +} + +func (s *Set) Union(iter Iterator) (Value, error) { + set := new(Set) + for _, elem := range s.elems() { + set.Insert(elem) // can't fail + } + var x Value + for iter.Next(&x) { + if err := set.Insert(x); err != nil { + return nil, err + } + } + return set, nil +} + +// toString returns the string form of value v. +// It may be more efficient than v.String() for larger values. +func toString(v Value) string { + var buf bytes.Buffer + path := make([]Value, 0, 4) + writeValue(&buf, v, path) + return buf.String() +} + +// path is the list of *List and *Dict values we're currently printing. +// (These are the only potentially cyclic structures.) +func writeValue(out *bytes.Buffer, x Value, path []Value) { + switch x := x.(type) { + case NoneType: + out.WriteString("None") + + case Int: + out.WriteString(x.String()) + + case Bool: + if x { + out.WriteString("True") + } else { + out.WriteString("False") + } + + case String: + fmt.Fprintf(out, "%q", string(x)) + + case *List: + out.WriteByte('[') + if pathContains(path, x) { + out.WriteString("...") // list contains itself + } else { + for i, elem := range x.elems { + if i > 0 { + out.WriteString(", ") + } + writeValue(out, elem, append(path, x)) + } + } + out.WriteByte(']') + + case Tuple: + out.WriteByte('(') + for i, elem := range x { + if i > 0 { + out.WriteString(", ") + } + writeValue(out, elem, path) + } + if len(x) == 1 { + out.WriteByte(',') + } + out.WriteByte(')') + + case *Function: + fmt.Fprintf(out, "<function %s>", x.Name()) + + case *Builtin: + if x.recv != nil { + fmt.Fprintf(out, "<built-in method %s of %s value>", x.Name(), x.recv.Type()) + } else { + fmt.Fprintf(out, "<built-in function %s>", x.Name()) + } + + case *Dict: + out.WriteByte('{') + if pathContains(path, x) { + out.WriteString("...") // dict contains itself + } else { + sep := "" + for _, item := range x.Items() { + k, v := item[0], item[1] + out.WriteString(sep) + writeValue(out, k, path) + out.WriteString(": ") + writeValue(out, v, append(path, x)) // cycle check + sep = ", " + } + } + out.WriteByte('}') + + case *Set: + out.WriteString("set([") + for i, elem := range x.elems() { + if i > 0 { + out.WriteString(", ") + } + writeValue(out, elem, path) + } + out.WriteString("])") + + default: + out.WriteString(x.String()) + } +} + +func pathContains(path []Value, x Value) bool { + for _, y := range path { + if x == y { + return true + } + } + return false +} + +const maxdepth = 10 + +// Equal reports whether two Skylark values are equal. +func Equal(x, y Value) (bool, error) { + return EqualDepth(x, y, maxdepth) +} + +// EqualDepth reports whether two Skylark values are equal. +// +// Recursive comparisons by implementations of Value.CompareSameType +// should use EqualDepth to prevent infinite recursion. +func EqualDepth(x, y Value, depth int) (bool, error) { + return CompareDepth(syntax.EQL, x, y, depth) +} + +// Compare compares two Skylark values. +// The comparison operation must be one of EQL, NEQ, LT, LE, GT, or GE. +// Compare returns an error if an ordered comparison was +// requested for a type that does not support it. +// +// Recursive comparisons by implementations of Value.CompareSameType +// should use CompareDepth to prevent infinite recursion. +func Compare(op syntax.Token, x, y Value) (bool, error) { + return CompareDepth(op, x, y, maxdepth) +} + +// CompareDepth compares two Skylark values. +// The comparison operation must be one of EQL, NEQ, LT, LE, GT, or GE. +// CompareDepth returns an error if an ordered comparison was +// requested for a pair of values that do not support it. +// +// The depth parameter limits the maximum depth of recursion +// in cyclic data structures. +func CompareDepth(op syntax.Token, x, y Value, depth int) (bool, error) { + if depth < 1 { + return false, fmt.Errorf("comparison exceeded maximum recursion depth") + } + if sameType(x, y) { + if xcomp, ok := x.(Comparable); ok { + return xcomp.CompareSameType(op, y, depth) + } + + // use identity comparison + switch op { + case syntax.EQL: + return x == y, nil + case syntax.NEQ: + return x != y, nil + } + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) + } + + // different types + + // int/float ordered comparisons + switch x := x.(type) { + case Int: + if y, ok := y.(Float); ok { + if y != y { + return false, nil // y is NaN + } + var cmp int + if !math.IsInf(float64(y), 0) { + cmp = x.rational().Cmp(y.rational()) // y is finite + } else if y > 0 { + cmp = -1 // y is +Inf + } else { + cmp = +1 // y is -Inf + } + return threeway(op, cmp), nil + } + case Float: + if y, ok := y.(Int); ok { + if x != x { + return false, nil // x is NaN + } + var cmp int + if !math.IsInf(float64(x), 0) { + cmp = x.rational().Cmp(y.rational()) // x is finite + } else if x > 0 { + cmp = -1 // x is +Inf + } else { + cmp = +1 // x is -Inf + } + return threeway(op, cmp), nil + } + } + + // All other values of different types compare unequal. + switch op { + case syntax.EQL: + return false, nil + case syntax.NEQ: + return true, nil + } + return false, fmt.Errorf("%s %s %s not implemented", x.Type(), op, y.Type()) +} + +func sameType(x, y Value) bool { + return reflect.TypeOf(x) == reflect.TypeOf(y) || x.Type() == y.Type() +} + +// threeway interprets a three-way comparison value cmp (-1, 0, +1) +// as a boolean comparison (e.g. x < y). +func threeway(op syntax.Token, cmp int) bool { + switch op { + case syntax.EQL: + return cmp == 0 + case syntax.NEQ: + return cmp != 0 + case syntax.LE: + return cmp <= 0 + case syntax.LT: + return cmp < 0 + case syntax.GE: + return cmp >= 0 + case syntax.GT: + return cmp > 0 + } + panic(op) +} + +func b2i(b bool) int { + if b { + return 1 + } else { + return 0 + } +} + +// Len returns the length of a string or sequence value, +// and -1 for all others. +// +// Warning: Len(x) >= 0 does not imply Iterate(x) != nil. +// A string has a known length but is not directly iterable. +func Len(x Value) int { + switch x := x.(type) { + case String: + return x.Len() + case Sequence: + return x.Len() + } + return -1 +} + +// Iterate return a new iterator for the value if iterable, nil otherwise. +// If the result is non-nil, the caller must call Done when finished with it. +// +// Warning: Iterate(x) != nil does not imply Len(x) >= 0. +// Some iterables may have unknown length. +func Iterate(x Value) Iterator { + if x, ok := x.(Iterable); ok { + return x.Iterate() + } + return nil +} diff --git a/value_test.go b/value_test.go new file mode 100644 index 0000000..6b24432 --- /dev/null +++ b/value_test.go @@ -0,0 +1,22 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file defines tests of the Value API. +package skylark + +import ( + "testing" +) + +func TestListAppend(t *testing.T) { + l := NewList(nil) + l.Append(String("hello")) + res, ok := AsString(l.Index(0)) + if !ok { + t.Errorf("failed list.Append() got: %s, want: skylark.String", l.Index(0).Type()) + } + if res != "hello" { + t.Errorf("failed list.Append() got: %+v, want: hello", res) + } +} |