summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSasha Smundak <asmundak@google.com>2022-04-13 17:48:41 -0700
committerSasha Smundak <asmundak@google.com>2022-04-21 14:53:42 -0700
commit8a2bb4d4c8d9d122fe6e2e3373bca43f8a9a45a7 (patch)
treeaf91d62a82c671d4278e66ecc4367aecc76e818c
parentb4791ed73cbd9c5b502a4d9ee8c7cf3ef5d33c10 (diff)
downloadkythe-8a2bb4d4c8d9d122fe6e2e3373bca43f8a9a45a7.tar.gz
Implement Rust extractor for Soong.
Bug: 222044478 Test: manual Change-Id: Ib50e259af5716b5bd035d1e85490119ebeb489d9
-rw-r--r--Android.gen.bp15
-rw-r--r--kythe/go/extractors/cmd/rust_extractor/rust_extractor.go307
-rw-r--r--kythe/go/extractors/cmd/rust_extractor/rust_extractor_test.go97
3 files changed, 418 insertions, 1 deletions
diff --git a/Android.gen.bp b/Android.gen.bp
index 00b44ecd1..4f30e42ea 100644
--- a/Android.gen.bp
+++ b/Android.gen.bp
@@ -1,5 +1,5 @@
// Automatically generated with:
-// go2bp -rewrite github.com/google/go-cmp/cmp=go-cmp -rewrite google.golang.org/protobuf=golang-protobuf -rewrite github.com/beevik/etree=go-etree -rewrite bitbucket.org/creachadair=creachadair -rewrite github.com/google/subcommands=go-subcommands -rewrite golang.org/x=golang-x -rewrite kythe.io/kythe=kythe -rewrite gotool=go_extractor -limit kythe.io/kythe/go/extractors/config/runextractor -limit kythe.io/kythe/go/extractors/cmd/gotool -skip-tests
+// go2bp -rewrite github.com/google/go-cmp/cmp=go-cmp -rewrite google.golang.org/protobuf=golang-protobuf -rewrite github.com/beevik/etree=go-etree -rewrite bitbucket.org/creachadair=creachadair -rewrite github.com/google/subcommands=go-subcommands -rewrite golang.org/x=golang-x -rewrite kythe.io/kythe=kythe -rewrite gotool=go_extractor -limit kythe.io/kythe/go/extractors/config/runextractor -limit kythe.io/kythe/go/extractors/cmd/gotool -limit kythe.io/kythe/go/extractors/cmd/rust_extractor -skip-tests
blueprint_go_binary {
name: "go_extractor",
@@ -17,6 +17,19 @@ blueprint_go_binary {
],
}
+blueprint_go_binary {
+ name: "rust_extractor",
+ deps: [
+ "kythe-go-platform-kzip",
+ "kythe-go-util-vnameutil",
+ "kythe-proto-analysis_go_proto",
+ "kythe-proto-storage_go_proto",
+ ],
+ srcs: [
+ "kythe/go/extractors/cmd/rust_extractor/rust_extractor.go",
+ ],
+}
+
bootstrap_go_package {
name: "kythe-go-extractors-config-preprocessor-modifier",
pkgPath: "kythe.io/kythe/go/extractors/config/preprocessor/modifier",
diff --git a/kythe/go/extractors/cmd/rust_extractor/rust_extractor.go b/kythe/go/extractors/cmd/rust_extractor/rust_extractor.go
new file mode 100644
index 000000000..a7a7f7b3e
--- /dev/null
+++ b/kythe/go/extractors/cmd/rust_extractor/rust_extractor.go
@@ -0,0 +1,307 @@
+// rust_extractor prepares kzip for a given Rust compilation.
+// It is a "prefix tool", i.e. if Rust compiler is invoked with
+// rustc --emit link foo.rs ...
+// then running
+// KYTHE_OUTPUT_FILE=foo.kzip rust_extractor rustc --emit foo.rs
+// creates foo.kzip
+// Kythe corpus is picked from KYTHE_CORPUS environment variable.
+// KYTHE_VNAMES environment variable points the JSON file with path rewrite rules.
+
+package main
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "kythe.io/kythe/go/platform/kzip"
+ "kythe.io/kythe/go/util/vnameutil"
+ apb "kythe.io/kythe/proto/analysis_go_proto"
+ spb "kythe.io/kythe/proto/storage_go_proto"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "regexp"
+ "strings"
+)
+
+func maybeFatal(err error) {
+ if err == nil {
+ return
+ }
+ _, _ = fmt.Fprintln(os.Stderr, err)
+ os.Exit(2)
+}
+
+// Context maintains extraction state and and implements extraction.
+type Context struct {
+ cu *apb.CompilationUnit
+ kzipPath string
+ kzipFile *kzip.Writer
+ rustCompiler string
+ inputs []string
+ rewriteRules vnameutil.Rules
+}
+
+// Construct
+func newContext(args []string, kzipPath string) *Context {
+ ctx := &Context{
+ kzipPath: kzipPath,
+ cu: &apb.CompilationUnit{
+ VName: &spb.VName{
+ Corpus: os.Getenv("KYTHE_CORPUS"),
+ Language: "rust",
+ },
+ },
+ }
+ // Process command line arguments.
+ envRex := regexp.MustCompile("^([A-Za-z_]\\w*)=(.*)$")
+ for i, arg := range args {
+ if m := envRex.FindStringSubmatch(arg); m != nil {
+ ctx.cu.Environment = append(ctx.cu.Environment, &apb.CompilationUnit_Env{Name: m[1], Value: m[2]})
+ continue
+ }
+ ctx.rustCompiler = arg
+ ctx.cu.Argument = args[i+1:]
+ break
+ }
+ return ctx
+}
+
+// Transforms command line arguments into our arguments.
+func (ctx *Context) actualCompilerArgs() []string {
+ // Drop -o and -emit arguments, prepend -emit dep-info=...,metadata=...
+ args := []string{
+ "--emit", fmt.Sprintf("dep-info=%s,metadata=%s", ctx.depsPath(), ctx.metadataPath()),
+ "-Z", "save-analysis"}
+
+ rdr := newArgReader(ctx.cu.Argument)
+ for ; !rdr.atEnd(); rdr.read() {
+ switch rdr.key() {
+ case "--emit", "-o":
+ default:
+ args = append(args, rdr.currentArg()...)
+ }
+ }
+ return args
+}
+
+// Runs Rust compiler, saving compilation analysis, extracts input files list
+func (ctx *Context) runCompiler() {
+ // Remove old files, ensure the output directory exists
+ _ = os.Remove(ctx.savedAnalysisPath())
+ _ = os.Remove(ctx.depsPath())
+ _ = os.Remove(ctx.metadataPath())
+ _ = os.Remove(ctx.kzipPath)
+ err := os.MkdirAll(filepath.Dir(ctx.kzipPath), 0755)
+ maybeFatal(err)
+
+ // In addition to the actual parameters, instruct the compiler to
+ // generate the dependencies file, the metadata file, and save the
+ // compiler analysis as JSON file.
+ cmd := exec.Command(ctx.rustCompiler, ctx.actualCompilerArgs()...)
+ // Append command line environment variables
+ cmd.Env = os.Environ()
+ for _, x := range ctx.cu.Environment {
+ cmd.Env = append(cmd.Env, x.Name+"="+x.Value)
+ }
+ // Append save analysis configuration, including saved analysis path
+ cmd.Env = append(cmd.Env,
+ fmt.Sprintf(`RUST_SAVE_ANALYSIS_CONFIG={`+
+ `"output_file": %q,`+
+ `"full_docs":true,`+
+ `"pub_only":false,`+
+ `"reachable_only":false,`+
+ `"distro_crate":false,`+
+ `"signatures":false,`+
+ `"borrow_data":false}`, ctx.savedAnalysisPath()))
+
+ output, err := cmd.CombinedOutput()
+ if len(output) > 0 {
+ fmt.Printf("%s\n", output)
+ }
+ maybeFatal(err)
+
+ // Build the list of the source files
+ f, err := os.Open(ctx.depsPath())
+ maybeFatal(err)
+ ctx.getInputFiles(f, ctx.depsPath())
+ _ = f.Close()
+}
+
+// Obtains the list of the Rust source being compiled.
+func (ctx *Context) getInputFiles(f io.Reader, dependentsOf string) {
+ s := bufio.NewScanner(f)
+ n := len(dependentsOf)
+ for s.Scan() {
+ ln := s.Text()
+ // Look for the line:
+ // <dependentsOf>: file ...
+ // Ignore not .rs files.
+ if strings.HasPrefix(ln, dependentsOf) && ln[n] == ':' {
+ deps := strings.Split(strings.Trim(ln[n+1:], " "), " ")
+ for _, dep := range deps {
+ if strings.HasSuffix(dep, ".rs") {
+ ctx.inputs = append(ctx.inputs, dep)
+ }
+ }
+ }
+ }
+}
+
+// Reads vname rewriting rules
+func (ctx *Context) loadRules(r io.Reader) {
+ var err error
+ ctx.rewriteRules, err = vnameutil.ReadRules(r)
+ maybeFatal(err)
+}
+
+func (ctx *Context) makeVname(path string) *spb.VName {
+ var vname *spb.VName
+ var ok bool
+ if vname, ok = ctx.rewriteRules.Apply(path); !ok {
+ vname = &spb.VName{Path: path, Corpus: ctx.cu.VName.Corpus}
+ }
+ // By default, the corpus is the same as compilation unit's.
+ if vname.Corpus == "" {
+ vname.Corpus = ctx.cu.VName.Corpus
+ }
+ return vname
+}
+
+// Saves kzip file.
+func (ctx *Context) saveKzip() {
+ kzf, err := os.Create(ctx.kzipPath)
+ maybeFatal(err)
+ ctx.kzipFile, err = kzip.NewWriteCloser(kzf)
+ maybeFatal(err)
+
+ // The output archive contains an entry with save analysis
+ // contents, and entry for each input file, and a proto/JSON
+ // message for the compilation unit.
+
+ // First, save analysis
+ saPath := ctx.savedAnalysisPath()
+ f, err := os.Open(saPath)
+ maybeFatal(err)
+ digest, err := ctx.kzipFile.AddFile(f)
+ err = f.Close()
+ maybeFatal(err)
+
+ ctx.cu.RequiredInput = append(ctx.cu.RequiredInput, &apb.CompilationUnit_FileInput{
+ VName: ctx.makeVname(saPath),
+ Info: &apb.FileInfo{Path: saPath, Digest: digest},
+ })
+
+ // Input files
+ for _, input := range ctx.inputs {
+ f, err = os.Open(input)
+ maybeFatal(err)
+ digest, err = ctx.kzipFile.AddFile(f)
+ maybeFatal(err)
+ err = f.Close()
+ maybeFatal(err)
+ vname := ctx.makeVname(input)
+ vname.Language = "rust"
+ ctx.cu.RequiredInput = append(ctx.cu.RequiredInput, &apb.CompilationUnit_FileInput{
+ VName: vname,
+ Info: &apb.FileInfo{
+ Path: input,
+ Digest: digest,
+ },
+ })
+ ctx.cu.SourceFile = append(ctx.cu.SourceFile, input)
+ }
+
+ // Finally, compilation unit
+ _, err = ctx.kzipFile.AddUnit(ctx.cu, nil)
+ maybeFatal(err)
+ err = ctx.kzipFile.Close()
+ maybeFatal(err)
+}
+
+func (ctx Context) savedAnalysisPath() string {
+ return ctx.kzipPath + ".sa.json"
+}
+
+func (ctx Context) depsPath() string {
+ return ctx.kzipPath + ".deps"
+}
+
+func (ctx Context) metadataPath() string {
+ return ctx.kzipPath + ".metadummy"
+}
+
+// Utility stuff to iterate over rustc command-line "items"
+// An item can be:
+// --foo val
+// --foo=val
+// -C val
+// -Cval
+// arg
+type argReader struct {
+ inArgs []string
+ currentIndex int
+ nextIndex int
+ argKey string
+}
+
+func newArgReader(args []string) *argReader {
+ ret := &argReader{inArgs: args, nextIndex: 0, currentIndex: 0}
+ ret.read()
+ return ret
+}
+
+func (ar argReader) atEnd() bool {
+ return ar.currentIndex >= len(ar.inArgs)
+}
+
+func (ar *argReader) read() {
+ ar.currentIndex = ar.nextIndex
+ if ar.atEnd() {
+ return
+ }
+ ar.nextIndex++
+ ar.argKey = ar.inArgs[ar.currentIndex]
+ if ar.argKey[0] != '-' {
+ return
+ }
+ n := -1
+ if strings.HasPrefix(ar.argKey, "--") {
+ n = strings.Index(ar.argKey, "=")
+ } else if len(ar.argKey) > 2 {
+ n = 2
+ }
+ if n >= 0 {
+ ar.argKey = ar.argKey[0:n]
+ } else if !ar.atEnd() {
+ ar.nextIndex++
+ }
+}
+
+func (ar *argReader) currentArg() []string {
+ return ar.inArgs[ar.currentIndex:ar.nextIndex]
+}
+
+func (ar argReader) key() string {
+ return ar.argKey
+}
+
+func main() {
+ if len(os.Args) < 2 {
+ maybeFatal(fmt.Errorf("at least the rust compiler path should be present"))
+ }
+ const KzipEnv = "KYTHE_OUTPUT_FILE"
+ kzipPath := os.Getenv(KzipEnv)
+ if kzipPath == "" {
+ maybeFatal(fmt.Errorf("%s is not set", KzipEnv))
+ }
+ ctx := newContext(os.Args[1:], kzipPath)
+ vnamesPath := os.Getenv("KYTHE_VNAMES")
+ if vnamesPath != "" {
+ vf, err := os.Open(vnamesPath)
+ maybeFatal(err)
+ ctx.loadRules(vf)
+ }
+ ctx.runCompiler()
+ ctx.saveKzip()
+}
diff --git a/kythe/go/extractors/cmd/rust_extractor/rust_extractor_test.go b/kythe/go/extractors/cmd/rust_extractor/rust_extractor_test.go
new file mode 100644
index 000000000..ef6c6c50f
--- /dev/null
+++ b/kythe/go/extractors/cmd/rust_extractor/rust_extractor_test.go
@@ -0,0 +1,97 @@
+package main
+
+import (
+ "kythe.io/kythe/go/test/testutil"
+ apb "kythe.io/kythe/proto/analysis_go_proto"
+ spb "kythe.io/kythe/proto/storage_go_proto"
+ "os"
+ "reflect"
+ "strings"
+ "testing"
+)
+
+func TestNewContext(t *testing.T) {
+ t.Run("processCommandLine", func(t *testing.T) {
+ line := "FOO=x RUST_VERSION=1.59 compiler arg"
+ ctx := newContext(strings.Split(line, " "), "foo.kzip")
+ if err := testutil.DeepEqual(
+ []*apb.CompilationUnit_Env{
+ {Name: "FOO", Value: "x"},
+ {Name: "RUST_VERSION", Value: "1.59"}},
+ ctx.cu.Environment); err != nil {
+ t.Errorf("wrong environment: %s", err)
+ }
+ if err := testutil.DeepEqual("compiler", ctx.rustCompiler); err != nil {
+ t.Errorf("wrong compiler: %s", err)
+ }
+ if err := testutil.DeepEqual([]string{"arg1"}, ctx.cu.Argument); err == nil {
+ t.Errorf("wrong arguments: %s", err)
+ }
+
+ })
+}
+
+func TestContext_compilerArgs(t *testing.T) {
+ cmdLine := "rustc -C linker=lld --emit link -o foo lib.rs --extern std=bar.so -Zremap-cwd-prefix= --crate-type=rlib"
+ expected := "--emit dep-info=foo.kzip.deps,metadata=foo.kzip.metadummy -Z save-analysis " +
+ "-C linker=lld lib.rs --extern std=bar.so -Zremap-cwd-prefix= --crate-type=rlib"
+
+ t.Run("parseArgs1", func(t *testing.T) {
+ ctx := newContext(strings.Split(cmdLine, " "), "foo.kzip")
+ if err := testutil.DeepEqual(strings.Split(expected, " "), ctx.actualCompilerArgs()); err != nil {
+ t.Errorf("wrong compiler arguments: %s", err)
+ }
+ })
+}
+
+func TestContext_getInputFiles(t *testing.T) {
+ t.Run("input files", func(t *testing.T) {
+ ctx := newContext(strings.Split("rustc foo.rs", " '"), "foo.kzip")
+ deps := `
+foo.rmeta: x.rs x.der
+foo.deps: foo.rs y.der
+`
+ ctx.getInputFiles(strings.NewReader(deps), "foo.deps")
+ if err := testutil.DeepEqual([]string{"foo.rs"}, ctx.inputs); err != nil {
+ t.Errorf("wrong input files: %s", err)
+ }
+ })
+}
+
+func TestContext_makeVname(t *testing.T) {
+ t.Run("name rewriting", func(t *testing.T) {
+ vnameJson := `[
+ {
+ "pattern": "out/(.*)",
+ "vname": {
+ "root": "out",
+ "path": "@1@"
+ }
+ },
+ {
+ "pattern": "(.*)",
+ "vname": {
+ "path": "@1@"
+ }
+ }
+]
+`
+ tests := []struct {
+ path string
+ want *spb.VName
+ }{
+ // static
+ {"foo/bar.rs", &spb.VName{Path: "foo/bar.rs", Corpus: "aosp"}},
+ {"out/path.json", &spb.VName{Root: "out", Path: "path.json", Corpus: "aosp"}},
+ }
+
+ _ = os.Setenv("KYTHE_CORPUS", "aosp")
+ ctx := newContext(strings.Split("rustc foo.rs", " '"), "foo.kzip")
+ ctx.loadRules(strings.NewReader(vnameJson))
+ for _, test := range tests {
+ if got := ctx.makeVname(test.path); !reflect.DeepEqual(got, test.want) {
+ t.Errorf("makeVname() = %v, want %v", got, test.want)
+ }
+ }
+ })
+}