aboutsummaryrefslogtreecommitdiff
path: root/bazel/aquery.go
blob: 4d39e8f55529bacd231cfacf78622a941d7bfd26 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
// Copyright 2020 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package bazel

import (
	"crypto/sha256"
	"encoding/base64"
	"fmt"
	"path/filepath"
	"reflect"
	"sort"
	"strings"
	"sync"

	analysis_v2_proto "prebuilts/bazel/common/proto/analysis_v2"

	"github.com/google/blueprint/metrics"
	"github.com/google/blueprint/proptools"
	"google.golang.org/protobuf/proto"
)

type artifactId int
type depsetId int
type pathFragmentId int

// artifact contains relevant portions of Bazel's aquery proto, Artifact.
// Represents a single artifact, whether it's a source file or a derived output file.
type artifact struct {
	Id             artifactId
	PathFragmentId pathFragmentId
}

type pathFragment struct {
	Id       pathFragmentId
	Label    string
	ParentId pathFragmentId
}

// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
type KeyValuePair struct {
	Key   string
	Value string
}

// AqueryDepset is a depset definition from Bazel's aquery response. This is
// akin to the `depSetOfFiles` in the response proto, except:
//   - direct artifacts are enumerated by full path instead of by ID
//   - it has a hash of the depset contents, instead of an int ID (for determinism)
//
// A depset is a data structure for efficient transitive handling of artifact
// paths. A single depset consists of one or more artifact paths and one or
// more "child" depsets.
type AqueryDepset struct {
	ContentHash            string
	DirectArtifacts        []string
	TransitiveDepSetHashes []string
}

// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
// data structure for storing large numbers of file paths.
type depSetOfFiles struct {
	Id                  depsetId
	DirectArtifactIds   []artifactId
	TransitiveDepSetIds []depsetId
}

// action contains relevant portions of Bazel's aquery proto, Action.
// Represents a single command line invocation in the Bazel build graph.
type action struct {
	Arguments            []string
	EnvironmentVariables []KeyValuePair
	InputDepSetIds       []depsetId
	Mnemonic             string
	OutputIds            []artifactId
	TemplateContent      string
	Substitutions        []KeyValuePair
	FileContents         string
}

// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
// An aquery response from Bazel contains a single ActionGraphContainer proto.
type actionGraphContainer struct {
	Artifacts     []artifact
	Actions       []action
	DepSetOfFiles []depSetOfFiles
	PathFragments []pathFragment
}

// BuildStatement contains information to register a build statement corresponding (one to one)
// with a Bazel action from Bazel's action graph.
type BuildStatement struct {
	Command      string
	Depfile      *string
	OutputPaths  []string
	SymlinkPaths []string
	Env          []*analysis_v2_proto.KeyValuePair
	Mnemonic     string

	// Inputs of this build statement, either as unexpanded depsets or expanded
	// input paths. There should be no overlap between these fields; an input
	// path should either be included as part of an unexpanded depset or a raw
	// input path string, but not both.
	InputDepsetHashes []string
	InputPaths        []string
	FileContents      string
}

// A helper type for aquery processing which facilitates retrieval of path IDs from their
// less readable Bazel structures (depset and path fragment).
type aqueryArtifactHandler struct {
	// Maps depset id to AqueryDepset, a representation of depset which is
	// post-processed for middleman artifact handling, unhandled artifact
	// dropping, content hashing, etc.
	depsetIdToAqueryDepset map[depsetId]AqueryDepset
	emptyDepsetIds         map[depsetId]struct{}
	// Maps content hash to AqueryDepset.
	depsetHashToAqueryDepset map[string]AqueryDepset

	// depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
	// may be an expensive operation.
	depsetHashToArtifactPathsCache sync.Map
	// Maps artifact ids to fully expanded paths.
	artifactIdToPath map[artifactId]string
}

// The tokens should be substituted with the value specified here, instead of the
// one returned in 'substitutions' of TemplateExpand action.
var templateActionOverriddenTokens = map[string]string{
	// Uses "python3" for %python_binary% instead of the value returned by aquery
	// which is "py3wrapper.sh". See removePy3wrapperScript.
	"%python_binary%": "python3",
}

const (
	middlemanMnemonic = "Middleman"
	// The file name of py3wrapper.sh, which is used by py_binary targets.
	py3wrapperFileName = "/py3wrapper.sh"
)

func indexBy[K comparable, V any](values []V, keyFn func(v V) K) map[K]V {
	m := map[K]V{}
	for _, v := range values {
		m[keyFn(v)] = v
	}
	return m
}

func newAqueryHandler(aqueryResult *analysis_v2_proto.ActionGraphContainer) (*aqueryArtifactHandler, error) {
	pathFragments := indexBy(aqueryResult.PathFragments, func(pf *analysis_v2_proto.PathFragment) pathFragmentId {
		return pathFragmentId(pf.Id)
	})

	artifactIdToPath := make(map[artifactId]string, len(aqueryResult.Artifacts))
	for _, artifact := range aqueryResult.Artifacts {
		artifactPath, err := expandPathFragment(pathFragmentId(artifact.PathFragmentId), pathFragments)
		if err != nil {
			return nil, err
		}
		artifactIdToPath[artifactId(artifact.Id)] = artifactPath
	}

	// Map middleman artifact ContentHash to input artifact depset ID.
	// Middleman artifacts are treated as "substitute" artifacts for mixed builds. For example,
	// if we find a middleman action which has inputs [foo, bar], and output [baz_middleman], then,
	// for each other action which has input [baz_middleman], we add [foo, bar] to the inputs for
	// that action instead.
	middlemanIdToDepsetIds := map[artifactId][]uint32{}
	for _, actionEntry := range aqueryResult.Actions {
		if actionEntry.Mnemonic == middlemanMnemonic {
			for _, outputId := range actionEntry.OutputIds {
				middlemanIdToDepsetIds[artifactId(outputId)] = actionEntry.InputDepSetIds
			}
		}
	}

	depsetIdToDepset := indexBy(aqueryResult.DepSetOfFiles, func(d *analysis_v2_proto.DepSetOfFiles) depsetId {
		return depsetId(d.Id)
	})

	aqueryHandler := aqueryArtifactHandler{
		depsetIdToAqueryDepset:         map[depsetId]AqueryDepset{},
		depsetHashToAqueryDepset:       map[string]AqueryDepset{},
		depsetHashToArtifactPathsCache: sync.Map{},
		emptyDepsetIds:                 make(map[depsetId]struct{}, 0),
		artifactIdToPath:               artifactIdToPath,
	}

	// Validate and adjust aqueryResult.DepSetOfFiles values.
	for _, depset := range aqueryResult.DepSetOfFiles {
		_, err := aqueryHandler.populateDepsetMaps(depset, middlemanIdToDepsetIds, depsetIdToDepset)
		if err != nil {
			return nil, err
		}
	}

	return &aqueryHandler, nil
}

// Ensures that the handler's depsetIdToAqueryDepset map contains an entry for the given
// depset.
func (a *aqueryArtifactHandler) populateDepsetMaps(depset *analysis_v2_proto.DepSetOfFiles, middlemanIdToDepsetIds map[artifactId][]uint32, depsetIdToDepset map[depsetId]*analysis_v2_proto.DepSetOfFiles) (*AqueryDepset, error) {
	if aqueryDepset, containsDepset := a.depsetIdToAqueryDepset[depsetId(depset.Id)]; containsDepset {
		return &aqueryDepset, nil
	}
	transitiveDepsetIds := depset.TransitiveDepSetIds
	directArtifactPaths := make([]string, 0, len(depset.DirectArtifactIds))
	for _, id := range depset.DirectArtifactIds {
		aId := artifactId(id)
		path, pathExists := a.artifactIdToPath[aId]
		if !pathExists {
			return nil, fmt.Errorf("undefined input artifactId %d", aId)
		}
		// Filter out any inputs which are universally dropped, and swap middleman
		// artifacts with their corresponding depsets.
		if depsetsToUse, isMiddleman := middlemanIdToDepsetIds[aId]; isMiddleman {
			// Swap middleman artifacts with their corresponding depsets and drop the middleman artifacts.
			transitiveDepsetIds = append(transitiveDepsetIds, depsetsToUse...)
		} else if strings.HasSuffix(path, py3wrapperFileName) ||
			strings.HasPrefix(path, "../bazel_tools") {
			continue
			// Drop these artifacts.
			// See go/python-binary-host-mixed-build for more details.
			// 1) Drop py3wrapper.sh, just use python binary, the launcher script generated by the
			// TemplateExpandAction handles everything necessary to launch a Pythin application.
			// 2) ../bazel_tools: they have MODIFY timestamp 10years in the future and would cause the
			// containing depset to always be considered newer than their outputs.
		} else {
			directArtifactPaths = append(directArtifactPaths, path)
		}
	}

	childDepsetHashes := make([]string, 0, len(transitiveDepsetIds))
	for _, id := range transitiveDepsetIds {
		childDepsetId := depsetId(id)
		childDepset, exists := depsetIdToDepset[childDepsetId]
		if !exists {
			if _, empty := a.emptyDepsetIds[childDepsetId]; empty {
				continue
			} else {
				return nil, fmt.Errorf("undefined input depsetId %d (referenced by depsetId %d)", childDepsetId, depset.Id)
			}
		}
		if childAqueryDepset, err := a.populateDepsetMaps(childDepset, middlemanIdToDepsetIds, depsetIdToDepset); err != nil {
			return nil, err
		} else if childAqueryDepset == nil {
			continue
		} else {
			childDepsetHashes = append(childDepsetHashes, childAqueryDepset.ContentHash)
		}
	}
	if len(directArtifactPaths) == 0 && len(childDepsetHashes) == 0 {
		a.emptyDepsetIds[depsetId(depset.Id)] = struct{}{}
		return nil, nil
	}
	aqueryDepset := AqueryDepset{
		ContentHash:            depsetContentHash(directArtifactPaths, childDepsetHashes),
		DirectArtifacts:        directArtifactPaths,
		TransitiveDepSetHashes: childDepsetHashes,
	}
	a.depsetIdToAqueryDepset[depsetId(depset.Id)] = aqueryDepset
	a.depsetHashToAqueryDepset[aqueryDepset.ContentHash] = aqueryDepset
	return &aqueryDepset, nil
}

// getInputPaths flattens the depsets of the given IDs and returns all transitive
// input paths contained in these depsets.
// This is a potentially expensive operation, and should not be invoked except
// for actions which need specialized input handling.
func (a *aqueryArtifactHandler) getInputPaths(depsetIds []uint32) ([]string, error) {
	var inputPaths []string

	for _, id := range depsetIds {
		inputDepSetId := depsetId(id)
		depset := a.depsetIdToAqueryDepset[inputDepSetId]
		inputArtifacts, err := a.artifactPathsFromDepsetHash(depset.ContentHash)
		if err != nil {
			return nil, err
		}
		for _, inputPath := range inputArtifacts {
			inputPaths = append(inputPaths, inputPath)
		}
	}

	return inputPaths, nil
}

func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
	if result, exists := a.depsetHashToArtifactPathsCache.Load(depsetHash); exists {
		return result.([]string), nil
	}
	if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
		result := depset.DirectArtifacts
		for _, childHash := range depset.TransitiveDepSetHashes {
			childArtifactIds, err := a.artifactPathsFromDepsetHash(childHash)
			if err != nil {
				return nil, err
			}
			result = append(result, childArtifactIds...)
		}
		a.depsetHashToArtifactPathsCache.Store(depsetHash, result)
		return result, nil
	} else {
		return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
	}
}

// AqueryBuildStatements returns a slice of BuildStatements and a slice of AqueryDepset
// which should be registered (and output to a ninja file) to correspond with Bazel's
// action graph, as described by the given action graph json proto.
// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
// are one-to-one with Bazel's depSetOfFiles objects.
func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHandler) ([]*BuildStatement, []AqueryDepset, error) {
	aqueryProto := &analysis_v2_proto.ActionGraphContainer{}
	err := proto.Unmarshal(aqueryJsonProto, aqueryProto)
	if err != nil {
		return nil, nil, err
	}

	var aqueryHandler *aqueryArtifactHandler
	{
		eventHandler.Begin("init_handler")
		defer eventHandler.End("init_handler")
		aqueryHandler, err = newAqueryHandler(aqueryProto)
		if err != nil {
			return nil, nil, err
		}
	}

	// allocate both length and capacity so each goroutine can write to an index independently without
	// any need for synchronization for slice access.
	buildStatements := make([]*BuildStatement, len(aqueryProto.Actions))
	{
		eventHandler.Begin("build_statements")
		defer eventHandler.End("build_statements")
		wg := sync.WaitGroup{}
		var errOnce sync.Once

		for i, actionEntry := range aqueryProto.Actions {
			wg.Add(1)
			go func(i int, actionEntry *analysis_v2_proto.Action) {
				buildStatement, aErr := aqueryHandler.actionToBuildStatement(actionEntry)
				if aErr != nil {
					errOnce.Do(func() {
						err = aErr
					})
				} else {
					// set build statement at an index rather than appending such that each goroutine does not
					// impact other goroutines
					buildStatements[i] = buildStatement
				}
				wg.Done()
			}(i, actionEntry)
		}
		wg.Wait()
	}
	if err != nil {
		return nil, nil, err
	}

	depsetsByHash := map[string]AqueryDepset{}
	depsets := make([]AqueryDepset, 0, len(aqueryHandler.depsetIdToAqueryDepset))
	{
		eventHandler.Begin("depsets")
		defer eventHandler.End("depsets")
		for _, aqueryDepset := range aqueryHandler.depsetIdToAqueryDepset {
			if prevEntry, hasKey := depsetsByHash[aqueryDepset.ContentHash]; hasKey {
				// Two depsets collide on hash. Ensure that their contents are identical.
				if !reflect.DeepEqual(aqueryDepset, prevEntry) {
					return nil, nil, fmt.Errorf("two different depsets have the same hash: %v, %v", prevEntry, aqueryDepset)
				}
			} else {
				depsetsByHash[aqueryDepset.ContentHash] = aqueryDepset
				depsets = append(depsets, aqueryDepset)
			}
		}
	}

	eventHandler.Do("build_statement_sort", func() {
		// Build Statements and depsets must be sorted by their content hash to
		// preserve determinism between builds (this will result in consistent ninja file
		// output). Note they are not sorted by their original IDs nor their Bazel ordering,
		// as Bazel gives nondeterministic ordering / identifiers in aquery responses.
		sort.Slice(buildStatements, func(i, j int) bool {
			// Sort all nil statements to the end of the slice
			if buildStatements[i] == nil {
				return false
			} else if buildStatements[j] == nil {
				return true
			}
			//For build statements, compare output lists. In Bazel, each output file
			// may only have one action which generates it, so this will provide
			// a deterministic ordering.
			outputs_i := buildStatements[i].OutputPaths
			outputs_j := buildStatements[j].OutputPaths
			if len(outputs_i) != len(outputs_j) {
				return len(outputs_i) < len(outputs_j)
			}
			if len(outputs_i) == 0 {
				// No outputs for these actions, so compare commands.
				return buildStatements[i].Command < buildStatements[j].Command
			}
			// There may be multiple outputs, but the output ordering is deterministic.
			return outputs_i[0] < outputs_j[0]
		})
	})
	eventHandler.Do("depset_sort", func() {
		sort.Slice(depsets, func(i, j int) bool {
			return depsets[i].ContentHash < depsets[j].ContentHash
		})
	})
	return buildStatements, depsets, nil
}

// depsetContentHash computes and returns a SHA256 checksum of the contents of
// the given depset. This content hash may serve as the depset's identifier.
// Using a content hash for an identifier is superior for determinism. (For example,
// using an integer identifier which depends on the order in which the depsets are
// created would result in nondeterministic depset IDs.)
func depsetContentHash(directPaths []string, transitiveDepsetHashes []string) string {
	h := sha256.New()
	// Use newline as delimiter, as paths cannot contain newline.
	h.Write([]byte(strings.Join(directPaths, "\n")))
	h.Write([]byte(strings.Join(transitiveDepsetHashes, "")))
	fullHash := base64.RawURLEncoding.EncodeToString(h.Sum(nil))
	return fullHash
}

func (a *aqueryArtifactHandler) depsetContentHashes(inputDepsetIds []uint32) ([]string, error) {
	var hashes []string
	for _, id := range inputDepsetIds {
		dId := depsetId(id)
		if aqueryDepset, exists := a.depsetIdToAqueryDepset[dId]; !exists {
			if _, empty := a.emptyDepsetIds[dId]; !empty {
				return nil, fmt.Errorf("undefined (not even empty) input depsetId %d", dId)
			}
		} else {
			hashes = append(hashes, aqueryDepset.ContentHash)
		}
	}
	return hashes, nil
}

func (a *aqueryArtifactHandler) normalActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
	command := strings.Join(proptools.ShellEscapeListIncludingSpaces(actionEntry.Arguments), " ")
	inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
	if err != nil {
		return nil, err
	}
	outputPaths, depfile, err := a.getOutputPaths(actionEntry)
	if err != nil {
		return nil, err
	}

	buildStatement := &BuildStatement{
		Command:           command,
		Depfile:           depfile,
		OutputPaths:       outputPaths,
		InputDepsetHashes: inputDepsetHashes,
		Env:               actionEntry.EnvironmentVariables,
		Mnemonic:          actionEntry.Mnemonic,
	}
	return buildStatement, nil
}

func (a *aqueryArtifactHandler) templateExpandActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
	outputPaths, depfile, err := a.getOutputPaths(actionEntry)
	if err != nil {
		return nil, err
	}
	if len(outputPaths) != 1 {
		return nil, fmt.Errorf("Expect 1 output to template expand action, got: output %q", outputPaths)
	}
	expandedTemplateContent := expandTemplateContent(actionEntry)
	// The expandedTemplateContent is escaped for being used in double quotes and shell unescape,
	// and the new line characters (\n) are also changed to \\n which avoids some Ninja escape on \n, which might
	// change \n to space and mess up the format of Python programs.
	// sed is used to convert \\n back to \n before saving to output file.
	// See go/python-binary-host-mixed-build for more details.
	command := fmt.Sprintf(`/bin/bash -c 'echo "%[1]s" | sed "s/\\\\n/\\n/g" > %[2]s && chmod a+x %[2]s'`,
		escapeCommandlineArgument(expandedTemplateContent), outputPaths[0])
	inputDepsetHashes, err := a.depsetContentHashes(actionEntry.InputDepSetIds)
	if err != nil {
		return nil, err
	}

	buildStatement := &BuildStatement{
		Command:           command,
		Depfile:           depfile,
		OutputPaths:       outputPaths,
		InputDepsetHashes: inputDepsetHashes,
		Env:               actionEntry.EnvironmentVariables,
		Mnemonic:          actionEntry.Mnemonic,
	}
	return buildStatement, nil
}

func (a *aqueryArtifactHandler) fileWriteActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
	outputPaths, _, err := a.getOutputPaths(actionEntry)
	var depsetHashes []string
	if err == nil {
		depsetHashes, err = a.depsetContentHashes(actionEntry.InputDepSetIds)
	}
	if err != nil {
		return nil, err
	}
	return &BuildStatement{
		Depfile:           nil,
		OutputPaths:       outputPaths,
		Env:               actionEntry.EnvironmentVariables,
		Mnemonic:          actionEntry.Mnemonic,
		InputDepsetHashes: depsetHashes,
		FileContents:      actionEntry.FileContents,
	}, nil
}

func (a *aqueryArtifactHandler) symlinkTreeActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
	outputPaths, _, err := a.getOutputPaths(actionEntry)
	if err != nil {
		return nil, err
	}
	inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
	if err != nil {
		return nil, err
	}
	if len(inputPaths) != 1 || len(outputPaths) != 1 {
		return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
	}
	// The actual command is generated in bazelSingleton.GenerateBuildActions
	return &BuildStatement{
		Depfile:     nil,
		OutputPaths: outputPaths,
		Env:         actionEntry.EnvironmentVariables,
		Mnemonic:    actionEntry.Mnemonic,
		InputPaths:  inputPaths,
	}, nil
}

func (a *aqueryArtifactHandler) symlinkActionBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
	outputPaths, depfile, err := a.getOutputPaths(actionEntry)
	if err != nil {
		return nil, err
	}

	inputPaths, err := a.getInputPaths(actionEntry.InputDepSetIds)
	if err != nil {
		return nil, err
	}
	if len(inputPaths) != 1 || len(outputPaths) != 1 {
		return nil, fmt.Errorf("Expect 1 input and 1 output to symlink action, got: input %q, output %q", inputPaths, outputPaths)
	}
	out := outputPaths[0]
	outDir := proptools.ShellEscapeIncludingSpaces(filepath.Dir(out))
	out = proptools.ShellEscapeIncludingSpaces(out)
	in := filepath.Join("$PWD", proptools.ShellEscapeIncludingSpaces(inputPaths[0]))
	// Use absolute paths, because some soong actions don't play well with relative paths (for example, `cp -d`).
	command := fmt.Sprintf("mkdir -p %[1]s && rm -f %[2]s && ln -sf %[3]s %[2]s", outDir, out, in)
	symlinkPaths := outputPaths[:]

	buildStatement := &BuildStatement{
		Command:      command,
		Depfile:      depfile,
		OutputPaths:  outputPaths,
		InputPaths:   inputPaths,
		Env:          actionEntry.EnvironmentVariables,
		Mnemonic:     actionEntry.Mnemonic,
		SymlinkPaths: symlinkPaths,
	}
	return buildStatement, nil
}

func (a *aqueryArtifactHandler) getOutputPaths(actionEntry *analysis_v2_proto.Action) (outputPaths []string, depfile *string, err error) {
	for _, outputId := range actionEntry.OutputIds {
		outputPath, exists := a.artifactIdToPath[artifactId(outputId)]
		if !exists {
			err = fmt.Errorf("undefined outputId %d", outputId)
			return
		}
		ext := filepath.Ext(outputPath)
		if ext == ".d" {
			if depfile != nil {
				err = fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
				return
			} else {
				depfile = &outputPath
			}
		} else {
			outputPaths = append(outputPaths, outputPath)
		}
	}
	return
}

// expandTemplateContent substitutes the tokens in a template.
func expandTemplateContent(actionEntry *analysis_v2_proto.Action) string {
	replacerString := make([]string, len(actionEntry.Substitutions)*2)
	for i, pair := range actionEntry.Substitutions {
		value := pair.Value
		if val, ok := templateActionOverriddenTokens[pair.Key]; ok {
			value = val
		}
		replacerString[i*2] = pair.Key
		replacerString[i*2+1] = value
	}
	replacer := strings.NewReplacer(replacerString...)
	return replacer.Replace(actionEntry.TemplateContent)
}

// \->\\, $->\$, `->\`, "->\", \n->\\n, '->'"'"'
var commandLineArgumentReplacer = strings.NewReplacer(
	`\`, `\\`,
	`$`, `\$`,
	"`", "\\`",
	`"`, `\"`,
	"\n", "\\n",
	`'`, `'"'"'`,
)

func escapeCommandlineArgument(str string) string {
	return commandLineArgumentReplacer.Replace(str)
}

func (a *aqueryArtifactHandler) actionToBuildStatement(actionEntry *analysis_v2_proto.Action) (*BuildStatement, error) {
	switch actionEntry.Mnemonic {
	// Middleman actions are not handled like other actions; they are handled separately as a
	// preparatory step so that their inputs may be relayed to actions depending on middleman
	// artifacts.
	case middlemanMnemonic:
		return nil, nil
	// PythonZipper is bogus action returned by aquery, ignore it (b/236198693)
	case "PythonZipper":
		return nil, nil
	// Skip "Fail" actions, which are placeholder actions designed to always fail.
	case "Fail":
		return nil, nil
	case "BaselineCoverage":
		return nil, nil
	case "Symlink", "SolibSymlink", "ExecutableSymlink":
		return a.symlinkActionBuildStatement(actionEntry)
	case "TemplateExpand":
		if len(actionEntry.Arguments) < 1 {
			return a.templateExpandActionBuildStatement(actionEntry)
		}
	case "FileWrite", "SourceSymlinkManifest":
		return a.fileWriteActionBuildStatement(actionEntry)
	case "SymlinkTree":
		return a.symlinkTreeActionBuildStatement(actionEntry)
	}

	if len(actionEntry.Arguments) < 1 {
		return nil, fmt.Errorf("received action with no command: [%s]", actionEntry.Mnemonic)
	}
	return a.normalActionBuildStatement(actionEntry)

}

func expandPathFragment(id pathFragmentId, pathFragmentsMap map[pathFragmentId]*analysis_v2_proto.PathFragment) (string, error) {
	var labels []string
	currId := id
	// Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
	for currId > 0 {
		currFragment, ok := pathFragmentsMap[currId]
		if !ok {
			return "", fmt.Errorf("undefined path fragment id %d", currId)
		}
		labels = append([]string{currFragment.Label}, labels...)
		parentId := pathFragmentId(currFragment.ParentId)
		if currId == parentId {
			return "", fmt.Errorf("fragment cannot refer to itself as parent %#v", currFragment)
		}
		currId = parentId
	}
	return filepath.Join(labels...), nil
}