grpc/tools/distrib/update_flakes_query.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

# Copyright 2022 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

QUERY = """
#standardSQL

-- See https://console.cloud.google.com/bigquery?sq=830293263384:5a8549832dfb48d9b2c04312a4ae3181 for the original query

WITH

  runs AS (
  SELECT
    RTRIM(LTRIM(REGEXP_REPLACE(test_target, r'(@poller=.+)', ''))) AS test_binary,
    REGEXP_EXTRACT(test_target, r'poller=(\w+)') AS poll_strategy,
    job_name,
    test_target,
    test_class_name,
    CASE
      # in case of timeout / retry / segfault the "test_case" fields will contain weird stuff
      # e.g. "test_shard2_run0_attempt0" or "shard_2/20"
      # when aggregating, we want to display all of these as a single category of problems
      WHEN test_case like 'test_shard_%_run%_attempt%' THEN 'CANNOT_DETERMINE'
      WHEN test_case like '%shard_%/%' THEN 'CANNOT_DETERMINE'
      # when test_case looks suspiciously like test_target
      # its value is probably meaningless and it means that the entire target has failed
      # e.g. test_target="//test/cpp/client:destroy_grpclb_channel_with_active_connect_stress_test" and test_case="test/cpp/client/destroy_grpclb_channel_with_active_connect_stress_test.exe"                         
      WHEN STRPOS(test_case, REPLACE(SUBSTR(test_target, 3), ":", "/")) != 0 THEN 'CANNOT_DETERMINE'
      ELSE test_case
    END AS test_case,
    result,
    build_id,
    timestamp
  FROM
    `grpc-testing.jenkins_test_results.rbe_test_results`
  WHERE
    DATETIME_DIFF(CURRENT_DATETIME(),
      dateTIME(timestamp),
      HOUR) < {lookback_hours}
    ),

  results_counts_per_build AS (
  SELECT
    test_binary,
    #test_target, # aggregate data over all pollers
    test_class_name,
    test_case,
    SUM(SAFE_CAST(result != 'PASSED'
        AND result != 'SKIPPED' AS INT64)) AS runs_failed,
    SUM(SAFE_CAST(result != 'SKIPPED' AS INT64)) AS runs_total,
    job_name,
    build_id
  FROM
    runs
  GROUP BY
    test_binary,
    test_class_name,
    test_case,
    job_name,
    build_id),
    
  builds_with_missing_cannot_determine_testcase_entry AS (
    SELECT 
      test_binary,
      job_name,
      build_id,
    FROM
      results_counts_per_build
    GROUP BY
      test_binary,
      job_name,
      build_id
    HAVING COUNTIF(test_case = 'CANNOT_DETERMINE') = 0
  ),
  
  # for each test target and build, generate a fake entry with "CANNOT_DETERMINE" test_case
  # if not already present.
  # this is because in many builds, there will be no "CANNOT_DETERMINE" entry
  # and we want to avoid skewing the statistics
  results_counts_per_build_with_fake_cannot_determine_test_case_entries AS (
    (SELECT * FROM results_counts_per_build)
    UNION ALL
    (SELECT
      test_binary,
      '' AS test_class_name,  # when test_case is 'CANNOT_DETERMINE', test class is empty string
      'CANNOT_DETERMINE' AS test_case,  # see table "runs"
      0 AS runs_failed,
      1 AS runs_total,
      job_name,
      build_id
    FROM
      builds_with_missing_cannot_determine_testcase_entry)
  ),
    
  results_counts AS (
  SELECT
    test_binary,
    test_class_name,
    test_case,
    job_name,
    SUM(runs_failed) AS runs_failed,
    SUM(runs_total) AS runs_total,
    SUM(SAFE_CAST(runs_failed > 0 AS INT64)) AS builds_failed,
    COUNT(build_id) AS builds_total,
    STRING_AGG(CASE
        WHEN runs_failed > 0 THEN 'X'
        ELSE '_' END, ''
    ORDER BY
      build_id ASC) AS build_failure_pattern,
    FORMAT("%T", ARRAY_AGG(build_id
      ORDER BY
        build_id ASC)) AS builds
  FROM
    #results_counts_per_build
    results_counts_per_build_with_fake_cannot_determine_test_case_entries
  GROUP BY
    test_binary,
    test_class_name,
    test_case,
    job_name
  HAVING
    runs_failed > 0)

SELECT
  ROUND(100*builds_failed / builds_total, 2) AS pct_builds_failed,
  ROUND(100*runs_failed / runs_total, 2) AS pct_runs_failed,
  test_binary,
  test_class_name,
  test_case,
  job_name,
  build_failure_pattern
           
FROM
  results_counts
ORDER BY
  pct_builds_failed DESC
"""