1
0
mirror of https://github.com/google/nomulus synced 2026-01-04 04:04:22 +00:00

Only use GKE logs in ICANN reports (#2738)

We no longer need to union GKE+GAE logs since we've moved all production
traffic to GKE only.

For testing, I copied the affected *_test.sql files to Bigquery, removed
all the "-alpha" bits, and changed the dates to 20250301 and 20250331
and ran them to make sure they returned the expected data.
This commit is contained in:
gbrodman
2025-04-09 13:12:02 -04:00
committed by GitHub
parent 03872b508f
commit 3cd50dc929
8 changed files with 39 additions and 120 deletions

View File

@@ -85,9 +85,6 @@ public final class ActivityReportingQueryBuilder implements QueryBuilder {
String monthlyLogsQuery =
SqlTemplate.create(getQueryFromFile(MONTHLY_LOGS + ".sql"))
.put("PROJECT_ID", projectId)
.put("APPENGINE_LOGS_DATA_SET", "appengine_logs")
.put("GKE_LOGS_DATA_SET", "gke_logs")
.put("REQUEST_TABLE", "appengine_googleapis_com_request_log_")
.put("FIRST_DAY_OF_MONTH", logTableFormatter.print(firstDayOfMonth))
.put("LAST_DAY_OF_MONTH", logTableFormatter.print(lastDayOfMonth))
.build();
@@ -96,9 +93,6 @@ public final class ActivityReportingQueryBuilder implements QueryBuilder {
String eppQuery =
SqlTemplate.create(getQueryFromFile(EPP_METRICS + ".sql"))
.put("PROJECT_ID", projectId)
.put("APPENGINE_LOGS_DATA_SET", "appengine_logs")
.put("GKE_LOGS_DATA_SET", "gke_logs")
.put("APP_LOGS_TABLE", "_var_log_app_")
.put("FIRST_DAY_OF_MONTH", logTableFormatter.print(firstDayOfMonth))
.put("LAST_DAY_OF_MONTH", logTableFormatter.print(lastDayOfMonth))
.build();

View File

@@ -112,9 +112,6 @@ public final class TransactionsReportingQueryBuilder implements QueryBuilder {
String attemptedAddsQuery =
SqlTemplate.create(getQueryFromFile(ATTEMPTED_ADDS + ".sql"))
.put("PROJECT_ID", projectId)
.put("APPENGINE_LOGS_DATA_SET", "appengine_logs")
.put("GKE_LOGS_DATA_SET", "gke_logs")
.put("APP_LOGS_TABLE", "_var_log_app_")
.put("FIRST_DAY_OF_MONTH", logTableFormatter.print(earliestReportTime))
.put("LAST_DAY_OF_MONTH", logTableFormatter.print(latestReportTime))
.build();

View File

@@ -16,7 +16,7 @@
-- Determine the number of attempted adds each registrar made.
-- Since the specification requests all 'attempted' adds, we regex the
-- monthly App Engine logs, searching for all create commands and associating
-- monthly GKE logs, searching for all create commands and associating
-- them with their corresponding registrars.
-- Example log generated by FlowReporter in App Engine and GKE logs:
@@ -27,7 +27,7 @@
-- ,"targetId":"","targetIds":[],"tld":"",
-- "tlds":[],"icannActivityReportField":""}
-- This outer select just converts the registrar's clientId to their name.
-- This outer select just converts the registrar's ID to their name.
SELECT
tld,
registrar_table.registrar_name AS registrar_name,
@@ -38,34 +38,20 @@ FROM (
JSON_EXTRACT_SCALAR(json, '$.tld') AS tld,
JSON_EXTRACT_SCALAR(json, '$.clientId') AS clientId,
COUNT(json) AS count
FROM ((
-- Extract JSON metadata package from monthly logs
FROM (
-- Extract JSON metadata package from monthly logs
SELECT
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') AS json
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') AS json
FROM
`%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%APP_LOGS_TABLE%*`
`%PROJECT_ID%.gke_logs.stderr_*`
WHERE
_TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%'
AND '%LAST_DAY_OF_MONTH%'
AND STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
AND STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
-- Look for domain creates
AND REGEXP_CONTAINS(textPayload, r'"commandType":"create","resourceType":"domain"')
AND REGEXP_CONTAINS(jsonPayload.message, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(textPayload, r'"prober-[a-z]{2}-((any)|(canary))"'))
UNION ALL (
-- Extract JSON metadata package from monthly logs
SELECT
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') AS json
FROM
`%PROJECT_ID%.%GKE_LOGS_DATA_SET%.stderr_*`
WHERE
_TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%'
AND '%LAST_DAY_OF_MONTH%'
AND STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
-- Look for domain creates
AND REGEXP_CONTAINS(jsonPayload.message, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(jsonPayload.message, r'"prober-[a-z]{2}-((any)|(canary))"')))
AND NOT REGEXP_CONTAINS(jsonPayload.message, r'"prober-[a-z]{2}-((any)|(canary))"'))
GROUP BY
tld,
clientId ) AS logs_table

View File

@@ -15,7 +15,7 @@
-- Query FlowReporter JSON log messages and calculate SRS metrics.
-- We use ugly regex's over the monthly appengine logs to determine how many
-- We use ugly regexes over the monthly GKE logs to determine how many
-- EPP requests we received for each command. For example:
-- {"commandType":"check"...,"targetIds":["ais.a.how"],
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
@@ -35,30 +35,15 @@ FROM (
JSON_EXTRACT_SCALAR(json,
'$.icannActivityReportField') AS activityReportField
FROM (
-- For reasons that I don't understand, if I directly select the three columns
-- from the union, BigQuery complains about column number mismatch, so I have to
-- make a temporary union table and select on it.
SELECT
*
FROM (
SELECT
-- Extract the logged JSON payload.
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%APP_LOGS_TABLE%*`
WHERE
STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%')
UNION ALL (
SELECT
-- Extract the logged JSON payload.
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `%PROJECT_ID%.%GKE_LOGS_DATA_SET%.stderr_*`
WHERE
STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%')
)) AS regexes
-- Extract the logged JSON payload.
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `%PROJECT_ID%.gke_logs.stderr_*`
WHERE
STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%')
) AS regexes
JOIN
-- Unnest the JSON-parsed tlds.
UNNEST(regexes.tlds) AS tld

View File

@@ -13,7 +13,7 @@
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Query to fetch AppEngine and GKE request logs for the report month.
-- Query to fetch GKE request logs for the report month.
-- START_OF_MONTH and END_OF_MONTH should be in YYYYMM01 format.
@@ -23,13 +23,6 @@ FROM (
SELECT
jsonPayload.httrequest.requesturl AS requestPath
FROM
`%PROJECT_ID%.%GKE_LOGS_DATA_SET%.stderr_*`
WHERE
_TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%')
UNION ALL (
SELECT
protoPayload.resource AS requestPath
FROM
`%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%*`
`%PROJECT_ID%.gke_logs.stderr_*`
WHERE
_TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%')

View File

@@ -16,7 +16,7 @@
-- Determine the number of attempted adds each registrar made.
-- Since the specification requests all 'attempted' adds, we regex the
-- monthly App Engine logs, searching for all create commands and associating
-- monthly GKE logs, searching for all create commands and associating
-- them with their corresponding registrars.
-- Example log generated by FlowReporter in App Engine and GKE logs:
@@ -27,7 +27,7 @@
-- ,"targetId":"","targetIds":[],"tld":"",
-- "tlds":[],"icannActivityReportField":""}
-- This outer select just converts the registrar's clientId to their name.
-- This outer select just converts the registrar's ID to their name.
SELECT
tld,
registrar_table.registrar_name AS registrar_name,
@@ -38,34 +38,20 @@ FROM (
JSON_EXTRACT_SCALAR(json, '$.tld') AS tld,
JSON_EXTRACT_SCALAR(json, '$.clientId') AS clientId,
COUNT(json) AS count
FROM ((
-- Extract JSON metadata package from monthly logs
FROM (
-- Extract JSON metadata package from monthly logs
SELECT
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') AS json
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') AS json
FROM
`domain-registry-alpha.appengine_logs._var_log_app_*`
`domain-registry-alpha.gke_logs.stderr_*`
WHERE
_TABLE_SUFFIX BETWEEN '20170901'
AND '20170930'
AND STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
AND STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
-- Look for domain creates
AND REGEXP_CONTAINS(textPayload, r'"commandType":"create","resourceType":"domain"')
AND REGEXP_CONTAINS(jsonPayload.message, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(textPayload, r'"prober-[a-z]{2}-((any)|(canary))"'))
UNION ALL (
-- Extract JSON metadata package from monthly logs
SELECT
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') AS json
FROM
`domain-registry-alpha.gke_logs.stderr_*`
WHERE
_TABLE_SUFFIX BETWEEN '20170901'
AND '20170930'
AND STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
-- Look for domain creates
AND REGEXP_CONTAINS(jsonPayload.message, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(jsonPayload.message, r'"prober-[a-z]{2}-((any)|(canary))"')))
AND NOT REGEXP_CONTAINS(jsonPayload.message, r'"prober-[a-z]{2}-((any)|(canary))"'))
GROUP BY
tld,
clientId ) AS logs_table

View File

@@ -15,7 +15,7 @@
-- Query FlowReporter JSON log messages and calculate SRS metrics.
-- We use ugly regex's over the monthly appengine logs to determine how many
-- We use ugly regexes over the monthly GKE logs to determine how many
-- EPP requests we received for each command. For example:
-- {"commandType":"check"...,"targetIds":["ais.a.how"],
-- "tld":"","tlds":["a.how"],"icannActivityReportField":"srs-dom-check"}
@@ -35,30 +35,15 @@ FROM (
JSON_EXTRACT_SCALAR(json,
'$.icannActivityReportField') AS activityReportField
FROM (
-- For reasons that I don't understand, if I directly select the three columns
-- from the union, BigQuery complains about column number mismatch, so I have to
-- make a temporary union table and select on it.
SELECT
*
FROM (
SELECT
-- Extract the logged JSON payload.
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `domain-registry-alpha.appengine_logs._var_log_app_*`
WHERE
STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '20170901' AND '20170930')
UNION ALL (
SELECT
-- Extract the logged JSON payload.
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `domain-registry-alpha.gke_logs.stderr_*`
WHERE
STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '20170901' AND '20170930')
)) AS regexes
-- Extract the logged JSON payload.
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `domain-registry-alpha.gke_logs.stderr_*`
WHERE
STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '20170901' AND '20170930')
) AS regexes
JOIN
-- Unnest the JSON-parsed tlds.
UNNEST(regexes.tlds) AS tld

View File

@@ -13,7 +13,7 @@
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Query to fetch AppEngine and GKE request logs for the report month.
-- Query to fetch GKE request logs for the report month.
-- START_OF_MONTH and END_OF_MONTH should be in YYYYMM01 format.
@@ -26,10 +26,3 @@ FROM (
`domain-registry-alpha.gke_logs.stderr_*`
WHERE
_TABLE_SUFFIX BETWEEN '20170901' AND '20170930')
UNION ALL (
SELECT
protoPayload.resource AS requestPath
FROM
`domain-registry-alpha.appengine_logs.appengine_googleapis_com_request_log_*`
WHERE
_TABLE_SUFFIX BETWEEN '20170901' AND '20170930')