1
0
mirror of https://github.com/google/nomulus synced 2025-12-23 06:15:42 +00:00

Read GKE logs in ICANN reports (#2673)

GKE logs are routed to a different dataset and the table is different.
The structs to look for are also different (jsonPayload vs textPayload
or protoPayload).

TESTED=Ran the resulting query in crash.
This commit is contained in:
Lai Jiang
2025-02-12 15:41:44 -05:00
committed by GitHub
parent c918258fb1
commit e78de98060
16 changed files with 164 additions and 85 deletions

View File

@@ -70,10 +70,10 @@ public final class ActivityReportingQueryBuilder implements QueryBuilder {
ImmutableMap.Builder<String, String> queriesBuilder = ImmutableMap.builder();
String operationalRegistrarsQuery;
operationalRegistrarsQuery =
SqlTemplate.create(getQueryFromFile("cloud_sql_registrar_operating_status.sql"))
.put("PROJECT_ID", projectId)
.build();
operationalRegistrarsQuery =
SqlTemplate.create(getQueryFromFile(REGISTRAR_OPERATING_STATUS + ".sql"))
.put("PROJECT_ID", projectId)
.build();
queriesBuilder.put(
getTableName(REGISTRAR_OPERATING_STATUS, yearMonth), operationalRegistrarsQuery);
@@ -83,9 +83,10 @@ public final class ActivityReportingQueryBuilder implements QueryBuilder {
// Convert reportingMonth into YYYYMMDD format for Bigquery table partition pattern-matching.
DateTimeFormatter logTableFormatter = DateTimeFormat.forPattern("yyyyMMdd");
String monthlyLogsQuery =
SqlTemplate.create(getQueryFromFile("monthly_logs.sql"))
SqlTemplate.create(getQueryFromFile(MONTHLY_LOGS + ".sql"))
.put("PROJECT_ID", projectId)
.put("APPENGINE_LOGS_DATA_SET", "appengine_logs")
.put("GKE_LOGS_DATA_SET", "gke_logs")
.put("REQUEST_TABLE", "appengine_googleapis_com_request_log_")
.put("FIRST_DAY_OF_MONTH", logTableFormatter.print(firstDayOfMonth))
.put("LAST_DAY_OF_MONTH", logTableFormatter.print(lastDayOfMonth))
@@ -93,9 +94,10 @@ public final class ActivityReportingQueryBuilder implements QueryBuilder {
queriesBuilder.put(getTableName(MONTHLY_LOGS, yearMonth), monthlyLogsQuery);
String eppQuery =
SqlTemplate.create(getQueryFromFile("epp_metrics.sql"))
SqlTemplate.create(getQueryFromFile(EPP_METRICS + ".sql"))
.put("PROJECT_ID", projectId)
.put("APPENGINE_LOGS_DATA_SET", "appengine_logs")
.put("GKE_LOGS_DATA_SET", "gke_logs")
.put("APP_LOGS_TABLE", "_var_log_app_")
.put("FIRST_DAY_OF_MONTH", logTableFormatter.print(firstDayOfMonth))
.put("LAST_DAY_OF_MONTH", logTableFormatter.print(lastDayOfMonth))
@@ -103,7 +105,7 @@ public final class ActivityReportingQueryBuilder implements QueryBuilder {
queriesBuilder.put(getTableName(EPP_METRICS, yearMonth), eppQuery);
String whoisQuery =
SqlTemplate.create(getQueryFromFile("whois_counts.sql"))
SqlTemplate.create(getQueryFromFile(WHOIS_COUNTS + ".sql"))
.put("PROJECT_ID", projectId)
.put("ICANN_REPORTING_DATA_SET", icannReportingDataSet)
.put("MONTHLY_LOGS_TABLE", getTableName(MONTHLY_LOGS, yearMonth))
@@ -111,7 +113,7 @@ public final class ActivityReportingQueryBuilder implements QueryBuilder {
queriesBuilder.put(getTableName(WHOIS_COUNTS, yearMonth), whoisQuery);
SqlTemplate aggregateQuery =
SqlTemplate.create(getQueryFromFile("cloud_sql_activity_report_aggregation.sql"))
SqlTemplate.create(getQueryFromFile(ACTIVITY_REPORT_AGGREGATION + ".sql"))
.put("PROJECT_ID", projectId)
.put(
"REGISTRAR_OPERATING_STATUS_TABLE",

View File

@@ -71,27 +71,27 @@ public final class TransactionsReportingQueryBuilder implements QueryBuilder {
ImmutableMap.Builder<String, String> queriesBuilder = ImmutableMap.builder();
String registrarIanaIdQuery =
SqlTemplate.create(getQueryFromFile("cloud_sql_registrar_iana_id.sql"))
SqlTemplate.create(getQueryFromFile(REGISTRAR_IANA_ID + ".sql"))
.put("PROJECT_ID", projectId)
.build();
queriesBuilder.put(getTableName(REGISTRAR_IANA_ID, yearMonth), registrarIanaIdQuery);
String totalDomainsQuery =
SqlTemplate.create(getQueryFromFile("cloud_sql_total_domains.sql"))
SqlTemplate.create(getQueryFromFile(TOTAL_DOMAINS + ".sql"))
.put("PROJECT_ID", projectId)
.build();
queriesBuilder.put(getTableName(TOTAL_DOMAINS, yearMonth), totalDomainsQuery);
DateTimeFormatter timestampFormatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS");
String totalNameserversQuery =
SqlTemplate.create(getQueryFromFile("cloud_sql_total_nameservers.sql"))
SqlTemplate.create(getQueryFromFile(TOTAL_NAMESERVERS + ".sql"))
.put("PROJECT_ID", projectId)
.put("LATEST_REPORT_TIME", timestampFormatter.print(latestReportTime))
.build();
queriesBuilder.put(getTableName(TOTAL_NAMESERVERS, yearMonth), totalNameserversQuery);
String transactionCountsQuery =
SqlTemplate.create(getQueryFromFile("cloud_sql_transaction_counts.sql"))
SqlTemplate.create(getQueryFromFile(TRANSACTION_COUNTS + ".sql"))
.put("PROJECT_ID", projectId)
.put("EARLIEST_REPORT_TIME", timestampFormatter.print(earliestReportTime))
.put("LATEST_REPORT_TIME", timestampFormatter.print(latestReportTime))
@@ -99,7 +99,7 @@ public final class TransactionsReportingQueryBuilder implements QueryBuilder {
queriesBuilder.put(getTableName(TRANSACTION_COUNTS, yearMonth), transactionCountsQuery);
String transactionTransferLosingQuery =
SqlTemplate.create(getQueryFromFile("cloud_sql_transaction_transfer_losing.sql"))
SqlTemplate.create(getQueryFromFile(TRANSACTION_TRANSFER_LOSING + ".sql"))
.put("PROJECT_ID", projectId)
.put("EARLIEST_REPORT_TIME", timestampFormatter.print(earliestReportTime))
.put("LATEST_REPORT_TIME", timestampFormatter.print(latestReportTime))
@@ -110,9 +110,10 @@ public final class TransactionsReportingQueryBuilder implements QueryBuilder {
// App Engine log table suffixes use YYYYMMDD format
DateTimeFormatter logTableFormatter = DateTimeFormat.forPattern("yyyyMMdd");
String attemptedAddsQuery =
SqlTemplate.create(getQueryFromFile("cloud_sql_attempted_adds.sql"))
SqlTemplate.create(getQueryFromFile(ATTEMPTED_ADDS + ".sql"))
.put("PROJECT_ID", projectId)
.put("APPENGINE_LOGS_DATA_SET", "appengine_logs")
.put("GKE_LOGS_DATA_SET", "gke_logs")
.put("APP_LOGS_TABLE", "_var_log_app_")
.put("FIRST_DAY_OF_MONTH", logTableFormatter.print(earliestReportTime))
.put("LAST_DAY_OF_MONTH", logTableFormatter.print(latestReportTime))
@@ -120,7 +121,7 @@ public final class TransactionsReportingQueryBuilder implements QueryBuilder {
queriesBuilder.put(getTableName(ATTEMPTED_ADDS, yearMonth), attemptedAddsQuery);
String aggregateQuery =
SqlTemplate.create(getQueryFromFile("cloud_sql_transactions_report_aggregation.sql"))
SqlTemplate.create(getQueryFromFile(TRANSACTIONS_REPORT_AGGREGATION + ".sql"))
.put("PROJECT_ID", projectId)
.put("ICANN_REPORTING_DATA_SET", icannReportingDataSet)
.put("REGISTRAR_IANA_ID_TABLE", getTableName(REGISTRAR_IANA_ID, yearMonth))

View File

@@ -19,7 +19,7 @@
-- monthly App Engine logs, searching for all create commands and associating
-- them with their corresponding registrars.
-- Example log generated by FlowReporter in App Engine logs:
-- Example log generated by FlowReporter in App Engine and GKE logs:
--google.registry.flows.FlowReporter
-- recordToLogs: FLOW-LOG-SIGNATURE-METADATA:
--{"serverTrid":"oNwL2J2eRya7bh7c9oHIzg==-2360a","clientId":"ipmirror"
@@ -38,30 +38,42 @@ FROM (
JSON_EXTRACT_SCALAR(json, '$.tld') AS tld,
JSON_EXTRACT_SCALAR(json, '$.clientId') AS clientId,
COUNT(json) AS count
FROM (
FROM ((
-- Extract JSON metadata package from monthly logs
SELECT
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM (
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') AS json
FROM
`%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%APP_LOGS_TABLE%*`
WHERE
_TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%'
AND '%LAST_DAY_OF_MONTH%'
AND STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
-- Look for domain creates
AND REGEXP_CONTAINS(textPayload, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(textPayload, r'"prober-[a-z]{2}-((any)|(canary))"'))
UNION ALL (
-- Extract JSON metadata package from monthly logs
SELECT
textPayload
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') AS json
FROM
`%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%APP_LOGS_TABLE%*`
WHERE _TABLE_SUFFIX
BETWEEN '%FIRST_DAY_OF_MONTH%'
AND '%LAST_DAY_OF_MONTH%')
WHERE STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
-- Look for domain creates
AND REGEXP_CONTAINS(
textPayload, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(
textPayload, r'"prober-[a-z]{2}-((any)|(canary))"') )
GROUP BY tld, clientId ) AS logs_table
`%PROJECT_ID%.%GKE_LOGS_DATA_SET%.stderr_*`
WHERE
_TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%'
AND '%LAST_DAY_OF_MONTH%'
AND STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
-- Look for domain creates
AND REGEXP_CONTAINS(jsonPayload.message, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(jsonPayload.message, r'"prober-[a-z]{2}-((any)|(canary))"')))
GROUP BY
tld,
clientId ) AS logs_table
JOIN
EXTERNAL_QUERY("projects/%PROJECT_ID%/locations/us/connections/%PROJECT_ID%-sql",
'''SELECT registrar_id, registrar_name FROM "Registrar";''') AS registrar_table
ON
logs_table.clientId = registrar_table.registrar_id
ORDER BY tld, registrar_name
'''SELECT registrar_id, registrar_name FROM "Registrar";''') AS registrar_table
ON
logs_table.clientId = registrar_table.registrar_id
ORDER BY
tld,
registrar_name

View File

@@ -35,14 +35,30 @@ FROM (
JSON_EXTRACT_SCALAR(json,
'$.icannActivityReportField') AS activityReportField
FROM (
-- For reasons that I don't understand, if I directly select the three columns
-- from the union, BigQuery complains about column number mismatch, so I have to
-- make a temporary union table and select on it.
SELECT
-- Extract the logged JSON payload.
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%APP_LOGS_TABLE%*`
WHERE
STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%')) AS regexes
*
FROM (
SELECT
-- Extract the logged JSON payload.
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%APP_LOGS_TABLE%*`
WHERE
STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%')
UNION ALL (
SELECT
-- Extract the logged JSON payload.
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `%PROJECT_ID%.%GKE_LOGS_DATA_SET%.stderr_*`
WHERE
STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%')
)) AS regexes
JOIN
-- Unnest the JSON-parsed tlds.
UNNEST(regexes.tlds) AS tld

View File

@@ -13,13 +13,23 @@
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Query to fetch AppEngine request logs for the report month.
-- Query to fetch AppEngine and GKE request logs for the report month.
-- START_OF_MONTH and END_OF_MONTH should be in YYYYMM01 format.
SELECT
protoPayload.resource AS requestPath,
FROM
`%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%*`
WHERE
_TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%'
*
FROM (
SELECT
jsonPayload.httrequest.requesturl AS requestPath
FROM
`%PROJECT_ID%.%GKE_LOGS_DATA_SET%.stderr_*`
WHERE
_TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%')
UNION ALL (
SELECT
protoPayload.resource AS requestPath
FROM
`%PROJECT_ID%.%APPENGINE_LOGS_DATA_SET%.%REQUEST_TABLE%*`
WHERE
_TABLE_SUFFIX BETWEEN '%FIRST_DAY_OF_MONTH%' AND '%LAST_DAY_OF_MONTH%')

View File

@@ -19,7 +19,7 @@
-- monthly App Engine logs, searching for all create commands and associating
-- them with their corresponding registrars.
-- Example log generated by FlowReporter in App Engine logs:
-- Example log generated by FlowReporter in App Engine and GKE logs:
--google.registry.flows.FlowReporter
-- recordToLogs: FLOW-LOG-SIGNATURE-METADATA:
--{"serverTrid":"oNwL2J2eRya7bh7c9oHIzg==-2360a","clientId":"ipmirror"
@@ -38,30 +38,42 @@ FROM (
JSON_EXTRACT_SCALAR(json, '$.tld') AS tld,
JSON_EXTRACT_SCALAR(json, '$.clientId') AS clientId,
COUNT(json) AS count
FROM (
FROM ((
-- Extract JSON metadata package from monthly logs
SELECT
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM (
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') AS json
FROM
`domain-registry-alpha.appengine_logs._var_log_app_*`
WHERE
_TABLE_SUFFIX BETWEEN '20170901'
AND '20170930'
AND STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
-- Look for domain creates
AND REGEXP_CONTAINS(textPayload, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(textPayload, r'"prober-[a-z]{2}-((any)|(canary))"'))
UNION ALL (
-- Extract JSON metadata package from monthly logs
SELECT
textPayload
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$') AS json
FROM
`domain-registry-alpha.appengine_logs._var_log_app_*`
WHERE _TABLE_SUFFIX
BETWEEN '20170901'
AND '20170930')
WHERE STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
-- Look for domain creates
AND REGEXP_CONTAINS(
textPayload, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(
textPayload, r'"prober-[a-z]{2}-((any)|(canary))"') )
GROUP BY tld, clientId ) AS logs_table
`domain-registry-alpha.gke_logs.stderr_*`
WHERE
_TABLE_SUFFIX BETWEEN '20170901'
AND '20170930'
AND STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
-- Look for domain creates
AND REGEXP_CONTAINS(jsonPayload.message, r'"commandType":"create","resourceType":"domain"')
-- Filter prober data
AND NOT REGEXP_CONTAINS(jsonPayload.message, r'"prober-[a-z]{2}-((any)|(canary))"')))
GROUP BY
tld,
clientId ) AS logs_table
JOIN
EXTERNAL_QUERY("projects/domain-registry-alpha/locations/us/connections/domain-registry-alpha-sql",
'''SELECT registrar_id, registrar_name FROM "Registrar";''') AS registrar_table
ON
logs_table.clientId = registrar_table.registrar_id
ORDER BY tld, registrar_name
'''SELECT registrar_id, registrar_name FROM "Registrar";''') AS registrar_table
ON
logs_table.clientId = registrar_table.registrar_id
ORDER BY
tld,
registrar_name

View File

@@ -35,14 +35,30 @@ FROM (
JSON_EXTRACT_SCALAR(json,
'$.icannActivityReportField') AS activityReportField
FROM (
-- For reasons that I don't understand, if I directly select the three columns
-- from the union, BigQuery complains about column number mismatch, so I have to
-- make a temporary union table and select on it.
SELECT
-- Extract the logged JSON payload.
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `domain-registry-alpha.appengine_logs._var_log_app_*`
WHERE
STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '20170901' AND '20170930')) AS regexes
*
FROM (
SELECT
-- Extract the logged JSON payload.
REGEXP_EXTRACT(textPayload, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `domain-registry-alpha.appengine_logs._var_log_app_*`
WHERE
STARTS_WITH(textPayload, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '20170901' AND '20170930')
UNION ALL (
SELECT
-- Extract the logged JSON payload.
REGEXP_EXTRACT(jsonPayload.message, r'FLOW-LOG-SIGNATURE-METADATA: (.*)\n?$')
AS json
FROM `domain-registry-alpha.gke_logs.stderr_*`
WHERE
STARTS_WITH(jsonPayload.message, "FLOW-LOG-SIGNATURE-METADATA")
AND _TABLE_SUFFIX BETWEEN '20170901' AND '20170930')
)) AS regexes
JOIN
-- Unnest the JSON-parsed tlds.
UNNEST(regexes.tlds) AS tld

View File

@@ -13,13 +13,23 @@
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Query to fetch AppEngine request logs for the report month.
-- Query to fetch AppEngine and GKE request logs for the report month.
-- START_OF_MONTH and END_OF_MONTH should be in YYYYMM01 format.
SELECT
protoPayload.resource AS requestPath,
FROM
`domain-registry-alpha.appengine_logs.appengine_googleapis_com_request_log_*`
WHERE
_TABLE_SUFFIX BETWEEN '20170901' AND '20170930'
*
FROM (
SELECT
jsonPayload.httrequest.requesturl AS requestPath
FROM
`domain-registry-alpha.gke_logs.stderr_*`
WHERE
_TABLE_SUFFIX BETWEEN '20170901' AND '20170930')
UNION ALL (
SELECT
protoPayload.resource AS requestPath
FROM
`domain-registry-alpha.appengine_logs.appengine_googleapis_com_request_log_*`
WHERE
_TABLE_SUFFIX BETWEEN '20170901' AND '20170930')