BigQuery ๊ฒฐ๊ณผ ์„ธํŠธ ํŽ˜์ด์ง€ ๋‚˜๋ˆ„๊ธฐ

์ฟผ๋ฆฌ ๊ฒฐ๊ณผ๋ฅผ Workflows ๋ฉ”๋ชจ๋ฆฌ ํ•œ๋„๋ณด๋‹ค ํฐ BigQuery ๋ฐ์ดํ„ฐ ์„ธํŠธ๋กœ ๊ฒ€์ƒ‰ํ•˜๋ ค๊ณ  ์‹œ๋„ํ•  ๊ฒฝ์šฐ ํŽ˜์ด์ง€ ํ† ํฐ์„ ์‚ฌ์šฉํ•˜์—ฌ ๊ฒฐ๊ณผ๋ฅผ ํŽ˜์ด์ง€๋กœ ๋‚˜๋ˆŒ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ํŽ˜์ด์ง€ ํ† ํฐ์€ ๊ฒฐ๊ณผ ์ง‘ํ•ฉ์˜ ์œ„์น˜๋ฅผ ๋‚˜ํƒ€๋‚ด๋ฉฐ, ์ถ”๊ฐ€ ๊ฒฐ๊ณผ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์„ ๋•Œ ๋ฐ˜ํ™˜๋ฉ๋‹ˆ๋‹ค. ์ด๋ ‡๊ฒŒ ํ•˜๋ฉด ํ•œ ํŽ˜์ด์ง€์˜ ๊ฒฐ๊ณผ๋ฅผ ํ•œ ๋ฒˆ์— ๋ฐ˜๋ณตํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.

BigQuery๋Š” ์ผ๋ฐ˜ ๋Œ€์ค‘์ด ์ฟผ๋ฆฌํ•  ์ˆ˜ ์žˆ๋Š” ์—ฌ๋Ÿฌ ๊ณต๊ฐœ ๋ฐ์ดํ„ฐ์„ธํŠธ๋ฅผ ํ˜ธ์ŠคํŒ…ํ•ฉ๋‹ˆ๋‹ค. ๋‹ค์Œ ์˜ˆ์‹œ์—์„œ๋Š” ๋ฏธ๊ตญ ์ด๋ฆ„ ๋ฐ์ดํ„ฐ ๊ณต๊ฐœ ๋ฐ์ดํ„ฐ์„ธํŠธ๋ฅผ ์ฟผ๋ฆฌํ•ด 1910๋…„๋ถ€ํ„ฐ 2013๋…„๊นŒ์ง€ ๋ฏธ๊ตญ์—์„œ ๊ฐ€์žฅ ํ”ํ•œ ์ด๋ฆ„์„ ํ™•์ธํ•ฉ๋‹ˆ๋‹ค.

YAML

# Use a page token to loop through a page of results at a time when
# querying a BigQuery dataset larger than the Workflows memory limit
# This workflow queries a public dataset to determine the most common
# names in the US between 1910 and 2013
main:
  params: [input]
  steps:
    - init:
        assign:
          - pageToken: null
    - startQuery:
        call: googleapis.bigquery.v2.jobs.insert
        args:
          projectId: ${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}
          body:
            configuration:
              query:
                useLegacySql: false
                # Remove LIMIT from the query to iterate through all results
                query: SELECT name, SUM(number) AS total FROM `bigquery-public-data.usa_names.usa_1910_2013` GROUP BY name ORDER BY total DESC LIMIT 50
        result: query
    - getPage:
        call: googleapis.bigquery.v2.jobs.getQueryResults
        args:
          projectId: ${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}
          jobId: ${query.jobReference.jobId}
          maxResults: 10
          pageToken: ${pageToken}
        result: page
    - processPage:
        for:
          value: row
          in: ${page.rows}
          steps:
            - processRow:
                call: sys.log
                args:
                  data: ${row}
    - checkIfDone:
        switch:
          - condition: ${"pageToken" in page and page.pageToken != ""}
            assign:
              - pageToken: ${page.pageToken}
            next: getPage

JSON

{
  "main": {
    "params": [
      "input"
    ],
    "steps": [
      {
        "init": {
          "assign": [
            {
              "pageToken": null
            }
          ]
        }
      },
      {
        "startQuery": {
          "call": "googleapis.bigquery.v2.jobs.insert",
          "args": {
            "projectId": "${sys.get_env(\"GOOGLE_CLOUD_PROJECT_ID\")}",
            "body": {
              "configuration": {
                "query": {
                  "useLegacySql": false,
                  "query": "SELECT name, SUM(number) AS total FROM `bigquery-public-data.usa_names.usa_1910_2013` GROUP BY name ORDER BY total DESC LIMIT 50"
                }
              }
            }
          },
          "result": "query"
        }
      },
      {
        "getPage": {
          "call": "googleapis.bigquery.v2.jobs.getQueryResults",
          "args": {
            "projectId": "${sys.get_env(\"GOOGLE_CLOUD_PROJECT_ID\")}",
            "jobId": "${query.jobReference.jobId}",
            "maxResults": 10,
            "pageToken": "${pageToken}"
          },
          "result": "page"
        }
      },
      {
        "processPage": {
          "for": {
            "value": "row",
            "in": "${page.rows}",
            "steps": [
              {
                "processRow": {
                  "call": "sys.log",
                  "args": {
                    "data": "${row}"
                  }
                }
              }
            ]
          }
        }
      },
      {
        "checkIfDone": {
          "switch": [
            {
              "condition": "${\"pageToken\" in page and page.pageToken != \"\"}",
              "assign": [
                {
                  "pageToken": "${page.pageToken}"
                }
              ],
              "next": "getPage"
            }
          ]
        }
      }
    ]
  }
}

๋‹ค์Œ ๋‹จ๊ณ„