|
16 | 16 | # specific language governing permissions and limitations
|
17 | 17 | # under the License.
|
18 | 18 | """
|
19 |
| -Example Airflow DAG that demonstrates interactions with Google Cloud Transfer. This DAG relies on |
20 |
| -the following OS environment variables |
21 |
| -
|
22 |
| -Note that you need to provide a large enough set of data so that operations do not execute too quickly. |
23 |
| -Otherwise, DAG will fail. |
24 |
| -
|
25 |
| -* GCP_PROJECT_ID - Google Cloud Project to use for the Google Cloud Transfer Service. |
26 |
| -* GCP_DESCRIPTION - Description of transfer job |
27 |
| -* GCP_TRANSFER_SOURCE_AWS_BUCKET - Amazon Web Services Storage bucket from which files are copied. |
28 |
| -* GCP_TRANSFER_SECOND_TARGET_BUCKET - Google Cloud Storage bucket to which files are copied |
29 |
| -* WAIT_FOR_OPERATION_POKE_INTERVAL - interval of what to check the status of the operation |
30 |
| - A smaller value than the default value accelerates the system test and ensures its correct execution with |
31 |
| - smaller quantities of files in the source bucket |
32 |
| - Look at documentation of :class:`~airflow.operators.sensors.BaseSensorOperator` for more information |
33 |
| -
|
| 19 | +Example Airflow DAG that demonstrates interactions with Google Cloud Transfer. |
34 | 20 | """
|
35 | 21 | from __future__ import annotations
|
36 | 22 |
|
37 | 23 | import os
|
38 | 24 | from datetime import datetime, timedelta
|
39 | 25 |
|
| 26 | +from pydantic.main import deepcopy |
| 27 | + |
40 | 28 | from airflow import models
|
41 |
| -from airflow.models.baseoperator import chain |
| 29 | +from airflow.providers.amazon.aws.operators.s3 import S3CreateBucketOperator, S3DeleteBucketOperator |
| 30 | +from airflow.providers.amazon.aws.transfers.gcs_to_s3 import GCSToS3Operator |
42 | 31 | from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
|
43 | 32 | ALREADY_EXISTING_IN_SINK,
|
44 | 33 | AWS_S3_DATA_SOURCE,
|
|
68 | 57 | CloudDataTransferServicePauseOperationOperator,
|
69 | 58 | CloudDataTransferServiceResumeOperationOperator,
|
70 | 59 | )
|
| 60 | +from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator |
71 | 61 | from airflow.providers.google.cloud.sensors.cloud_storage_transfer_service import (
|
72 | 62 | CloudDataTransferServiceJobStatusSensor,
|
73 | 63 | )
|
| 64 | +from airflow.utils.trigger_rule import TriggerRule |
74 | 65 |
|
75 |
| -GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project") |
76 |
| -GCP_DESCRIPTION = os.environ.get("GCP_DESCRIPTION", "description") |
77 |
| -GCP_TRANSFER_TARGET_BUCKET = os.environ.get("GCP_TRANSFER_TARGET_BUCKET") |
78 |
| -WAIT_FOR_OPERATION_POKE_INTERVAL = int(os.environ.get("WAIT_FOR_OPERATION_POKE_INTERVAL", 5)) |
| 66 | +ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID") |
| 67 | +GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") |
79 | 68 |
|
80 |
| -GCP_TRANSFER_SOURCE_AWS_BUCKET = os.environ.get("GCP_TRANSFER_SOURCE_AWS_BUCKET") |
81 |
| -GCP_TRANSFER_FIRST_TARGET_BUCKET = os.environ.get( |
82 |
| - "GCP_TRANSFER_FIRST_TARGET_BUCKET", "gcp-transfer-first-target" |
83 |
| -) |
| 69 | +DAG_ID = "example_gcp_transfer_aws" |
| 70 | + |
| 71 | +EXAMPLE_BUCKET = "airflow-system-tests-resources" |
| 72 | +EXAMPLE_FILE = "storage-transfer/big_file.dat" |
| 73 | +BUCKET_SOURCE_AWS = f"bucket-aws-{DAG_ID}-{ENV_ID}".replace("_", "-") |
| 74 | +BUCKET_TARGET_GCS = f"bucket-gcs-{DAG_ID}-{ENV_ID}".replace("_", "-") |
| 75 | +WAIT_FOR_OPERATION_POKE_INTERVAL = int(os.environ.get("WAIT_FOR_OPERATION_POKE_INTERVAL", 5)) |
84 | 76 |
|
85 |
| -GCP_TRANSFER_JOB_NAME = os.environ.get("GCP_TRANSFER_JOB_NAME", "transferJobs/sampleJob") |
| 77 | +GCP_DESCRIPTION = "description" |
| 78 | +GCP_TRANSFER_JOB_NAME = f"transferJobs/sampleJob-{DAG_ID}-{ENV_ID}".replace("-", "_") |
| 79 | +GCP_TRANSFER_JOB_2_NAME = f"transferJobs/sampleJob2-{DAG_ID}-{ENV_ID}".replace("-", "_") |
86 | 80 |
|
87 | 81 | # [START howto_operator_gcp_transfer_create_job_body_aws]
|
88 | 82 | aws_to_gcs_transfer_body = {
|
|
93 | 87 | SCHEDULE: {
|
94 | 88 | SCHEDULE_START_DATE: datetime(2015, 1, 1).date(),
|
95 | 89 | SCHEDULE_END_DATE: datetime(2030, 1, 1).date(),
|
96 |
| - START_TIME_OF_DAY: (datetime.utcnow() + timedelta(minutes=2)).time(), |
| 90 | + START_TIME_OF_DAY: (datetime.utcnow() + timedelta(minutes=1)).time(), |
97 | 91 | },
|
98 | 92 | TRANSFER_SPEC: {
|
99 |
| - AWS_S3_DATA_SOURCE: {BUCKET_NAME: GCP_TRANSFER_SOURCE_AWS_BUCKET}, |
100 |
| - GCS_DATA_SINK: {BUCKET_NAME: GCP_TRANSFER_FIRST_TARGET_BUCKET}, |
| 93 | + AWS_S3_DATA_SOURCE: {BUCKET_NAME: BUCKET_SOURCE_AWS}, |
| 94 | + GCS_DATA_SINK: {BUCKET_NAME: BUCKET_TARGET_GCS}, |
101 | 95 | TRANSFER_OPTIONS: {ALREADY_EXISTING_IN_SINK: True},
|
102 | 96 | },
|
103 | 97 | }
|
104 | 98 | # [END howto_operator_gcp_transfer_create_job_body_aws]
|
105 | 99 |
|
| 100 | +aws_to_gcs_transfer_body_2 = deepcopy(aws_to_gcs_transfer_body) |
| 101 | +aws_to_gcs_transfer_body_2[JOB_NAME] = GCP_TRANSFER_JOB_2_NAME |
106 | 102 |
|
107 | 103 | with models.DAG(
|
108 |
| - "example_gcp_transfer_aws", |
| 104 | + dag_id=DAG_ID, |
109 | 105 | start_date=datetime(2021, 1, 1),
|
110 | 106 | catchup=False,
|
111 |
| - tags=["example"], |
| 107 | + tags=["example", "aws", "gcs", "transfer"], |
112 | 108 | ) as dag:
|
| 109 | + create_bucket_s3 = S3CreateBucketOperator( |
| 110 | + task_id="create_bucket_s3", bucket_name=BUCKET_SOURCE_AWS, region_name="us-east-1" |
| 111 | + ) |
| 112 | + |
| 113 | + upload_file_to_s3 = GCSToS3Operator( |
| 114 | + task_id="upload_file_to_s3", |
| 115 | + gcp_user_project=GCP_PROJECT_ID, |
| 116 | + bucket=EXAMPLE_BUCKET, |
| 117 | + prefix=EXAMPLE_FILE, |
| 118 | + dest_s3_key=f"s3://{BUCKET_SOURCE_AWS}", |
| 119 | + replace=True, |
| 120 | + ) |
| 121 | + # |
| 122 | + create_bucket_gcs = GCSCreateBucketOperator( |
| 123 | + task_id="create_bucket_gcs", |
| 124 | + bucket_name=BUCKET_TARGET_GCS, |
| 125 | + project_id=GCP_PROJECT_ID, |
| 126 | + ) |
113 | 127 |
|
114 | 128 | # [START howto_operator_gcp_transfer_create_job]
|
115 |
| - create_transfer_job_from_aws = CloudDataTransferServiceCreateJobOperator( |
116 |
| - task_id="create_transfer_job_from_aws", body=aws_to_gcs_transfer_body |
| 129 | + create_transfer_job_s3_to_gcs = CloudDataTransferServiceCreateJobOperator( |
| 130 | + task_id="create_transfer_job_s3_to_gcs", body=aws_to_gcs_transfer_body |
117 | 131 | )
|
118 | 132 | # [END howto_operator_gcp_transfer_create_job]
|
119 | 133 |
|
120 | 134 | wait_for_operation_to_start = CloudDataTransferServiceJobStatusSensor(
|
121 | 135 | task_id="wait_for_operation_to_start",
|
122 |
| - job_name="{{task_instance.xcom_pull('create_transfer_job_from_aws')['name']}}", |
| 136 | + job_name="{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs')['name']}}", |
123 | 137 | project_id=GCP_PROJECT_ID,
|
124 | 138 | expected_statuses={GcpTransferOperationStatus.IN_PROGRESS},
|
125 | 139 | poke_interval=WAIT_FOR_OPERATION_POKE_INTERVAL,
|
|
138 | 152 | task_id="list_operations",
|
139 | 153 | request_filter={
|
140 | 154 | FILTER_PROJECT_ID: GCP_PROJECT_ID,
|
141 |
| - FILTER_JOB_NAMES: ["{{task_instance.xcom_pull('create_transfer_job_from_aws')['name']}}"], |
| 155 | + FILTER_JOB_NAMES: ["{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs')['name']}}"], |
142 | 156 | },
|
143 | 157 | )
|
144 | 158 | # [END howto_operator_gcp_transfer_list_operations]
|
|
158 | 172 | # [START howto_operator_gcp_transfer_wait_operation]
|
159 | 173 | wait_for_operation_to_end = CloudDataTransferServiceJobStatusSensor(
|
160 | 174 | task_id="wait_for_operation_to_end",
|
161 |
| - job_name="{{task_instance.xcom_pull('create_transfer_job_from_aws')['name']}}", |
| 175 | + job_name="{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs')['name']}}", |
162 | 176 | project_id=GCP_PROJECT_ID,
|
163 | 177 | expected_statuses={GcpTransferOperationStatus.SUCCESS},
|
164 | 178 | poke_interval=WAIT_FOR_OPERATION_POKE_INTERVAL,
|
165 | 179 | )
|
166 | 180 | # [END howto_operator_gcp_transfer_wait_operation]
|
167 | 181 |
|
| 182 | + create_second_transfer_job_from_aws = CloudDataTransferServiceCreateJobOperator( |
| 183 | + task_id="create_transfer_job_s3_to_gcs_2", body=aws_to_gcs_transfer_body_2 |
| 184 | + ) |
| 185 | + |
| 186 | + wait_for_operation_to_start_2 = CloudDataTransferServiceJobStatusSensor( |
| 187 | + task_id="wait_for_operation_to_start_2", |
| 188 | + job_name="{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs_2')['name']}}", |
| 189 | + project_id=GCP_PROJECT_ID, |
| 190 | + expected_statuses={GcpTransferOperationStatus.IN_PROGRESS}, |
| 191 | + poke_interval=WAIT_FOR_OPERATION_POKE_INTERVAL, |
| 192 | + ) |
| 193 | + |
168 | 194 | # [START howto_operator_gcp_transfer_cancel_operation]
|
169 | 195 | cancel_operation = CloudDataTransferServiceCancelOperationOperator(
|
170 | 196 | task_id="cancel_operation",
|
171 | 197 | operation_name="{{task_instance.xcom_pull("
|
172 |
| - "'wait_for_second_operation_to_start', key='sensed_operations')[0]['name']}}", |
| 198 | + "'wait_for_operation_to_start_2', key='sensed_operations')[0]['name']}}", |
173 | 199 | )
|
174 | 200 | # [END howto_operator_gcp_transfer_cancel_operation]
|
175 | 201 |
|
176 | 202 | # [START howto_operator_gcp_transfer_delete_job]
|
177 |
| - delete_transfer_from_aws_job = CloudDataTransferServiceDeleteJobOperator( |
178 |
| - task_id="delete_transfer_from_aws_job", |
179 |
| - job_name="{{task_instance.xcom_pull('create_transfer_job_from_aws')['name']}}", |
| 203 | + delete_transfer_job_s3_to_gcs = CloudDataTransferServiceDeleteJobOperator( |
| 204 | + task_id="delete_transfer_job_s3_to_gcs", |
| 205 | + job_name="{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs')['name']}}", |
180 | 206 | project_id=GCP_PROJECT_ID,
|
| 207 | + trigger_rule=TriggerRule.ALL_DONE, |
181 | 208 | )
|
182 | 209 | # [END howto_operator_gcp_transfer_delete_job]
|
183 | 210 |
|
184 |
| - chain( |
185 |
| - create_transfer_job_from_aws, |
186 |
| - wait_for_operation_to_start, |
187 |
| - pause_operation, |
188 |
| - list_operations, |
189 |
| - get_operation, |
190 |
| - resume_operation, |
191 |
| - wait_for_operation_to_end, |
192 |
| - cancel_operation, |
193 |
| - delete_transfer_from_aws_job, |
| 211 | + delete_transfer_job_s3_to_gcs_2 = CloudDataTransferServiceDeleteJobOperator( |
| 212 | + task_id="delete_transfer_job_s3_to_gcs_2", |
| 213 | + job_name="{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs_2')['name']}}", |
| 214 | + project_id=GCP_PROJECT_ID, |
| 215 | + trigger_rule=TriggerRule.ALL_DONE, |
| 216 | + ) |
| 217 | + |
| 218 | + delete_bucket_s3 = S3DeleteBucketOperator( |
| 219 | + task_id="delete_bucket_s3", |
| 220 | + bucket_name=BUCKET_SOURCE_AWS, |
| 221 | + force_delete=True, |
| 222 | + trigger_rule=TriggerRule.ALL_DONE, |
| 223 | + ) |
| 224 | + |
| 225 | + delete_bucket_gcs = GCSDeleteBucketOperator( |
| 226 | + task_id="delete_bucket_gcs", |
| 227 | + bucket_name=BUCKET_TARGET_GCS, |
| 228 | + trigger_rule=TriggerRule.ALL_DONE, |
| 229 | + ) |
| 230 | + |
| 231 | + ( |
| 232 | + # TEST SETUP |
| 233 | + [create_bucket_s3 >> upload_file_to_s3, create_bucket_gcs] |
| 234 | + # TEST BODY |
| 235 | + >> create_transfer_job_s3_to_gcs |
| 236 | + >> wait_for_operation_to_start |
| 237 | + >> pause_operation |
| 238 | + >> list_operations |
| 239 | + >> get_operation |
| 240 | + >> resume_operation |
| 241 | + >> wait_for_operation_to_end |
| 242 | + >> create_second_transfer_job_from_aws |
| 243 | + >> wait_for_operation_to_start_2 |
| 244 | + >> cancel_operation |
| 245 | + # TEST TEARDOWN |
| 246 | + >> [ |
| 247 | + delete_transfer_job_s3_to_gcs, |
| 248 | + delete_transfer_job_s3_to_gcs_2, |
| 249 | + delete_bucket_gcs, |
| 250 | + delete_bucket_s3, |
| 251 | + ] |
194 | 252 | )
|
| 253 | + |
| 254 | + from tests.system.utils.watcher import watcher |
| 255 | + |
| 256 | + # This test needs watcher in order to properly mark success/failure |
| 257 | + # when "tearDown" task with trigger rule is part of the DAG |
| 258 | + list(dag.tasks) >> watcher() |
| 259 | + |
| 260 | + |
| 261 | +from tests.system.utils import get_test_run # noqa: E402 |
| 262 | + |
| 263 | +# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) |
| 264 | +test_run = get_test_run(dag) |
0 commit comments