Skip to content

Commit 843a3b8

Browse files
authored
Fix system test example_cloud_storage_transfer_service_aws (#33429)
1 parent c93a354 commit 843a3b8

File tree

2 files changed

+128
-58
lines changed

2 files changed

+128
-58
lines changed

β€Ždocs/apache-airflow-providers-google/operators/cloud/cloud_storage_transfer_service.rst

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,12 @@ Using the operator
6767
:start-after: [START howto_operator_gcp_transfer_create_job_body_gcp]
6868
:end-before: [END howto_operator_gcp_transfer_create_job_body_gcp]
6969

70-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py
70+
.. exampleinclude:: /../../tests/system/providers/google/cloud/storage_transfer/example_cloud_storage_transfer_service_aws.py
7171
:language: python
7272
:start-after: [START howto_operator_gcp_transfer_create_job_body_aws]
7373
:end-before: [END howto_operator_gcp_transfer_create_job_body_aws]
7474

75-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py
75+
.. exampleinclude:: /../../tests/system/providers/google/cloud/storage_transfer/example_cloud_storage_transfer_service_aws.py
7676
:language: python
7777
:dedent: 4
7878
:start-after: [START howto_operator_gcp_transfer_create_job]
@@ -107,7 +107,7 @@ For parameter definition, take a look at
107107
Using the operator
108108
""""""""""""""""""
109109

110-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py
110+
.. exampleinclude:: /../../tests/system/providers/google/cloud/storage_transfer/example_cloud_storage_transfer_service_aws.py
111111
:language: python
112112
:dedent: 4
113113
:start-after: [START howto_operator_gcp_transfer_delete_job]
@@ -181,7 +181,7 @@ For parameter definition, take a look at
181181
Using the operator
182182
""""""""""""""""""
183183

184-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py
184+
.. exampleinclude:: /../../tests/system/providers/google/cloud/storage_transfer/example_cloud_storage_transfer_service_aws.py
185185
:language: python
186186
:dedent: 4
187187
:start-after: [START howto_operator_gcp_transfer_cancel_operation]
@@ -217,7 +217,7 @@ For parameter definition, take a look at
217217
Using the operator
218218
""""""""""""""""""
219219

220-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py
220+
.. exampleinclude:: /../../tests/system/providers/google/cloud/storage_transfer/example_cloud_storage_transfer_service_aws.py
221221
:language: python
222222
:dedent: 4
223223
:start-after: [START howto_operator_gcp_transfer_get_operation]
@@ -252,7 +252,7 @@ For parameter definition, take a look at
252252
Using the operator
253253
""""""""""""""""""
254254

255-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py
255+
.. exampleinclude:: /../../tests/system/providers/google/cloud/storage_transfer/example_cloud_storage_transfer_service_aws.py
256256
:language: python
257257
:dedent: 4
258258
:start-after: [START howto_operator_gcp_transfer_list_operations]
@@ -286,7 +286,7 @@ For parameter definition, take a look at
286286
Using the operator
287287
""""""""""""""""""
288288

289-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py
289+
.. exampleinclude:: /../../tests/system/providers/google/cloud/storage_transfer/example_cloud_storage_transfer_service_aws.py
290290
:language: python
291291
:dedent: 4
292292
:start-after: [START howto_operator_gcp_transfer_pause_operation]
@@ -320,7 +320,7 @@ For parameter definition, take a look at
320320
Using the operator
321321
""""""""""""""""""
322322

323-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py
323+
.. exampleinclude:: /../../tests/system/providers/google/cloud/storage_transfer/example_cloud_storage_transfer_service_aws.py
324324
:language: python
325325
:dedent: 4
326326
:start-after: [START howto_operator_gcp_transfer_resume_operation]
@@ -355,7 +355,7 @@ For parameter definition, take a look at
355355
Using the operator
356356
""""""""""""""""""
357357

358-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py
358+
.. exampleinclude:: /../../tests/system/providers/google/cloud/storage_transfer/example_cloud_storage_transfer_service_aws.py
359359
:language: python
360360
:dedent: 4
361361
:start-after: [START howto_operator_gcp_transfer_wait_operation]

β€Žairflow/providers/google/cloud/example_dags/example_cloud_storage_transfer_service_aws.py renamed to β€Žtests/system/providers/google/cloud/storage_transfer/example_cloud_storage_transfer_service_aws.py

Lines changed: 119 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -16,29 +16,18 @@
1616
# specific language governing permissions and limitations
1717
# under the License.
1818
"""
19-
Example Airflow DAG that demonstrates interactions with Google Cloud Transfer. This DAG relies on
20-
the following OS environment variables
21-
22-
Note that you need to provide a large enough set of data so that operations do not execute too quickly.
23-
Otherwise, DAG will fail.
24-
25-
* GCP_PROJECT_ID - Google Cloud Project to use for the Google Cloud Transfer Service.
26-
* GCP_DESCRIPTION - Description of transfer job
27-
* GCP_TRANSFER_SOURCE_AWS_BUCKET - Amazon Web Services Storage bucket from which files are copied.
28-
* GCP_TRANSFER_SECOND_TARGET_BUCKET - Google Cloud Storage bucket to which files are copied
29-
* WAIT_FOR_OPERATION_POKE_INTERVAL - interval of what to check the status of the operation
30-
A smaller value than the default value accelerates the system test and ensures its correct execution with
31-
smaller quantities of files in the source bucket
32-
Look at documentation of :class:`~airflow.operators.sensors.BaseSensorOperator` for more information
33-
19+
Example Airflow DAG that demonstrates interactions with Google Cloud Transfer.
3420
"""
3521
from __future__ import annotations
3622

3723
import os
3824
from datetime import datetime, timedelta
3925

26+
from pydantic.main import deepcopy
27+
4028
from airflow import models
41-
from airflow.models.baseoperator import chain
29+
from airflow.providers.amazon.aws.operators.s3 import S3CreateBucketOperator, S3DeleteBucketOperator
30+
from airflow.providers.amazon.aws.transfers.gcs_to_s3 import GCSToS3Operator
4231
from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
4332
ALREADY_EXISTING_IN_SINK,
4433
AWS_S3_DATA_SOURCE,
@@ -68,21 +57,26 @@
6857
CloudDataTransferServicePauseOperationOperator,
6958
CloudDataTransferServiceResumeOperationOperator,
7059
)
60+
from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator
7161
from airflow.providers.google.cloud.sensors.cloud_storage_transfer_service import (
7262
CloudDataTransferServiceJobStatusSensor,
7363
)
64+
from airflow.utils.trigger_rule import TriggerRule
7465

75-
GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project")
76-
GCP_DESCRIPTION = os.environ.get("GCP_DESCRIPTION", "description")
77-
GCP_TRANSFER_TARGET_BUCKET = os.environ.get("GCP_TRANSFER_TARGET_BUCKET")
78-
WAIT_FOR_OPERATION_POKE_INTERVAL = int(os.environ.get("WAIT_FOR_OPERATION_POKE_INTERVAL", 5))
66+
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
67+
GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT")
7968

80-
GCP_TRANSFER_SOURCE_AWS_BUCKET = os.environ.get("GCP_TRANSFER_SOURCE_AWS_BUCKET")
81-
GCP_TRANSFER_FIRST_TARGET_BUCKET = os.environ.get(
82-
"GCP_TRANSFER_FIRST_TARGET_BUCKET", "gcp-transfer-first-target"
83-
)
69+
DAG_ID = "example_gcp_transfer_aws"
70+
71+
EXAMPLE_BUCKET = "airflow-system-tests-resources"
72+
EXAMPLE_FILE = "storage-transfer/big_file.dat"
73+
BUCKET_SOURCE_AWS = f"bucket-aws-{DAG_ID}-{ENV_ID}".replace("_", "-")
74+
BUCKET_TARGET_GCS = f"bucket-gcs-{DAG_ID}-{ENV_ID}".replace("_", "-")
75+
WAIT_FOR_OPERATION_POKE_INTERVAL = int(os.environ.get("WAIT_FOR_OPERATION_POKE_INTERVAL", 5))
8476

85-
GCP_TRANSFER_JOB_NAME = os.environ.get("GCP_TRANSFER_JOB_NAME", "transferJobs/sampleJob")
77+
GCP_DESCRIPTION = "description"
78+
GCP_TRANSFER_JOB_NAME = f"transferJobs/sampleJob-{DAG_ID}-{ENV_ID}".replace("-", "_")
79+
GCP_TRANSFER_JOB_2_NAME = f"transferJobs/sampleJob2-{DAG_ID}-{ENV_ID}".replace("-", "_")
8680

8781
# [START howto_operator_gcp_transfer_create_job_body_aws]
8882
aws_to_gcs_transfer_body = {
@@ -93,33 +87,53 @@
9387
SCHEDULE: {
9488
SCHEDULE_START_DATE: datetime(2015, 1, 1).date(),
9589
SCHEDULE_END_DATE: datetime(2030, 1, 1).date(),
96-
START_TIME_OF_DAY: (datetime.utcnow() + timedelta(minutes=2)).time(),
90+
START_TIME_OF_DAY: (datetime.utcnow() + timedelta(minutes=1)).time(),
9791
},
9892
TRANSFER_SPEC: {
99-
AWS_S3_DATA_SOURCE: {BUCKET_NAME: GCP_TRANSFER_SOURCE_AWS_BUCKET},
100-
GCS_DATA_SINK: {BUCKET_NAME: GCP_TRANSFER_FIRST_TARGET_BUCKET},
93+
AWS_S3_DATA_SOURCE: {BUCKET_NAME: BUCKET_SOURCE_AWS},
94+
GCS_DATA_SINK: {BUCKET_NAME: BUCKET_TARGET_GCS},
10195
TRANSFER_OPTIONS: {ALREADY_EXISTING_IN_SINK: True},
10296
},
10397
}
10498
# [END howto_operator_gcp_transfer_create_job_body_aws]
10599

100+
aws_to_gcs_transfer_body_2 = deepcopy(aws_to_gcs_transfer_body)
101+
aws_to_gcs_transfer_body_2[JOB_NAME] = GCP_TRANSFER_JOB_2_NAME
106102

107103
with models.DAG(
108-
"example_gcp_transfer_aws",
104+
dag_id=DAG_ID,
109105
start_date=datetime(2021, 1, 1),
110106
catchup=False,
111-
tags=["example"],
107+
tags=["example", "aws", "gcs", "transfer"],
112108
) as dag:
109+
create_bucket_s3 = S3CreateBucketOperator(
110+
task_id="create_bucket_s3", bucket_name=BUCKET_SOURCE_AWS, region_name="us-east-1"
111+
)
112+
113+
upload_file_to_s3 = GCSToS3Operator(
114+
task_id="upload_file_to_s3",
115+
gcp_user_project=GCP_PROJECT_ID,
116+
bucket=EXAMPLE_BUCKET,
117+
prefix=EXAMPLE_FILE,
118+
dest_s3_key=f"s3://{BUCKET_SOURCE_AWS}",
119+
replace=True,
120+
)
121+
#
122+
create_bucket_gcs = GCSCreateBucketOperator(
123+
task_id="create_bucket_gcs",
124+
bucket_name=BUCKET_TARGET_GCS,
125+
project_id=GCP_PROJECT_ID,
126+
)
113127

114128
# [START howto_operator_gcp_transfer_create_job]
115-
create_transfer_job_from_aws = CloudDataTransferServiceCreateJobOperator(
116-
task_id="create_transfer_job_from_aws", body=aws_to_gcs_transfer_body
129+
create_transfer_job_s3_to_gcs = CloudDataTransferServiceCreateJobOperator(
130+
task_id="create_transfer_job_s3_to_gcs", body=aws_to_gcs_transfer_body
117131
)
118132
# [END howto_operator_gcp_transfer_create_job]
119133

120134
wait_for_operation_to_start = CloudDataTransferServiceJobStatusSensor(
121135
task_id="wait_for_operation_to_start",
122-
job_name="{{task_instance.xcom_pull('create_transfer_job_from_aws')['name']}}",
136+
job_name="{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs')['name']}}",
123137
project_id=GCP_PROJECT_ID,
124138
expected_statuses={GcpTransferOperationStatus.IN_PROGRESS},
125139
poke_interval=WAIT_FOR_OPERATION_POKE_INTERVAL,
@@ -138,7 +152,7 @@
138152
task_id="list_operations",
139153
request_filter={
140154
FILTER_PROJECT_ID: GCP_PROJECT_ID,
141-
FILTER_JOB_NAMES: ["{{task_instance.xcom_pull('create_transfer_job_from_aws')['name']}}"],
155+
FILTER_JOB_NAMES: ["{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs')['name']}}"],
142156
},
143157
)
144158
# [END howto_operator_gcp_transfer_list_operations]
@@ -158,37 +172,93 @@
158172
# [START howto_operator_gcp_transfer_wait_operation]
159173
wait_for_operation_to_end = CloudDataTransferServiceJobStatusSensor(
160174
task_id="wait_for_operation_to_end",
161-
job_name="{{task_instance.xcom_pull('create_transfer_job_from_aws')['name']}}",
175+
job_name="{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs')['name']}}",
162176
project_id=GCP_PROJECT_ID,
163177
expected_statuses={GcpTransferOperationStatus.SUCCESS},
164178
poke_interval=WAIT_FOR_OPERATION_POKE_INTERVAL,
165179
)
166180
# [END howto_operator_gcp_transfer_wait_operation]
167181

182+
create_second_transfer_job_from_aws = CloudDataTransferServiceCreateJobOperator(
183+
task_id="create_transfer_job_s3_to_gcs_2", body=aws_to_gcs_transfer_body_2
184+
)
185+
186+
wait_for_operation_to_start_2 = CloudDataTransferServiceJobStatusSensor(
187+
task_id="wait_for_operation_to_start_2",
188+
job_name="{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs_2')['name']}}",
189+
project_id=GCP_PROJECT_ID,
190+
expected_statuses={GcpTransferOperationStatus.IN_PROGRESS},
191+
poke_interval=WAIT_FOR_OPERATION_POKE_INTERVAL,
192+
)
193+
168194
# [START howto_operator_gcp_transfer_cancel_operation]
169195
cancel_operation = CloudDataTransferServiceCancelOperationOperator(
170196
task_id="cancel_operation",
171197
operation_name="{{task_instance.xcom_pull("
172-
"'wait_for_second_operation_to_start', key='sensed_operations')[0]['name']}}",
198+
"'wait_for_operation_to_start_2', key='sensed_operations')[0]['name']}}",
173199
)
174200
# [END howto_operator_gcp_transfer_cancel_operation]
175201

176202
# [START howto_operator_gcp_transfer_delete_job]
177-
delete_transfer_from_aws_job = CloudDataTransferServiceDeleteJobOperator(
178-
task_id="delete_transfer_from_aws_job",
179-
job_name="{{task_instance.xcom_pull('create_transfer_job_from_aws')['name']}}",
203+
delete_transfer_job_s3_to_gcs = CloudDataTransferServiceDeleteJobOperator(
204+
task_id="delete_transfer_job_s3_to_gcs",
205+
job_name="{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs')['name']}}",
180206
project_id=GCP_PROJECT_ID,
207+
trigger_rule=TriggerRule.ALL_DONE,
181208
)
182209
# [END howto_operator_gcp_transfer_delete_job]
183210

184-
chain(
185-
create_transfer_job_from_aws,
186-
wait_for_operation_to_start,
187-
pause_operation,
188-
list_operations,
189-
get_operation,
190-
resume_operation,
191-
wait_for_operation_to_end,
192-
cancel_operation,
193-
delete_transfer_from_aws_job,
211+
delete_transfer_job_s3_to_gcs_2 = CloudDataTransferServiceDeleteJobOperator(
212+
task_id="delete_transfer_job_s3_to_gcs_2",
213+
job_name="{{task_instance.xcom_pull('create_transfer_job_s3_to_gcs_2')['name']}}",
214+
project_id=GCP_PROJECT_ID,
215+
trigger_rule=TriggerRule.ALL_DONE,
216+
)
217+
218+
delete_bucket_s3 = S3DeleteBucketOperator(
219+
task_id="delete_bucket_s3",
220+
bucket_name=BUCKET_SOURCE_AWS,
221+
force_delete=True,
222+
trigger_rule=TriggerRule.ALL_DONE,
223+
)
224+
225+
delete_bucket_gcs = GCSDeleteBucketOperator(
226+
task_id="delete_bucket_gcs",
227+
bucket_name=BUCKET_TARGET_GCS,
228+
trigger_rule=TriggerRule.ALL_DONE,
229+
)
230+
231+
(
232+
# TEST SETUP
233+
[create_bucket_s3 >> upload_file_to_s3, create_bucket_gcs]
234+
# TEST BODY
235+
>> create_transfer_job_s3_to_gcs
236+
>> wait_for_operation_to_start
237+
>> pause_operation
238+
>> list_operations
239+
>> get_operation
240+
>> resume_operation
241+
>> wait_for_operation_to_end
242+
>> create_second_transfer_job_from_aws
243+
>> wait_for_operation_to_start_2
244+
>> cancel_operation
245+
# TEST TEARDOWN
246+
>> [
247+
delete_transfer_job_s3_to_gcs,
248+
delete_transfer_job_s3_to_gcs_2,
249+
delete_bucket_gcs,
250+
delete_bucket_s3,
251+
]
194252
)
253+
254+
from tests.system.utils.watcher import watcher
255+
256+
# This test needs watcher in order to properly mark success/failure
257+
# when "tearDown" task with trigger rule is part of the DAG
258+
list(dag.tasks) >> watcher()
259+
260+
261+
from tests.system.utils import get_test_run # noqa: E402
262+
263+
# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)
264+
test_run = get_test_run(dag)

0 commit comments

Comments
 (0)