|
16 | 16 | # KIND, either express or implied. See the License for the
|
17 | 17 | # specific language governing permissions and limitations
|
18 | 18 | # under the License.
|
19 |
| -""" |
20 |
| -This module contains a Google Cloud Storage operator. |
21 |
| -""" |
22 |
| -import tempfile |
23 |
| -from typing import Optional |
| 19 | +"""This module is deprecated. Please use `airflow.providers.google.suite.operators.gcs_to_gdrive_operator`.""" |
24 | 20 |
|
25 |
| -from airflow.exceptions import AirflowException |
26 |
| -from airflow.gcp.hooks.gcs import GCSHook |
27 |
| -from airflow.models import BaseOperator |
28 |
| -from airflow.providers.google.suite.hooks.drive import GoogleDriveHook |
29 |
| -from airflow.utils.decorators import apply_defaults |
| 21 | +import warnings |
30 | 22 |
|
31 |
| -WILDCARD = "*" |
| 23 | +# pylint: disable=unused-import |
| 24 | +from airflow.providers.google.suite.operators.gcs_to_gdrive_operator import GCSToGoogleDriveOperator # noqa |
32 | 25 |
|
33 |
| - |
34 |
| -class GCSToGoogleDriveOperator(BaseOperator): |
35 |
| - """ |
36 |
| - Copies objects from a Google Cloud Storage service service to Google Drive service, with renaming |
37 |
| - if requested. |
38 |
| -
|
39 |
| - Using this operator requires the following OAuth 2.0 scope: |
40 |
| -
|
41 |
| - .. code-block:: none |
42 |
| -
|
43 |
| - https://www.googleapis.com/auth/drive |
44 |
| -
|
45 |
| - :param source_bucket: The source Google Cloud Storage bucket where the object is. (templated) |
46 |
| - :type source_bucket: str |
47 |
| - :param source_object: The source name of the object to copy in the Google cloud |
48 |
| - storage bucket. (templated) |
49 |
| - You can use only one wildcard for objects (filenames) within your bucket. The wildcard can appear |
50 |
| - inside the object name or at the end of the object name. Appending a wildcard to the bucket name |
51 |
| - is unsupported. |
52 |
| - :type source_object: str |
53 |
| - :param destination_object: The destination name of the object in the destination Google Drive |
54 |
| - service. (templated) |
55 |
| - If a wildcard is supplied in the source_object argument, this is the prefix that will be prepended |
56 |
| - to the final destination objects' paths. |
57 |
| - Note that the source path's part before the wildcard will be removed; |
58 |
| - if it needs to be retained it should be appended to destination_object. |
59 |
| - For example, with prefix ``foo/*`` and destination_object ``blah/``, the file ``foo/baz`` will be |
60 |
| - copied to ``blah/baz``; to retain the prefix write the destination_object as e.g. ``blah/foo``, in |
61 |
| - which case the copied file will be named ``blah/foo/baz``. |
62 |
| - :type destination_object: str |
63 |
| - :param move_object: When move object is True, the object is moved instead of copied to the new location. |
64 |
| - This is the equivalent of a mv command as opposed to a cp command. |
65 |
| - :type move_object: bool |
66 |
| - :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud Platform. |
67 |
| - :type gcp_conn_id: str |
68 |
| - :param delegate_to: The account to impersonate, if any. |
69 |
| - For this to work, the service account making the request must have domain-wide delegation enabled. |
70 |
| - :type delegate_to: str |
71 |
| - """ |
72 |
| - |
73 |
| - template_fields = ("source_bucket", "source_object", "destination_object") |
74 |
| - ui_color = "#f0eee4" |
75 |
| - |
76 |
| - @apply_defaults |
77 |
| - def __init__( |
78 |
| - self, |
79 |
| - source_bucket: str, |
80 |
| - source_object: str, |
81 |
| - destination_object: Optional[str] = None, |
82 |
| - move_object: bool = False, |
83 |
| - gcp_conn_id: str = "google_cloud_default", |
84 |
| - delegate_to: Optional[str] = None, |
85 |
| - *args, |
86 |
| - **kwargs |
87 |
| - ): |
88 |
| - super().__init__(*args, **kwargs) |
89 |
| - |
90 |
| - self.source_bucket = source_bucket |
91 |
| - self.source_object = source_object |
92 |
| - self.destination_object = destination_object |
93 |
| - self.move_object = move_object |
94 |
| - self.gcp_conn_id = gcp_conn_id |
95 |
| - self.delegate_to = delegate_to |
96 |
| - self.gcs_hook = None # type: Optional[GCSHook] |
97 |
| - self.gdrive_hook = None # type: Optional[GoogleDriveHook] |
98 |
| - |
99 |
| - def execute(self, context): |
100 |
| - |
101 |
| - self.gcs_hook = GCSHook( |
102 |
| - google_cloud_storage_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to |
103 |
| - ) |
104 |
| - self.gdrive_hook = GoogleDriveHook(gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to) |
105 |
| - |
106 |
| - if WILDCARD in self.source_object: |
107 |
| - total_wildcards = self.source_object.count(WILDCARD) |
108 |
| - if total_wildcards > 1: |
109 |
| - error_msg = ( |
110 |
| - "Only one wildcard '*' is allowed in source_object parameter. " |
111 |
| - "Found {} in {}.".format(total_wildcards, self.source_object) |
112 |
| - ) |
113 |
| - |
114 |
| - raise AirflowException(error_msg) |
115 |
| - |
116 |
| - prefix, delimiter = self.source_object.split(WILDCARD, 1) |
117 |
| - objects = self.gcs_hook.list(self.source_bucket, prefix=prefix, delimiter=delimiter) |
118 |
| - |
119 |
| - for source_object in objects: |
120 |
| - if self.destination_object is None: |
121 |
| - destination_object = source_object |
122 |
| - else: |
123 |
| - destination_object = source_object.replace(prefix, self.destination_object, 1) |
124 |
| - |
125 |
| - self._copy_single_object(source_object=source_object, destination_object=destination_object) |
126 |
| - else: |
127 |
| - self._copy_single_object( |
128 |
| - source_object=self.source_object, destination_object=self.destination_object |
129 |
| - ) |
130 |
| - |
131 |
| - def _copy_single_object(self, source_object, destination_object): |
132 |
| - self.log.info( |
133 |
| - "Executing copy of gs://%s/%s to gdrive://%s", |
134 |
| - self.source_bucket, |
135 |
| - source_object, |
136 |
| - destination_object, |
137 |
| - ) |
138 |
| - |
139 |
| - with tempfile.NamedTemporaryFile() as file: |
140 |
| - filename = file.name |
141 |
| - self.gcs_hook.download( |
142 |
| - bucket_name=self.source_bucket, object_name=source_object, filename=filename |
143 |
| - ) |
144 |
| - self.gdrive_hook.upload_file(local_location=filename, remote_location=destination_object) |
145 |
| - |
146 |
| - if self.move_object: |
147 |
| - self.gcs_hook.delete(self.source_bucket, source_object) |
| 26 | +warnings.warn( |
| 27 | + "This module is deprecated. " |
| 28 | + "Please use `airflow.providers.google.suite.operators.gcs_to_gdrive_operator`.", |
| 29 | + DeprecationWarning, stacklevel=2 |
| 30 | +) |
0 commit comments