Skip to content

Commit f9e1847

Browse files
authored
Add autodetect arg in BQCreateExternalTable Operator (#22710)
* Add autodetect parameter * Update docstring * Update google provider documentation
1 parent 215993b commit f9e1847

File tree

3 files changed

+12
-0
lines changed

3 files changed

+12
-0
lines changed

β€Žairflow/providers/google/cloud/operators/bigquery.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,9 @@ class BigQueryCreateExternalTableOperator(BaseOperator):
964964
:param schema_object: If set, a GCS object path pointing to a .json file that
965965
contains the schema for the table. (templated)
966966
:param source_format: File format of the data.
967+
:param autodetect: Try to detect schema and format options automatically.
968+
The schema_fields and schema_object options will be honored when specified explicitly.
969+
https://cloud.google.com/bigquery/docs/schema-detect#schema_auto-detection_for_external_data_sources
967970
:param compression: [Optional] The compression type of the data source.
968971
Possible values include GZIP and NONE.
969972
The default value is NONE.
@@ -1028,6 +1031,7 @@ def __init__(
10281031
schema_fields: Optional[List] = None,
10291032
schema_object: Optional[str] = None,
10301033
source_format: Optional[str] = None,
1034+
autodetect: bool = False,
10311035
compression: Optional[str] = None,
10321036
skip_leading_rows: Optional[int] = None,
10331037
field_delimiter: Optional[str] = None,
@@ -1057,6 +1061,7 @@ def __init__(
10571061
skip_leading_rows,
10581062
field_delimiter,
10591063
max_bad_records,
1064+
autodetect,
10601065
quote_character,
10611066
allow_quoted_newlines,
10621067
allow_jagged_rows,
@@ -1116,6 +1121,7 @@ def __init__(
11161121
self.bigquery_conn_id = bigquery_conn_id
11171122
self.google_cloud_storage_conn_id = google_cloud_storage_conn_id
11181123
self.delegate_to = delegate_to
1124+
self.autodetect = autodetect
11191125

11201126
self.src_fmt_configs = src_fmt_configs or {}
11211127
self.labels = labels
@@ -1153,6 +1159,7 @@ def execute(self, context: 'Context') -> None:
11531159
schema_fields=schema_fields,
11541160
source_uris=source_uris,
11551161
source_format=self.source_format,
1162+
autodetect=self.autodetect,
11561163
compression=self.compression,
11571164
skip_leading_rows=self.skip_leading_rows,
11581165
field_delimiter=self.field_delimiter,

β€Ždocs/apache-airflow-providers-google/operators/cloud/bigquery.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,9 @@ Or you may point the operator to a Google Cloud Storage object name where the sc
193193
:start-after: [START howto_operator_bigquery_create_table_schema_json]
194194
:end-before: [END howto_operator_bigquery_create_table_schema_json]
195195

196+
To use BigQuery `schema auto-detection <https://cloud.google.com/bigquery/docs/schema-detect#schema_auto-detection_for_external_data_sources>`__,
197+
set the ``autodetect`` flag instead of providing explicit schema information.
198+
196199
.. _howto/operator:BigQueryGetDataOperator:
197200

198201
Fetch data from table

β€Žtests/providers/google/cloud/operators/test_bigquery.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def test_execute(self, mock_hook):
197197
bucket=TEST_GCS_BUCKET,
198198
source_objects=TEST_GCS_DATA,
199199
source_format=TEST_SOURCE_FORMAT,
200+
autodetect=True,
200201
)
201202

202203
operator.execute(None)
@@ -205,6 +206,7 @@ def test_execute(self, mock_hook):
205206
schema_fields=[],
206207
source_uris=[f'gs://{TEST_GCS_BUCKET}/{source_object}' for source_object in TEST_GCS_DATA],
207208
source_format=TEST_SOURCE_FORMAT,
209+
autodetect=True,
208210
compression='NONE',
209211
skip_leading_rows=0,
210212
field_delimiter=',',

0 commit comments

Comments
 (0)