Skip to content

Commit 3fa51f9

Browse files
author
Tobiasz KΔ™dzierski
authored
Add check for duplicates in provider.yaml files (#12578)
1 parent 6d0dcd2 commit 3fa51f9

File tree

3 files changed

+56
-6
lines changed

3 files changed

+56
-6
lines changed

β€Žairflow/providers/amazon/provider.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,6 @@ operators:
167167
- airflow.providers.amazon.aws.operators.step_function_start_execution
168168

169169
sensors:
170-
- integration-name: Amazon Athena
171-
python-modules:
172-
- airflow.providers.amazon.aws.sensors.athena
173170
- integration-name: Amazon Athena
174171
python-modules:
175172
- airflow.providers.amazon.aws.sensors.athena

β€Žairflow/providers/google/provider.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -398,9 +398,6 @@ sensors:
398398
- integration-name: Google Campaign Manager
399399
python-modules:
400400
- airflow.providers.google.marketing_platform.sensors.campaign_manager
401-
- integration-name: Google Dataflow
402-
python-modules:
403-
- airflow.providers.google.cloud.sensors.dataflow
404401
- integration-name: Google Display&Video 360
405402
python-modules:
406403
- airflow.providers.google.marketing_platform.sensors.display_video

β€Žscripts/ci/pre_commit/pre_commit_check_provider_yaml_files.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,30 @@ def check_completeness_of_list_of_hooks_sensors_hooks(yaml_files: Dict[str, Dict
155155
sys.exit(1)
156156

157157

158+
def check_duplicates_in_integrations_names_of_hooks_sensors_operators(yaml_files: Dict[str, Dict]):
159+
print("Checking for duplicates in list of {sensors, hooks, operators}")
160+
errors = []
161+
for (yaml_file_path, provider_data), resource_type in product(
162+
yaml_files.items(), ["sensors", "operators", "hooks"]
163+
):
164+
resource_data = provider_data.get(resource_type, [])
165+
current_integrations = [r.get("integration-name", "") for r in resource_data]
166+
if len(current_integrations) != len(set(current_integrations)):
167+
for integration in current_integrations:
168+
if current_integrations.count(integration) > 1:
169+
errors.append(
170+
f"Duplicated content of '{resource_type}/integration-name/{integration}' "
171+
f"in file: {yaml_file_path}"
172+
)
173+
174+
if errors:
175+
print(f"Found {len(errors)} errors")
176+
for error in errors:
177+
print(error)
178+
print()
179+
sys.exit(1)
180+
181+
158182
def check_completeness_of_list_of_transfers(yaml_files: Dict[str, Dict]):
159183
print("Checking completeness of list of transfers")
160184
errors = []
@@ -185,6 +209,35 @@ def check_completeness_of_list_of_transfers(yaml_files: Dict[str, Dict]):
185209
sys.exit(1)
186210

187211

212+
def check_duplicates_in_list_of_transfers(yaml_files: Dict[str, Dict]):
213+
print("Checking for duplicates in list of transfers")
214+
errors = []
215+
resource_type = "transfers"
216+
for yaml_file_path, provider_data in yaml_files.items():
217+
resource_data = provider_data.get(resource_type, [])
218+
219+
source_target_integrations = [
220+
(r.get("source-integration-name", ""), r.get("target-integration-name", ""))
221+
for r in resource_data
222+
]
223+
if len(source_target_integrations) != len(set(source_target_integrations)):
224+
for integration_couple in source_target_integrations:
225+
if source_target_integrations.count(integration_couple) > 1:
226+
errors.append(
227+
f"Duplicated content of \n"
228+
f" '{resource_type}/source-integration-name/{integration_couple[0]}' "
229+
f" '{resource_type}/target-integration-name/{integration_couple[1]}' "
230+
f"in file: {yaml_file_path}"
231+
)
232+
233+
if errors:
234+
print(f"Found {len(errors)} errors")
235+
for error in errors:
236+
print(error)
237+
print()
238+
sys.exit(1)
239+
240+
188241
def check_invalid_integration(yaml_files: Dict[str, Dict]):
189242
print("Detect unregistered integrations")
190243
errors = []
@@ -278,7 +331,10 @@ def check_doc_files(yaml_files: Dict[str, Dict]):
278331
check_integration_duplicates(all_parsed_yaml_files)
279332

280333
check_completeness_of_list_of_hooks_sensors_hooks(all_parsed_yaml_files)
334+
check_duplicates_in_integrations_names_of_hooks_sensors_operators(all_parsed_yaml_files)
335+
281336
check_completeness_of_list_of_transfers(all_parsed_yaml_files)
337+
check_duplicates_in_list_of_transfers(all_parsed_yaml_files)
282338

283339
if all_files_loaded:
284340
# Only check those if all provider files are loaded

0 commit comments

Comments
 (0)