Skip to content

Commit ca35bd7

Browse files
authored
By default PIP will install all packages in .local folder (#14125)
In order to optimize the Docker image, we use the ~/.local folder copied from build imge (this gives huge optimisations regarding the docker image size). So far we instructed the users to add --user flag manually when installing any packages when they extend the images, however this has proven to be problematic as users rarely read the whole documentation and simply try what they know. This PR attempts to fix it. `PIP_USER` variable is set to `true` in the final image, which means that the installation by default will use ~/.local folder as target. This can be disabled by unsetting the variable or setting it to `false`. Also since pylint version has been released to 2.7.0, it fixes a few pylint versions so that we can update to the latest constraints.
1 parent beed530 commit ca35bd7

File tree

11 files changed

+32
-13
lines changed

11 files changed

+32
-13
lines changed

β€ŽDockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,9 @@ LABEL org.apache.airflow.distro="debian" \
523523
org.opencontainers.image.title="Production Airflow Image" \
524524
org.opencontainers.image.description="Installed Apache Airflow"
525525

526+
# By default PIP will install everything in ~/.local
527+
ARG PIP_USER="true"
528+
ENV PIP_USER=${PIP_USER}
526529

527530
ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"]
528531
CMD ["--help"]

β€Žairflow/kubernetes/refresh_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def _load_from_exec_plugin(self):
6868
return True
6969
except Exception as e: # pylint: disable=W0703
7070
logging.error(str(e))
71+
return None
7172

7273
def refresh_api_key(self, client_configuration):
7374
"""Refresh API key if expired"""

β€Žairflow/models/dagbag.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ def _load_modules_from_zip(self, filepath, safe_mode):
330330
if not might_contain_dag(zip_info.filename, safe_mode, current_zip_file):
331331
# todo: create ignore list
332332
# Don't want to spam user with skip messages
333-
if not self.has_logged or True:
333+
if not self.has_logged:
334334
self.has_logged = True
335335
self.log.info(
336336
"File %s:%s assumed to contain no DAGs. Skipping.", filepath, zip_info.filename

β€Žairflow/providers/amazon/aws/log/cloudwatch_task_handler.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,15 @@ def hook(self):
5656
from airflow.providers.amazon.aws.hooks.logs import AwsLogsHook
5757

5858
return AwsLogsHook(aws_conn_id=remote_conn_id, region_name=self.region_name)
59-
except Exception: # pylint: disable=broad-except
59+
except Exception as e: # pylint: disable=broad-except
6060
self.log.error(
6161
'Could not create an AwsLogsHook with connection id "%s". '
6262
'Please make sure that airflow[aws] is installed and '
63-
'the Cloudwatch logs connection exists.',
63+
'the Cloudwatch logs connection exists. Exception: "%s"',
6464
remote_conn_id,
65+
e,
6566
)
67+
return None
6668

6769
def _render_filename(self, ti, try_number):
6870
# Replace unsupported log group name characters

β€Žairflow/providers/amazon/aws/log/s3_task_handler.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,15 @@ def hook(self):
4747
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
4848

4949
return S3Hook(remote_conn_id)
50-
except Exception: # pylint: disable=broad-except
50+
except Exception as e: # pylint: disable=broad-except
5151
self.log.exception(
5252
'Could not create an S3Hook with connection id "%s". '
5353
'Please make sure that airflow[aws] is installed and '
54-
'the S3 connection exists.',
54+
'the S3 connection exists. Exception : "%s"',
5555
remote_conn_id,
56+
e,
5657
)
58+
return None
5759

5860
def set_context(self, ti):
5961
super().set_context(ti)

β€Žairflow/providers/google/cloud/operators/functions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,7 @@ def execute(self, context):
397397
status = e.resp.status
398398
if status == 404:
399399
self.log.info('The function does not exist in this project')
400+
return None
400401
else:
401402
self.log.error('An error occurred. Exiting.')
402403
raise e

β€Žairflow/providers/microsoft/azure/log/wasb_task_handler.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,15 @@ def hook(self):
5959
from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
6060

6161
return WasbHook(remote_conn_id)
62-
except AzureHttpError:
62+
except AzureHttpError as e:
6363
self.log.error(
6464
'Could not create an WasbHook with connection id "%s". '
6565
'Please make sure that airflow[azure] is installed and '
66-
'the Wasb connection exists.',
66+
'the Wasb connection exists. Exception "%s"',
6767
remote_conn_id,
68+
e,
6869
)
70+
return None
6971

7072
def set_context(self, ti) -> None:
7173
super().set_context(ti)
@@ -136,8 +138,9 @@ def wasb_log_exists(self, remote_log_location: str) -> bool:
136138
"""
137139
try:
138140
return self.hook.check_for_blob(self.wasb_container, remote_log_location)
139-
except Exception: # pylint: disable=broad-except
140-
pass
141+
# pylint: disable=broad-except
142+
except Exception as e:
143+
self.log.debug('Exception when trying to check remote location: "%s"', e)
141144
return False
142145

143146
def wasb_read(self, remote_log_location: str, return_error: bool = False):
@@ -153,12 +156,13 @@ def wasb_read(self, remote_log_location: str, return_error: bool = False):
153156
"""
154157
try:
155158
return self.hook.read_file(self.wasb_container, remote_log_location)
156-
except AzureHttpError:
159+
except AzureHttpError as e:
157160
msg = f'Could not read logs from {remote_log_location}'
158-
self.log.exception(msg)
161+
self.log.exception("Message: '%s', exception '%s'", msg, e)
159162
# return error if needed
160163
if return_error:
161164
return msg
165+
return ''
162166

163167
def wasb_write(self, log: str, remote_log_location: str, append: bool = True) -> None:
164168
"""

β€Žairflow/serialization/serialized_objects.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ def _deserialize(cls, encoded_var: Any) -> Any: # pylint: disable=too-many-retu
290290
elif type_ == DAT.SET:
291291
return {cls._deserialize(v) for v in var}
292292
elif type_ == DAT.TUPLE:
293+
# pylint: disable=consider-using-generator
293294
return tuple([cls._deserialize(v) for v in var])
294295
else:
295296
raise TypeError(f'Invalid type {type_!s} in deserialization.')

β€Žairflow/stats.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ class AllowListValidator:
243243

244244
def __init__(self, allow_list=None):
245245
if allow_list:
246+
# pylint: disable=consider-using-generator
246247
self.allow_list = tuple([item.strip().lower() for item in allow_list.split(',')])
247248
else:
248249
self.allow_list = None

β€Žairflow/www/views.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3648,8 +3648,9 @@ def action_clear(self, task_instances, session=None):
36483648
flash(f"{len(task_instances)} task instances have been cleared")
36493649
self.update_redirect()
36503650
return redirect(self.get_redirect())
3651-
except Exception: # noqa pylint: disable=broad-except
3652-
flash('Failed to clear task instances', 'error')
3651+
except Exception as e: # noqa pylint: disable=broad-except
3652+
flash(f'Failed to clear task instances: "{e}"', 'error')
3653+
return None
36533654

36543655
@provide_session
36553656
def set_task_instance_state(self, tis, target_state, session=None):

0 commit comments

Comments
 (0)