diff options
author | Martin Krizek <martin.krizek@gmail.com> | 2024-10-03 15:36:42 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-03 15:36:42 +0200 |
commit | f593eb42a3aee30c9bc040eee439dda51a84e390 (patch) | |
tree | 45f28f7ea466c3d59c845911996a486cde96445b | |
parent | Use sentinel everywhere (#84041) (diff) | |
download | ansible-f593eb42a3aee30c9bc040eee439dda51a84e390.tar.xz ansible-f593eb42a3aee30c9bc040eee439dda51a84e390.zip |
get_url: properly parse filename in content-disposition (#83748)
Since we don't really care about the type we don't have to
query for it and just retrieve the filename value.
Unfortunately we cannot use module_utils.urls.get_response_filename
as we don't have the response object, so just utilize
email.message.Message to parse the filename
instead of manually doing the work ourselves.
Fixes: #83690
-rw-r--r-- | changelogs/fragments/83690-get_url-content-disposition-filename.yml | 2 | ||||
-rw-r--r-- | lib/ansible/module_utils/urls.py | 15 | ||||
-rw-r--r-- | lib/ansible/modules/get_url.py | 26 | ||||
-rw-r--r-- | test/integration/targets/get_url/tasks/main.yml | 20 |
4 files changed, 41 insertions, 22 deletions
diff --git a/changelogs/fragments/83690-get_url-content-disposition-filename.yml b/changelogs/fragments/83690-get_url-content-disposition-filename.yml new file mode 100644 index 0000000000..47f9734c35 --- /dev/null +++ b/changelogs/fragments/83690-get_url-content-disposition-filename.yml @@ -0,0 +1,2 @@ +bugfixes: + - get_url - fix honoring ``filename`` from the ``content-disposition`` header even when the type is ``inline`` (https://github.com/ansible/ansible/issues/83690) diff --git a/lib/ansible/module_utils/urls.py b/lib/ansible/module_utils/urls.py index c4c8e3ab7d..b3047f0164 100644 --- a/lib/ansible/module_utils/urls.py +++ b/lib/ansible/module_utils/urls.py @@ -340,13 +340,16 @@ def extract_pem_certs(data): def get_response_filename(response): - url = response.geturl() - path = urlparse(url)[2] - filename = os.path.basename(path.rstrip('/')) or None - if filename: - filename = unquote(filename) + if filename := response.headers.get_param('filename', header='content-disposition'): + filename = os.path.basename(filename) + else: + url = response.geturl() + path = urlparse(url)[2] + filename = os.path.basename(path.rstrip('/')) or None + if filename: + filename = unquote(filename) - return response.headers.get_param('filename', header='content-disposition') or filename + return filename def parse_content_type(response): diff --git a/lib/ansible/modules/get_url.py b/lib/ansible/modules/get_url.py index 959998c959..965e5f6196 100644 --- a/lib/ansible/modules/get_url.py +++ b/lib/ansible/modules/get_url.py @@ -367,6 +367,7 @@ url: sample: https://www.ansible.com/ ''' +import email.message import os import re import shutil @@ -439,23 +440,16 @@ def url_get(module, url, dest, use_proxy, last_mod_time, force, timeout=10, head def extract_filename_from_headers(headers): + """Extracts a filename from the given dict of HTTP headers. + + Returns the filename if successful, else None. """ - Extracts a filename from the given dict of HTTP headers. - - Looks for the content-disposition header and applies a regex. - Returns the filename if successful, else None.""" - cont_disp_regex = 'attachment; ?filename="?([^"]+)' - res = None - - if 'content-disposition' in headers: - cont_disp = headers['content-disposition'] - match = re.match(cont_disp_regex, cont_disp) - if match: - res = match.group(1) - # Try preventing any funny business. - res = os.path.basename(res) - - return res + msg = email.message.Message() + msg['content-disposition'] = headers.get('content-disposition', '') + if filename := msg.get_param('filename', header='content-disposition'): + # Avoid directory traversal + filename = os.path.basename(filename) + return filename def is_url(checksum): diff --git a/test/integration/targets/get_url/tasks/main.yml b/test/integration/targets/get_url/tasks/main.yml index 2f50b4366c..66bd129368 100644 --- a/test/integration/targets/get_url/tasks/main.yml +++ b/test/integration/targets/get_url/tasks/main.yml @@ -719,3 +719,23 @@ - result is not changed - '"did not match" in result.msg' - stat_result_checksum_verify.stat.exists + +- name: Test downloading to dir with content-disposition attachment + get_url: + url: 'https://{{ httpbin_host }}/response-headers?Content-Disposition=attachment%3B%20filename%3D%22filename.json%22' + dest: "{{ remote_tmp_dir }}" + register: get_dir_filename + +- assert: + that: + - get_dir_filename.dest == remote_tmp_dir ~ "/filename.json" + +- name: Test downloading to dir with content-disposition inline + get_url: + url: 'https://{{ httpbin_host }}/response-headers?Content-Disposition=inline%3B%20filename%3D%22filename.json%22' + dest: "{{ remote_tmp_dir }}" + register: get_dir_filename + +- assert: + that: + - get_dir_filename.dest == remote_tmp_dir ~ "/filename.json" |