summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Krizek <martin.krizek@gmail.com>2024-10-03 15:36:42 +0200
committerGitHub <noreply@github.com>2024-10-03 15:36:42 +0200
commitf593eb42a3aee30c9bc040eee439dda51a84e390 (patch)
tree45f28f7ea466c3d59c845911996a486cde96445b
parentUse sentinel everywhere (#84041) (diff)
downloadansible-f593eb42a3aee30c9bc040eee439dda51a84e390.tar.xz
ansible-f593eb42a3aee30c9bc040eee439dda51a84e390.zip
get_url: properly parse filename in content-disposition (#83748)
Since we don't really care about the type we don't have to query for it and just retrieve the filename value. Unfortunately we cannot use module_utils.urls.get_response_filename as we don't have the response object, so just utilize email.message.Message to parse the filename instead of manually doing the work ourselves. Fixes: #83690
-rw-r--r--changelogs/fragments/83690-get_url-content-disposition-filename.yml2
-rw-r--r--lib/ansible/module_utils/urls.py15
-rw-r--r--lib/ansible/modules/get_url.py26
-rw-r--r--test/integration/targets/get_url/tasks/main.yml20
4 files changed, 41 insertions, 22 deletions
diff --git a/changelogs/fragments/83690-get_url-content-disposition-filename.yml b/changelogs/fragments/83690-get_url-content-disposition-filename.yml
new file mode 100644
index 0000000000..47f9734c35
--- /dev/null
+++ b/changelogs/fragments/83690-get_url-content-disposition-filename.yml
@@ -0,0 +1,2 @@
+bugfixes:
+ - get_url - fix honoring ``filename`` from the ``content-disposition`` header even when the type is ``inline`` (https://github.com/ansible/ansible/issues/83690)
diff --git a/lib/ansible/module_utils/urls.py b/lib/ansible/module_utils/urls.py
index c4c8e3ab7d..b3047f0164 100644
--- a/lib/ansible/module_utils/urls.py
+++ b/lib/ansible/module_utils/urls.py
@@ -340,13 +340,16 @@ def extract_pem_certs(data):
def get_response_filename(response):
- url = response.geturl()
- path = urlparse(url)[2]
- filename = os.path.basename(path.rstrip('/')) or None
- if filename:
- filename = unquote(filename)
+ if filename := response.headers.get_param('filename', header='content-disposition'):
+ filename = os.path.basename(filename)
+ else:
+ url = response.geturl()
+ path = urlparse(url)[2]
+ filename = os.path.basename(path.rstrip('/')) or None
+ if filename:
+ filename = unquote(filename)
- return response.headers.get_param('filename', header='content-disposition') or filename
+ return filename
def parse_content_type(response):
diff --git a/lib/ansible/modules/get_url.py b/lib/ansible/modules/get_url.py
index 959998c959..965e5f6196 100644
--- a/lib/ansible/modules/get_url.py
+++ b/lib/ansible/modules/get_url.py
@@ -367,6 +367,7 @@ url:
sample: https://www.ansible.com/
'''
+import email.message
import os
import re
import shutil
@@ -439,23 +440,16 @@ def url_get(module, url, dest, use_proxy, last_mod_time, force, timeout=10, head
def extract_filename_from_headers(headers):
+ """Extracts a filename from the given dict of HTTP headers.
+
+ Returns the filename if successful, else None.
"""
- Extracts a filename from the given dict of HTTP headers.
-
- Looks for the content-disposition header and applies a regex.
- Returns the filename if successful, else None."""
- cont_disp_regex = 'attachment; ?filename="?([^"]+)'
- res = None
-
- if 'content-disposition' in headers:
- cont_disp = headers['content-disposition']
- match = re.match(cont_disp_regex, cont_disp)
- if match:
- res = match.group(1)
- # Try preventing any funny business.
- res = os.path.basename(res)
-
- return res
+ msg = email.message.Message()
+ msg['content-disposition'] = headers.get('content-disposition', '')
+ if filename := msg.get_param('filename', header='content-disposition'):
+ # Avoid directory traversal
+ filename = os.path.basename(filename)
+ return filename
def is_url(checksum):
diff --git a/test/integration/targets/get_url/tasks/main.yml b/test/integration/targets/get_url/tasks/main.yml
index 2f50b4366c..66bd129368 100644
--- a/test/integration/targets/get_url/tasks/main.yml
+++ b/test/integration/targets/get_url/tasks/main.yml
@@ -719,3 +719,23 @@
- result is not changed
- '"did not match" in result.msg'
- stat_result_checksum_verify.stat.exists
+
+- name: Test downloading to dir with content-disposition attachment
+ get_url:
+ url: 'https://{{ httpbin_host }}/response-headers?Content-Disposition=attachment%3B%20filename%3D%22filename.json%22'
+ dest: "{{ remote_tmp_dir }}"
+ register: get_dir_filename
+
+- assert:
+ that:
+ - get_dir_filename.dest == remote_tmp_dir ~ "/filename.json"
+
+- name: Test downloading to dir with content-disposition inline
+ get_url:
+ url: 'https://{{ httpbin_host }}/response-headers?Content-Disposition=inline%3B%20filename%3D%22filename.json%22'
+ dest: "{{ remote_tmp_dir }}"
+ register: get_dir_filename
+
+- assert:
+ that:
+ - get_dir_filename.dest == remote_tmp_dir ~ "/filename.json"