Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Make URL parsing in mimetypes.guess_type() more reliable.
  • Loading branch information
serhiy-storchaka committed Mar 25, 2024
commit caa728db554a91ca13739c1b4a0cdd52671243a7
7 changes: 6 additions & 1 deletion Lib/mimetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,12 @@ def guess_type(self, url, strict=True):
"""
url = os.fspath(url)
p = urllib.parse.urlparse(url)
scheme, url = p.scheme, p.path
if p.scheme and len(p.scheme) > 1:
scheme = p.scheme
url = p.path
else:
scheme = None
url = os.path.splitdrive(url)[1]
if scheme == 'data':
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
Expand Down
32 changes: 25 additions & 7 deletions Lib/test/test_mimetypes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import io
import mimetypes
import os
import pathlib
import sys
import unittest.mock
Expand Down Expand Up @@ -109,13 +110,26 @@ def test_filename_with_url_delimiters(self):
# compared to when interpreted as filename because of the semicolon.
eq = self.assertEqual
gzip_expected = ('application/x-tar', 'gzip')
eq(self.db.guess_type(";1.tar.gz"), gzip_expected)
eq(self.db.guess_type("?1.tar.gz"), gzip_expected)
eq(self.db.guess_type("#1.tar.gz"), gzip_expected)
eq(self.db.guess_type("#1#.tar.gz"), gzip_expected)
eq(self.db.guess_type(";1#.tar.gz"), gzip_expected)
eq(self.db.guess_type(";&1=123;?.tar.gz"), gzip_expected)
eq(self.db.guess_type("?k1=v1&k2=v2.tar.gz"), gzip_expected)
for name in (
';1.tar.gz',
'?1.tar.gz',
'#1.tar.gz',
'#1#.tar.gz',
';1#.tar.gz',
';&1=123;?.tar.gz',
'?k1=v1&k2=v2.tar.gz',
):
for prefix in ('', '/', '\\',
'c:', 'c:/', 'c:\\', 'c:/d/', 'c:\\d\\',
'//share/server/', '\\\\share\\server\\'):
path = prefix + name
with self.subTest(path=path):
eq(self.db.guess_type(path), gzip_expected)
expected = (None, None) if os.name == 'nt' else gzip_expected
for prefix in ('//', '\\\\', '//share/', '\\\\share\\'):
path = prefix + name
with self.subTest(path=path):
eq(self.db.guess_type(path), expected)
eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)

def test_url(self):
Expand All @@ -125,6 +139,10 @@ def test_url(self):
result = self.db.guess_type('http://example.com/host.html')
msg = 'Should be text/html'
self.assertSequenceEqual(result, ('text/html', None), msg)
result = self.db.guess_type('http://example.com/host.html#x.tar')
self.assertSequenceEqual(result, ('text/html', None))
result = self.db.guess_type('http://example.com/host.html?q=x.tar')
self.assertSequenceEqual(result, ('text/html', None))

def test_guess_all_types(self):
# First try strict. Use a set here for testing the results because if
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Make :func:`mimetypes.guess_type` properly parsing of URLs with only a host
name, URLs containing fragment or query, and filenames with only a UNC
sharepoint on Windows.
Based on patch by Dong-hee Na.