Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
gh-100001: Omit control characters in http.server stderr logs.
  • Loading branch information
gpshead committed Dec 5, 2022
commit dd1de549aa19210f729bd045035784026fff4d3d
11 changes: 10 additions & 1 deletion Lib/http/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
import html
import http.client
import io
import itertools
import mimetypes
import os
import posixpath
Expand Down Expand Up @@ -562,6 +563,10 @@ def log_error(self, format, *args):

self.log_message(format, *args)

# https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes
__control_char_table = str.maketrans(
{c: '!' for c in itertools.chain(range(0x20), range(0x7f,0xa0))})

def log_message(self, format, *args):
"""Log an arbitrary message.

Expand All @@ -577,12 +582,16 @@ def log_message(self, format, *args):
The client ip and current date/time are prefixed to
every message.

Unicode control characters are replaced with ! before
writing the output to stderr.

"""

message = format % args
sys.stderr.write("%s - - [%s] %s\n" %
(self.address_string(),
self.log_date_time_string(),
format%args))
message.translate(self.__control_char_table)))

def version_string(self):
"""Return the server software version string."""
Expand Down
21 changes: 20 additions & 1 deletion Lib/test/test_httpservers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import datetime
import threading
from unittest import mock
from io import BytesIO
from io import BytesIO, StringIO

import unittest
from test import support
Expand Down Expand Up @@ -990,6 +990,25 @@ def verify_http_server_response(self, response):
match = self.HTTPResponseMatch.search(response)
self.assertIsNotNone(match)

def test_unprintable_not_logged(self):
# We call the method from the class directly as our Socketless
# Handler subclass overrode it... nice for everything BUT this test.
self.handler.client_address = ('127.0.0.1', 1337)
log_message = BaseHTTPRequestHandler.log_message
with mock.patch.object(sys, 'stderr', StringIO()) as fake_stderr:
log_message(self.handler, '/foo')
log_message(self.handler, '/\033bar\000\033')
log_message(self.handler, '/spam %s.', 'a')
log_message(self.handler, '/spam %s.', '\033\x7f\x9f\xa0beans')
stderr = fake_stderr.getvalue()
self.assertNotIn('\033', stderr) # non-printable chars are caught.
self.assertNotIn('\000', stderr) # non-printable chars are caught.
lines = stderr.splitlines()
self.assertIn('/foo', lines[0])
self.assertIn('/!bar!!', lines[1])
self.assertIn('/spam a.', lines[2])
self.assertIn('/spam !!!\xa0beans.', lines[3])

def test_http_1_1(self):
result = self.send_typical_request(b'GET / HTTP/1.1\r\n\r\n')
self.verify_http_server_response(result[0])
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
``python -m http.server`` no longer allows terminal control characters sent
within a garbage request to be printed to the stderr server log.

This is done by changing the :mod:`http.server`
:class:``BaseHTTPRequestHandler`` ``.log_message`` method to replace control
characters with a single ``'!'`` before printing.