Coverage for mcpgateway / utils / log_sanitizer.py: 100%
13 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-09 03:05 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-09 03:05 +0000
1# -*- coding: utf-8 -*-
2"""Location: ./mcpgateway/utils/log_sanitizer.py
3Copyright 2025
4SPDX-License-Identifier: Apache-2.0
6Log Sanitization Utility.
8This module provides utilities to sanitize untrusted input before logging to prevent
9log injection attacks. Control characters like newlines (\n, \r) can be used to inject
10fabricated log entries when logging unauthenticated user input.
12Security Context:
13 Log injection occurs when an attacker includes control characters (especially newlines)
14 in query parameters, headers, or other user-controlled input that gets logged. When
15 URL-decoded by the ASGI framework, these characters are passed to Python's logging
16 module which does not sanitize them, allowing injection of fake log lines.
18 Example attack:
19 GET /oauth/callback?error=foo&error_description=bar%0ACRITICAL:root:SECURITY+BREACH
21 This produces two log lines:
22 WARNING:oauth:OAuth error: bar
23 CRITICAL:root:SECURITY BREACH
25 The second line is entirely fabricated by the attacker.
27Mitigation:
28 This utility strips or replaces control characters before logging. Structured logging
29 (JSON format) also mitigates this by encapsulating the full message as a single field.
31Examples:
32 >>> from mcpgateway.utils.log_sanitizer import sanitize_for_log
33 >>> sanitize_for_log("normal text")
34 'normal text'
35 >>> sanitize_for_log("text with\\nnewline")
36 'text with newline'
37 >>> sanitize_for_log("text with\\r\\nCRLF")
38 'text with CRLF'
39 >>> sanitize_for_log("tab\\there")
40 'tab here'
41 >>> sanitize_for_log(None)
42 'None'
43 >>> sanitize_for_log(123)
44 '123'
45"""
47# Standard
48import re
49from typing import Any, Optional
51# Regex pattern to match control characters that could be used for log injection
52# Includes: \n (LF), \r (CR), \t (TAB), \v (VT), \f (FF), and other C0/C1 control chars
53# We preserve space (0x20) as it's safe and commonly used
54CONTROL_CHARS_PATTERN = re.compile(r"[\x00-\x1f\x7f-\x9f]")
57def sanitize_for_log(value: Any, replacement: str = " ") -> str:
58 """
59 Sanitize a value for safe logging by removing control characters.
61 This function converts the input to a string and removes all control characters
62 that could be used for log injection attacks. Control characters include newlines,
63 carriage returns, tabs, and other non-printable characters.
65 Args:
66 value: The value to sanitize. Can be any type; will be converted to string.
67 replacement: The string to replace control characters with. Defaults to a space.
68 Use empty string '' to remove control characters entirely.
70 Returns:
71 A sanitized string safe for logging, with control characters replaced.
73 Security Notes:
74 - Always use this function when logging unauthenticated user input
75 - Particularly important for query parameters, headers, and form data
76 - Does not protect against other injection types (SQL, XSS, etc.)
77 - Structured logging (JSON) provides additional protection
79 Examples:
80 >>> sanitize_for_log("error: bad scope\\nCRITICAL:root:FAKE LOG")
81 'error: bad scope CRITICAL:root:FAKE LOG'
82 >>> sanitize_for_log("path/to/file\\x00null")
83 'path/to/file null'
84 >>> sanitize_for_log("normal text")
85 'normal text'
86 >>> sanitize_for_log(None)
87 'None'
88 >>> sanitize_for_log({"key": "value"})
89 "{'key': 'value'}"
90 """
91 # Convert to string first (handles None, numbers, objects, etc.)
92 str_value = str(value)
94 # Replace all control characters with the replacement string
95 sanitized = CONTROL_CHARS_PATTERN.sub(replacement, str_value)
97 return sanitized
100def sanitize_dict_for_log(data: dict[str, Any], replacement: str = " ") -> dict[str, str]:
101 """
102 Sanitize all values in a dictionary for safe logging.
104 This is useful when logging multiple related values, such as query parameters
105 or form data. Each value is sanitized individually.
107 Args:
108 data: Dictionary with string keys and any values
109 replacement: The string to replace control characters with
111 Returns:
112 A new dictionary with all values sanitized as strings
114 Examples:
115 >>> sanitize_dict_for_log({"error": "foo", "desc": "bar\\nFAKE"})
116 {'error': 'foo', 'desc': 'bar FAKE'}
117 >>> sanitize_dict_for_log({"count": 42, "name": "test\\ttab"})
118 {'count': '42', 'name': 'test tab'}
119 """
120 return {key: sanitize_for_log(value, replacement) for key, value in data.items()}
123def sanitize_optional(value: Optional[Any], replacement: str = " ") -> Optional[str]:
124 """
125 Sanitize an optional value, preserving None.
127 This is useful when you want to maintain None as None rather than converting
128 it to the string "None".
130 Args:
131 value: The value to sanitize, or None
132 replacement: The string to replace control characters with
134 Returns:
135 Sanitized string if value is not None, otherwise None
137 Examples:
138 >>> sanitize_optional("text\\nwith newline")
139 'text with newline'
140 >>> sanitize_optional(None)
141 >>> sanitize_optional(None) is None
142 True
143 """
144 if value is None:
145 return None
146 return sanitize_for_log(value, replacement)