Coverage for mcpgateway / utils / log_sanitizer.py: 100%

13 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-09 03:05 +0000

1# -*- coding: utf-8 -*- 

2"""Location: ./mcpgateway/utils/log_sanitizer.py 

3Copyright 2025 

4SPDX-License-Identifier: Apache-2.0 

5 

6Log Sanitization Utility. 

7 

8This module provides utilities to sanitize untrusted input before logging to prevent 

9log injection attacks. Control characters like newlines (\n, \r) can be used to inject 

10fabricated log entries when logging unauthenticated user input. 

11 

12Security Context: 

13 Log injection occurs when an attacker includes control characters (especially newlines) 

14 in query parameters, headers, or other user-controlled input that gets logged. When 

15 URL-decoded by the ASGI framework, these characters are passed to Python's logging 

16 module which does not sanitize them, allowing injection of fake log lines. 

17 

18 Example attack: 

19 GET /oauth/callback?error=foo&error_description=bar%0ACRITICAL:root:SECURITY+BREACH 

20 

21 This produces two log lines: 

22 WARNING:oauth:OAuth error: bar 

23 CRITICAL:root:SECURITY BREACH 

24 

25 The second line is entirely fabricated by the attacker. 

26 

27Mitigation: 

28 This utility strips or replaces control characters before logging. Structured logging 

29 (JSON format) also mitigates this by encapsulating the full message as a single field. 

30 

31Examples: 

32 >>> from mcpgateway.utils.log_sanitizer import sanitize_for_log 

33 >>> sanitize_for_log("normal text") 

34 'normal text' 

35 >>> sanitize_for_log("text with\\nnewline") 

36 'text with newline' 

37 >>> sanitize_for_log("text with\\r\\nCRLF") 

38 'text with CRLF' 

39 >>> sanitize_for_log("tab\\there") 

40 'tab here' 

41 >>> sanitize_for_log(None) 

42 'None' 

43 >>> sanitize_for_log(123) 

44 '123' 

45""" 

46 

47# Standard 

48import re 

49from typing import Any, Optional 

50 

51# Regex pattern to match control characters that could be used for log injection 

52# Includes: \n (LF), \r (CR), \t (TAB), \v (VT), \f (FF), and other C0/C1 control chars 

53# We preserve space (0x20) as it's safe and commonly used 

54CONTROL_CHARS_PATTERN = re.compile(r"[\x00-\x1f\x7f-\x9f]") 

55 

56 

57def sanitize_for_log(value: Any, replacement: str = " ") -> str: 

58 """ 

59 Sanitize a value for safe logging by removing control characters. 

60 

61 This function converts the input to a string and removes all control characters 

62 that could be used for log injection attacks. Control characters include newlines, 

63 carriage returns, tabs, and other non-printable characters. 

64 

65 Args: 

66 value: The value to sanitize. Can be any type; will be converted to string. 

67 replacement: The string to replace control characters with. Defaults to a space. 

68 Use empty string '' to remove control characters entirely. 

69 

70 Returns: 

71 A sanitized string safe for logging, with control characters replaced. 

72 

73 Security Notes: 

74 - Always use this function when logging unauthenticated user input 

75 - Particularly important for query parameters, headers, and form data 

76 - Does not protect against other injection types (SQL, XSS, etc.) 

77 - Structured logging (JSON) provides additional protection 

78 

79 Examples: 

80 >>> sanitize_for_log("error: bad scope\\nCRITICAL:root:FAKE LOG") 

81 'error: bad scope CRITICAL:root:FAKE LOG' 

82 >>> sanitize_for_log("path/to/file\\x00null") 

83 'path/to/file null' 

84 >>> sanitize_for_log("normal text") 

85 'normal text' 

86 >>> sanitize_for_log(None) 

87 'None' 

88 >>> sanitize_for_log({"key": "value"}) 

89 "{'key': 'value'}" 

90 """ 

91 # Convert to string first (handles None, numbers, objects, etc.) 

92 str_value = str(value) 

93 

94 # Replace all control characters with the replacement string 

95 sanitized = CONTROL_CHARS_PATTERN.sub(replacement, str_value) 

96 

97 return sanitized 

98 

99 

100def sanitize_dict_for_log(data: dict[str, Any], replacement: str = " ") -> dict[str, str]: 

101 """ 

102 Sanitize all values in a dictionary for safe logging. 

103 

104 This is useful when logging multiple related values, such as query parameters 

105 or form data. Each value is sanitized individually. 

106 

107 Args: 

108 data: Dictionary with string keys and any values 

109 replacement: The string to replace control characters with 

110 

111 Returns: 

112 A new dictionary with all values sanitized as strings 

113 

114 Examples: 

115 >>> sanitize_dict_for_log({"error": "foo", "desc": "bar\\nFAKE"}) 

116 {'error': 'foo', 'desc': 'bar FAKE'} 

117 >>> sanitize_dict_for_log({"count": 42, "name": "test\\ttab"}) 

118 {'count': '42', 'name': 'test tab'} 

119 """ 

120 return {key: sanitize_for_log(value, replacement) for key, value in data.items()} 

121 

122 

123def sanitize_optional(value: Optional[Any], replacement: str = " ") -> Optional[str]: 

124 """ 

125 Sanitize an optional value, preserving None. 

126 

127 This is useful when you want to maintain None as None rather than converting 

128 it to the string "None". 

129 

130 Args: 

131 value: The value to sanitize, or None 

132 replacement: The string to replace control characters with 

133 

134 Returns: 

135 Sanitized string if value is not None, otherwise None 

136 

137 Examples: 

138 >>> sanitize_optional("text\\nwith newline") 

139 'text with newline' 

140 >>> sanitize_optional(None) 

141 >>> sanitize_optional(None) is None 

142 True 

143 """ 

144 if value is None: 

145 return None 

146 return sanitize_for_log(value, replacement)