Coverage for mcpgateway / services / metrics.py: 98%

50 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-02-11 07:10 +0000

1# -*- coding: utf-8 -*- 

2""" 

3Location: ./mcpgateway/services/metrics.py 

4Copyright 2025 

5SPDX-License-Identifier: Apache-2.0 

6 

7MCP Gateway Metrics Service. 

8 

9This module provides comprehensive Prometheus metrics instrumentation for the MCP Gateway. 

10It configures and exposes HTTP metrics including request counts, latencies, response sizes, 

11and custom application metrics. 

12 

13The service automatically instruments FastAPI applications with standard HTTP metrics 

14and provides configurable exclusion patterns for endpoints that should not be monitored. 

15Metrics are exposed at the `/metrics/prometheus` endpoint in Prometheus format. 

16 

17Supported Metrics: 

18- http_requests_total: Counter for total HTTP requests by method, endpoint, and status 

19- http_request_duration_seconds: Histogram of request processing times 

20- http_request_size_bytes: Histogram of incoming request payload sizes 

21- http_response_size_bytes: Histogram of outgoing response payload sizes 

22- app_info: Gauge with custom static labels for application metadata 

23 

24Environment Variables: 

25- ENABLE_METRICS: Enable/disable metrics collection (default: "true") 

26- METRICS_EXCLUDED_HANDLERS: Comma-separated regex patterns for excluded endpoints 

27- METRICS_CUSTOM_LABELS: Custom labels for app_info gauge (format: "key1=value1,key2=value2") 

28 

29Usage: 

30 from mcpgateway.services.metrics import setup_metrics 

31 

32 app = FastAPI() 

33 setup_metrics(app) # Automatically instruments the app 

34 

35 # Metrics available at: GET /metrics/prometheus 

36 

37Functions: 

38- setup_metrics: Configure Prometheus instrumentation for FastAPI app 

39""" 

40 

41# Standard 

42import os 

43import re 

44 

45# Third-Party 

46from fastapi import Response, status 

47from prometheus_client import Counter, Gauge, REGISTRY 

48from prometheus_fastapi_instrumentator import Instrumentator 

49 

50# First-Party 

51from mcpgateway.config import settings 

52 

53# Global Metrics 

54# Exposed for import by services/plugins to increment counters 

55tool_timeout_counter = Counter( 

56 "tool_timeout_total", 

57 "Total number of tool invocation timeouts", 

58 ["tool_name"], 

59) 

60 

61circuit_breaker_open_counter = Counter( 

62 "circuit_breaker_open_total", 

63 "Total number of times circuit breaker opened", 

64 ["tool_name"], 

65) 

66 

67 

68def setup_metrics(app): 

69 """ 

70 Configure Prometheus metrics instrumentation for a FastAPI application. 

71 

72 This function sets up comprehensive HTTP metrics collection including request counts, 

73 latencies, and payload sizes. It also handles custom application labels and endpoint 

74 exclusion patterns. 

75 

76 Args: 

77 app: FastAPI application instance to instrument 

78 

79 Environment Variables Used: 

80 ENABLE_METRICS (str): "true" to enable metrics, "false" to disable (default: "true") 

81 METRICS_EXCLUDED_HANDLERS (str): Comma-separated regex patterns for endpoints 

82 to exclude from metrics collection 

83 METRICS_CUSTOM_LABELS (str): Custom labels in "key1=value1,key2=value2" format 

84 for the app_info gauge metric 

85 

86 Side Effects: 

87 - Registers Prometheus metrics collectors with the global registry 

88 - Adds middleware to the FastAPI app for request instrumentation 

89 - Exposes /metrics/prometheus endpoint for Prometheus scraping 

90 - Prints status messages to stdout 

91 

92 Example: 

93 >>> from fastapi import FastAPI 

94 >>> from mcpgateway.services.metrics import setup_metrics 

95 >>> app = FastAPI() 

96 >>> # setup_metrics(app) # Configures Prometheus metrics 

97 >>> # Metrics available at GET /metrics/prometheus 

98 """ 

99 enable_metrics = os.getenv("ENABLE_METRICS", "true").lower() == "true" 

100 

101 if enable_metrics: 

102 # Detect database engine from DATABASE_URL 

103 database_url = settings.database_url.lower() 

104 if database_url.startswith("mysql+pymysql://") or "mariadb" in database_url: 

105 db_engine = "mariadb" 

106 elif database_url.startswith("postgresql://") or database_url.startswith("postgres://"): 

107 db_engine = "postgresql" 

108 elif database_url.startswith("sqlite://"): 

109 db_engine = "sqlite" 

110 elif database_url.startswith("mongodb://"): 

111 db_engine = "mongodb" 

112 else: 

113 db_engine = "unknown" 

114 

115 # Custom labels gauge with automatic database engine detection 

116 custom_labels = dict(kv.split("=") for kv in os.getenv("METRICS_CUSTOM_LABELS", "").split(",") if "=" in kv) 

117 

118 # Always include database engine in metrics 

119 custom_labels["engine"] = db_engine 

120 

121 if custom_labels: 121 ↛ 130line 121 didn't jump to line 130 because the condition on line 121 was always true

122 app_info_gauge = Gauge( 

123 "app_info", 

124 "Static labels for the application", 

125 labelnames=list(custom_labels.keys()), 

126 registry=REGISTRY, 

127 ) 

128 app_info_gauge.labels(**custom_labels).set(1) 

129 

130 excluded = [pattern.strip() for pattern in (settings.METRICS_EXCLUDED_HANDLERS or "").split(",") if pattern.strip()] 

131 

132 # Add database metrics gauge 

133 db_info_gauge = Gauge( 

134 "database_info", 

135 "Database engine information", 

136 labelnames=["engine", "url_scheme"], 

137 registry=REGISTRY, 

138 ) 

139 

140 # Extract URL scheme for additional context 

141 url_scheme = database_url.split("://", maxsplit=1)[0] if "://" in database_url else "unknown" 

142 db_info_gauge.labels(engine=db_engine, url_scheme=url_scheme).set(1) 

143 

144 # Add HTTP connection pool metrics with lazy initialization 

145 # These gauges are updated from app lifespan after SharedHttpClient is ready 

146 http_pool_max_connections = Gauge( 

147 "http_pool_max_connections", 

148 "Maximum allowed HTTP connections in the pool", 

149 registry=REGISTRY, 

150 ) 

151 http_pool_max_keepalive = Gauge( 

152 "http_pool_max_keepalive_connections", 

153 "Maximum idle keepalive connections to retain", 

154 registry=REGISTRY, 

155 ) 

156 

157 # Store update function as a module-level attribute so it can be called 

158 # from the application lifespan after SharedHttpClient is initialized 

159 def update_http_pool_metrics(): 

160 """Update HTTP connection pool metrics from SharedHttpClient stats.""" 

161 try: 

162 # First-Party 

163 from mcpgateway.services.http_client_service import SharedHttpClient # pylint: disable=import-outside-toplevel 

164 

165 # Only update if client is initialized 

166 if SharedHttpClient._instance and SharedHttpClient._instance._initialized: # pylint: disable=protected-access 

167 stats = SharedHttpClient._instance.get_pool_stats() # pylint: disable=protected-access 

168 http_pool_max_connections.set(stats.get("max_connections", 0)) 

169 http_pool_max_keepalive.set(stats.get("max_keepalive", 0)) 

170 # Note: httpx doesn't expose current connection count, only limits 

171 except Exception: # nosec B110 

172 pass # Silently skip if client not initialized or error occurs 

173 

174 # Make the update function available at module level for lifespan calls 

175 app.state.update_http_pool_metrics = update_http_pool_metrics 

176 

177 # Create instrumentator instance 

178 instrumentator = Instrumentator( 

179 should_group_status_codes=False, 

180 should_ignore_untemplated=True, 

181 excluded_handlers=[re.compile(p) for p in excluded], 

182 ) 

183 

184 # Instrument FastAPI app 

185 instrumentator.instrument(app) 

186 

187 # Expose Prometheus metrics at /metrics/prometheus and include 

188 # the endpoint in the OpenAPI schema so it appears in Swagger UI. 

189 instrumentator.expose(app, endpoint="/metrics/prometheus", include_in_schema=True, should_gzip=True) 

190 

191 print("✅ Metrics instrumentation enabled") 

192 else: 

193 print("⚠️ Metrics instrumentation disabled") 

194 

195 @app.get("/metrics/prometheus") 

196 async def metrics_disabled(): 

197 """Returns metrics response when metrics collection is disabled. 

198 

199 Returns: 

200 Response: HTTP 503 response indicating metrics are disabled. 

201 """ 

202 return Response(content='{"error": "Metrics collection is disabled"}', media_type="application/json", status_code=status.HTTP_503_SERVICE_UNAVAILABLE)