Fenix: Micrometer Metrics for CloudWatch
Problem
In a microservices architecture, it is crucial to monitor the performance and reliability of both internal and external HTTP requests and API calls. This involves measuring the latency and counting the number of requests based on their status codes. Without proper monitoring, it becomes challenging to identify performance bottlenecks and reliability issues.
Solution
To address this problem, we can use two classes: HttpMetricsRequestInterceptor and LMSApiMetricsFilter. These classes help in capturing and publishing metrics for HTTP requests and API calls, respectively.
HttpMetricsRequestInterceptor
Purpose:
Captures and publishes metrics for HTTP requests made by the client, focusing on third-party (3P) requests.
How It Works:
Latency Measurement: Uses Timer to measure the time taken for each HTTP request.
Request Counting: Uses Counter to count the number of requests based on their status codes (success, bad request, auth failure, internal failure).
Host Identification: Differentiates between internal and third-party requests by checking the host. Only third-party requests are intercepted for metrics.
Code:
public class LMSApiMetricsFilter extends OncePerRequestFilter {
private MeterRegistry meterRegistry;
public LMSApiMetricsFilter(MeterRegistry meterRegistry) {
this.meterRegistry = meterRegistry;
}
@Override
protected void doFilterInternal(HttpServletRequest request, HttpServletResponse response, FilterChain filterChain)
throws ServletException, IOException {
long startTime = System.currentTimeMillis();
try {
filterChain.doFilter(request, response);
} finally {
LMSApiMetricConfigEnum apiMetricConfigMapping =
LMSApiConfigUtility.getApiMetricConfigMapping(request.getRequestURI());
if (apiMetricConfigMapping != null && apiMetricConfigMapping.getIsMetricEnabled()) {
long duration = System.currentTimeMillis() - startTime;
String apiName = apiMetricConfigMapping.getApiName();
if (apiName == null) {
apiName = "unknown";
}
int statusCode = response.getStatus();
String namespace =
apiMetricConfigMapping.getIsInternalApi() ? "fenix.internal.api" : "fenix.external.api";
Timer.builder(namespace)
.tag("apiName", apiName)
.tag("httpStatusCode", String.valueOf(statusCode))
.register(meterRegistry)
.record(duration, java.util.concurrent.TimeUnit.MILLISECONDS);
Counter counter;
if (statusCode >= 200 && statusCode < 400) {
counter = Counter.builder(namespace + ".requests.success")
.tag("apiName", apiName)
.register(meterRegistry);
} else if (statusCode == 400 || statusCode == 404) {
counter = Counter.builder(namespace + ".requests.badRequest")
.tag("apiName", apiName)
.register(meterRegistry);
} else {
counter = Counter.builder(namespace + ".requests.internalFailure")
.tag("apiName", apiName)
.register(meterRegistry);
}
counter.increment();
}
}
}
}
LMSApiMetricsFilter
Purpose:
Captures and publishes metrics for API requests handled by the server, focusing on both internal and external APIs.
How It Works:
Latency Measurement: Uses Timer to measure the time taken for each API request.
Request Counting: Uses Counter to count the number of API requests based on their status codes (success, bad request, internal failure).
API Identification: Uses LMSApiConfigUtility to determine if the API is internal or external and whether metrics are enabled for it.
Code:
public class HttpMetricsRequestInterceptor implements Interceptor {
private MeterRegistry meterRegistry;
public HttpMetricsRequestInterceptor(MeterRegistry meterRegistry) {
this.meterRegistry = meterRegistry;
}
@NotNull
@Override
public Response intercept(@NotNull Chain chain) throws IOException {
Request request = chain.request();
String host = request.url().host();
if (host.contains("fenix")) {
return chain.proceed(request);
}
Timer.Sample sample = Timer.start(meterRegistry);
Response response = chain.proceed(request);
String methodName =
Optional.ofNullable(request.tag()).orElse(request.url().encodedPath()).toString();
sample.stop(Timer.builder("fenix.3p.requests")
.tag("method", methodName)
.tag("host", host)
.register(meterRegistry));
int statusCode = response.code();
Counter counter = null;
if (statusCode >= 200 && statusCode < 400) {
counter = Counter.builder("fenix.3p.requests.success")
.tag("host", host)
.tag("method", methodName)
.register(meterRegistry);
} else if (statusCode == 400 || statusCode == 404) {
counter = Counter.builder("fenix.3p.requests.badRequest")
.tag("host", host)
.tag("method", methodName)
.register(meterRegistry);
} else if (statusCode == 401 || statusCode == 403) {
counter = Counter.builder("fenix.3p.requests.authFailure")
.tag("host", host)
.tag("method", methodName)
.register(meterRegistry);
} else {
counter = Counter.builder("fenix.3p.requests.internalFailure")
.tag("host", host)
.tag("method", methodName)
.register(meterRegistry);
}
counter.increment();
return response;
}
}
Approach 2: How AOP Works in This Case
Custom Annotation: A custom annotation @TrackMetrics is created to mark methods that need metrics collection.
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface TrackMetrics {
String value();
}
Aspect: An aspect MetricsAspect is defined to intercept methods annotated with @TrackMetrics.
@Aspect
@Component
public class MetricsAspect {
@Autowired
private MeterRegistry meterRegistry;
@Around("@annotation(trackMetrics)")
public Object trackMetrics(ProceedingJoinPoint joinPoint, TrackMetrics trackMetrics) throws Throwable {
Timer.Sample sample = Timer.start(meterRegistry);
String metricName = trackMetrics.value();
try {
Object result = joinPoint.proceed();
int statusCode = getStatusCode(result);
String statusTag = getStatusTag(statusCode);
sample.stop(Timer.builder(metricName + ".latency")
.tag("status", statusTag)
.register(meterRegistry));
Counter.builder(metricName + "." + statusTag)
.register(meterRegistry)
.increment();
return result;
} catch (Exception e) {
sample.stop(Timer.builder(metricName + ".latency")
.tag("status", "failure")
.register(meterRegistry));
Counter.builder(metricName + ".failure")
.register(meterRegistry)
.increment();
throw e;
}
}
private int getStatusCode(Object result) {
if (result instanceof CashFreeTransferResponse) {
return ((CashFreeTransferResponse) result).getStatusCode(); // Assuming getStatusCode() method exists
}
return HttpStatus.OK.value();
}
private String getStatusTag(int statusCode) {
switch (statusCode) {
case 400:
return "badRequest";
case 401:
return "unauthorized";
case 403:
return "forbidden";
case 500:
return "internalServerError";
default:
return "success";
}
}
}
Method Interception: The aspect intercepts the method execution, starts a timer, and proceeds with the method execution.
@TrackMetrics("cashfree.transfer")
public CashFreeTransferResponse transferToBeneficiary(CashFreeTransferRequest request) {
return httpUtility.post("cashFreePayout", cashFreePayoutUrl, getCashFreeRequestHeaders(), request,
CashFreeTransferResponse.class, this::handleCashFreeUnSuccessfulResponse);
}
@TrackMetrics("cashfree.status")
public CashFreeTransferResponse getTransferStatus(String cashFreeTransferId) {
String url = cashFreePayoutUrl + CASHFREE_STATUS_URI_PART + cashFreeTransferId;
return httpUtility.get("cashFreePayout", url, getCashFreeRequestHeaders(),
CashFreeTransferResponse.class, this::handleCashFreeUnSuccessfulResponse);
}
Metrics Collection: After the method execution, the aspect stops the timer and increments the appropriate counters based on the HTTP status code.