From 5b560d77eccd921fdb3a083353470647775a9f60 Mon Sep 17 00:00:00 2001 From: Vivek Kumar Date: Thu, 22 Feb 2024 20:27:18 +0530 Subject: [PATCH] add telemetry endpoint --- cmd/adapter/adapter.go | 12 ------- pkg/client/metrics/metrics.go | 64 +++++++---------------------------- 2 files changed, 13 insertions(+), 63 deletions(-) diff --git a/cmd/adapter/adapter.go b/cmd/adapter/adapter.go index 55961eaf..dd2a799b 100644 --- a/cmd/adapter/adapter.go +++ b/cmd/adapter/adapter.go @@ -271,23 +271,11 @@ func (cmd *PrometheusAdapter) addResourceMetricsAPI(promClient prom.Client, stop return err } - config, err := cmd.Config() - if err != nil { - return err - } - config.GenericConfig.EnableMetrics = false - server, err := cmd.Server() if err != nil { return err } - metricsHandler, err := mprom.MetricsHandler() - if err != nil { - return err - } - server.GenericAPIServer.Handler.NonGoRestfulMux.HandleFunc("/metrics", metricsHandler) - if err := api.Install(provider, podInformer.Lister(), informer.Core().V1().Nodes().Lister(), server.GenericAPIServer, nil); err != nil { return err } diff --git a/pkg/client/metrics/metrics.go b/pkg/client/metrics/metrics.go index 8067872a..fd7660da 100644 --- a/pkg/client/metrics/metrics.go +++ b/pkg/client/metrics/metrics.go @@ -18,16 +18,11 @@ package metrics import ( "context" - "net/http" "net/url" "time" "github.com/prometheus/client_golang/prometheus" - apimetrics "k8s.io/apiserver/pkg/endpoints/metrics" - "k8s.io/component-base/metrics" - "k8s.io/component-base/metrics/legacyregistry" - "sigs.k8s.io/prometheus-adapter/pkg/client" ) @@ -35,47 +30,18 @@ var ( // queryLatency is the total latency of any query going through the // various endpoints (query, range-query, series). It includes some deserialization // overhead and HTTP overhead. - queryLatency = metrics.NewHistogramVec( - &metrics.HistogramOpts{ - Namespace: "prometheus_adapter", - Subsystem: "prometheus_client", - Name: "request_duration_seconds", - Help: "Prometheus client query latency in seconds. Broken down by target prometheus endpoint and target server", - Buckets: prometheus.DefBuckets, + queryLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "cmgateway_prometheus_query_latency_seconds", + Help: "Prometheus client query latency in seconds. Broken down by target prometheus endpoint and target server", + Buckets: prometheus.ExponentialBuckets(0.0001, 2, 10), }, - []string{"path", "server"}, + []string{"endpoint", "server"}, ) - - // define a counter for API errors for various ErrorTypes - apiErrorCount = metrics.NewCounterVec( - &metrics.CounterOpts{ - Namespace: "prometheus_adapter", - Subsystem: "prometheus_client", - Name: "api_errors_total", - Help: "Total number of API errors", - }, - []string{"error_code", "path", "server"}, - ) ) -func MetricsHandler() (http.HandlerFunc, error) { - registry := metrics.NewKubeRegistry() - - errRegisterQueryLatency := registry.Register(queryLatency) - if errRegisterQueryLatency != nil { - return nil, errRegisterQueryLatency - } - - errRegisterAPIErrorCount := registry.Register(apiErrorCount) - if errRegisterAPIErrorCount != nil { - return nil, errRegisterAPIErrorCount - } - - apimetrics.Register() - return func(w http.ResponseWriter, req *http.Request) { - legacyregistry.Handler().ServeHTTP(w, req) - metrics.HandlerFor(registry, metrics.HandlerOpts{}).ServeHTTP(w, req) - }, nil +func init() { + prometheus.MustRegister(queryLatency) } // instrumentedClient is a client.GenericAPIClient which instruments calls to Do, @@ -92,16 +58,12 @@ func (c *instrumentedGenericClient) Do(ctx context.Context, verb, endpoint strin endTime := time.Now() // skip calls where we don't make the actual request if err != nil { - if apiErr, wasAPIErr := err.(*client.Error); wasAPIErr { - // Measure API errors - apiErrorCount.With(prometheus.Labels{"error_code": string(apiErr.Type), "path": endpoint, "server": c.serverName}).Inc() - } else { - // Increment a generic error code counter - apiErrorCount.With(prometheus.Labels{"error_code": "generic", "path": endpoint, "server": c.serverName}).Inc() - } - return + if _, wasAPIErr := err.(*client.Error); !wasAPIErr { + // TODO: measure API errors by code? + return + } } - queryLatency.With(prometheus.Labels{"path": endpoint, "server": c.serverName}).Observe(endTime.Sub(startTime).Seconds()) + queryLatency.With(prometheus.Labels{"endpoint": endpoint, "server": c.serverName}).Observe(endTime.Sub(startTime).Seconds()) }() var resp client.APIResponse