add telemetry endpoint

This commit is contained in:
Vivek Kumar 2024-02-22 20:27:18 +05:30
parent 57b07c911b
commit 5b560d77ec
No known key found for this signature in database
GPG key ID: 81605FDFBA4EF440
2 changed files with 13 additions and 63 deletions

View file

@ -271,23 +271,11 @@ func (cmd *PrometheusAdapter) addResourceMetricsAPI(promClient prom.Client, stop
return err
}
config, err := cmd.Config()
if err != nil {
return err
}
config.GenericConfig.EnableMetrics = false
server, err := cmd.Server()
if err != nil {
return err
}
metricsHandler, err := mprom.MetricsHandler()
if err != nil {
return err
}
server.GenericAPIServer.Handler.NonGoRestfulMux.HandleFunc("/metrics", metricsHandler)
if err := api.Install(provider, podInformer.Lister(), informer.Core().V1().Nodes().Lister(), server.GenericAPIServer, nil); err != nil {
return err
}

View file

@ -18,16 +18,11 @@ package metrics
import (
"context"
"net/http"
"net/url"
"time"
"github.com/prometheus/client_golang/prometheus"
apimetrics "k8s.io/apiserver/pkg/endpoints/metrics"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"sigs.k8s.io/prometheus-adapter/pkg/client"
)
@ -35,47 +30,18 @@ var (
// queryLatency is the total latency of any query going through the
// various endpoints (query, range-query, series). It includes some deserialization
// overhead and HTTP overhead.
queryLatency = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Namespace: "prometheus_adapter",
Subsystem: "prometheus_client",
Name: "request_duration_seconds",
Help: "Prometheus client query latency in seconds. Broken down by target prometheus endpoint and target server",
Buckets: prometheus.DefBuckets,
queryLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "cmgateway_prometheus_query_latency_seconds",
Help: "Prometheus client query latency in seconds. Broken down by target prometheus endpoint and target server",
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 10),
},
[]string{"path", "server"},
[]string{"endpoint", "server"},
)
// define a counter for API errors for various ErrorTypes
apiErrorCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Namespace: "prometheus_adapter",
Subsystem: "prometheus_client",
Name: "api_errors_total",
Help: "Total number of API errors",
},
[]string{"error_code", "path", "server"},
)
)
func MetricsHandler() (http.HandlerFunc, error) {
registry := metrics.NewKubeRegistry()
errRegisterQueryLatency := registry.Register(queryLatency)
if errRegisterQueryLatency != nil {
return nil, errRegisterQueryLatency
}
errRegisterAPIErrorCount := registry.Register(apiErrorCount)
if errRegisterAPIErrorCount != nil {
return nil, errRegisterAPIErrorCount
}
apimetrics.Register()
return func(w http.ResponseWriter, req *http.Request) {
legacyregistry.Handler().ServeHTTP(w, req)
metrics.HandlerFor(registry, metrics.HandlerOpts{}).ServeHTTP(w, req)
}, nil
func init() {
prometheus.MustRegister(queryLatency)
}
// instrumentedClient is a client.GenericAPIClient which instruments calls to Do,
@ -92,16 +58,12 @@ func (c *instrumentedGenericClient) Do(ctx context.Context, verb, endpoint strin
endTime := time.Now()
// skip calls where we don't make the actual request
if err != nil {
if apiErr, wasAPIErr := err.(*client.Error); wasAPIErr {
// Measure API errors
apiErrorCount.With(prometheus.Labels{"error_code": string(apiErr.Type), "path": endpoint, "server": c.serverName}).Inc()
} else {
// Increment a generic error code counter
apiErrorCount.With(prometheus.Labels{"error_code": "generic", "path": endpoint, "server": c.serverName}).Inc()
}
return
if _, wasAPIErr := err.(*client.Error); !wasAPIErr {
// TODO: measure API errors by code?
return
}
}
queryLatency.With(prometheus.Labels{"path": endpoint, "server": c.serverName}).Observe(endTime.Sub(startTime).Seconds())
queryLatency.With(prometheus.Labels{"endpoint": endpoint, "server": c.serverName}).Observe(endTime.Sub(startTime).Seconds())
}()
var resp client.APIResponse