Update custom-metrics-apiserver and metrics-server

This commit is contained in:
Johannes Würbach 2020-09-27 22:14:53 +02:00
parent 4c673534f2
commit b480e45a67
No known key found for this signature in database
GPG key ID: 74DB0F4D956CCCE3
915 changed files with 63694 additions and 106514 deletions

View file

@ -27,8 +27,7 @@ import (
"sync"
"time"
"github.com/emicklei/go-restful"
restful "github.com/emicklei/go-restful"
"k8s.io/apimachinery/pkg/apis/meta/v1/validation"
"k8s.io/apimachinery/pkg/types"
utilsets "k8s.io/apimachinery/pkg/util/sets"
@ -48,6 +47,8 @@ type resettableCollector interface {
const (
APIServerComponent string = "apiserver"
OtherContentType string = "other"
OtherRequestMethod string = "other"
)
/*
@ -64,23 +65,14 @@ var (
requestCounter = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Name: "apiserver_request_total",
Help: "Counter of apiserver requests broken out for each verb, dry run value, group, version, resource, scope, component, client, and HTTP response contentType and code.",
Help: "Counter of apiserver requests broken out for each verb, dry run value, group, version, resource, scope, component, and HTTP response contentType and code.",
StabilityLevel: compbasemetrics.ALPHA,
},
// The label_name contentType doesn't follow the label_name convention defined here:
// https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/instrumentation.md
// But changing it would break backwards compatibility. Future label_names
// should be all lowercase and separated by underscores.
[]string{"verb", "dry_run", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"},
)
deprecatedRequestCounter = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Name: "apiserver_request_count",
Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.",
StabilityLevel: compbasemetrics.ALPHA,
DeprecatedVersion: "1.14.0",
},
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"},
[]string{"verb", "dry_run", "group", "version", "resource", "subresource", "scope", "component", "contentType", "code"},
)
longRunningRequestGauge = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
@ -103,29 +95,6 @@ var (
},
[]string{"verb", "dry_run", "group", "version", "resource", "subresource", "scope", "component"},
)
deprecatedRequestLatencies = compbasemetrics.NewHistogramVec(
&compbasemetrics.HistogramOpts{
Name: "apiserver_request_latencies",
Help: "Response latency distribution in microseconds for each verb, group, version, resource, subresource, scope and component.",
// Use buckets ranging from 125 ms to 8 seconds.
Buckets: compbasemetrics.ExponentialBuckets(125000, 2.0, 7),
StabilityLevel: compbasemetrics.ALPHA,
DeprecatedVersion: "1.14.0",
},
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
)
deprecatedRequestLatenciesSummary = compbasemetrics.NewSummaryVec(
&compbasemetrics.SummaryOpts{
Name: "apiserver_request_latencies_summary",
Help: "Response latency summary in microseconds for each verb, group, version, resource, subresource, scope and component.",
// Make the sliding window of 5h.
// TODO: The value for this should be based on our SLI definition (medium term).
MaxAge: 5 * time.Hour,
StabilityLevel: compbasemetrics.ALPHA,
DeprecatedVersion: "1.14.0",
},
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
)
responseSizes = compbasemetrics.NewHistogramVec(
&compbasemetrics.HistogramOpts{
Name: "apiserver_response_sizes",
@ -145,15 +114,6 @@ var (
},
[]string{"requestKind"},
)
DeprecatedDroppedRequests = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Name: "apiserver_dropped_requests",
Help: "Number of requests dropped with 'Try again later' response",
StabilityLevel: compbasemetrics.ALPHA,
DeprecatedVersion: "1.14.0",
},
[]string{"requestKind"},
)
// RegisteredWatchers is a number of currently registered watchers splitted by resource.
RegisteredWatchers = compbasemetrics.NewGaugeVec(
&compbasemetrics.GaugeOpts{
@ -203,20 +163,47 @@ var (
metrics = []resettableCollector{
requestCounter,
deprecatedRequestCounter,
longRunningRequestGauge,
requestLatencies,
deprecatedRequestLatencies,
deprecatedRequestLatenciesSummary,
responseSizes,
DroppedRequests,
DeprecatedDroppedRequests,
RegisteredWatchers,
WatchEvents,
WatchEventsSizes,
currentInflightRequests,
requestTerminationsTotal,
}
// these are the known (e.g. whitelisted/known) content types which we will report for
// request metrics. Any other RFC compliant content types will be aggregated under 'unknown'
knownMetricContentTypes = utilsets.NewString(
"application/apply-patch+yaml",
"application/json",
"application/json-patch+json",
"application/merge-patch+json",
"application/strategic-merge-patch+json",
"application/vnd.kubernetes.protobuf",
"application/vnd.kubernetes.protobuf;stream=watch",
"application/yaml",
"text/plain",
"text/plain;charset=utf-8")
// these are the valid request methods which we report in our metrics. Any other request methods
// will be aggregated under 'unknown'
validRequestMethods = utilsets.NewString(
"APPLY",
"CONNECT",
"CREATE",
"DELETE",
"DELETECOLLECTION",
"GET",
"LIST",
"PATCH",
"POST",
"PROXY",
"PUT",
"UPDATE",
"WATCH",
"WATCHLIST")
)
const (
@ -264,6 +251,10 @@ func RecordRequestTermination(req *http.Request, requestInfo *request.RequestInf
// translated to RequestInfo).
// However, we need to tweak it e.g. to differentiate GET from LIST.
verb := canonicalVerb(strings.ToUpper(req.Method), scope)
// set verbs to a bounded set of known and expected verbs
if !validRequestMethods.Has(verb) {
verb = OtherRequestMethod
}
if requestInfo.IsResourceRequest {
requestTerminationsTotal.WithLabelValues(cleanVerb(verb, req), requestInfo.APIGroup, requestInfo.APIVersion, requestInfo.Resource, requestInfo.Subresource, scope, component, codeToString(code)).Inc()
} else {
@ -300,15 +291,10 @@ func RecordLongRunning(req *http.Request, requestInfo *request.RequestInfo, comp
func MonitorRequest(req *http.Request, verb, group, version, resource, subresource, scope, component, contentType string, httpCode, respSize int, elapsed time.Duration) {
reportedVerb := cleanVerb(verb, req)
dryRun := cleanDryRun(req.URL)
// blank out client string here, in order to avoid cardinality issues
client := ""
elapsedMicroseconds := float64(elapsed / time.Microsecond)
elapsedSeconds := elapsed.Seconds()
requestCounter.WithLabelValues(reportedVerb, dryRun, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc()
deprecatedRequestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc()
cleanContentType := cleanContentType(contentType)
requestCounter.WithLabelValues(reportedVerb, dryRun, group, version, resource, subresource, scope, component, cleanContentType, codeToString(httpCode)).Inc()
requestLatencies.WithLabelValues(reportedVerb, dryRun, group, version, resource, subresource, scope, component).Observe(elapsedSeconds)
deprecatedRequestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
deprecatedRequestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
// We are only interested in response sizes of read requests.
if verb == "GET" || verb == "LIST" {
responseSizes.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(float64(respSize))
@ -362,6 +348,19 @@ func InstrumentHandlerFunc(verb, group, version, resource, subresource, scope, c
}
}
// cleanContentType binds the contentType (for metrics related purposes) to a
// bounded set of known/expected content-types.
func cleanContentType(contentType string) string {
normalizedContentType := strings.ToLower(contentType)
if strings.HasSuffix(contentType, " stream=watch") || strings.HasSuffix(contentType, " charset=utf-8") {
normalizedContentType = strings.ReplaceAll(contentType, " ", "")
}
if knownMetricContentTypes.Has(normalizedContentType) {
return normalizedContentType
}
return OtherContentType
}
// CleanScope returns the scope of the request.
func CleanScope(requestInfo *request.RequestInfo) string {
if requestInfo.Namespace != "" {
@ -406,7 +405,10 @@ func cleanVerb(verb string, request *http.Request) string {
if verb == "PATCH" && request.Header.Get("Content-Type") == string(types.ApplyPatchType) && utilfeature.DefaultFeatureGate.Enabled(features.ServerSideApply) {
reportedVerb = "APPLY"
}
return reportedVerb
if validRequestMethods.Has(reportedVerb) {
return reportedVerb
}
return OtherRequestMethod
}
func cleanDryRun(u *url.URL) string {
@ -425,19 +427,6 @@ func cleanDryRun(u *url.URL) string {
return strings.Join(utilsets.NewString(dryRun...).List(), ",")
}
func cleanUserAgent(ua string) string {
// We collapse all "web browser"-type user agents into one "browser" to reduce metric cardinality.
if strings.HasPrefix(ua, "Mozilla/") {
return "Browser"
}
// If an old "kubectl.exe" has passed us its full path, we discard the path portion.
if kubectlExeRegexp.MatchString(ua) {
// avoid an allocation
ua = kubectlExeRegexp.ReplaceAllString(ua, "$1")
}
return ua
}
// ResponseWriterDelegator interface wraps http.ResponseWriter to additionally record content-length, status-code, etc.
type ResponseWriterDelegator struct {
http.ResponseWriter