mirror of
https://github.com/kubernetes-sigs/prometheus-adapter.git
synced 2026-04-06 01:38:10 +00:00
Adding support to dynamically discover metrics configs across all namespaces in the cluster
This commit is contained in:
parent
17cef511b1
commit
538dee837e
3 changed files with 134 additions and 26 deletions
|
|
@ -19,6 +19,7 @@ package main
|
||||||
import (
|
import (
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"crypto/x509"
|
"crypto/x509"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
|
@ -82,6 +83,10 @@ type PrometheusAdapter struct {
|
||||||
MetricsMaxAge time.Duration
|
MetricsMaxAge time.Duration
|
||||||
// DisableHTTP2 indicates that http2 should not be enabled.
|
// DisableHTTP2 indicates that http2 should not be enabled.
|
||||||
DisableHTTP2 bool
|
DisableHTTP2 bool
|
||||||
|
// Load Dynamic Adapter Configurations
|
||||||
|
EnableMetricsConfigsDiscovery bool
|
||||||
|
// ConfigMap labels to select on
|
||||||
|
MetricsConfigsLabels string
|
||||||
metricsConfig *adaptercfg.MetricsDiscoveryConfig
|
metricsConfig *adaptercfg.MetricsDiscoveryConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -157,6 +162,10 @@ func (cmd *PrometheusAdapter) addFlags() {
|
||||||
"period for which to query the set of available metrics from Prometheus")
|
"period for which to query the set of available metrics from Prometheus")
|
||||||
cmd.Flags().BoolVar(&cmd.DisableHTTP2, "disable-http2", cmd.DisableHTTP2,
|
cmd.Flags().BoolVar(&cmd.DisableHTTP2, "disable-http2", cmd.DisableHTTP2,
|
||||||
"Disable HTTP/2 support")
|
"Disable HTTP/2 support")
|
||||||
|
cmd.Flags().BoolVar(&cmd.EnableMetricsConfigsDiscovery, "enable-metrics-configs-disovery", cmd.EnableMetricsConfigsDiscovery,
|
||||||
|
"Load metrics configuration dynamically by querying the cluster for configmaps")
|
||||||
|
cmd.Flags().StringVar(&cmd.MetricsConfigsLabels, "metrics-configs-labels", cmd.MetricsConfigsLabels,
|
||||||
|
"Labels to query on while filtering ConfigMap objects when dynamically discovering metrics configuration")
|
||||||
|
|
||||||
// Add logging flags
|
// Add logging flags
|
||||||
logs.AddFlags(cmd.Flags())
|
logs.AddFlags(cmd.Flags())
|
||||||
|
|
@ -165,7 +174,16 @@ func (cmd *PrometheusAdapter) addFlags() {
|
||||||
func (cmd *PrometheusAdapter) loadConfig() error {
|
func (cmd *PrometheusAdapter) loadConfig() error {
|
||||||
// load metrics discovery configuration
|
// load metrics discovery configuration
|
||||||
if cmd.AdapterConfigFile == "" {
|
if cmd.AdapterConfigFile == "" {
|
||||||
return fmt.Errorf("no metrics discovery configuration file specified (make sure to use --config)")
|
if !cmd.EnableMetricsConfigsDiscovery {
|
||||||
|
return fmt.Errorf("loading dynamic config is turned off, and no metrics discovery configuration file specified (make sure to use --config)")
|
||||||
|
}
|
||||||
|
// Assign empty metrics config to prevent nilptr exceptions
|
||||||
|
cmd.metricsConfig = &adaptercfg.MetricsDiscoveryConfig{
|
||||||
|
Rules: []adaptercfg.DiscoveryRule{},
|
||||||
|
ExternalRules: []adaptercfg.DiscoveryRule{},
|
||||||
|
ResourceRules: &adaptercfg.ResourceRules{},
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
metricsConfig, err := adaptercfg.FromFile(cmd.AdapterConfigFile)
|
metricsConfig, err := adaptercfg.FromFile(cmd.AdapterConfigFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -178,7 +196,7 @@ func (cmd *PrometheusAdapter) loadConfig() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cmd *PrometheusAdapter) makeProvider(promClient prom.Client, stopCh <-chan struct{}) (provider.CustomMetricsProvider, error) {
|
func (cmd *PrometheusAdapter) makeProvider(promClient prom.Client, stopCh <-chan struct{}) (provider.CustomMetricsProvider, error) {
|
||||||
if len(cmd.metricsConfig.Rules) == 0 {
|
if len(cmd.metricsConfig.Rules) == 0 && !cmd.EnableMetricsConfigsDiscovery {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -203,7 +221,7 @@ func (cmd *PrometheusAdapter) makeProvider(promClient prom.Client, stopCh <-chan
|
||||||
}
|
}
|
||||||
|
|
||||||
// construct the provider and start it
|
// construct the provider and start it
|
||||||
cmProvider, runner := cmprov.NewPrometheusProvider(mapper, dynClient, promClient, namers, cmd.MetricsRelistInterval, cmd.MetricsMaxAge)
|
cmProvider, runner := cmprov.NewPrometheusProvider(mapper, dynClient, promClient, namers, cmd.MetricsRelistInterval, cmd.MetricsMaxAge, cmd.EnableMetricsConfigsDiscovery, cmd.MetricsConfigsLabels)
|
||||||
runner.RunUntil(stopCh)
|
runner.RunUntil(stopCh)
|
||||||
|
|
||||||
return cmProvider, nil
|
return cmProvider, nil
|
||||||
|
|
@ -340,6 +358,11 @@ func main() {
|
||||||
klog.Fatalf("unable to load metrics discovery config: %v", err)
|
klog.Fatalf("unable to load metrics discovery config: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// verify the dynamic metrics loading properties
|
||||||
|
if cmd.EnableMetricsConfigsDiscovery && cmd.MetricsConfigsLabels == "" {
|
||||||
|
klog.Fatal("", errors.New("Did not specify --metrics-configs-labels to provide a list of labels to select on but metrics configs discovery is turned on"))
|
||||||
|
}
|
||||||
|
|
||||||
// stop channel closed on SIGTERM and SIGINT
|
// stop channel closed on SIGTERM and SIGINT
|
||||||
stopCh := genericapiserver.SetupSignalHandler()
|
stopCh := genericapiserver.SetupSignalHandler()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,11 @@ package provider
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
corev1 "k8s.io/api/core/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
"math"
|
"math"
|
||||||
|
adaptercfg "sigs.k8s.io/prometheus-adapter/pkg/config"
|
||||||
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
pmodel "github.com/prometheus/common/model"
|
pmodel "github.com/prometheus/common/model"
|
||||||
|
|
@ -51,31 +55,44 @@ type Runnable interface {
|
||||||
RunUntil(stopChan <-chan struct{})
|
RunUntil(stopChan <-chan struct{})
|
||||||
}
|
}
|
||||||
|
|
||||||
type prometheusProvider struct {
|
type kubeClientAndMapper struct {
|
||||||
mapper apimeta.RESTMapper
|
|
||||||
kubeClient dynamic.Interface
|
kubeClient dynamic.Interface
|
||||||
|
mapper apimeta.RESTMapper
|
||||||
|
}
|
||||||
|
|
||||||
|
type prometheusProvider struct {
|
||||||
promClient prom.Client
|
promClient prom.Client
|
||||||
|
|
||||||
|
kubeClientAndMapper
|
||||||
SeriesRegistry
|
SeriesRegistry
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewPrometheusProvider(mapper apimeta.RESTMapper, kubeClient dynamic.Interface, promClient prom.Client, namers []naming.MetricNamer, updateInterval time.Duration, maxAge time.Duration) (provider.CustomMetricsProvider, Runnable) {
|
func NewPrometheusProvider(mapper apimeta.RESTMapper, kubeClient dynamic.Interface, promClient prom.Client, namers []naming.MetricNamer, updateInterval time.Duration, maxAge time.Duration, enableMetricsConfigsDiscovery bool, metricsConfigsLabels string) (provider.CustomMetricsProvider, Runnable) {
|
||||||
lister := &cachingMetricsLister{
|
lister := &cachingMetricsLister{
|
||||||
updateInterval: updateInterval,
|
updateInterval: updateInterval,
|
||||||
maxAge: maxAge,
|
maxAge: maxAge,
|
||||||
promClient: promClient,
|
promClient: promClient,
|
||||||
namers: namers,
|
namers: namers,
|
||||||
|
enableMetricsConfigsDiscovery: enableMetricsConfigsDiscovery,
|
||||||
|
discoveredNamers: atomic.Pointer[[]naming.MetricNamer]{},
|
||||||
|
metricsConfigsLabels: metricsConfigsLabels,
|
||||||
|
|
||||||
SeriesRegistry: &basicSeriesRegistry{
|
SeriesRegistry: &basicSeriesRegistry{
|
||||||
mapper: mapper,
|
mapper: mapper,
|
||||||
},
|
},
|
||||||
|
kubeClientAndMapper: kubeClientAndMapper{
|
||||||
|
kubeClient: kubeClient,
|
||||||
|
mapper: mapper,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return &prometheusProvider{
|
return &prometheusProvider{
|
||||||
mapper: mapper,
|
|
||||||
kubeClient: kubeClient,
|
|
||||||
promClient: promClient,
|
promClient: promClient,
|
||||||
|
|
||||||
|
kubeClientAndMapper: kubeClientAndMapper{
|
||||||
|
kubeClient: kubeClient,
|
||||||
|
mapper: mapper,
|
||||||
|
},
|
||||||
SeriesRegistry: lister,
|
SeriesRegistry: lister,
|
||||||
}, lister
|
}, lister
|
||||||
}
|
}
|
||||||
|
|
@ -212,11 +229,15 @@ func (p *prometheusProvider) GetMetricBySelector(ctx context.Context, namespace
|
||||||
|
|
||||||
type cachingMetricsLister struct {
|
type cachingMetricsLister struct {
|
||||||
SeriesRegistry
|
SeriesRegistry
|
||||||
|
kubeClientAndMapper
|
||||||
|
|
||||||
promClient prom.Client
|
promClient prom.Client
|
||||||
updateInterval time.Duration
|
updateInterval time.Duration
|
||||||
maxAge time.Duration
|
maxAge time.Duration
|
||||||
namers []naming.MetricNamer
|
namers []naming.MetricNamer
|
||||||
|
enableMetricsConfigsDiscovery bool
|
||||||
|
discoveredNamers atomic.Pointer[[]naming.MetricNamer]
|
||||||
|
metricsConfigsLabels string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *cachingMetricsLister) Run() {
|
func (l *cachingMetricsLister) Run() {
|
||||||
|
|
@ -239,15 +260,24 @@ type selectorSeries struct {
|
||||||
func (l *cachingMetricsLister) updateMetrics() error {
|
func (l *cachingMetricsLister) updateMetrics() error {
|
||||||
startTime := pmodel.Now().Add(-1 * l.maxAge)
|
startTime := pmodel.Now().Add(-1 * l.maxAge)
|
||||||
|
|
||||||
|
var allNamers []naming.MetricNamer
|
||||||
|
|
||||||
|
if l.enableMetricsConfigsDiscovery {
|
||||||
|
l.discoverMetricsConfigs()
|
||||||
|
allNamers = append(l.namers, *l.discoveredNamers.Load()...)
|
||||||
|
} else {
|
||||||
|
allNamers = l.namers
|
||||||
|
}
|
||||||
|
|
||||||
// don't do duplicate queries when it's just the matchers that change
|
// don't do duplicate queries when it's just the matchers that change
|
||||||
seriesCacheByQuery := make(map[prom.Selector][]prom.Series)
|
seriesCacheByQuery := make(map[prom.Selector][]prom.Series)
|
||||||
|
|
||||||
// these can take a while on large clusters, so launch in parallel
|
// these can take a while on large clusters, so launch in parallel
|
||||||
// and don't duplicate
|
// and don't duplicate
|
||||||
selectors := make(map[prom.Selector]struct{})
|
selectors := make(map[prom.Selector]struct{})
|
||||||
selectorSeriesChan := make(chan selectorSeries, len(l.namers))
|
selectorSeriesChan := make(chan selectorSeries, len(allNamers))
|
||||||
errs := make(chan error, len(l.namers))
|
errs := make(chan error, len(allNamers))
|
||||||
for _, namer := range l.namers {
|
for _, namer := range allNamers {
|
||||||
sel := namer.Selector()
|
sel := namer.Selector()
|
||||||
if _, ok := selectors[sel]; ok {
|
if _, ok := selectors[sel]; ok {
|
||||||
errs <- nil
|
errs <- nil
|
||||||
|
|
@ -270,7 +300,7 @@ func (l *cachingMetricsLister) updateMetrics() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterate through, blocking until we've got all results
|
// iterate through, blocking until we've got all results
|
||||||
for range l.namers {
|
for range allNamers {
|
||||||
if err := <-errs; err != nil {
|
if err := <-errs; err != nil {
|
||||||
return fmt.Errorf("unable to update list of all metrics: %v", err)
|
return fmt.Errorf("unable to update list of all metrics: %v", err)
|
||||||
}
|
}
|
||||||
|
|
@ -280,8 +310,8 @@ func (l *cachingMetricsLister) updateMetrics() error {
|
||||||
}
|
}
|
||||||
close(errs)
|
close(errs)
|
||||||
|
|
||||||
newSeries := make([][]prom.Series, len(l.namers))
|
newSeries := make([][]prom.Series, len(allNamers))
|
||||||
for i, namer := range l.namers {
|
for i, namer := range allNamers {
|
||||||
series, cached := seriesCacheByQuery[namer.Selector()]
|
series, cached := seriesCacheByQuery[namer.Selector()]
|
||||||
if !cached {
|
if !cached {
|
||||||
return fmt.Errorf("unable to update list of all metrics: no metrics retrieved for query %q", namer.Selector())
|
return fmt.Errorf("unable to update list of all metrics: no metrics retrieved for query %q", namer.Selector())
|
||||||
|
|
@ -291,5 +321,60 @@ func (l *cachingMetricsLister) updateMetrics() error {
|
||||||
|
|
||||||
klog.V(10).Infof("Set available metric list from Prometheus to: %v", newSeries)
|
klog.V(10).Infof("Set available metric list from Prometheus to: %v", newSeries)
|
||||||
|
|
||||||
return l.SetSeries(newSeries, l.namers)
|
return l.SetSeries(newSeries, allNamers)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *cachingMetricsLister) discoverMetricsConfigs() {
|
||||||
|
configmaps, err := l.kubeClient.Resource(corev1.SchemeGroupVersion.WithResource("configmaps")).Namespace("").List(context.TODO(), metav1.ListOptions{
|
||||||
|
LabelSelector: l.metricsConfigsLabels,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
klog.V(5).ErrorS(err, "Could not obtain configmaps from apiserver with label: ", "label", l.metricsConfigsLabels)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var discoveredNamers []naming.MetricNamer
|
||||||
|
var errs []error
|
||||||
|
|
||||||
|
for _, cm := range configmaps.Items {
|
||||||
|
var configmap corev1.ConfigMap
|
||||||
|
err := runtime.DefaultUnstructuredConverter.FromUnstructured(cm.UnstructuredContent(), &configmap)
|
||||||
|
if err != nil {
|
||||||
|
klog.V(5).ErrorS(err, "Could not convert unstructured ConfigMap to structured representation.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if configmap.Data == nil {
|
||||||
|
klog.V(5).ErrorS(err, "ConfigMap does not have any data in it for name="+configmap.ObjectMeta.Name)
|
||||||
|
errs = append(errs, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
c, ok := configmap.Data["config.yaml"]
|
||||||
|
if !ok {
|
||||||
|
klog.V(5).ErrorS(err, "ConfigMap does not have the adapter YAML config under 'config.yaml' for="+configmap.ObjectMeta.Name)
|
||||||
|
errs = append(errs, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
metricsConfig, err := adaptercfg.FromYAML([]byte(c))
|
||||||
|
if err != nil {
|
||||||
|
klog.V(5).ErrorS(err, "Could not unmarshal metrics config for name="+configmap.ObjectMeta.Name)
|
||||||
|
errs = append(errs, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
namers, err := naming.NamersFromConfig(metricsConfig.Rules, l.mapper)
|
||||||
|
if err != nil {
|
||||||
|
klog.V(5).ErrorS(err, "Could not create a metric namer from given config for name="+configmap.ObjectMeta.Name)
|
||||||
|
errs = append(errs, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
discoveredNamers = append(discoveredNamers, namers...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(errs) == 0 {
|
||||||
|
klog.V(5).Infof("Found %d namers, replacing the old namers with the new ones.", len(discoveredNamers))
|
||||||
|
l.discoveredNamers.Store(&discoveredNamers)
|
||||||
|
} else {
|
||||||
|
klog.V(5).Infof("Found errors while creating namers from config -- not updating the existing list of dynamically discovered namers.")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ func setupPrometheusProvider() (provider.CustomMetricsProvider, *fakeprom.FakePr
|
||||||
namers, err := naming.NamersFromConfig(cfg.Rules, restMapper())
|
namers, err := naming.NamersFromConfig(cfg.Rules, restMapper())
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
prov, _ := NewPrometheusProvider(restMapper(), fakeKubeClient, fakeProm, namers, fakeProviderUpdateInterval, fakeProviderStartDuration)
|
prov, _ := NewPrometheusProvider(restMapper(), fakeKubeClient, fakeProm, namers, fakeProviderUpdateInterval, fakeProviderStartDuration, false, "")
|
||||||
|
|
||||||
containerSel := prom.MatchSeries("", prom.NameMatches("^container_.*"), prom.LabelNeq("container", "POD"), prom.LabelNeq("namespace", ""), prom.LabelNeq("pod", ""))
|
containerSel := prom.MatchSeries("", prom.NameMatches("^container_.*"), prom.LabelNeq("container", "POD"), prom.LabelNeq("namespace", ""), prom.LabelNeq("pod", ""))
|
||||||
namespacedSel := prom.MatchSeries("", prom.LabelNeq("namespace", ""), prom.NameNotMatches("^container_.*"))
|
namespacedSel := prom.MatchSeries("", prom.LabelNeq("namespace", ""), prom.NameNotMatches("^container_.*"))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue