mirror of
https://github.com/kubernetes-sigs/prometheus-adapter.git
synced 2026-04-07 02:07:58 +00:00
One last shot at correcting things.
This commit is contained in:
parent
b3e1323a1c
commit
20af7fe865
7 changed files with 524 additions and 19 deletions
|
|
@ -24,7 +24,6 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
apimeta "k8s.io/apimachinery/pkg/api/meta"
|
|
||||||
"k8s.io/client-go/discovery"
|
"k8s.io/client-go/discovery"
|
||||||
"k8s.io/client-go/dynamic"
|
"k8s.io/client-go/dynamic"
|
||||||
"k8s.io/client-go/rest"
|
"k8s.io/client-go/rest"
|
||||||
|
|
@ -32,6 +31,7 @@ import (
|
||||||
|
|
||||||
prom "github.com/directxman12/k8s-prometheus-adapter/pkg/client"
|
prom "github.com/directxman12/k8s-prometheus-adapter/pkg/client"
|
||||||
mprom "github.com/directxman12/k8s-prometheus-adapter/pkg/client/metrics"
|
mprom "github.com/directxman12/k8s-prometheus-adapter/pkg/client/metrics"
|
||||||
|
adaptercfg "github.com/directxman12/k8s-prometheus-adapter/pkg/config"
|
||||||
cmprov "github.com/directxman12/k8s-prometheus-adapter/pkg/custom-provider"
|
cmprov "github.com/directxman12/k8s-prometheus-adapter/pkg/custom-provider"
|
||||||
"github.com/kubernetes-incubator/custom-metrics-apiserver/pkg/cmd/server"
|
"github.com/kubernetes-incubator/custom-metrics-apiserver/pkg/cmd/server"
|
||||||
"github.com/kubernetes-incubator/custom-metrics-apiserver/pkg/dynamicmapper"
|
"github.com/kubernetes-incubator/custom-metrics-apiserver/pkg/dynamicmapper"
|
||||||
|
|
@ -43,9 +43,7 @@ func NewCommandStartPrometheusAdapterServer(out, errOut io.Writer, stopCh <-chan
|
||||||
o := PrometheusAdapterServerOptions{
|
o := PrometheusAdapterServerOptions{
|
||||||
CustomMetricsAdapterServerOptions: baseOpts,
|
CustomMetricsAdapterServerOptions: baseOpts,
|
||||||
MetricsRelistInterval: 10 * time.Minute,
|
MetricsRelistInterval: 10 * time.Minute,
|
||||||
RateInterval: 5 * time.Minute,
|
|
||||||
PrometheusURL: "https://localhost",
|
PrometheusURL: "https://localhost",
|
||||||
DiscoveryInterval: 10 * time.Minute,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd := &cobra.Command{
|
cmd := &cobra.Command{
|
||||||
|
|
@ -76,19 +74,20 @@ func NewCommandStartPrometheusAdapterServer(out, errOut io.Writer, stopCh <-chan
|
||||||
"any described objets")
|
"any described objets")
|
||||||
flags.DurationVar(&o.MetricsRelistInterval, "metrics-relist-interval", o.MetricsRelistInterval, ""+
|
flags.DurationVar(&o.MetricsRelistInterval, "metrics-relist-interval", o.MetricsRelistInterval, ""+
|
||||||
"interval at which to re-list the set of all available metrics from Prometheus")
|
"interval at which to re-list the set of all available metrics from Prometheus")
|
||||||
flags.DurationVar(&o.RateInterval, "rate-interval", o.RateInterval, ""+
|
|
||||||
"period of time used to calculate rate metrics from cumulative metrics")
|
|
||||||
flags.DurationVar(&o.DiscoveryInterval, "discovery-interval", o.DiscoveryInterval, ""+
|
flags.DurationVar(&o.DiscoveryInterval, "discovery-interval", o.DiscoveryInterval, ""+
|
||||||
"interval at which to refresh API discovery information")
|
"interval at which to refresh API discovery information")
|
||||||
flags.StringVar(&o.PrometheusURL, "prometheus-url", o.PrometheusURL,
|
flags.StringVar(&o.PrometheusURL, "prometheus-url", o.PrometheusURL,
|
||||||
"URL for connecting to Prometheus. Query parameters are used to configure the connection")
|
"URL for connecting to Prometheus.")
|
||||||
flags.BoolVar(&o.PrometheusAuthInCluster, "prometheus-auth-incluster", o.PrometheusAuthInCluster,
|
flags.BoolVar(&o.PrometheusAuthInCluster, "prometheus-auth-incluster", o.PrometheusAuthInCluster,
|
||||||
"use auth details from the in-cluster kubeconfig when connecting to prometheus.")
|
"use auth details from the in-cluster kubeconfig when connecting to prometheus.")
|
||||||
flags.StringVar(&o.PrometheusAuthConf, "prometheus-auth-config", o.PrometheusAuthConf,
|
flags.StringVar(&o.PrometheusAuthConf, "prometheus-auth-config", o.PrometheusAuthConf,
|
||||||
"kubeconfig file used to configure auth when connecting to Prometheus.")
|
"kubeconfig file used to configure auth when connecting to Prometheus.")
|
||||||
flags.StringVar(&o.LabelPrefix, "label-prefix", o.LabelPrefix,
|
flags.StringVar(&o.AdapterConfigFile, "config", o.AdapterConfigFile,
|
||||||
"Prefix to expect on labels referring to pod resources. For example, if the prefix is "+
|
"Configuration file containing details of how to transform between Prometheus metrics "+
|
||||||
"'kube_', any series with the 'kube_pod' label would be considered a pod metric")
|
"and custom metrics API resources")
|
||||||
|
|
||||||
|
cmd.MarkFlagRequired("config")
|
||||||
|
|
||||||
return cmd
|
return cmd
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -128,6 +127,15 @@ func makeHTTPClient(inClusterAuth bool, kubeConfigPath string) (*http.Client, er
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o PrometheusAdapterServerOptions) RunCustomMetricsAdapterServer(stopCh <-chan struct{}) error {
|
func (o PrometheusAdapterServerOptions) RunCustomMetricsAdapterServer(stopCh <-chan struct{}) error {
|
||||||
|
if o.AdapterConfigFile == "" {
|
||||||
|
return fmt.Errorf("no discovery configuration file specified")
|
||||||
|
}
|
||||||
|
|
||||||
|
metricsConfig, err := adaptercfg.FromFile(o.AdapterConfigFile)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to load metrics discovery configuration: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
config, err := o.Config()
|
config, err := o.Config()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
@ -153,12 +161,12 @@ func (o PrometheusAdapterServerOptions) RunCustomMetricsAdapterServer(stopCh <-c
|
||||||
return fmt.Errorf("unable to construct discovery client for dynamic client: %v", err)
|
return fmt.Errorf("unable to construct discovery client for dynamic client: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
dynamicMapper, err := dynamicmapper.NewRESTMapper(discoveryClient, apimeta.InterfacesForUnstructured, o.DiscoveryInterval)
|
dynamicMapper, err := dynamicmapper.NewRESTMapper(discoveryClient, o.DiscoveryInterval)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to construct dynamic discovery mapper: %v", err)
|
return fmt.Errorf("unable to construct dynamic discovery mapper: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
clientPool := dynamic.NewClientPool(clientConfig, dynamicMapper, dynamic.LegacyAPIPathResolverFunc)
|
dynamicClient, err := dynamic.NewForConfig(clientConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to construct lister client to initialize provider: %v", err)
|
return fmt.Errorf("unable to construct lister client to initialize provider: %v", err)
|
||||||
}
|
}
|
||||||
|
|
@ -176,9 +184,15 @@ func (o PrometheusAdapterServerOptions) RunCustomMetricsAdapterServer(stopCh <-c
|
||||||
instrumentedGenericPromClient := mprom.InstrumentGenericAPIClient(genericPromClient, baseURL.String())
|
instrumentedGenericPromClient := mprom.InstrumentGenericAPIClient(genericPromClient, baseURL.String())
|
||||||
promClient := prom.NewClientForAPI(instrumentedGenericPromClient)
|
promClient := prom.NewClientForAPI(instrumentedGenericPromClient)
|
||||||
|
|
||||||
cmProvider := cmprov.NewCustomPrometheusProvider(dynamicMapper, clientPool, promClient, o.LabelPrefix, o.MetricsRelistInterval, o.RateInterval, stopCh)
|
namers, err := cmprov.NamersFromConfig(metricsConfig, dynamicMapper)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to construct naming scheme from metrics rules: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
server, err := config.Complete().New("prometheus-custom-metrics-adapter", cmProvider)
|
cmProvider, runner := cmprov.NewCustomPrometheusProvider(dynamicMapper, dynamicClient, promClient, namers, o.MetricsRelistInterval)
|
||||||
|
runner.RunUntil(stopCh)
|
||||||
|
|
||||||
|
server, err := config.Complete().New("prometheus-custom-metrics-adapter", cmProvider, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
@ -192,8 +206,6 @@ type PrometheusAdapterServerOptions struct {
|
||||||
RemoteKubeConfigFile string
|
RemoteKubeConfigFile string
|
||||||
// MetricsRelistInterval is the interval at which to relist the set of available metrics
|
// MetricsRelistInterval is the interval at which to relist the set of available metrics
|
||||||
MetricsRelistInterval time.Duration
|
MetricsRelistInterval time.Duration
|
||||||
// RateInterval is the period of time used to calculate rate metrics
|
|
||||||
RateInterval time.Duration
|
|
||||||
// DiscoveryInterval is the interval at which discovery information is refreshed
|
// DiscoveryInterval is the interval at which discovery information is refreshed
|
||||||
DiscoveryInterval time.Duration
|
DiscoveryInterval time.Duration
|
||||||
// PrometheusURL is the URL describing how to connect to Prometheus. Query parameters configure connection options.
|
// PrometheusURL is the URL describing how to connect to Prometheus. Query parameters configure connection options.
|
||||||
|
|
@ -202,7 +214,6 @@ type PrometheusAdapterServerOptions struct {
|
||||||
PrometheusAuthInCluster bool
|
PrometheusAuthInCluster bool
|
||||||
// PrometheusAuthConf is the kubeconfig file that contains auth details used to connect to Prometheus
|
// PrometheusAuthConf is the kubeconfig file that contains auth details used to connect to Prometheus
|
||||||
PrometheusAuthConf string
|
PrometheusAuthConf string
|
||||||
// LabelPrefix is the prefix to expect on labels for Kubernetes resources
|
// AdapterConfigFile points to the file containing the metrics discovery configuration.
|
||||||
// (e.g. if the prefix is "kube_", we'd expect a "kube_pod" label for pod metrics).
|
AdapterConfigFile string
|
||||||
LabelPrefix string
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
44
cmd/config-gen/main.go
Normal file
44
cmd/config-gen/main.go
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
yaml "gopkg.in/yaml.v2"
|
||||||
|
|
||||||
|
"github.com/directxman12/k8s-prometheus-adapter/cmd/config-gen/utils"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var labelPrefix string
|
||||||
|
var rateInterval time.Duration
|
||||||
|
|
||||||
|
cmd := &cobra.Command{
|
||||||
|
Short: "Generate a config matching the legacy discovery rules",
|
||||||
|
Long: `Generate a config that produces the same functionality
|
||||||
|
as the legacy discovery rules. This includes discovering metrics and associating
|
||||||
|
resources according to the Kubernetes instrumention conventions and the cAdvisor
|
||||||
|
conventions, and auto-converting cumulative metrics into rate metrics.`,
|
||||||
|
RunE: func(c *cobra.Command, args []string) error {
|
||||||
|
cfg := utils.DefaultConfig(rateInterval, labelPrefix)
|
||||||
|
enc := yaml.NewEncoder(os.Stdout)
|
||||||
|
if err := enc.Encode(cfg); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return enc.Close()
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd.Flags().StringVar(&labelPrefix, "label-prefix", "",
|
||||||
|
"Prefix to expect on labels referring to pod resources. For example, if the prefix is "+
|
||||||
|
"'kube_', any series with the 'kube_pod' label would be considered a pod metric")
|
||||||
|
cmd.Flags().DurationVar(&rateInterval, "rate-interval", 5*time.Minute,
|
||||||
|
"Period of time used to calculate rate metrics from cumulative metrics")
|
||||||
|
|
||||||
|
if err := cmd.Execute(); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Unable to generate config: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
93
cmd/config-gen/utils/default.go
Normal file
93
cmd/config-gen/utils/default.go
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
prom "github.com/directxman12/k8s-prometheus-adapter/pkg/client"
|
||||||
|
. "github.com/directxman12/k8s-prometheus-adapter/pkg/config"
|
||||||
|
pmodel "github.com/prometheus/common/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DefaultConfig returns a configuration equivalent to the former
|
||||||
|
// pre-advanced-config settings. This means that "normal" series labels
|
||||||
|
// will be of the form `<prefix><<.Resource>>`, cadvisor series will be
|
||||||
|
// of the form `container_`, and have the label `pod_name`. Any series ending
|
||||||
|
// in total will be treated as a rate metric.
|
||||||
|
func DefaultConfig(rateInterval time.Duration, labelPrefix string) *MetricsDiscoveryConfig {
|
||||||
|
return &MetricsDiscoveryConfig{
|
||||||
|
Rules: []DiscoveryRule{
|
||||||
|
// container seconds rate metrics
|
||||||
|
{
|
||||||
|
SeriesQuery: string(prom.MatchSeries("", prom.NameMatches("^container_.*"), prom.LabelNeq("container_name", "POD"), prom.LabelNeq("namespace", ""), prom.LabelNeq("pod_name", ""))),
|
||||||
|
Resources: ResourceMapping{
|
||||||
|
Overrides: map[string]GroupResource{
|
||||||
|
"namespace": {Resource: "namespace"},
|
||||||
|
"pod_name": {Resource: "pod"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Name: NameMapping{Matches: "^container_(.*)_seconds_total$"},
|
||||||
|
MetricsQuery: fmt.Sprintf(`sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[%s])) by (<<.GroupBy>>)`, pmodel.Duration(rateInterval).String()),
|
||||||
|
},
|
||||||
|
|
||||||
|
// container rate metrics
|
||||||
|
{
|
||||||
|
SeriesQuery: string(prom.MatchSeries("", prom.NameMatches("^container_.*"), prom.LabelNeq("container_name", "POD"), prom.LabelNeq("namespace", ""), prom.LabelNeq("pod_name", ""))),
|
||||||
|
SeriesFilters: []RegexFilter{{IsNot: "^container_.*_seconds_total$"}},
|
||||||
|
Resources: ResourceMapping{
|
||||||
|
Overrides: map[string]GroupResource{
|
||||||
|
"namespace": {Resource: "namespace"},
|
||||||
|
"pod_name": {Resource: "pod"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Name: NameMapping{Matches: "^container_(.*)_total$"},
|
||||||
|
MetricsQuery: fmt.Sprintf(`sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[%s])) by (<<.GroupBy>>)`, pmodel.Duration(rateInterval).String()),
|
||||||
|
},
|
||||||
|
|
||||||
|
// container non-cumulative metrics
|
||||||
|
{
|
||||||
|
SeriesQuery: string(prom.MatchSeries("", prom.NameMatches("^container_.*"), prom.LabelNeq("container_name", "POD"), prom.LabelNeq("namespace", ""), prom.LabelNeq("pod_name", ""))),
|
||||||
|
SeriesFilters: []RegexFilter{{IsNot: "^container_.*_total$"}},
|
||||||
|
Resources: ResourceMapping{
|
||||||
|
Overrides: map[string]GroupResource{
|
||||||
|
"namespace": {Resource: "namespace"},
|
||||||
|
"pod_name": {Resource: "pod"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Name: NameMapping{Matches: "^container_(.*)$"},
|
||||||
|
MetricsQuery: `sum(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}) by (<<.GroupBy>>)`,
|
||||||
|
},
|
||||||
|
|
||||||
|
// normal non-cumulative metrics
|
||||||
|
{
|
||||||
|
SeriesQuery: string(prom.MatchSeries("", prom.LabelNeq(fmt.Sprintf("%snamespace", labelPrefix), ""), prom.NameNotMatches("^container_.*"))),
|
||||||
|
SeriesFilters: []RegexFilter{{IsNot: ".*_total$"}},
|
||||||
|
Resources: ResourceMapping{
|
||||||
|
Template: fmt.Sprintf("%s<<.Resource>>", labelPrefix),
|
||||||
|
},
|
||||||
|
MetricsQuery: "sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)",
|
||||||
|
},
|
||||||
|
|
||||||
|
// normal rate metrics
|
||||||
|
{
|
||||||
|
SeriesQuery: string(prom.MatchSeries("", prom.LabelNeq(fmt.Sprintf("%snamespace", labelPrefix), ""), prom.NameNotMatches("^container_.*"))),
|
||||||
|
SeriesFilters: []RegexFilter{{IsNot: ".*_seconds_total"}},
|
||||||
|
Name: NameMapping{Matches: "^(.*)_total$"},
|
||||||
|
Resources: ResourceMapping{
|
||||||
|
Template: fmt.Sprintf("%s<<.Resource>>", labelPrefix),
|
||||||
|
},
|
||||||
|
MetricsQuery: fmt.Sprintf("sum(rate(<<.Series>>{<<.LabelMatchers>>}[%s])) by (<<.GroupBy>>)", pmodel.Duration(rateInterval).String()),
|
||||||
|
},
|
||||||
|
|
||||||
|
// seconds rate metrics
|
||||||
|
{
|
||||||
|
SeriesQuery: string(prom.MatchSeries("", prom.LabelNeq(fmt.Sprintf("%snamespace", labelPrefix), ""), prom.NameNotMatches("^container_.*"))),
|
||||||
|
Name: NameMapping{Matches: "^(.*)_seconds_total$"},
|
||||||
|
Resources: ResourceMapping{
|
||||||
|
Template: fmt.Sprintf("%s<<.Resource>>", labelPrefix),
|
||||||
|
},
|
||||||
|
MetricsQuery: fmt.Sprintf("sum(rate(<<.Series>>{<<.LabelMatchers>>}[%s])) by (<<.GroupBy>>)", pmodel.Duration(rateInterval).String()),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -28,15 +28,21 @@ spec:
|
||||||
- --logtostderr=true
|
- --logtostderr=true
|
||||||
- --prometheus-url=http://prometheus.prom.svc:9090/
|
- --prometheus-url=http://prometheus.prom.svc:9090/
|
||||||
- --metrics-relist-interval=30s
|
- --metrics-relist-interval=30s
|
||||||
- --rate-interval=5m
|
|
||||||
- --v=10
|
- --v=10
|
||||||
|
- --config=/default-config.yaml
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 6443
|
- containerPort: 6443
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- mountPath: /var/run/serving-cert
|
- mountPath: /var/run/serving-cert
|
||||||
name: volume-serving-cert
|
name: volume-serving-cert
|
||||||
readOnly: true
|
readOnly: true
|
||||||
|
- mountPath: /etc/adapter/
|
||||||
|
name: config
|
||||||
|
readOnly: true
|
||||||
volumes:
|
volumes:
|
||||||
- name: volume-serving-cert
|
- name: volume-serving-cert
|
||||||
secret:
|
secret:
|
||||||
secretName: cm-adapter-serving-certs
|
secretName: cm-adapter-serving-certs
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: adapter-config
|
||||||
|
|
|
||||||
74
deploy/manifests/custom-metrics-config-map.yaml
Normal file
74
deploy/manifests/custom-metrics-config-map.yaml
Normal file
|
|
@ -0,0 +1,74 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: adapter-config
|
||||||
|
namespace: custom-metrics
|
||||||
|
data:
|
||||||
|
config.yaml: |
|
||||||
|
rules:
|
||||||
|
- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}'
|
||||||
|
seriesFilters: []
|
||||||
|
resources:
|
||||||
|
overrides:
|
||||||
|
namespace:
|
||||||
|
resource: namespace
|
||||||
|
pod_name:
|
||||||
|
resource: pod
|
||||||
|
name:
|
||||||
|
matches: ^container_(.*)_seconds_total$
|
||||||
|
as: ""
|
||||||
|
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[5m]))
|
||||||
|
by (<<.GroupBy>>)
|
||||||
|
- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}'
|
||||||
|
seriesFilters:
|
||||||
|
- isNot: ^container_.*_seconds_total$
|
||||||
|
resources:
|
||||||
|
overrides:
|
||||||
|
namespace:
|
||||||
|
resource: namespace
|
||||||
|
pod_name:
|
||||||
|
resource: pod
|
||||||
|
name:
|
||||||
|
matches: ^container_(.*)_total$
|
||||||
|
as: ""
|
||||||
|
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[5m]))
|
||||||
|
by (<<.GroupBy>>)
|
||||||
|
- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}'
|
||||||
|
seriesFilters:
|
||||||
|
- isNot: ^container_.*_total$
|
||||||
|
resources:
|
||||||
|
overrides:
|
||||||
|
namespace:
|
||||||
|
resource: namespace
|
||||||
|
pod_name:
|
||||||
|
resource: pod
|
||||||
|
name:
|
||||||
|
matches: ^container_(.*)$
|
||||||
|
as: ""
|
||||||
|
metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}) by (<<.GroupBy>>)
|
||||||
|
- seriesQuery: '{namespace!="",__name__!~"^container_.*"}'
|
||||||
|
seriesFilters:
|
||||||
|
- isNot: .*_total$
|
||||||
|
resources:
|
||||||
|
template: <<.Resource>>
|
||||||
|
name:
|
||||||
|
matches: ""
|
||||||
|
as: ""
|
||||||
|
metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)
|
||||||
|
- seriesQuery: '{namespace!="",__name__!~"^container_.*"}'
|
||||||
|
seriesFilters:
|
||||||
|
- isNot: .*_seconds_total
|
||||||
|
resources:
|
||||||
|
template: <<.Resource>>
|
||||||
|
name:
|
||||||
|
matches: ^(.*)_total$
|
||||||
|
as: ""
|
||||||
|
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>)
|
||||||
|
- seriesQuery: '{namespace!="",__name__!~"^container_.*"}'
|
||||||
|
seriesFilters: []
|
||||||
|
resources:
|
||||||
|
template: <<.Resource>>
|
||||||
|
name:
|
||||||
|
matches: ^(.*)_seconds_total$
|
||||||
|
as: ""
|
||||||
|
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>)
|
||||||
208
docs/config.md
Normal file
208
docs/config.md
Normal file
|
|
@ -0,0 +1,208 @@
|
||||||
|
Metrics Discovery and Presentation Configuration
|
||||||
|
================================================
|
||||||
|
|
||||||
|
The adapter determines which metrics to expose, and how to expose them,
|
||||||
|
through a set of "discovery" rules. Each rule is executed independently
|
||||||
|
(so make sure that your rules are mutually exclusive), and specifies each
|
||||||
|
of the steps the adapter needs to take to expose a metric in the API.
|
||||||
|
|
||||||
|
Each rule can be broken down into roughly four parts:
|
||||||
|
|
||||||
|
- *Discovery*, which specifies how the adapter should find all Prometheus
|
||||||
|
metrics for this rule.
|
||||||
|
|
||||||
|
- *Association*, which specifies how the adapter should determine which
|
||||||
|
Kubernetes resources a particular metric is associated with.
|
||||||
|
|
||||||
|
- *Naming*, which specifies how the adapter should expose the metric in
|
||||||
|
the custom metrics API.
|
||||||
|
|
||||||
|
- *Querying*, which specifies how a request for a particular metric on one
|
||||||
|
or more Kubernetes objects should be turned into a query to Prometheus.
|
||||||
|
|
||||||
|
A more comprehensive configuration file can be found in
|
||||||
|
[sample-config.yaml](sample-config.yaml), but a basic config with one rule
|
||||||
|
might look like:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
rules:
|
||||||
|
# this rule matches cumulative cAdvisor metrics measured in seconds
|
||||||
|
- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}'
|
||||||
|
resources:
|
||||||
|
# skip specifying generic resource<->label mappings, and just
|
||||||
|
# attach only pod and namespace resources by mapping label names to group-resources
|
||||||
|
overrides:
|
||||||
|
namespace: {resource: "namespace"},
|
||||||
|
pod_name: {resource: "pod"},
|
||||||
|
# specify that the `container_` and `_seconds_total` suffixes should be removed.
|
||||||
|
# this also introduces an implicit filter on metric family names
|
||||||
|
name:
|
||||||
|
# we use the value of the capture group implicitly as the API name
|
||||||
|
# we could also explicitly write `as: "$1"`
|
||||||
|
matches: "^container_(.*)_seconds_total$"
|
||||||
|
# specify how to construct a query to fetch samples for a given series
|
||||||
|
# This is a Go template where the `.Series` and `.LabelMatchers` string values
|
||||||
|
# are available, and the delimiters are `<<` and `>>` to avoid conflicts with
|
||||||
|
# the prometheus query language
|
||||||
|
metricsQuery: "sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[2m])) by (<<.GroupBy>>)"
|
||||||
|
```
|
||||||
|
|
||||||
|
Discovery
|
||||||
|
---------
|
||||||
|
|
||||||
|
Discovery governs the process of finding the metrics that you want to
|
||||||
|
expose in the custom metrics API. There are two fields that factor into
|
||||||
|
discovery: `seriesQuery` and `seriesFilters`.
|
||||||
|
|
||||||
|
`seriesQuery` specifies Prometheus series query (as passed to the
|
||||||
|
`/api/v1/series` endpoint in Prometheus) to use to find some set of
|
||||||
|
Prometheus series. The adapter will strip the label values from this
|
||||||
|
series, and then use the resulting metric-name-label-names combinations
|
||||||
|
later on.
|
||||||
|
|
||||||
|
In many cases, `seriesQuery` will be sufficient to narrow down the list of
|
||||||
|
Prometheus series. However, sometimes (especially if two rules might
|
||||||
|
otherwise overlap), it's useful to do additional filtering on metric
|
||||||
|
names. In this case, `seriesFilters` can be used. After the list of
|
||||||
|
series is returned from `seriesQuery`, each series has its metric name
|
||||||
|
filtered through any specified filters.
|
||||||
|
|
||||||
|
Filters may be either:
|
||||||
|
|
||||||
|
- `is: <regex>`, which matches any series whose name matches the specified
|
||||||
|
regex.
|
||||||
|
|
||||||
|
- `isNot: <regex>`, which matches any series whose name does not match the
|
||||||
|
specified regex.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# match all cAdvisor metrics that aren't measured in seconds
|
||||||
|
seriesQuery: '{__name__=~"^container_.*_total",container_name!="POD",namespace!="",pod_name!=""}'
|
||||||
|
seriesFilters:
|
||||||
|
isNot: "^container_.*_seconds_total"
|
||||||
|
```
|
||||||
|
|
||||||
|
Association
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Association governs the process of figuring out which Kubernetes resources
|
||||||
|
a particular metric could be attached to. The `resources` field controls
|
||||||
|
this process.
|
||||||
|
|
||||||
|
There are two ways to associate resources with a particular metric. In
|
||||||
|
both cases, the value of the label becomes the name of the particular
|
||||||
|
object.
|
||||||
|
|
||||||
|
One way is to specify that any label name that matches some particular
|
||||||
|
pattern refers to some group-resource based on the label name. This can
|
||||||
|
be done using the `template` field. The pattern is specified as a Go
|
||||||
|
template, with the `Group` and `Resource` fields representing group and
|
||||||
|
resource. You don't necessarily have to use the `Group` field (in which
|
||||||
|
case the group is guessed by the system). For instance:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# any label `kube_<group>_<resource>` becomes <group>.<resource> in Kubernetes
|
||||||
|
resources:
|
||||||
|
template: "kube_<<.Group>>_<<.Resource>>"
|
||||||
|
```
|
||||||
|
|
||||||
|
The other way is to specify that some particular label represents some
|
||||||
|
particular Kubernetes resource. This can be done using the `overrides`
|
||||||
|
field. Each override maps a Prometheus label to a Kubernetes
|
||||||
|
group-resource. For instance:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# the microservice label corresponds to the apps.deployment resource
|
||||||
|
resource:
|
||||||
|
overrides:
|
||||||
|
microservice: {group: "apps", resource: "deployment"}
|
||||||
|
```
|
||||||
|
|
||||||
|
These two can be combined, so you can specify both a template and some
|
||||||
|
individual overrides.
|
||||||
|
|
||||||
|
Naming
|
||||||
|
------
|
||||||
|
|
||||||
|
Naming governs the process of converting a Prometheus metric name into
|
||||||
|
a metric in the custom metrics API, and vice version. It's controlled by
|
||||||
|
the `name` field.
|
||||||
|
|
||||||
|
Naming is controlled by specifying a pattern to extract an API name from
|
||||||
|
a Prometheus name, and potentially a transformation on that extracted
|
||||||
|
value.
|
||||||
|
|
||||||
|
The pattern is specified in the `matches` field, and is just a regular
|
||||||
|
expression. If not specified, it defaults to `.*`.
|
||||||
|
|
||||||
|
The transformation is specified by the `as` field. You can use any
|
||||||
|
capture groups defined in the `matches` field. If the `matches` field
|
||||||
|
doesn't contain capture groups, the `as` field defaults to `$0`. If it
|
||||||
|
contains a single capture group, the `as` field defautls to `$1`.
|
||||||
|
Otherwise, it's an error not to specify the as field.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# match turn any name <name>_total to <name>_per_second
|
||||||
|
# e.g. http_requests_total becomes http_requests_per_second
|
||||||
|
name:
|
||||||
|
matches: "^(.*)_total$"
|
||||||
|
as: "<<1}_per_second"
|
||||||
|
```
|
||||||
|
|
||||||
|
Querying
|
||||||
|
--------
|
||||||
|
|
||||||
|
Querying governs the process of actually fetching values for a particular
|
||||||
|
metric. It's controlled by the `metricsQuery` field.
|
||||||
|
|
||||||
|
The `metricsQuery` field is a Go template that gets turned into
|
||||||
|
a Prometheus query, using input from a particular call to the custom
|
||||||
|
metrics API. A given call to the custom metrics API is distilled down to
|
||||||
|
a metric name, a group-resource, and one or more objects of that
|
||||||
|
group-resource. These get turned into the following fields in the
|
||||||
|
template:
|
||||||
|
|
||||||
|
- `Series`: the metric name
|
||||||
|
- `LabelMatchers`: a comma-separated list of label matchers matching the
|
||||||
|
given objects. Currently, this is the label for the particular
|
||||||
|
group-resource, plus the label for namespace, if the group-resource is
|
||||||
|
namespaced.
|
||||||
|
- `GroupBy`: a comma-separated list of labels to group by. Currently,
|
||||||
|
this contains the group-resoure label used in `LabelMarchers`.
|
||||||
|
|
||||||
|
For instance, suppose we had a series `http_requests_total` (exposed as
|
||||||
|
`http_requests_per_second` in the API) with labels `service`, `pod`,
|
||||||
|
`ingress`, `namespace`, and `verb`. The first four correspond to
|
||||||
|
Kubernetes resources. Then, if someone requested the metric
|
||||||
|
`pods/http_request_per_second` for the pods `pod1` and `pod2` in the
|
||||||
|
`somens` namespace, we'd have:
|
||||||
|
|
||||||
|
- `Series: "http_requests_total"
|
||||||
|
- `LabelMatchers: "pod=~\"pod1|pod2",namespace="somens"`
|
||||||
|
- `GroupBy`: `pod`
|
||||||
|
|
||||||
|
Additionally, there are two advanced fields that are "raw" forms of other
|
||||||
|
fields:
|
||||||
|
|
||||||
|
- `LabelValuesByName`: a map mapping the labels and values from the
|
||||||
|
`LabelMatchers` field. The values are pre-joined by `|`
|
||||||
|
(for used with the `=~` matcher in Prometheus).
|
||||||
|
- `GroupBySlice`: the slice form of `GroupBy`.
|
||||||
|
|
||||||
|
In general, you'll probably want to use the `Series`, `LabelMatchers`, and
|
||||||
|
`GroupBy` fields. The other two are for advanced usage.
|
||||||
|
|
||||||
|
The query is expected to return one value for each object requested. The
|
||||||
|
adapter will use the labels on the returned series to associate a given
|
||||||
|
series back to its corresponding object.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# convert cumulative cAdvisor metrics into rates calculated over 2 minutes
|
||||||
|
metricsQuery: "sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[2m])) by (<<.GroupBy>>)"
|
||||||
|
```
|
||||||
69
docs/sample-config.yaml
Normal file
69
docs/sample-config.yaml
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
rules:
|
||||||
|
# Each rule represents a some naming and discovery logic.
|
||||||
|
# Each rule is executed independently of the others, so
|
||||||
|
# take care to avoid overlap. As an optimization, rules
|
||||||
|
# with the same `seriesQuery` but different
|
||||||
|
# `name` or `seriesFilters` will use only one query to
|
||||||
|
# Prometheus for discovery.
|
||||||
|
|
||||||
|
# some of these rules are taken from the "default" configuration, which
|
||||||
|
# can be found in pkg/config/default.go
|
||||||
|
|
||||||
|
# this rule matches cumulative cAdvisor metrics measured in seconds
|
||||||
|
- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}'
|
||||||
|
resources:
|
||||||
|
# skip specifying generic resource<->label mappings, and just
|
||||||
|
# attach only pod and namespace resources by mapping label names to group-resources
|
||||||
|
overrides:
|
||||||
|
namespace: {resource: "namespace"},
|
||||||
|
pod_name: {resource: "pod"},
|
||||||
|
# specify that the `container_` and `_seconds_total` suffixes should be removed.
|
||||||
|
# this also introduces an implicit filter on metric family names
|
||||||
|
name:
|
||||||
|
# we use the value of the capture group implicitly as the API name
|
||||||
|
# we could also explicitly write `as: "$1"`
|
||||||
|
matches: "^container_(.*)_seconds_total$"
|
||||||
|
# specify how to construct a query to fetch samples for a given series
|
||||||
|
# This is a Go template where the `.Series` and `.LabelMatchers` string values
|
||||||
|
# are available, and the delimiters are `<<` and `>>` to avoid conflicts with
|
||||||
|
# the prometheus query language
|
||||||
|
metricsQuery: "sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[2m])) by (<<.GroupBy>>)"
|
||||||
|
|
||||||
|
# this rule matches cumulative cAdvisor metrics not measured in seconds
|
||||||
|
- seriesQuery: '{__name__=~"^container_.*_total",container_name!="POD",namespace!="",pod_name!=""}'
|
||||||
|
resources:
|
||||||
|
overrides:
|
||||||
|
namespace: {resource: "namespace"},
|
||||||
|
pod_name: {resource: "pod"},
|
||||||
|
seriesFilters:
|
||||||
|
# since this is a superset of the query above, we introduce an additional filter here
|
||||||
|
- isNot: "^container_.*_seconds_total$"
|
||||||
|
name: {matches: "^container_(.*)_total$"}
|
||||||
|
metricsQuery: "sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[2m])) by (<<.GroupBy>>)"
|
||||||
|
|
||||||
|
# this rule matches cumulative non-cAdvisor metrics
|
||||||
|
- seriesQuery: '{namespace!="",__name__!="^container_.*"}'
|
||||||
|
name: {matches: "^(.*)_total$"}
|
||||||
|
resources:
|
||||||
|
# specify an a generic mapping between resources and labels. This
|
||||||
|
# is a template, like the `metricsQuery` template, except with the `.Group`
|
||||||
|
# and `.Resource` strings available. It will also be used to match labels,
|
||||||
|
# so avoid using template functions which truncate the group or resource.
|
||||||
|
# Group will be converted to a form acceptible for use as a label automatically.
|
||||||
|
template: "<<.Resource>>"
|
||||||
|
# if we wanted to, we could also specify overrides here
|
||||||
|
metricsQuery: "sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[2m])) by (<<.GroupBy>>)"
|
||||||
|
|
||||||
|
# this rule matches only a single metric, explicitly naming it something else
|
||||||
|
# It's series query *must* return only a single metric family
|
||||||
|
- seriesQuery: 'cheddar{sharp="true"}'
|
||||||
|
# this metric will appear as "cheesy_goodness" in the custom metrics API
|
||||||
|
name: {as: "cheesy_goodness"}
|
||||||
|
resources:
|
||||||
|
overrides:
|
||||||
|
# this should still resolve in our cluster
|
||||||
|
brand: {group: "cheese.io", resource: "brand"}
|
||||||
|
metricQuery: 'count(cheddar{sharp="true"})'
|
||||||
|
|
||||||
|
# TODO: should we be able to map to a constant instance of a resource
|
||||||
|
# (e.g. `resources: {constant: [{resource: "namespace", name: "kube-system"}}]`)?
|
||||||
Loading…
Add table
Add a link
Reference in a new issue