From 20af7fe8652900e095f12e708bdb52b31f1e5864 Mon Sep 17 00:00:00 2001 From: Tony Compton Date: Wed, 27 Jun 2018 17:39:05 -0400 Subject: [PATCH] One last shot at correcting things. --- cmd/adapter/app/start.go | 47 ++-- cmd/config-gen/main.go | 44 ++++ cmd/config-gen/utils/default.go | 93 ++++++++ .../custom-metrics-apiserver-deployment.yaml | 8 +- .../manifests/custom-metrics-config-map.yaml | 74 +++++++ docs/config.md | 208 ++++++++++++++++++ docs/sample-config.yaml | 69 ++++++ 7 files changed, 524 insertions(+), 19 deletions(-) create mode 100644 cmd/config-gen/main.go create mode 100644 cmd/config-gen/utils/default.go create mode 100644 deploy/manifests/custom-metrics-config-map.yaml create mode 100644 docs/config.md create mode 100644 docs/sample-config.yaml diff --git a/cmd/adapter/app/start.go b/cmd/adapter/app/start.go index 2c68e482..fa1b0b0d 100644 --- a/cmd/adapter/app/start.go +++ b/cmd/adapter/app/start.go @@ -24,7 +24,6 @@ import ( "time" "github.com/spf13/cobra" - apimeta "k8s.io/apimachinery/pkg/api/meta" "k8s.io/client-go/discovery" "k8s.io/client-go/dynamic" "k8s.io/client-go/rest" @@ -32,6 +31,7 @@ import ( prom "github.com/directxman12/k8s-prometheus-adapter/pkg/client" mprom "github.com/directxman12/k8s-prometheus-adapter/pkg/client/metrics" + adaptercfg "github.com/directxman12/k8s-prometheus-adapter/pkg/config" cmprov "github.com/directxman12/k8s-prometheus-adapter/pkg/custom-provider" "github.com/kubernetes-incubator/custom-metrics-apiserver/pkg/cmd/server" "github.com/kubernetes-incubator/custom-metrics-apiserver/pkg/dynamicmapper" @@ -43,9 +43,7 @@ func NewCommandStartPrometheusAdapterServer(out, errOut io.Writer, stopCh <-chan o := PrometheusAdapterServerOptions{ CustomMetricsAdapterServerOptions: baseOpts, MetricsRelistInterval: 10 * time.Minute, - RateInterval: 5 * time.Minute, PrometheusURL: "https://localhost", - DiscoveryInterval: 10 * time.Minute, } cmd := &cobra.Command{ @@ -76,19 +74,20 @@ func NewCommandStartPrometheusAdapterServer(out, errOut io.Writer, stopCh <-chan "any described objets") flags.DurationVar(&o.MetricsRelistInterval, "metrics-relist-interval", o.MetricsRelistInterval, ""+ "interval at which to re-list the set of all available metrics from Prometheus") - flags.DurationVar(&o.RateInterval, "rate-interval", o.RateInterval, ""+ - "period of time used to calculate rate metrics from cumulative metrics") flags.DurationVar(&o.DiscoveryInterval, "discovery-interval", o.DiscoveryInterval, ""+ "interval at which to refresh API discovery information") flags.StringVar(&o.PrometheusURL, "prometheus-url", o.PrometheusURL, - "URL for connecting to Prometheus. Query parameters are used to configure the connection") + "URL for connecting to Prometheus.") flags.BoolVar(&o.PrometheusAuthInCluster, "prometheus-auth-incluster", o.PrometheusAuthInCluster, "use auth details from the in-cluster kubeconfig when connecting to prometheus.") flags.StringVar(&o.PrometheusAuthConf, "prometheus-auth-config", o.PrometheusAuthConf, "kubeconfig file used to configure auth when connecting to Prometheus.") - flags.StringVar(&o.LabelPrefix, "label-prefix", o.LabelPrefix, - "Prefix to expect on labels referring to pod resources. For example, if the prefix is "+ - "'kube_', any series with the 'kube_pod' label would be considered a pod metric") + flags.StringVar(&o.AdapterConfigFile, "config", o.AdapterConfigFile, + "Configuration file containing details of how to transform between Prometheus metrics "+ + "and custom metrics API resources") + + cmd.MarkFlagRequired("config") + return cmd } @@ -128,6 +127,15 @@ func makeHTTPClient(inClusterAuth bool, kubeConfigPath string) (*http.Client, er } func (o PrometheusAdapterServerOptions) RunCustomMetricsAdapterServer(stopCh <-chan struct{}) error { + if o.AdapterConfigFile == "" { + return fmt.Errorf("no discovery configuration file specified") + } + + metricsConfig, err := adaptercfg.FromFile(o.AdapterConfigFile) + if err != nil { + return fmt.Errorf("unable to load metrics discovery configuration: %v", err) + } + config, err := o.Config() if err != nil { return err @@ -153,12 +161,12 @@ func (o PrometheusAdapterServerOptions) RunCustomMetricsAdapterServer(stopCh <-c return fmt.Errorf("unable to construct discovery client for dynamic client: %v", err) } - dynamicMapper, err := dynamicmapper.NewRESTMapper(discoveryClient, apimeta.InterfacesForUnstructured, o.DiscoveryInterval) + dynamicMapper, err := dynamicmapper.NewRESTMapper(discoveryClient, o.DiscoveryInterval) if err != nil { return fmt.Errorf("unable to construct dynamic discovery mapper: %v", err) } - clientPool := dynamic.NewClientPool(clientConfig, dynamicMapper, dynamic.LegacyAPIPathResolverFunc) + dynamicClient, err := dynamic.NewForConfig(clientConfig) if err != nil { return fmt.Errorf("unable to construct lister client to initialize provider: %v", err) } @@ -176,9 +184,15 @@ func (o PrometheusAdapterServerOptions) RunCustomMetricsAdapterServer(stopCh <-c instrumentedGenericPromClient := mprom.InstrumentGenericAPIClient(genericPromClient, baseURL.String()) promClient := prom.NewClientForAPI(instrumentedGenericPromClient) - cmProvider := cmprov.NewCustomPrometheusProvider(dynamicMapper, clientPool, promClient, o.LabelPrefix, o.MetricsRelistInterval, o.RateInterval, stopCh) + namers, err := cmprov.NamersFromConfig(metricsConfig, dynamicMapper) + if err != nil { + return fmt.Errorf("unable to construct naming scheme from metrics rules: %v", err) + } - server, err := config.Complete().New("prometheus-custom-metrics-adapter", cmProvider) + cmProvider, runner := cmprov.NewCustomPrometheusProvider(dynamicMapper, dynamicClient, promClient, namers, o.MetricsRelistInterval) + runner.RunUntil(stopCh) + + server, err := config.Complete().New("prometheus-custom-metrics-adapter", cmProvider, nil) if err != nil { return err } @@ -192,8 +206,6 @@ type PrometheusAdapterServerOptions struct { RemoteKubeConfigFile string // MetricsRelistInterval is the interval at which to relist the set of available metrics MetricsRelistInterval time.Duration - // RateInterval is the period of time used to calculate rate metrics - RateInterval time.Duration // DiscoveryInterval is the interval at which discovery information is refreshed DiscoveryInterval time.Duration // PrometheusURL is the URL describing how to connect to Prometheus. Query parameters configure connection options. @@ -202,7 +214,6 @@ type PrometheusAdapterServerOptions struct { PrometheusAuthInCluster bool // PrometheusAuthConf is the kubeconfig file that contains auth details used to connect to Prometheus PrometheusAuthConf string - // LabelPrefix is the prefix to expect on labels for Kubernetes resources - // (e.g. if the prefix is "kube_", we'd expect a "kube_pod" label for pod metrics). - LabelPrefix string + // AdapterConfigFile points to the file containing the metrics discovery configuration. + AdapterConfigFile string } diff --git a/cmd/config-gen/main.go b/cmd/config-gen/main.go new file mode 100644 index 00000000..b3d8f7e1 --- /dev/null +++ b/cmd/config-gen/main.go @@ -0,0 +1,44 @@ +package main + +import ( + "fmt" + "os" + "time" + + "github.com/spf13/cobra" + yaml "gopkg.in/yaml.v2" + + "github.com/directxman12/k8s-prometheus-adapter/cmd/config-gen/utils" +) + +func main() { + var labelPrefix string + var rateInterval time.Duration + + cmd := &cobra.Command{ + Short: "Generate a config matching the legacy discovery rules", + Long: `Generate a config that produces the same functionality +as the legacy discovery rules. This includes discovering metrics and associating +resources according to the Kubernetes instrumention conventions and the cAdvisor +conventions, and auto-converting cumulative metrics into rate metrics.`, + RunE: func(c *cobra.Command, args []string) error { + cfg := utils.DefaultConfig(rateInterval, labelPrefix) + enc := yaml.NewEncoder(os.Stdout) + if err := enc.Encode(cfg); err != nil { + return err + } + return enc.Close() + }, + } + + cmd.Flags().StringVar(&labelPrefix, "label-prefix", "", + "Prefix to expect on labels referring to pod resources. For example, if the prefix is "+ + "'kube_', any series with the 'kube_pod' label would be considered a pod metric") + cmd.Flags().DurationVar(&rateInterval, "rate-interval", 5*time.Minute, + "Period of time used to calculate rate metrics from cumulative metrics") + + if err := cmd.Execute(); err != nil { + fmt.Fprintf(os.Stderr, "Unable to generate config: %v\n", err) + os.Exit(1) + } +} diff --git a/cmd/config-gen/utils/default.go b/cmd/config-gen/utils/default.go new file mode 100644 index 00000000..d8873d96 --- /dev/null +++ b/cmd/config-gen/utils/default.go @@ -0,0 +1,93 @@ +package utils + +import ( + "fmt" + "time" + + prom "github.com/directxman12/k8s-prometheus-adapter/pkg/client" + . "github.com/directxman12/k8s-prometheus-adapter/pkg/config" + pmodel "github.com/prometheus/common/model" +) + +// DefaultConfig returns a configuration equivalent to the former +// pre-advanced-config settings. This means that "normal" series labels +// will be of the form `<<.Resource>>`, cadvisor series will be +// of the form `container_`, and have the label `pod_name`. Any series ending +// in total will be treated as a rate metric. +func DefaultConfig(rateInterval time.Duration, labelPrefix string) *MetricsDiscoveryConfig { + return &MetricsDiscoveryConfig{ + Rules: []DiscoveryRule{ + // container seconds rate metrics + { + SeriesQuery: string(prom.MatchSeries("", prom.NameMatches("^container_.*"), prom.LabelNeq("container_name", "POD"), prom.LabelNeq("namespace", ""), prom.LabelNeq("pod_name", ""))), + Resources: ResourceMapping{ + Overrides: map[string]GroupResource{ + "namespace": {Resource: "namespace"}, + "pod_name": {Resource: "pod"}, + }, + }, + Name: NameMapping{Matches: "^container_(.*)_seconds_total$"}, + MetricsQuery: fmt.Sprintf(`sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[%s])) by (<<.GroupBy>>)`, pmodel.Duration(rateInterval).String()), + }, + + // container rate metrics + { + SeriesQuery: string(prom.MatchSeries("", prom.NameMatches("^container_.*"), prom.LabelNeq("container_name", "POD"), prom.LabelNeq("namespace", ""), prom.LabelNeq("pod_name", ""))), + SeriesFilters: []RegexFilter{{IsNot: "^container_.*_seconds_total$"}}, + Resources: ResourceMapping{ + Overrides: map[string]GroupResource{ + "namespace": {Resource: "namespace"}, + "pod_name": {Resource: "pod"}, + }, + }, + Name: NameMapping{Matches: "^container_(.*)_total$"}, + MetricsQuery: fmt.Sprintf(`sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[%s])) by (<<.GroupBy>>)`, pmodel.Duration(rateInterval).String()), + }, + + // container non-cumulative metrics + { + SeriesQuery: string(prom.MatchSeries("", prom.NameMatches("^container_.*"), prom.LabelNeq("container_name", "POD"), prom.LabelNeq("namespace", ""), prom.LabelNeq("pod_name", ""))), + SeriesFilters: []RegexFilter{{IsNot: "^container_.*_total$"}}, + Resources: ResourceMapping{ + Overrides: map[string]GroupResource{ + "namespace": {Resource: "namespace"}, + "pod_name": {Resource: "pod"}, + }, + }, + Name: NameMapping{Matches: "^container_(.*)$"}, + MetricsQuery: `sum(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}) by (<<.GroupBy>>)`, + }, + + // normal non-cumulative metrics + { + SeriesQuery: string(prom.MatchSeries("", prom.LabelNeq(fmt.Sprintf("%snamespace", labelPrefix), ""), prom.NameNotMatches("^container_.*"))), + SeriesFilters: []RegexFilter{{IsNot: ".*_total$"}}, + Resources: ResourceMapping{ + Template: fmt.Sprintf("%s<<.Resource>>", labelPrefix), + }, + MetricsQuery: "sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)", + }, + + // normal rate metrics + { + SeriesQuery: string(prom.MatchSeries("", prom.LabelNeq(fmt.Sprintf("%snamespace", labelPrefix), ""), prom.NameNotMatches("^container_.*"))), + SeriesFilters: []RegexFilter{{IsNot: ".*_seconds_total"}}, + Name: NameMapping{Matches: "^(.*)_total$"}, + Resources: ResourceMapping{ + Template: fmt.Sprintf("%s<<.Resource>>", labelPrefix), + }, + MetricsQuery: fmt.Sprintf("sum(rate(<<.Series>>{<<.LabelMatchers>>}[%s])) by (<<.GroupBy>>)", pmodel.Duration(rateInterval).String()), + }, + + // seconds rate metrics + { + SeriesQuery: string(prom.MatchSeries("", prom.LabelNeq(fmt.Sprintf("%snamespace", labelPrefix), ""), prom.NameNotMatches("^container_.*"))), + Name: NameMapping{Matches: "^(.*)_seconds_total$"}, + Resources: ResourceMapping{ + Template: fmt.Sprintf("%s<<.Resource>>", labelPrefix), + }, + MetricsQuery: fmt.Sprintf("sum(rate(<<.Series>>{<<.LabelMatchers>>}[%s])) by (<<.GroupBy>>)", pmodel.Duration(rateInterval).String()), + }, + }, + } +} diff --git a/deploy/manifests/custom-metrics-apiserver-deployment.yaml b/deploy/manifests/custom-metrics-apiserver-deployment.yaml index 848d4ec3..5ca0e55f 100644 --- a/deploy/manifests/custom-metrics-apiserver-deployment.yaml +++ b/deploy/manifests/custom-metrics-apiserver-deployment.yaml @@ -28,15 +28,21 @@ spec: - --logtostderr=true - --prometheus-url=http://prometheus.prom.svc:9090/ - --metrics-relist-interval=30s - - --rate-interval=5m - --v=10 + - --config=/default-config.yaml ports: - containerPort: 6443 volumeMounts: - mountPath: /var/run/serving-cert name: volume-serving-cert readOnly: true + - mountPath: /etc/adapter/ + name: config + readOnly: true volumes: - name: volume-serving-cert secret: secretName: cm-adapter-serving-certs + - name: config + configMap: + name: adapter-config diff --git a/deploy/manifests/custom-metrics-config-map.yaml b/deploy/manifests/custom-metrics-config-map.yaml new file mode 100644 index 00000000..04ee0c67 --- /dev/null +++ b/deploy/manifests/custom-metrics-config-map.yaml @@ -0,0 +1,74 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: adapter-config + namespace: custom-metrics +data: + config.yaml: | + rules: + - seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}' + seriesFilters: [] + resources: + overrides: + namespace: + resource: namespace + pod_name: + resource: pod + name: + matches: ^container_(.*)_seconds_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[5m])) + by (<<.GroupBy>>) + - seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}' + seriesFilters: + - isNot: ^container_.*_seconds_total$ + resources: + overrides: + namespace: + resource: namespace + pod_name: + resource: pod + name: + matches: ^container_(.*)_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[5m])) + by (<<.GroupBy>>) + - seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}' + seriesFilters: + - isNot: ^container_.*_total$ + resources: + overrides: + namespace: + resource: namespace + pod_name: + resource: pod + name: + matches: ^container_(.*)$ + as: "" + metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}) by (<<.GroupBy>>) + - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' + seriesFilters: + - isNot: .*_total$ + resources: + template: <<.Resource>> + name: + matches: "" + as: "" + metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) + - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' + seriesFilters: + - isNot: .*_seconds_total + resources: + template: <<.Resource>> + name: + matches: ^(.*)_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>) + - seriesQuery: '{namespace!="",__name__!~"^container_.*"}' + seriesFilters: [] + resources: + template: <<.Resource>> + name: + matches: ^(.*)_seconds_total$ + as: "" + metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[5m])) by (<<.GroupBy>>) diff --git a/docs/config.md b/docs/config.md new file mode 100644 index 00000000..43d5dee2 --- /dev/null +++ b/docs/config.md @@ -0,0 +1,208 @@ +Metrics Discovery and Presentation Configuration +================================================ + +The adapter determines which metrics to expose, and how to expose them, +through a set of "discovery" rules. Each rule is executed independently +(so make sure that your rules are mutually exclusive), and specifies each +of the steps the adapter needs to take to expose a metric in the API. + +Each rule can be broken down into roughly four parts: + +- *Discovery*, which specifies how the adapter should find all Prometheus + metrics for this rule. + +- *Association*, which specifies how the adapter should determine which + Kubernetes resources a particular metric is associated with. + +- *Naming*, which specifies how the adapter should expose the metric in + the custom metrics API. + +- *Querying*, which specifies how a request for a particular metric on one + or more Kubernetes objects should be turned into a query to Prometheus. + +A more comprehensive configuration file can be found in +[sample-config.yaml](sample-config.yaml), but a basic config with one rule +might look like: + +```yaml +rules: +# this rule matches cumulative cAdvisor metrics measured in seconds +- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}' + resources: + # skip specifying generic resource<->label mappings, and just + # attach only pod and namespace resources by mapping label names to group-resources + overrides: + namespace: {resource: "namespace"}, + pod_name: {resource: "pod"}, + # specify that the `container_` and `_seconds_total` suffixes should be removed. + # this also introduces an implicit filter on metric family names + name: + # we use the value of the capture group implicitly as the API name + # we could also explicitly write `as: "$1"` + matches: "^container_(.*)_seconds_total$" + # specify how to construct a query to fetch samples for a given series + # This is a Go template where the `.Series` and `.LabelMatchers` string values + # are available, and the delimiters are `<<` and `>>` to avoid conflicts with + # the prometheus query language + metricsQuery: "sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[2m])) by (<<.GroupBy>>)" +``` + +Discovery +--------- + +Discovery governs the process of finding the metrics that you want to +expose in the custom metrics API. There are two fields that factor into +discovery: `seriesQuery` and `seriesFilters`. + +`seriesQuery` specifies Prometheus series query (as passed to the +`/api/v1/series` endpoint in Prometheus) to use to find some set of +Prometheus series. The adapter will strip the label values from this +series, and then use the resulting metric-name-label-names combinations +later on. + +In many cases, `seriesQuery` will be sufficient to narrow down the list of +Prometheus series. However, sometimes (especially if two rules might +otherwise overlap), it's useful to do additional filtering on metric +names. In this case, `seriesFilters` can be used. After the list of +series is returned from `seriesQuery`, each series has its metric name +filtered through any specified filters. + +Filters may be either: + +- `is: `, which matches any series whose name matches the specified + regex. + +- `isNot: `, which matches any series whose name does not match the + specified regex. + +For example: + +```yaml +# match all cAdvisor metrics that aren't measured in seconds +seriesQuery: '{__name__=~"^container_.*_total",container_name!="POD",namespace!="",pod_name!=""}' +seriesFilters: + isNot: "^container_.*_seconds_total" +``` + +Association +----------- + +Association governs the process of figuring out which Kubernetes resources +a particular metric could be attached to. The `resources` field controls +this process. + +There are two ways to associate resources with a particular metric. In +both cases, the value of the label becomes the name of the particular +object. + +One way is to specify that any label name that matches some particular +pattern refers to some group-resource based on the label name. This can +be done using the `template` field. The pattern is specified as a Go +template, with the `Group` and `Resource` fields representing group and +resource. You don't necessarily have to use the `Group` field (in which +case the group is guessed by the system). For instance: + +```yaml +# any label `kube__` becomes . in Kubernetes +resources: + template: "kube_<<.Group>>_<<.Resource>>" +``` + +The other way is to specify that some particular label represents some +particular Kubernetes resource. This can be done using the `overrides` +field. Each override maps a Prometheus label to a Kubernetes +group-resource. For instance: + +```yaml +# the microservice label corresponds to the apps.deployment resource +resource: + overrides: + microservice: {group: "apps", resource: "deployment"} +``` + +These two can be combined, so you can specify both a template and some +individual overrides. + +Naming +------ + +Naming governs the process of converting a Prometheus metric name into +a metric in the custom metrics API, and vice version. It's controlled by +the `name` field. + +Naming is controlled by specifying a pattern to extract an API name from +a Prometheus name, and potentially a transformation on that extracted +value. + +The pattern is specified in the `matches` field, and is just a regular +expression. If not specified, it defaults to `.*`. + +The transformation is specified by the `as` field. You can use any +capture groups defined in the `matches` field. If the `matches` field +doesn't contain capture groups, the `as` field defaults to `$0`. If it +contains a single capture group, the `as` field defautls to `$1`. +Otherwise, it's an error not to specify the as field. + +For example: + +```yaml +# match turn any name _total to _per_second +# e.g. http_requests_total becomes http_requests_per_second +name: + matches: "^(.*)_total$" + as: "<<1}_per_second" +``` + +Querying +-------- + +Querying governs the process of actually fetching values for a particular +metric. It's controlled by the `metricsQuery` field. + +The `metricsQuery` field is a Go template that gets turned into +a Prometheus query, using input from a particular call to the custom +metrics API. A given call to the custom metrics API is distilled down to +a metric name, a group-resource, and one or more objects of that +group-resource. These get turned into the following fields in the +template: + +- `Series`: the metric name +- `LabelMatchers`: a comma-separated list of label matchers matching the + given objects. Currently, this is the label for the particular + group-resource, plus the label for namespace, if the group-resource is + namespaced. +- `GroupBy`: a comma-separated list of labels to group by. Currently, + this contains the group-resoure label used in `LabelMarchers`. + +For instance, suppose we had a series `http_requests_total` (exposed as +`http_requests_per_second` in the API) with labels `service`, `pod`, +`ingress`, `namespace`, and `verb`. The first four correspond to +Kubernetes resources. Then, if someone requested the metric +`pods/http_request_per_second` for the pods `pod1` and `pod2` in the +`somens` namespace, we'd have: + +- `Series: "http_requests_total" +- `LabelMatchers: "pod=~\"pod1|pod2",namespace="somens"` +- `GroupBy`: `pod` + +Additionally, there are two advanced fields that are "raw" forms of other +fields: + +- `LabelValuesByName`: a map mapping the labels and values from the + `LabelMatchers` field. The values are pre-joined by `|` + (for used with the `=~` matcher in Prometheus). +- `GroupBySlice`: the slice form of `GroupBy`. + +In general, you'll probably want to use the `Series`, `LabelMatchers`, and +`GroupBy` fields. The other two are for advanced usage. + +The query is expected to return one value for each object requested. The +adapter will use the labels on the returned series to associate a given +series back to its corresponding object. + +For example: + +```yaml +# convert cumulative cAdvisor metrics into rates calculated over 2 minutes +metricsQuery: "sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[2m])) by (<<.GroupBy>>)" +``` diff --git a/docs/sample-config.yaml b/docs/sample-config.yaml new file mode 100644 index 00000000..3aa5be6d --- /dev/null +++ b/docs/sample-config.yaml @@ -0,0 +1,69 @@ +rules: +# Each rule represents a some naming and discovery logic. +# Each rule is executed independently of the others, so +# take care to avoid overlap. As an optimization, rules +# with the same `seriesQuery` but different +# `name` or `seriesFilters` will use only one query to +# Prometheus for discovery. + +# some of these rules are taken from the "default" configuration, which +# can be found in pkg/config/default.go + +# this rule matches cumulative cAdvisor metrics measured in seconds +- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}' + resources: + # skip specifying generic resource<->label mappings, and just + # attach only pod and namespace resources by mapping label names to group-resources + overrides: + namespace: {resource: "namespace"}, + pod_name: {resource: "pod"}, + # specify that the `container_` and `_seconds_total` suffixes should be removed. + # this also introduces an implicit filter on metric family names + name: + # we use the value of the capture group implicitly as the API name + # we could also explicitly write `as: "$1"` + matches: "^container_(.*)_seconds_total$" + # specify how to construct a query to fetch samples for a given series + # This is a Go template where the `.Series` and `.LabelMatchers` string values + # are available, and the delimiters are `<<` and `>>` to avoid conflicts with + # the prometheus query language + metricsQuery: "sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[2m])) by (<<.GroupBy>>)" + +# this rule matches cumulative cAdvisor metrics not measured in seconds +- seriesQuery: '{__name__=~"^container_.*_total",container_name!="POD",namespace!="",pod_name!=""}' + resources: + overrides: + namespace: {resource: "namespace"}, + pod_name: {resource: "pod"}, + seriesFilters: + # since this is a superset of the query above, we introduce an additional filter here + - isNot: "^container_.*_seconds_total$" + name: {matches: "^container_(.*)_total$"} + metricsQuery: "sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[2m])) by (<<.GroupBy>>)" + +# this rule matches cumulative non-cAdvisor metrics +- seriesQuery: '{namespace!="",__name__!="^container_.*"}' + name: {matches: "^(.*)_total$"} + resources: + # specify an a generic mapping between resources and labels. This + # is a template, like the `metricsQuery` template, except with the `.Group` + # and `.Resource` strings available. It will also be used to match labels, + # so avoid using template functions which truncate the group or resource. + # Group will be converted to a form acceptible for use as a label automatically. + template: "<<.Resource>>" + # if we wanted to, we could also specify overrides here + metricsQuery: "sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[2m])) by (<<.GroupBy>>)" + +# this rule matches only a single metric, explicitly naming it something else +# It's series query *must* return only a single metric family +- seriesQuery: 'cheddar{sharp="true"}' + # this metric will appear as "cheesy_goodness" in the custom metrics API + name: {as: "cheesy_goodness"} + resources: + overrides: + # this should still resolve in our cluster + brand: {group: "cheese.io", resource: "brand"} + metricQuery: 'count(cheddar{sharp="true"})' + +# TODO: should we be able to map to a constant instance of a resource +# (e.g. `resources: {constant: [{resource: "namespace", name: "kube-system"}}]`)?