Catch this up to upstream/master.

This commit is contained in:
Tony Compton 2018-06-27 17:31:43 -04:00
parent 0af14dc93d
commit b3e1323a1c
11 changed files with 1442 additions and 520 deletions

View file

@ -22,59 +22,58 @@ import (
"time"
"github.com/golang/glog"
"github.com/kubernetes-incubator/custom-metrics-apiserver/pkg/provider"
pmodel "github.com/prometheus/common/model"
apierr "k8s.io/apimachinery/pkg/api/errors"
apimeta "k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/dynamic"
"k8s.io/metrics/pkg/apis/custom_metrics"
prom "github.com/directxman12/k8s-prometheus-adapter/pkg/client"
)
// Runnable represents something that can be run until told to stop.
type Runnable interface {
// Run runs the runnable forever.
Run()
// RunUntil runs the runnable until the given channel is closed.
RunUntil(stopChan <-chan struct{})
}
type customPrometheusProvider struct {
mapper apimeta.RESTMapper
kubeClient dynamic.ClientPool
kubeClient dynamic.Interface
promClient prom.Client
SeriesRegistry
rateInterval time.Duration
}
func NewCustomPrometheusProvider(mapper apimeta.RESTMapper, kubeClient dynamic.ClientPool, promClient prom.Client, labelPrefix string, updateInterval time.Duration, rateInterval time.Duration, stopChan <-chan struct{}) provider.CustomMetricsProvider {
func NewCustomPrometheusProvider(mapper apimeta.RESTMapper, kubeClient dynamic.Interface, promClient prom.Client, namers []MetricNamer, updateInterval time.Duration) (provider.CustomMetricsProvider, Runnable) {
lister := &cachingMetricsLister{
updateInterval: updateInterval,
promClient: promClient,
namers: namers,
SeriesRegistry: &basicSeriesRegistry{
namer: metricNamer{
// TODO: populate the overrides list
overrides: nil,
mapper: mapper,
labelPrefix: labelPrefix,
},
mapper: mapper,
},
}
lister.RunUntil(stopChan)
return &customPrometheusProvider{
mapper: mapper,
kubeClient: kubeClient,
promClient: promClient,
SeriesRegistry: lister,
rateInterval: rateInterval,
}
}, lister
}
func (p *customPrometheusProvider) metricFor(value pmodel.SampleValue, groupResource schema.GroupResource, namespace string, name string, metricName string) (*custom_metrics.MetricValue, error) {
@ -91,8 +90,8 @@ func (p *customPrometheusProvider) metricFor(value pmodel.SampleValue, groupReso
Namespace: namespace,
},
MetricName: metricName,
Timestamp: metaV1.Time{time.Now()},
Value: *resource.NewMilliQuantity(int64(value*1000.0), resource.DecimalSI).ToDec(),
Timestamp: metav1.Time{time.Now()},
Value: *resource.NewMilliQuantity(int64(value*1000.0), resource.DecimalSI),
}, nil
}
@ -131,29 +130,13 @@ func (p *customPrometheusProvider) metricsFor(valueSet pmodel.Vector, info provi
}
func (p *customPrometheusProvider) buildQuery(info provider.CustomMetricInfo, namespace string, names ...string) (pmodel.Vector, error) {
kind, baseQuery, groupBy, found := p.QueryForMetric(info, namespace, names...)
query, found := p.QueryForMetric(info, namespace, names...)
if !found {
return nil, provider.NewMetricNotFoundError(info.GroupResource, info.Metric)
}
fullQuery := baseQuery
switch kind {
case CounterSeries:
fullQuery = prom.Selector(fmt.Sprintf("rate(%s[%s])", baseQuery, pmodel.Duration(p.rateInterval).String()))
case SecondsCounterSeries:
// TODO: futher modify for seconds?
fullQuery = prom.Selector(prom.Selector(fmt.Sprintf("rate(%s[%s])", baseQuery, pmodel.Duration(p.rateInterval).String())))
}
// NB: too small of a rate interval will return no results...
// sum over all other dimensions of this query (e.g. if we select on route, sum across all pods,
// but if we select on pods, sum across all routes), and split by the dimension of our resource
// TODO: return/populate the by list in SeriesForMetric
fullQuery = prom.Selector(fmt.Sprintf("sum(%s) by (%s)", fullQuery, groupBy))
// TODO: use an actual context
queryResults, err := p.promClient.Query(context.Background(), pmodel.Now(), fullQuery)
queryResults, err := p.promClient.Query(context.TODO(), pmodel.Now(), query)
if err != nil {
glog.Errorf("unable to fetch metrics from prometheus: %v", err)
// don't leak implementation details to the user
@ -197,23 +180,24 @@ func (p *customPrometheusProvider) getSingle(info provider.CustomMetricInfo, nam
}
func (p *customPrometheusProvider) getMultiple(info provider.CustomMetricInfo, namespace string, selector labels.Selector) (*custom_metrics.MetricValueList, error) {
// construct a client to list the names of objects matching the label selector
client, err := p.kubeClient.ClientForGroupVersionResource(info.GroupResource.WithVersion(""))
fullResources, err := p.mapper.ResourcesFor(info.GroupResource.WithVersion(""))
if err == nil && len(fullResources) == 0 {
err = fmt.Errorf("no fully versioned resources known for group-resource %v", info.GroupResource)
}
if err != nil {
glog.Errorf("unable to construct dynamic client to list matching resource names: %v", err)
glog.Errorf("unable to find preferred version to list matching resource names: %v", err)
// don't leak implementation details to the user
return nil, apierr.NewInternalError(fmt.Errorf("unable to list matching resources"))
}
// we can construct a this APIResource ourself, since the dynamic client only uses Name and Namespaced
apiRes := &metaV1.APIResource{
Name: info.GroupResource.Resource,
Namespaced: info.Namespaced,
var client dynamic.ResourceInterface
if namespace != "" {
client = p.kubeClient.Resource(fullResources[0]).Namespace(namespace)
} else {
client = p.kubeClient.Resource(fullResources[0])
}
// actually list the objects matching the label selector
matchingObjectsRaw, err := client.Resource(apiRes, namespace).
List(metaV1.ListOptions{LabelSelector: selector.String()})
matchingObjectsRaw, err := client.List(metav1.ListOptions{LabelSelector: selector.String()})
if err != nil {
glog.Errorf("unable to list matching resource names: %v", err)
// don't leak implementation details to the user
@ -278,3 +262,86 @@ func (p *customPrometheusProvider) GetNamespacedMetricBySelector(groupResource s
}
return p.getMultiple(info, namespace, selector)
}
type cachingMetricsLister struct {
SeriesRegistry
promClient prom.Client
updateInterval time.Duration
namers []MetricNamer
}
func (l *cachingMetricsLister) Run() {
l.RunUntil(wait.NeverStop)
}
func (l *cachingMetricsLister) RunUntil(stopChan <-chan struct{}) {
go wait.Until(func() {
if err := l.updateMetrics(); err != nil {
utilruntime.HandleError(err)
}
}, l.updateInterval, stopChan)
}
type selectorSeries struct {
selector prom.Selector
series []prom.Series
}
func (l *cachingMetricsLister) updateMetrics() error {
startTime := pmodel.Now().Add(-1 * l.updateInterval)
// don't do duplicate queries when it's just the matchers that change
seriesCacheByQuery := make(map[prom.Selector][]prom.Series)
// these can take a while on large clusters, so launch in parallel
// and don't duplicate
selectors := make(map[prom.Selector]struct{})
selectorSeriesChan := make(chan selectorSeries, len(l.namers))
errs := make(chan error, len(l.namers))
for _, namer := range l.namers {
sel := namer.Selector()
if _, ok := selectors[sel]; ok {
errs <- nil
selectorSeriesChan <- selectorSeries{}
continue
}
selectors[sel] = struct{}{}
go func() {
series, err := l.promClient.Series(context.TODO(), pmodel.Interval{startTime, 0}, sel)
if err != nil {
errs <- fmt.Errorf("unable to fetch metrics for query %q: %v", sel, err)
return
}
errs <- nil
selectorSeriesChan <- selectorSeries{
selector: sel,
series: series,
}
}()
}
// iterate through, blocking until we've got all results
for range l.namers {
if err := <-errs; err != nil {
return fmt.Errorf("unable to update list of all metrics: %v", err)
}
if ss := <-selectorSeriesChan; ss.series != nil {
seriesCacheByQuery[ss.selector] = ss.series
}
}
close(errs)
newSeries := make([][]prom.Series, len(l.namers))
for i, namer := range l.namers {
series, cached := seriesCacheByQuery[namer.Selector()]
if !cached {
return fmt.Errorf("unable to update list of all metrics: no metrics retrieved for query %q", namer.Selector())
}
newSeries[i] = namer.FilterSeries(series)
}
glog.V(10).Infof("Set available metric list from Prometheus to: %v", newSeries)
return l.SetSeries(newSeries, l.namers)
}