Catch this up to upstream/master.

2026-04-07 10:17:51 +00:00 · 2018-06-27 17:31:43 -04:00 · 2018-06-27 17:31:43 -04:00 · b3e1323a1c
commit b3e1323a1c
parent 0af14dc93d
11 changed files with 1442 additions and 520 deletions
--- a/pkg/custom-provider/custom_provider.go
+++ b/pkg/custom-provider/custom_provider.go
@ -22,59 +22,58 @@ import (
 	"time"

 	"github.com/golang/glog"
-
 	"github.com/kubernetes-incubator/custom-metrics-apiserver/pkg/provider"
 	pmodel "github.com/prometheus/common/model"
 	apierr "k8s.io/apimachinery/pkg/api/errors"
 	apimeta "k8s.io/apimachinery/pkg/api/meta"
 	"k8s.io/apimachinery/pkg/api/resource"
-	metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/runtime/schema"
+	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/dynamic"
 	"k8s.io/metrics/pkg/apis/custom_metrics"

 	prom "github.com/directxman12/k8s-prometheus-adapter/pkg/client"
 )

+// Runnable represents something that can be run until told to stop.
+type Runnable interface {
+	// Run runs the runnable forever.
+	Run()
+	// RunUntil runs the runnable until the given channel is closed.
+	RunUntil(stopChan <-chan struct{})
+}
+
 type customPrometheusProvider struct {
 	mapper     apimeta.RESTMapper
-	kubeClient dynamic.ClientPool
+	kubeClient dynamic.Interface
 	promClient prom.Client

 	SeriesRegistry
-
-	rateInterval time.Duration
 }

-func NewCustomPrometheusProvider(mapper apimeta.RESTMapper, kubeClient dynamic.ClientPool, promClient prom.Client, labelPrefix string, updateInterval time.Duration, rateInterval time.Duration, stopChan <-chan struct{}) provider.CustomMetricsProvider {
+func NewCustomPrometheusProvider(mapper apimeta.RESTMapper, kubeClient dynamic.Interface, promClient prom.Client, namers []MetricNamer, updateInterval time.Duration) (provider.CustomMetricsProvider, Runnable) {
 	lister := &cachingMetricsLister{
 		updateInterval: updateInterval,
 		promClient:     promClient,
+		namers:         namers,

 		SeriesRegistry: &basicSeriesRegistry{
-			namer: metricNamer{
-				// TODO: populate the overrides list
-				overrides:   nil,
-				mapper:      mapper,
-				labelPrefix: labelPrefix,
-			},
+			mapper: mapper,
 		},
 	}

-	lister.RunUntil(stopChan)
-
 	return &customPrometheusProvider{
 		mapper:     mapper,
 		kubeClient: kubeClient,
 		promClient: promClient,

 		SeriesRegistry: lister,
-
-		rateInterval: rateInterval,
-	}
+	}, lister
 }

 func (p *customPrometheusProvider) metricFor(value pmodel.SampleValue, groupResource schema.GroupResource, namespace string, name string, metricName string) (*custom_metrics.MetricValue, error) {
@ -91,8 +90,8 @@ func (p *customPrometheusProvider) metricFor(value pmodel.SampleValue, groupReso
 			Namespace:  namespace,
 		},
 		MetricName: metricName,
-		Timestamp:  metaV1.Time{time.Now()},
-		Value:      *resource.NewMilliQuantity(int64(value*1000.0), resource.DecimalSI).ToDec(),
+		Timestamp:  metav1.Time{time.Now()},
+		Value:      *resource.NewMilliQuantity(int64(value*1000.0), resource.DecimalSI),
 	}, nil
 }

@ -131,29 +130,13 @@ func (p *customPrometheusProvider) metricsFor(valueSet pmodel.Vector, info provi
 }

 func (p *customPrometheusProvider) buildQuery(info provider.CustomMetricInfo, namespace string, names ...string) (pmodel.Vector, error) {
-	kind, baseQuery, groupBy, found := p.QueryForMetric(info, namespace, names...)
+	query, found := p.QueryForMetric(info, namespace, names...)
 	if !found {
 		return nil, provider.NewMetricNotFoundError(info.GroupResource, info.Metric)
 	}

-	fullQuery := baseQuery
-	switch kind {
-	case CounterSeries:
-		fullQuery = prom.Selector(fmt.Sprintf("rate(%s[%s])", baseQuery, pmodel.Duration(p.rateInterval).String()))
-	case SecondsCounterSeries:
-		// TODO: futher modify for seconds?
-		fullQuery = prom.Selector(prom.Selector(fmt.Sprintf("rate(%s[%s])", baseQuery, pmodel.Duration(p.rateInterval).String())))
-	}
-
-	// NB: too small of a rate interval will return no results...
-
-	// sum over all other dimensions of this query (e.g. if we select on route, sum across all pods,
-	// but if we select on pods, sum across all routes), and split by the dimension of our resource
-	// TODO: return/populate the by list in SeriesForMetric
-	fullQuery = prom.Selector(fmt.Sprintf("sum(%s) by (%s)", fullQuery, groupBy))
-
 	// TODO: use an actual context
-	queryResults, err := p.promClient.Query(context.Background(), pmodel.Now(), fullQuery)
+	queryResults, err := p.promClient.Query(context.TODO(), pmodel.Now(), query)
 	if err != nil {
 		glog.Errorf("unable to fetch metrics from prometheus: %v", err)
 		// don't leak implementation details to the user
@ -197,23 +180,24 @@ func (p *customPrometheusProvider) getSingle(info provider.CustomMetricInfo, nam
 }

 func (p *customPrometheusProvider) getMultiple(info provider.CustomMetricInfo, namespace string, selector labels.Selector) (*custom_metrics.MetricValueList, error) {
-	// construct a client to list the names of objects matching the label selector
-	client, err := p.kubeClient.ClientForGroupVersionResource(info.GroupResource.WithVersion(""))
+	fullResources, err := p.mapper.ResourcesFor(info.GroupResource.WithVersion(""))
+	if err == nil && len(fullResources) == 0 {
+		err = fmt.Errorf("no fully versioned resources known for group-resource %v", info.GroupResource)
+	}
 	if err != nil {
-		glog.Errorf("unable to construct dynamic client to list matching resource names: %v", err)
+		glog.Errorf("unable to find preferred version to list matching resource names: %v", err)
 		// don't leak implementation details to the user
 		return nil, apierr.NewInternalError(fmt.Errorf("unable to list matching resources"))
 	}
-
-	// we can construct a this APIResource ourself, since the dynamic client only uses Name and Namespaced
-	apiRes := &metaV1.APIResource{
-		Name:       info.GroupResource.Resource,
-		Namespaced: info.Namespaced,
+	var client dynamic.ResourceInterface
+	if namespace != "" {
+		client = p.kubeClient.Resource(fullResources[0]).Namespace(namespace)
+	} else {
+		client = p.kubeClient.Resource(fullResources[0])
 	}

 	// actually list the objects matching the label selector
-	matchingObjectsRaw, err := client.Resource(apiRes, namespace).
-		List(metaV1.ListOptions{LabelSelector: selector.String()})
+	matchingObjectsRaw, err := client.List(metav1.ListOptions{LabelSelector: selector.String()})
 	if err != nil {
 		glog.Errorf("unable to list matching resource names: %v", err)
 		// don't leak implementation details to the user
@ -278,3 +262,86 @@ func (p *customPrometheusProvider) GetNamespacedMetricBySelector(groupResource s
 	}
 	return p.getMultiple(info, namespace, selector)
 }
+
+type cachingMetricsLister struct {
+	SeriesRegistry
+
+	promClient     prom.Client
+	updateInterval time.Duration
+	namers         []MetricNamer
+}
+
+func (l *cachingMetricsLister) Run() {
+	l.RunUntil(wait.NeverStop)
+}
+
+func (l *cachingMetricsLister) RunUntil(stopChan <-chan struct{}) {
+	go wait.Until(func() {
+		if err := l.updateMetrics(); err != nil {
+			utilruntime.HandleError(err)
+		}
+	}, l.updateInterval, stopChan)
+}
+
+type selectorSeries struct {
+	selector prom.Selector
+	series   []prom.Series
+}
+
+func (l *cachingMetricsLister) updateMetrics() error {
+	startTime := pmodel.Now().Add(-1 * l.updateInterval)
+
+	// don't do duplicate queries when it's just the matchers that change
+	seriesCacheByQuery := make(map[prom.Selector][]prom.Series)
+
+	// these can take a while on large clusters, so launch in parallel
+	// and don't duplicate
+	selectors := make(map[prom.Selector]struct{})
+	selectorSeriesChan := make(chan selectorSeries, len(l.namers))
+	errs := make(chan error, len(l.namers))
+	for _, namer := range l.namers {
+		sel := namer.Selector()
+		if _, ok := selectors[sel]; ok {
+			errs <- nil
+			selectorSeriesChan <- selectorSeries{}
+			continue
+		}
+		selectors[sel] = struct{}{}
+		go func() {
+			series, err := l.promClient.Series(context.TODO(), pmodel.Interval{startTime, 0}, sel)
+			if err != nil {
+				errs <- fmt.Errorf("unable to fetch metrics for query %q: %v", sel, err)
+				return
+			}
+			errs <- nil
+			selectorSeriesChan <- selectorSeries{
+				selector: sel,
+				series:   series,
+			}
+		}()
+	}
+
+	// iterate through, blocking until we've got all results
+	for range l.namers {
+		if err := <-errs; err != nil {
+			return fmt.Errorf("unable to update list of all metrics: %v", err)
+		}
+		if ss := <-selectorSeriesChan; ss.series != nil {
+			seriesCacheByQuery[ss.selector] = ss.series
+		}
+	}
+	close(errs)
+
+	newSeries := make([][]prom.Series, len(l.namers))
+	for i, namer := range l.namers {
+		series, cached := seriesCacheByQuery[namer.Selector()]
+		if !cached {
+			return fmt.Errorf("unable to update list of all metrics: no metrics retrieved for query %q", namer.Selector())
+		}
+		newSeries[i] = namer.FilterSeries(series)
+	}
+
+	glog.V(10).Infof("Set available metric list from Prometheus to: %v", newSeries)
+
+	return l.SetSeries(newSeries, l.namers)
+}