Merge pull request #431 from dgrisonnet/neg-resource-metrics

Prevent prometheus-adapter from returning negative resource metrics
This commit is contained in:
Damien Grisonnet 2021-07-16 16:41:09 +02:00 committed by GitHub
commit 3fde77674e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 80 additions and 5 deletions

View file

@ -19,6 +19,7 @@ package resourceprovider
import (
"context"
"fmt"
"math"
"sync"
"time"
@ -390,13 +391,17 @@ func (p *resourceProvider) runQuery(now pmodel.Time, queryInfo resourceQuery, re
// associate the results back to each given pod or node
res := make(queryResults, len(*rawRes.Vector))
for _, val := range *rawRes.Vector {
if val == nil {
// skip empty values
for _, sample := range *rawRes.Vector {
// skip empty samples
if sample == nil {
continue
}
resKey := string(val.Metric[resourceLbl])
res[resKey] = append(res[resKey], val)
// replace NaN and negative values by zero
if math.IsNaN(float64(sample.Value)) || sample.Value < 0 {
sample.Value = 0
}
resKey := string(sample.Metric[resourceLbl])
res[resKey] = append(res[resKey], sample)
}
return res, nil

View file

@ -17,6 +17,7 @@ limitations under the License.
package resourceprovider
import (
"math"
"time"
corev1 "k8s.io/api/core/v1"
@ -206,6 +207,47 @@ var _ = Describe("Resource Metrics Provider", func() {
Expect(times[0]).To(Equal(api.TimeInfo{Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute}))
})
It("should return metrics of value zero when pod metrics have NaN or negative values", func() {
fakeProm.QueryResults = map[prom.Selector]prom.QueryResult{
mustBuild(cpuQueries.contQuery.Build("", podResource, "some-ns", []string{cpuQueries.containerLabel}, labels.Everything(), "pod1", "pod3")): buildQueryRes("container_cpu_usage_seconds_total",
buildPodSample("some-ns", "pod1", "cont1", -1100.0, 10),
buildPodSample("some-ns", "pod1", "cont2", math.NaN(), 20),
buildPodSample("some-ns", "pod3", "cont1", -1300.0, 10),
buildPodSample("some-ns", "pod3", "cont2", 1310.0, 20),
),
mustBuild(memQueries.contQuery.Build("", podResource, "some-ns", []string{cpuQueries.containerLabel}, labels.Everything(), "pod1", "pod3")): buildQueryRes("container_memory_working_set_bytes",
buildPodSample("some-ns", "pod1", "cont1", 3100.0, 11),
buildPodSample("some-ns", "pod1", "cont2", -3110.0, 21),
buildPodSample("some-ns", "pod3", "cont1", math.NaN(), 11),
buildPodSample("some-ns", "pod3", "cont2", -3310.0, 21),
),
}
By("querying for metrics for some pods")
times, metricVals, err := prov.GetPodMetrics(
types.NamespacedName{Namespace: "some-ns", Name: "pod1"},
types.NamespacedName{Namespace: "some-ns", Name: "pod3"},
)
Expect(err).NotTo(HaveOccurred())
By("verifying that the reported times for each are the earliest times for each pod")
Expect(times).To(Equal([]api.TimeInfo{
{Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute},
{Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute},
}))
By("verifying that NaN and negative values were replaced by zero")
Expect(metricVals).To(HaveLen(2))
Expect(metricVals[0]).To(ConsistOf(
metrics.ContainerMetrics{Name: "cont1", Usage: buildResList(0, 3100.0)},
metrics.ContainerMetrics{Name: "cont2", Usage: buildResList(0, 0)},
))
Expect(metricVals[1]).To(ConsistOf(
metrics.ContainerMetrics{Name: "cont1", Usage: buildResList(0, 0)},
metrics.ContainerMetrics{Name: "cont2", Usage: buildResList(1310.0, 0)},
))
})
It("should be able to list metrics for nodes", func() {
fakeProm.QueryResults = map[prom.Selector]prom.QueryResult{
mustBuild(cpuQueries.nodeQuery.Build("", nodeResource, "", nil, labels.Everything(), "node1", "node2")): buildQueryRes("container_cpu_usage_seconds_total",
@ -265,4 +307,32 @@ var _ = Describe("Resource Metrics Provider", func() {
{},
}))
})
It("should return metrics of value zero when node metrics have NaN or negative values", func() {
fakeProm.QueryResults = map[prom.Selector]prom.QueryResult{
mustBuild(cpuQueries.nodeQuery.Build("", nodeResource, "", nil, labels.Everything(), "node1", "node2")): buildQueryRes("container_cpu_usage_seconds_total",
buildNodeSample("node1", -1100.0, 10),
buildNodeSample("node2", 1200.0, 14),
),
mustBuild(memQueries.nodeQuery.Build("", nodeResource, "", nil, labels.Everything(), "node1", "node2")): buildQueryRes("container_memory_working_set_bytes",
buildNodeSample("node1", 2100.0, 11),
buildNodeSample("node2", math.NaN(), 12),
),
}
By("querying for metrics for some nodes")
times, metricVals, err := prov.GetNodeMetrics("node1", "node2")
Expect(err).NotTo(HaveOccurred())
By("verifying that the reported times for each are the earliest times for each pod")
Expect(times).To(Equal([]api.TimeInfo{
{Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute},
{Timestamp: pmodel.Time(12).Time(), Window: 1 * time.Minute},
}))
By("verifying that NaN and negative values were replaced by zero")
Expect(metricVals).To(Equal([]corev1.ResourceList{
buildResList(0, 2100.0),
buildResList(1200.0, 0),
}))
})
})