mirror of
https://github.com/kubernetes-sigs/prometheus-adapter.git
synced 2026-04-05 17:27:51 +00:00
pkg/resourceprovider: guard from negative metrics
When serving the resource metrics API, prometheus-adapter may return negative values for pods/nodes memory and CPU usage. This happens because Prometheus sees counter resets which results in Prometheus interpolating data incorrectly to avoid the counter value going down. To prevent that, we need to add some guards in prometheus-adapter to replace the negative value by zero whenever it detects one. Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
This commit is contained in:
parent
93450fc29f
commit
0b3ac78d19
2 changed files with 80 additions and 5 deletions
|
|
@ -19,6 +19,7 @@ package resourceprovider
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
|
|
@ -390,13 +391,17 @@ func (p *resourceProvider) runQuery(now pmodel.Time, queryInfo resourceQuery, re
|
|||
|
||||
// associate the results back to each given pod or node
|
||||
res := make(queryResults, len(*rawRes.Vector))
|
||||
for _, val := range *rawRes.Vector {
|
||||
if val == nil {
|
||||
// skip empty values
|
||||
for _, sample := range *rawRes.Vector {
|
||||
// skip empty samples
|
||||
if sample == nil {
|
||||
continue
|
||||
}
|
||||
resKey := string(val.Metric[resourceLbl])
|
||||
res[resKey] = append(res[resKey], val)
|
||||
// replace NaN and negative values by zero
|
||||
if math.IsNaN(float64(sample.Value)) || sample.Value < 0 {
|
||||
sample.Value = 0
|
||||
}
|
||||
resKey := string(sample.Metric[resourceLbl])
|
||||
res[resKey] = append(res[resKey], sample)
|
||||
}
|
||||
|
||||
return res, nil
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||
package resourceprovider
|
||||
|
||||
import (
|
||||
"math"
|
||||
"time"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
|
|
@ -206,6 +207,47 @@ var _ = Describe("Resource Metrics Provider", func() {
|
|||
Expect(times[0]).To(Equal(api.TimeInfo{Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute}))
|
||||
})
|
||||
|
||||
It("should return metrics of value zero when pod metrics have NaN or negative values", func() {
|
||||
fakeProm.QueryResults = map[prom.Selector]prom.QueryResult{
|
||||
mustBuild(cpuQueries.contQuery.Build("", podResource, "some-ns", []string{cpuQueries.containerLabel}, labels.Everything(), "pod1", "pod3")): buildQueryRes("container_cpu_usage_seconds_total",
|
||||
buildPodSample("some-ns", "pod1", "cont1", -1100.0, 10),
|
||||
buildPodSample("some-ns", "pod1", "cont2", math.NaN(), 20),
|
||||
buildPodSample("some-ns", "pod3", "cont1", -1300.0, 10),
|
||||
buildPodSample("some-ns", "pod3", "cont2", 1310.0, 20),
|
||||
),
|
||||
mustBuild(memQueries.contQuery.Build("", podResource, "some-ns", []string{cpuQueries.containerLabel}, labels.Everything(), "pod1", "pod3")): buildQueryRes("container_memory_working_set_bytes",
|
||||
buildPodSample("some-ns", "pod1", "cont1", 3100.0, 11),
|
||||
buildPodSample("some-ns", "pod1", "cont2", -3110.0, 21),
|
||||
buildPodSample("some-ns", "pod3", "cont1", math.NaN(), 11),
|
||||
buildPodSample("some-ns", "pod3", "cont2", -3310.0, 21),
|
||||
),
|
||||
}
|
||||
|
||||
By("querying for metrics for some pods")
|
||||
times, metricVals, err := prov.GetPodMetrics(
|
||||
types.NamespacedName{Namespace: "some-ns", Name: "pod1"},
|
||||
types.NamespacedName{Namespace: "some-ns", Name: "pod3"},
|
||||
)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By("verifying that the reported times for each are the earliest times for each pod")
|
||||
Expect(times).To(Equal([]api.TimeInfo{
|
||||
{Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute},
|
||||
{Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute},
|
||||
}))
|
||||
|
||||
By("verifying that NaN and negative values were replaced by zero")
|
||||
Expect(metricVals).To(HaveLen(2))
|
||||
Expect(metricVals[0]).To(ConsistOf(
|
||||
metrics.ContainerMetrics{Name: "cont1", Usage: buildResList(0, 3100.0)},
|
||||
metrics.ContainerMetrics{Name: "cont2", Usage: buildResList(0, 0)},
|
||||
))
|
||||
Expect(metricVals[1]).To(ConsistOf(
|
||||
metrics.ContainerMetrics{Name: "cont1", Usage: buildResList(0, 0)},
|
||||
metrics.ContainerMetrics{Name: "cont2", Usage: buildResList(1310.0, 0)},
|
||||
))
|
||||
})
|
||||
|
||||
It("should be able to list metrics for nodes", func() {
|
||||
fakeProm.QueryResults = map[prom.Selector]prom.QueryResult{
|
||||
mustBuild(cpuQueries.nodeQuery.Build("", nodeResource, "", nil, labels.Everything(), "node1", "node2")): buildQueryRes("container_cpu_usage_seconds_total",
|
||||
|
|
@ -265,4 +307,32 @@ var _ = Describe("Resource Metrics Provider", func() {
|
|||
{},
|
||||
}))
|
||||
})
|
||||
|
||||
It("should return metrics of value zero when node metrics have NaN or negative values", func() {
|
||||
fakeProm.QueryResults = map[prom.Selector]prom.QueryResult{
|
||||
mustBuild(cpuQueries.nodeQuery.Build("", nodeResource, "", nil, labels.Everything(), "node1", "node2")): buildQueryRes("container_cpu_usage_seconds_total",
|
||||
buildNodeSample("node1", -1100.0, 10),
|
||||
buildNodeSample("node2", 1200.0, 14),
|
||||
),
|
||||
mustBuild(memQueries.nodeQuery.Build("", nodeResource, "", nil, labels.Everything(), "node1", "node2")): buildQueryRes("container_memory_working_set_bytes",
|
||||
buildNodeSample("node1", 2100.0, 11),
|
||||
buildNodeSample("node2", math.NaN(), 12),
|
||||
),
|
||||
}
|
||||
By("querying for metrics for some nodes")
|
||||
times, metricVals, err := prov.GetNodeMetrics("node1", "node2")
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By("verifying that the reported times for each are the earliest times for each pod")
|
||||
Expect(times).To(Equal([]api.TimeInfo{
|
||||
{Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute},
|
||||
{Timestamp: pmodel.Time(12).Time(), Window: 1 * time.Minute},
|
||||
}))
|
||||
|
||||
By("verifying that NaN and negative values were replaced by zero")
|
||||
Expect(metricVals).To(Equal([]corev1.ResourceList{
|
||||
buildResList(0, 2100.0),
|
||||
buildResList(1200.0, 0),
|
||||
}))
|
||||
})
|
||||
})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue