pkg/resourceprovider: guard from negative metrics

When serving the resource metrics API, prometheus-adapter may return
negative values for pods/nodes memory and CPU usage. This happens
because Prometheus sees counter resets which results in Prometheus
interpolating data incorrectly to avoid the counter value going down.
To prevent that, we need to add some guards in prometheus-adapter to
replace the negative value by zero whenever it detects one.

Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
This commit is contained in:
Damien Grisonnet 2021-07-12 15:35:36 +02:00
parent 93450fc29f
commit 0b3ac78d19
2 changed files with 80 additions and 5 deletions

View file

@ -19,6 +19,7 @@ package resourceprovider
import (
"context"
"fmt"
"math"
"sync"
"time"
@ -390,13 +391,17 @@ func (p *resourceProvider) runQuery(now pmodel.Time, queryInfo resourceQuery, re
// associate the results back to each given pod or node
res := make(queryResults, len(*rawRes.Vector))
for _, val := range *rawRes.Vector {
if val == nil {
// skip empty values
for _, sample := range *rawRes.Vector {
// skip empty samples
if sample == nil {
continue
}
resKey := string(val.Metric[resourceLbl])
res[resKey] = append(res[resKey], val)
// replace NaN and negative values by zero
if math.IsNaN(float64(sample.Value)) || sample.Value < 0 {
sample.Value = 0
}
resKey := string(sample.Metric[resourceLbl])
res[resKey] = append(res[resKey], sample)
}
return res, nil