diff --git a/pkg/resourceprovider/provider.go b/pkg/resourceprovider/provider.go index 13d8c787..483377fa 100644 --- a/pkg/resourceprovider/provider.go +++ b/pkg/resourceprovider/provider.go @@ -19,6 +19,7 @@ package resourceprovider import ( "context" "fmt" + "math" "sync" "time" @@ -390,13 +391,17 @@ func (p *resourceProvider) runQuery(now pmodel.Time, queryInfo resourceQuery, re // associate the results back to each given pod or node res := make(queryResults, len(*rawRes.Vector)) - for _, val := range *rawRes.Vector { - if val == nil { - // skip empty values + for _, sample := range *rawRes.Vector { + // skip empty samples + if sample == nil { continue } - resKey := string(val.Metric[resourceLbl]) - res[resKey] = append(res[resKey], val) + // replace NaN and negative values by zero + if math.IsNaN(float64(sample.Value)) || sample.Value < 0 { + sample.Value = 0 + } + resKey := string(sample.Metric[resourceLbl]) + res[resKey] = append(res[resKey], sample) } return res, nil diff --git a/pkg/resourceprovider/provider_test.go b/pkg/resourceprovider/provider_test.go index cd005409..f3fe8d7b 100644 --- a/pkg/resourceprovider/provider_test.go +++ b/pkg/resourceprovider/provider_test.go @@ -17,6 +17,7 @@ limitations under the License. package resourceprovider import ( + "math" "time" corev1 "k8s.io/api/core/v1" @@ -206,6 +207,47 @@ var _ = Describe("Resource Metrics Provider", func() { Expect(times[0]).To(Equal(api.TimeInfo{Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute})) }) + It("should return metrics of value zero when pod metrics have NaN or negative values", func() { + fakeProm.QueryResults = map[prom.Selector]prom.QueryResult{ + mustBuild(cpuQueries.contQuery.Build("", podResource, "some-ns", []string{cpuQueries.containerLabel}, labels.Everything(), "pod1", "pod3")): buildQueryRes("container_cpu_usage_seconds_total", + buildPodSample("some-ns", "pod1", "cont1", -1100.0, 10), + buildPodSample("some-ns", "pod1", "cont2", math.NaN(), 20), + buildPodSample("some-ns", "pod3", "cont1", -1300.0, 10), + buildPodSample("some-ns", "pod3", "cont2", 1310.0, 20), + ), + mustBuild(memQueries.contQuery.Build("", podResource, "some-ns", []string{cpuQueries.containerLabel}, labels.Everything(), "pod1", "pod3")): buildQueryRes("container_memory_working_set_bytes", + buildPodSample("some-ns", "pod1", "cont1", 3100.0, 11), + buildPodSample("some-ns", "pod1", "cont2", -3110.0, 21), + buildPodSample("some-ns", "pod3", "cont1", math.NaN(), 11), + buildPodSample("some-ns", "pod3", "cont2", -3310.0, 21), + ), + } + + By("querying for metrics for some pods") + times, metricVals, err := prov.GetPodMetrics( + types.NamespacedName{Namespace: "some-ns", Name: "pod1"}, + types.NamespacedName{Namespace: "some-ns", Name: "pod3"}, + ) + Expect(err).NotTo(HaveOccurred()) + + By("verifying that the reported times for each are the earliest times for each pod") + Expect(times).To(Equal([]api.TimeInfo{ + {Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute}, + {Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute}, + })) + + By("verifying that NaN and negative values were replaced by zero") + Expect(metricVals).To(HaveLen(2)) + Expect(metricVals[0]).To(ConsistOf( + metrics.ContainerMetrics{Name: "cont1", Usage: buildResList(0, 3100.0)}, + metrics.ContainerMetrics{Name: "cont2", Usage: buildResList(0, 0)}, + )) + Expect(metricVals[1]).To(ConsistOf( + metrics.ContainerMetrics{Name: "cont1", Usage: buildResList(0, 0)}, + metrics.ContainerMetrics{Name: "cont2", Usage: buildResList(1310.0, 0)}, + )) + }) + It("should be able to list metrics for nodes", func() { fakeProm.QueryResults = map[prom.Selector]prom.QueryResult{ mustBuild(cpuQueries.nodeQuery.Build("", nodeResource, "", nil, labels.Everything(), "node1", "node2")): buildQueryRes("container_cpu_usage_seconds_total", @@ -265,4 +307,32 @@ var _ = Describe("Resource Metrics Provider", func() { {}, })) }) + + It("should return metrics of value zero when node metrics have NaN or negative values", func() { + fakeProm.QueryResults = map[prom.Selector]prom.QueryResult{ + mustBuild(cpuQueries.nodeQuery.Build("", nodeResource, "", nil, labels.Everything(), "node1", "node2")): buildQueryRes("container_cpu_usage_seconds_total", + buildNodeSample("node1", -1100.0, 10), + buildNodeSample("node2", 1200.0, 14), + ), + mustBuild(memQueries.nodeQuery.Build("", nodeResource, "", nil, labels.Everything(), "node1", "node2")): buildQueryRes("container_memory_working_set_bytes", + buildNodeSample("node1", 2100.0, 11), + buildNodeSample("node2", math.NaN(), 12), + ), + } + By("querying for metrics for some nodes") + times, metricVals, err := prov.GetNodeMetrics("node1", "node2") + Expect(err).NotTo(HaveOccurred()) + + By("verifying that the reported times for each are the earliest times for each pod") + Expect(times).To(Equal([]api.TimeInfo{ + {Timestamp: pmodel.Time(10).Time(), Window: 1 * time.Minute}, + {Timestamp: pmodel.Time(12).Time(), Window: 1 * time.Minute}, + })) + + By("verifying that NaN and negative values were replaced by zero") + Expect(metricVals).To(Equal([]corev1.ResourceList{ + buildResList(0, 2100.0), + buildResList(1200.0, 0), + })) + }) })