Skip to content

Commit

Permalink
fix: Fix karpenter_nodeclaims_drifted metric for v0.32.x (#880)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis authored Dec 18, 2023
1 parent c79a02d commit 8675e14
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pkg/controllers/nodeclaim/disruption/drift.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func (d *Drift) Reconcile(ctx context.Context, nodePool *v1beta1.NodePool, nodeC
Reason: string(driftedReason),
})
if !hasDriftedCondition {
logging.FromContext(ctx).Debugf("marking drifted")
logging.FromContext(ctx).With("reason", string(driftedReason)).Debugf("marking drifted")
nodeclaimutil.DisruptedCounter(nodeClaim, metrics.DriftReason).Inc()
nodeclaimutil.DriftedCounter(nodeClaim, string(driftedReason)).Inc()
}
Expand Down
53 changes: 53 additions & 0 deletions pkg/controllers/nodeclaim/disruption/nodeclaim_drift_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,59 @@ var _ = Describe("NodeClaim/Drift", func() {
// NodeClaims are required to be launched before they can be evaluated for drift
nodeClaim.StatusConditions().MarkTrue(v1beta1.Launched)
})
Context("Metrics", func() {
It("should fire a karpenter_nodeclaims_drifted metric when drifted", func() {
cp.Drifted = "CloudProviderDrifted"
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Drifted).IsTrue()).To(BeTrue())
metric, found := FindMetricWithLabelValues("karpenter_nodeclaims_drifted", map[string]string{
"type": "CloudProviderDrifted",
"nodepool": nodePool.Name,
})
Expect(found).To(BeTrue())
Expect(metric.GetCounter().GetValue()).To(BeNumerically("==", 1))
})
It("should pass-through the correct drifted type value through the karpenter_nodeclaims_drifted metric", func() {
cp.Drifted = "drifted"
nodePool.Spec.Template.Spec.Requirements = []v1.NodeSelectorRequirement{
{
Key: v1.LabelInstanceTypeStable,
Operator: v1.NodeSelectorOpDoesNotExist,
},
}
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Drifted).IsTrue()).To(BeTrue())
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Drifted).Reason).To(Equal(string(disruption.RequirementsDrifted)))

metric, found := FindMetricWithLabelValues("karpenter_nodeclaims_drifted", map[string]string{
"type": "RequirementsDrifted",
"nodepool": nodePool.Name,
})
Expect(found).To(BeTrue())
Expect(metric.GetCounter().GetValue()).To(BeNumerically("==", 1))
})
It("should fire a karpenter_nodeclaims_disrupted metric when drifted", func() {
cp.Drifted = "drifted"
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Drifted).IsTrue()).To(BeTrue())

metric, found := FindMetricWithLabelValues("karpenter_nodeclaims_disrupted", map[string]string{
"type": "drift",
"nodepool": nodePool.Name,
})
Expect(found).To(BeTrue())
Expect(metric.GetCounter().GetValue()).To(BeNumerically("==", 1))
})
})
It("should detect drift", func() {
cp.Drifted = "drifted"
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
Expand Down
19 changes: 18 additions & 1 deletion pkg/controllers/nodeclaim/disruption/nodeclaim_emptiness_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,24 @@ var _ = Describe("NodeClaim/Emptiness", func() {
},
})
})

Context("Metrics", func() {
It("should fire a karpenter_nodeclaims_disrupted metric when empty", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node)
ExpectMakeNodeClaimsInitialized(ctx, env.Client, nodeClaim)

ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Empty).IsTrue()).To(BeTrue())

metric, found := FindMetricWithLabelValues("karpenter_nodeclaims_disrupted", map[string]string{
"type": "emptiness",
"nodepool": nodePool.Name,
})
Expect(found).To(BeTrue())
Expect(metric.GetCounter().GetValue()).To(BeNumerically("==", 1))
})
})
It("should mark NodeClaims as empty", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node)
ExpectMakeNodeClaimsInitialized(ctx, env.Client, nodeClaim)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,26 @@ var _ = Describe("NodeClaim/Expiration", func() {
},
})
})

Context("Metrics", func() {
It("should fire a karpenter_nodeclaims_disrupted metric when expired", func() {
nodePool.Spec.Disruption.ExpireAfter.Duration = lo.ToPtr(time.Second * 30)
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)

// step forward to make the node expired
fakeClock.Step(60 * time.Second)
ExpectReconcileSucceeded(ctx, nodeClaimDisruptionController, client.ObjectKeyFromObject(nodeClaim))

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.StatusConditions().GetCondition(v1beta1.Expired).IsTrue()).To(BeTrue())

metric, found := FindMetricWithLabelValues("karpenter_nodeclaims_disrupted", map[string]string{
"type": "expiration",
"nodepool": nodePool.Name,
})
Expect(found).To(BeTrue())
Expect(metric.GetCounter().GetValue()).To(BeNumerically("==", 1))
})
})
It("should remove the status condition from the NodeClaims when expiration is disabled", func() {
nodePool.Spec.Disruption.ExpireAfter.Duration = nil
nodeClaim.StatusConditions().MarkTrue(v1beta1.Expired)
Expand Down
2 changes: 1 addition & 1 deletion pkg/utils/nodeclaim/nodeclaim.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ func DriftedCounter(nodeClaim *v1beta1.NodeClaim, driftType string) prometheus.C
metrics.ProvisionerLabel: nodeClaim.Labels[v1alpha5.ProvisionerNameLabelKey],
})
}
return metrics.NodeClaimsDisruptedCounter.With(prometheus.Labels{
return metrics.NodeClaimsDriftedCounter.With(prometheus.Labels{
metrics.TypeLabel: driftType,
metrics.NodePoolLabel: nodeClaim.Labels[v1beta1.NodePoolLabelKey],
})
Expand Down

0 comments on commit 8675e14

Please sign in to comment.