Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ import (
"github.com/openshift-kni/numaresources-operator/internal/api/features"
"github.com/openshift-kni/numaresources-operator/internal/controller"
intkloglevel "github.com/openshift-kni/numaresources-operator/internal/kloglevel"
"github.com/openshift-kni/numaresources-operator/internal/platforminfo"
"github.com/openshift-kni/numaresources-operator/pkg/hash"
"github.com/openshift-kni/numaresources-operator/pkg/images"
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
Expand Down Expand Up @@ -330,10 +331,7 @@ func main() {
SchedulerManifests: schedMf,
Namespace: namespace,
AutodetectReplicas: info.NodeCount,
PlatformInfo: controller.PlatformInfo{
Platform: clusterPlatform,
Version: clusterPlatformVersion,
},
PlatformInfo: platforminfo.New(clusterPlatform, clusterPlatformVersion),
}).SetupWithManager(mgr); err != nil {
klog.ErrorS(err, "unable to create controller", "controller", "NUMAResourcesScheduler")
os.Exit(1)
Expand Down
35 changes: 10 additions & 25 deletions internal/controller/numaresourcesscheduler_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import (

nropv1 "github.com/openshift-kni/numaresources-operator/api/v1"
"github.com/openshift-kni/numaresources-operator/internal/api/annotations"
"github.com/openshift-kni/numaresources-operator/internal/platforminfo"
"github.com/openshift-kni/numaresources-operator/internal/relatedobjects"
"github.com/openshift-kni/numaresources-operator/pkg/apply"
"github.com/openshift-kni/numaresources-operator/pkg/hash"
Expand All @@ -56,25 +57,14 @@ import (
"github.com/openshift-kni/numaresources-operator/pkg/status"
)

const (
// ActivePodsResourcesSupportSince defines the OCP version which started to support the fixed kubelet
// in which the PodResourcesAPI lists the active pods by default
activePodsResourcesSupportSince = "4.19.999"
)

type PlatformInfo struct {
Platform platform.Platform
Version platform.Version
}

// NUMAResourcesSchedulerReconciler reconciles a NUMAResourcesScheduler object
type NUMAResourcesSchedulerReconciler struct {
client.Client
Scheme *runtime.Scheme
SchedulerManifests schedmanifests.Manifests
Namespace string
AutodetectReplicas int
PlatformInfo PlatformInfo
PlatformInfo platforminfo.PlatformInfo
}

//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles,verbs=*
Expand Down Expand Up @@ -271,26 +261,21 @@ func (r *NUMAResourcesSchedulerReconciler) syncNUMASchedulerResources(ctx contex
return schedStatus, nil
}

func platformNormalize(spec *nropv1.NUMAResourcesSchedulerSpec, platInfo PlatformInfo) {
func platformNormalize(spec *nropv1.NUMAResourcesSchedulerSpec, platInfo platforminfo.PlatformInfo) {
if platInfo.Platform != platform.OpenShift && platInfo.Platform != platform.HyperShift {
return
}

parsedVersion, _ := platform.ParseVersion(activePodsResourcesSupportSince)
ok, err := platInfo.Version.AtLeast(parsedVersion)
if err != nil {
klog.Infof("failed to compare version %v with %v, err %v", parsedVersion, platInfo.Version, err)
if spec.SchedulerInformer != nil {
// assume user-provided value. Nothing to do.
klog.V(4).InfoS("SchedulerInformer explicit value", "Platform", platInfo.Platform, "PlatformVersion", platInfo.Version.String(), "SchedulerInformer", *spec.SchedulerInformer)
return
}

if !ok {
if !platInfo.Properties.PodResourcesListFilterActivePods {
// keep shared default for backward compatibility. TODO: review/switch default in 4.21
return
}

if spec.SchedulerInformer == nil {
spec.SchedulerInformer = ptr.To(nropv1.SchedulerInformerShared)
klog.V(4).InfoS("SchedulerInformer default is overridden", "Platform", platInfo.Platform, "PlatformVersion", platInfo.Version.String(), "SchedulerInformer", &spec.SchedulerInformer)
}
spec.SchedulerInformer = ptr.To(nropv1.SchedulerInformerShared)
klog.V(4).InfoS("SchedulerInformer default is overridden", "Platform", platInfo.Platform, "PlatformVersion", platInfo.Version.String(), "SchedulerInformer", *spec.SchedulerInformer)
}

func buildDedicatedInformerCondition(instance nropv1.NUMAResourcesScheduler, normalized nropv1.NUMAResourcesSchedulerSpec) metav1.Condition {
Expand Down
94 changes: 24 additions & 70 deletions internal/controller/numaresourcesscheduler_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import (
nropv1 "github.com/openshift-kni/numaresources-operator/api/v1"
"github.com/openshift-kni/numaresources-operator/internal/api/annotations"
testobjs "github.com/openshift-kni/numaresources-operator/internal/objects"
"github.com/openshift-kni/numaresources-operator/internal/platforminfo"
"github.com/openshift-kni/numaresources-operator/pkg/hash"
nrosched "github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler"
schedmanifests "github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler/manifests/sched"
Expand Down Expand Up @@ -767,9 +768,12 @@ var _ = ginkgo.Describe("Test NUMAResourcesScheduler Reconcile", func() {
numOfMasters := 3

ginkgo.When("kubelet fix is enabled", func() {
fixedVersion, _ := platform.ParseVersion(activePodsResourcesSupportSince)
fixedVersion, _ := platform.ParseVersion("4.19.10")
unfixedVersion, _ := platform.ParseVersion("4.19.0") // can't (and we must not even if we can) rewrite history
futureFixedVersionZstream, _ := platform.ParseVersion("4.19.20") // we must never regress
futureFixedVersion, _ := platform.ParseVersion("4.21.0") // we must never regress

ginkgo.DescribeTable("should configure by default the informerMode to the expected when field is not set", func(reconcilerPlatInfo PlatformInfo, expectedInformer string) {
ginkgo.DescribeTable("should configure by default the informerMode to the expected when field is not set", func(reconcilerPlatInfo platforminfo.PlatformInfo, expectedInformer string) {
var err error
nrs = testobjs.NewNUMAResourcesScheduler("numaresourcesscheduler", "some/url:latest", testSchedulerName, 11*time.Second)
initObjects := []runtime.Object{nrs}
Expand All @@ -796,17 +800,18 @@ var _ = ginkgo.Describe("Test NUMAResourcesScheduler Reconcile", func() {
gomega.Expect(c).ToNot(gomega.BeNil())
gomega.Expect(c.Status).To(gomega.Equal(expectedDedicatedActiveStatus))
},
ginkgo.Entry("with fixed Openshift the default informer is Shared", PlatformInfo{
Platform: platform.OpenShift,
Version: fixedVersion,
}, depmanifests.CacheInformerShared),
ginkgo.Entry("with fixed Hypershift the default informer is Shared", PlatformInfo{
Platform: platform.HyperShift,
Version: fixedVersion,
}, depmanifests.CacheInformerShared),
ginkgo.Entry("with unknown platform the default informer is Dedicated (unchanged)", PlatformInfo{}, depmanifests.CacheInformerDedicated))

ginkgo.DescribeTable("should preserve informerMode value if set", func(reconcilerPlatInfo PlatformInfo) {
ginkgo.Entry("with fixed Openshift the default informer is Shared", platforminfo.New(platform.OpenShift, fixedVersion), depmanifests.CacheInformerShared),
ginkgo.Entry("with fixed Hypershift the default informer is Shared", platforminfo.New(platform.HyperShift, fixedVersion), depmanifests.CacheInformerShared),
ginkgo.Entry("with unfixed platform the default informer is Dedicated (unchanged)", platforminfo.New(platform.OpenShift, unfixedVersion), depmanifests.CacheInformerDedicated),
ginkgo.Entry("with unfixed platform the default informer is Dedicated (unchanged)", platforminfo.New(platform.HyperShift, unfixedVersion), depmanifests.CacheInformerDedicated),
ginkgo.Entry("with fixed Openshift the default informer is Shared", platforminfo.New(platform.OpenShift, futureFixedVersion), depmanifests.CacheInformerShared),
ginkgo.Entry("with fixed Hypershift the default informer is Shared", platforminfo.New(platform.HyperShift, futureFixedVersion), depmanifests.CacheInformerShared),
ginkgo.Entry("with fixed Openshift the default informer is Shared", platforminfo.New(platform.OpenShift, futureFixedVersionZstream), depmanifests.CacheInformerShared),
ginkgo.Entry("with fixed Hypershift the default informer is Shared", platforminfo.New(platform.HyperShift, futureFixedVersionZstream), depmanifests.CacheInformerShared),
ginkgo.Entry("with unknown platform the default informer is Dedicated (unchanged)", platforminfo.PlatformInfo{}, depmanifests.CacheInformerDedicated),
)

ginkgo.DescribeTable("should preserve informerMode value if set", func(reconcilerPlatInfo platforminfo.PlatformInfo) {
var err error
nrs = testobjs.NewNUMAResourcesScheduler("numaresourcesscheduler", "some/url:latest", testSchedulerName, 11*time.Second)
infMode := nropv1.SchedulerInformerDedicated
Expand All @@ -823,17 +828,17 @@ var _ = ginkgo.Describe("Test NUMAResourcesScheduler Reconcile", func() {
gomega.Expect(err).ToNot(gomega.HaveOccurred())
expectCacheParams(reconciler.Client, depmanifests.CacheResyncAutodetect, depmanifests.CacheResyncOnlyExclusiveResources, string(infMode))
},
ginkgo.Entry("with Openshift", PlatformInfo{
ginkgo.Entry("with Openshift", platforminfo.PlatformInfo{
Platform: platform.OpenShift,
Version: fixedVersion,
}),
ginkgo.Entry("with Hypershift", PlatformInfo{
ginkgo.Entry("with Hypershift", platforminfo.PlatformInfo{
Platform: platform.HyperShift,
Version: fixedVersion,
}),
ginkgo.Entry("with unknown platform", PlatformInfo{}))
ginkgo.Entry("with unknown platform", platforminfo.PlatformInfo{}))

ginkgo.DescribeTable("should allow to update the informerMode to be Dedicated after an overridden default", func(reconcilerPlatInfo PlatformInfo) {
ginkgo.DescribeTable("should allow to update the informerMode to be Dedicated after an overridden default", func(reconcilerPlatInfo platforminfo.PlatformInfo) {
var err error
nrs = testobjs.NewNUMAResourcesScheduler("numaresourcesscheduler", "some/url:latest", testSchedulerName, 11*time.Second)
initObjects := []runtime.Object{nrs}
Expand Down Expand Up @@ -868,69 +873,18 @@ var _ = ginkgo.Describe("Test NUMAResourcesScheduler Reconcile", func() {

expectCacheParams(reconciler.Client, depmanifests.CacheResyncAutodetect, depmanifests.CacheResyncOnlyExclusiveResources, string(informerMode))
},
ginkgo.Entry("with Openshift", PlatformInfo{
ginkgo.Entry("with Openshift", platforminfo.PlatformInfo{
Platform: platform.OpenShift,
Version: fixedVersion,
}),
ginkgo.Entry("with Hypershift", PlatformInfo{
ginkgo.Entry("with Hypershift", platforminfo.PlatformInfo{
Platform: platform.HyperShift,
Version: fixedVersion,
}))
})
})
})

var _ = ginkgo.Describe("Test scheduler spec PreNormalize", func() {
ginkgo.When("Spec.SchedulerInformer is not set by the user", func() {
ginkgo.It("should override default informer to Shared if kubelet is fixed - first supported zstream version", func() {
v, _ := platform.ParseVersion(activePodsResourcesSupportSince)
spec := nropv1.NUMAResourcesSchedulerSpec{}
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
gomega.Expect(*spec.SchedulerInformer).To(gomega.Equal(nropv1.SchedulerInformerShared))
})

ginkgo.It("should override default informer to Shared if kubelet is fixed - version is greater than first supported (zstream)", func() {
v, _ := platform.ParseVersion("4.19.1000")
spec := nropv1.NUMAResourcesSchedulerSpec{}
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
gomega.Expect(*spec.SchedulerInformer).To(gomega.Equal(nropv1.SchedulerInformerShared))
})

ginkgo.It("should override default informer to Shared if kubelet is fixed - version is greater than first supported (ystream)", func() {
v, _ := platform.ParseVersion("4.20.0")
spec := nropv1.NUMAResourcesSchedulerSpec{}
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
gomega.Expect(*spec.SchedulerInformer).To(gomega.Equal(nropv1.SchedulerInformerShared))
})

ginkgo.It("should not override default informer if kubelet is not fixed - version is less than first supported (zstream)", func() {
// this is only for testing purposes as there is plan to backport the fix to older minor versions
// will need to remove this test if the fix is supported starting the first zstream of the release
v, _ := platform.ParseVersion("4.19.0")
spec := nropv1.NUMAResourcesSchedulerSpec{}
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
gomega.Expect(spec.SchedulerInformer).To(gomega.BeNil())
})

ginkgo.It("should not override default informer if kubelet is not fixed - version is less than first supported (ystream)", func() {
v, _ := platform.ParseVersion("4.13.0")
spec := nropv1.NUMAResourcesSchedulerSpec{}
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
gomega.Expect(spec.SchedulerInformer).To(gomega.BeNil())
})
})
ginkgo.When("Spec.SchedulerInformer is set by the user", func() {
ginkgo.It("should preserve informer value set by the user even if kubelet is fixed", func() {
v, _ := platform.ParseVersion(activePodsResourcesSupportSince)
spec := nropv1.NUMAResourcesSchedulerSpec{
SchedulerInformer: ptr.To(nropv1.SchedulerInformerDedicated),
}
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
gomega.Expect(*spec.SchedulerInformer).To(gomega.Equal(nropv1.SchedulerInformerDedicated))
})
})
})

func HaveTheSameResourceRequirements(expectedRR corev1.ResourceRequirements) types.GomegaMatcher {
return gcustom.MakeMatcher(func(actual *appsv1.Deployment) (bool, error) {
cntName := schedupdate.MainContainerName // shortcut
Expand Down
46 changes: 46 additions & 0 deletions internal/platforminfo/platforminfo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package platforminfo

import (
"github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform"
)

// PlatformProperties represents the platform capabilities we have to infer and which are not explicitly
// advertised from the platform using standard APIs.
type PlatformProperties struct {
PodResourcesListFilterActivePods bool
}

type PlatformInfo struct {
Platform platform.Platform
Version platform.Version
Properties PlatformProperties
}

func New(plat platform.Platform, ver platform.Version) PlatformInfo {
info := PlatformInfo{
Platform: plat,
Version: ver,
}
discoverProperties(&info)
return info
}

func discoverProperties(info *PlatformInfo) {
info.Properties.PodResourcesListFilterActivePods = isVersionEnoughForPodresourcesListFilterActivePods(info.Platform, info.Version)
}
85 changes: 85 additions & 0 deletions internal/platforminfo/platforminfo_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright 2025 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package platforminfo

import (
"reflect"
"testing"

"github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform"
)

func TestNewWithDiscover(t *testing.T) {
testcases := []struct {
description string
plat platform.Platform
ver platform.Version
expected PlatformProperties
}{
{
description: "empty",
},
{
description: "last major unfixed",
plat: platform.OpenShift,
ver: mustParseVersion("4.19.0"),
expected: PlatformProperties{
PodResourcesListFilterActivePods: false,
},
},
{
description: "first major fixed",
plat: platform.OpenShift,
ver: mustParseVersion("4.20.0"), // at time of writing
expected: PlatformProperties{
PodResourcesListFilterActivePods: true,
},
},
{
description: "next Z-stream fixed, must never regress", // at time of writing
plat: platform.OpenShift,
ver: mustParseVersion("4.20.1"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be 4.19.11 or so

expected: PlatformProperties{
PodResourcesListFilterActivePods: true,
},
},
{
description: "next major fixed, must never regress", // at time of writing
plat: platform.OpenShift,
ver: mustParseVersion("4.21.0"),
expected: PlatformProperties{
PodResourcesListFilterActivePods: true,
},
},
}
for _, tc := range testcases {
t.Run(tc.description, func(t *testing.T) {
got := New(tc.plat, tc.ver)
if !reflect.DeepEqual(got.Properties, tc.expected) {
t.Errorf("expected %#v got %#v", tc.expected, got.Properties)
}
})
}
}

func mustParseVersion(ver string) platform.Version {
v, err := platform.ParseVersion(ver)
if err != nil {
panic(err)
}
return v
}
Loading