@@ -18,6 +18,7 @@ package ray
1818import (
1919 "context"
2020 "fmt"
21+ "log"
2122 "reflect"
2223 "time"
2324
@@ -46,6 +47,7 @@ const (
4647var _ = Context ("Inside the default namespace" , func () {
4748 ctx := context .TODO ()
4849 var workerPods corev1.PodList
50+ var headPods corev1.PodList
4951 enableInTreeAutoscaling := true
5052
5153 myRayCluster := & rayiov1alpha1.RayCluster {
@@ -62,7 +64,6 @@ var _ = Context("Inside the default namespace", func() {
6264 "port" : "6379" ,
6365 "object-manager-port" : "12345" ,
6466 "node-manager-port" : "12346" ,
65- "object-store-memory" : "100000000" ,
6667 "num-cpus" : "1" ,
6768 },
6869 Template : corev1.PodTemplateSpec {
@@ -126,7 +127,8 @@ var _ = Context("Inside the default namespace", func() {
126127 },
127128 }
128129
129- filterLabels := client.MatchingLabels {common .RayClusterLabelKey : myRayCluster .Name , common .RayNodeGroupLabelKey : "small-group" }
130+ headFilterLabels := client.MatchingLabels {common .RayClusterLabelKey : myRayCluster .Name , common .RayNodeGroupLabelKey : "headgroup" }
131+ workerFilterLabels := client.MatchingLabels {common .RayClusterLabelKey : myRayCluster .Name , common .RayNodeGroupLabelKey : "small-group" }
130132
131133 Describe ("When creating a raycluster" , func () {
132134 It ("should create a raycluster object" , func () {
@@ -150,17 +152,15 @@ var _ = Context("Inside the default namespace", func() {
150152
151153 It ("should create 3 workers" , func () {
152154 Eventually (
153- listResourceFunc (ctx , & workerPods , filterLabels , & client.ListOptions {Namespace : "default" }),
155+ listResourceFunc (ctx , & workerPods , workerFilterLabels , & client.ListOptions {Namespace : "default" }),
154156 time .Second * 15 , time .Millisecond * 500 ).Should (Equal (3 ), fmt .Sprintf ("workerGroup %v" , workerPods .Items ))
155157 if len (workerPods .Items ) > 0 {
156158 Expect (workerPods .Items [0 ].Status .Phase ).Should (Or (Equal (corev1 .PodRunning ), Equal (corev1 .PodPending )))
157159 }
158160 })
159161
160162 It ("should create a head pod resource" , func () {
161- var headPods corev1.PodList
162- filterLabels := client.MatchingLabels {common .RayClusterLabelKey : myRayCluster .Name , common .RayNodeGroupLabelKey : "headgroup" }
163- err := k8sClient .List (ctx , & headPods , filterLabels , & client.ListOptions {Namespace : "default" }, client .InNamespace (myRayCluster .Namespace ))
163+ err := k8sClient .List (ctx , & headPods , headFilterLabels , & client.ListOptions {Namespace : "default" }, client .InNamespace (myRayCluster .Namespace ))
164164 Expect (err ).NotTo (HaveOccurred (), "failed list head pods" )
165165 Expect (len (headPods .Items )).Should (BeNumerically ("==" , 1 ), "My head pod list= %v" , headPods .Items )
166166
@@ -190,9 +190,42 @@ var _ = Context("Inside the default namespace", func() {
190190 time .Second * 15 , time .Millisecond * 500 ).Should (BeNil (), "autoscaler RoleBinding = %v" , rbName )
191191 })
192192
193+ It ("should be able to update all Pods to Running" , func () {
194+ // We need to manually update Pod statuses otherwise they'll always be Pending.
195+ // envtest doesn't create a full K8s cluster. It's only the control plane.
196+ // There's no container runtime or any other K8s controllers.
197+ // So Pods are created, but no controller updates them from Pending to Running.
198+ // See https://book.kubebuilder.io/reference/envtest.html
199+ for _ , headPod := range headPods .Items {
200+ headPod .Status .Phase = corev1 .PodRunning
201+ Expect (k8sClient .Status ().Update (ctx , & headPod )).Should (BeNil ())
202+ }
203+ err := k8sClient .List (ctx , & headPods , headFilterLabels , & client.ListOptions {Namespace : "default" })
204+ Expect (err ).ShouldNot (HaveOccurred (), "failed to list head Pods" )
205+ for _ , headPod := range headPods .Items {
206+ Expect (headPod .Status .Phase ).Should (Equal (corev1 .PodRunning ))
207+ }
208+
209+ for _ , workerPod := range workerPods .Items {
210+ workerPod .Status .Phase = corev1 .PodRunning
211+ Expect (k8sClient .Status ().Update (ctx , & workerPod )).Should (BeNil ())
212+ }
213+ err = k8sClient .List (ctx , & workerPods , workerFilterLabels , & client.ListOptions {Namespace : "default" })
214+ Expect (err ).ShouldNot (HaveOccurred (), "failed to list worker Pods" )
215+ for _ , workerPod := range workerPods .Items {
216+ Expect (workerPod .Status .Phase ).Should (Equal (corev1 .PodRunning ))
217+ }
218+ })
219+
220+ It ("cluster's .status.state should be updated to 'ready' shortly after all Pods are Running" , func () {
221+ Eventually (
222+ getClusterState (ctx , "default" , myRayCluster .Name ),
223+ time .Second * (common .RAYCLUSTER_DEFAULT_REQUEUE_SECONDS + 5 ), time .Millisecond * 500 ).Should (Equal (rayiov1alpha1 .Ready ))
224+ })
225+
193226 It ("should re-create a deleted worker" , func () {
194227 Eventually (
195- listResourceFunc (ctx , & workerPods , filterLabels , & client.ListOptions {Namespace : "default" }),
228+ listResourceFunc (ctx , & workerPods , workerFilterLabels , & client.ListOptions {Namespace : "default" }),
196229 time .Second * 15 , time .Millisecond * 500 ).Should (Equal (3 ), fmt .Sprintf ("workerGroup %v" , workerPods .Items ))
197230
198231 pod := workerPods .Items [0 ]
@@ -203,7 +236,7 @@ var _ = Context("Inside the default namespace", func() {
203236
204237 // at least 3 pods should be in none-failed phase
205238 Eventually (
206- listResourceFunc (ctx , & workerPods , filterLabels , & client.ListOptions {Namespace : "default" }),
239+ listResourceFunc (ctx , & workerPods , workerFilterLabels , & client.ListOptions {Namespace : "default" }),
207240 time .Second * 15 , time .Millisecond * 500 ).Should (Equal (3 ), fmt .Sprintf ("workerGroup %v" , workerPods .Items ))
208241 })
209242
@@ -228,7 +261,7 @@ var _ = Context("Inside the default namespace", func() {
228261 It ("should have only 2 running worker" , func () {
229262 // retry listing pods, given that last update may not immediately happen.
230263 Eventually (
231- listResourceFunc (ctx , & workerPods , filterLabels , & client.ListOptions {Namespace : "default" }),
264+ listResourceFunc (ctx , & workerPods , workerFilterLabels , & client.ListOptions {Namespace : "default" }),
232265 time .Second * 15 , time .Millisecond * 500 ).Should (Equal (2 ), fmt .Sprintf ("workerGroup %v" , workerPods .Items ))
233266 })
234267
@@ -250,7 +283,7 @@ var _ = Context("Inside the default namespace", func() {
250283 It ("should have only 1 running worker" , func () {
251284 // retry listing pods, given that last update may not immediately happen.
252285 Eventually (
253- listResourceFunc (ctx , & workerPods , filterLabels , & client.ListOptions {Namespace : "default" }),
286+ listResourceFunc (ctx , & workerPods , workerFilterLabels , & client.ListOptions {Namespace : "default" }),
254287 time .Second * 15 , time .Millisecond * 500 ).Should (Equal (1 ), fmt .Sprintf ("workerGroup %v" , workerPods .Items ))
255288 })
256289
@@ -275,14 +308,14 @@ var _ = Context("Inside the default namespace", func() {
275308 It ("should scale to maxReplicas (4) workers" , func () {
276309 // retry listing pods, given that last update may not immediately happen.
277310 Eventually (
278- listResourceFunc (ctx , & workerPods , filterLabels , & client.ListOptions {Namespace : "default" }),
311+ listResourceFunc (ctx , & workerPods , workerFilterLabels , & client.ListOptions {Namespace : "default" }),
279312 time .Second * 15 , time .Millisecond * 500 ).Should (Equal (4 ), fmt .Sprintf ("workerGroup %v" , workerPods .Items ))
280313 })
281314
282315 It ("should countinue to have only maxReplicas (4) workers" , func () {
283316 // check that pod count stays at 4 for two seconds.
284317 Consistently (
285- listResourceFunc (ctx , & workerPods , filterLabels , & client.ListOptions {Namespace : "default" }),
318+ listResourceFunc (ctx , & workerPods , workerFilterLabels , & client.ListOptions {Namespace : "default" }),
286319 time .Second * 2 , time .Millisecond * 200 ).Should (Equal (4 ), fmt .Sprintf ("workerGroup %v" , workerPods .Items ))
287320 })
288321 })
@@ -330,3 +363,13 @@ func retryOnOldRevision(attempts int, sleep time.Duration, f func() error) error
330363 }
331364 return fmt .Errorf ("after %d attempts, last error: %s" , attempts , err )
332365}
366+
367+ func getClusterState (ctx context.Context , namespace string , clusterName string ) func () rayiov1alpha1.ClusterState {
368+ return func () rayiov1alpha1.ClusterState {
369+ var cluster rayiov1alpha1.RayCluster
370+ if err := k8sClient .Get (ctx , client.ObjectKey {Namespace : namespace , Name : clusterName }, & cluster ); err != nil {
371+ log .Fatal (err )
372+ }
373+ return cluster .Status .State
374+ }
375+ }
0 commit comments