@@ -42,8 +42,19 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
4242 glog .V (3 ).Infof ("Enter Allocate ..." )
4343 defer glog .V (3 ).Infof ("Leaving Allocate ..." )
4444
45- queues := util .NewPriorityQueue (ssn .QueueOrderFn )
46- jobsMap := map [api.QueueID ]* util.PriorityQueue {}
45+ // the allocation for pod may have many stages
46+ // 1. pick a namespace named N (using ssn.NamespaceOrderFn)
47+ // 2. pick a queue named Q from N (using ssn.QueueOrderFn)
48+ // 3. pick a job named J from Q (using ssn.JobOrderFn)
49+ // 4. pick a task T from J (using ssn.TaskOrderFn)
50+ // 5. use predicateFn to filter out node that T can not be allocated on.
51+ // 6. use ssn.NodeOrderFn to judge the best node and assign it to T
52+
53+ namespaces := util .NewPriorityQueue (ssn .NamespaceOrderFn )
54+
55+ // jobsMap is map[api.NamespaceName]map[api.QueueID]PriorityQueue(*api.JobInfo)
56+ // used to find job with highest priority in given queue and namespace
57+ jobsMap := map [api.NamespaceName ]map [api.QueueID ]* util.PriorityQueue {}
4758
4859 for _ , job := range ssn .Jobs {
4960 if job .PodGroup .Status .Phase == api .PodGroupPending {
@@ -54,23 +65,32 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
5465 continue
5566 }
5667
57- if queue , found := ssn .Queues [job .Queue ]; found {
58- queues .Push (queue )
59- } else {
68+ if _ , found := ssn .Queues [job .Queue ]; ! found {
6069 glog .Warningf ("Skip adding Job <%s/%s> because its queue %s is not found" ,
6170 job .Namespace , job .Name , job .Queue )
6271 continue
6372 }
6473
65- if _ , found := jobsMap [job .Queue ]; ! found {
66- jobsMap [job .Queue ] = util .NewPriorityQueue (ssn .JobOrderFn )
74+ namespace := api .NamespaceName (job .Namespace )
75+ queueMap , found := jobsMap [namespace ]
76+ if ! found {
77+ namespaces .Push (namespace )
78+
79+ queueMap = make (map [api.QueueID ]* util.PriorityQueue )
80+ jobsMap [namespace ] = queueMap
81+ }
82+
83+ jobs , found := queueMap [job .Queue ]
84+ if ! found {
85+ jobs = util .NewPriorityQueue (ssn .JobOrderFn )
86+ queueMap [job .Queue ] = jobs
6787 }
6888
6989 glog .V (4 ).Infof ("Added Job <%s/%s> into Queue <%s>" , job .Namespace , job .Name , job .Queue )
70- jobsMap [ job . Queue ] .Push (job )
90+ jobs .Push (job )
7191 }
7292
73- glog .V (3 ).Infof ("Try to allocate resource to %d Queues " , len (jobsMap ))
93+ glog .V (3 ).Infof ("Try to allocate resource to %d Namespaces " , len (jobsMap ))
7494
7595 pendingTasks := map [api.JobID ]* util.PriorityQueue {}
7696
@@ -91,21 +111,47 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
91111 return ssn .PredicateFn (task , node )
92112 }
93113
114+ // To pick <namespace, queue> tuple for job, we choose to pick namespace firstly.
115+ // Because we believe that number of queues would less than namespaces in most case.
116+ // And, this action would make the resource usage among namespace balanced.
94117 for {
95- if queues .Empty () {
118+ if namespaces .Empty () {
96119 break
97120 }
98121
99- queue := queues .Pop ().(* api.QueueInfo )
100- if ssn .Overused (queue ) {
101- glog .V (3 ).Infof ("Queue <%s> is overused, ignore it." , queue .Name )
102- continue
122+ // pick namespace from namespaces PriorityQueue
123+ namespace := namespaces .Pop ().(api.NamespaceName )
124+
125+ queueInNamespace := jobsMap [namespace ]
126+
127+ // pick queue for given namespace
128+ //
129+ // This block use a algorithm with time complex O(n).
130+ // But at least PriorityQueue could not be used here,
131+ // because the allocation of job would change the priority of queue among all namespaces,
132+ // and the PriorityQueue have no ability to update priority for a special queue.
133+ var queue * api.QueueInfo
134+ for queueId := range queueInNamespace {
135+ currentQueue := ssn .Queues [queueId ]
136+ if ssn .Overused (currentQueue ) {
137+ glog .V (3 ).Infof ("Namespace <%s> Queue <%s> is overused, ignore it." , namespace , currentQueue .Name )
138+ delete (queueInNamespace , queueId )
139+ continue
140+ }
141+
142+ if queue == nil || ssn .QueueOrderFn (currentQueue , queue ) {
143+ queue = currentQueue
144+ }
103145 }
104146
105- jobs , found := jobsMap [queue .UID ]
147+ if queue == nil {
148+ glog .V (3 ).Infof ("Namespace <%s> have no queue, skip it" , namespace )
149+ continue
150+ }
106151
107- glog .V (3 ).Infof ("Try to allocate resource to Jobs in Queue <%v>" , queue .Name )
152+ glog .V (3 ).Infof ("Try to allocate resource to Jobs in Namespace <%s> Queue <%v>" , namespace , queue .Name )
108153
154+ jobs , found := queueInNamespace [queue .UID ]
109155 if ! found || jobs .Empty () {
110156 glog .V (4 ).Infof ("Can not find jobs for queue %s." , queue .Name )
111157 continue
@@ -193,8 +239,9 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
193239 } else {
194240 stmt .Discard ()
195241 }
196- // Added Queue back until no job in Queue.
197- queues .Push (queue )
242+
243+ // Added Namespace back until no job in Namespace.
244+ namespaces .Push (namespace )
198245 }
199246}
200247
0 commit comments