@@ -42,9 +42,20 @@ func (alloc *allocateAction) Initialize() {}
4242func (alloc * allocateAction ) Execute (ssn * framework.Session ) {
4343 glog .V (3 ).Infof ("Enter Allocate ..." )
4444 defer glog .V (3 ).Infof ("Leaving Allocate ..." )
45- // further
46- queues := util .NewPriorityQueue (ssn .QueueOrderFn )
47- jobsMap := map [api.QueueID ]* util.PriorityQueue {}
45+
46+ // the allocation for pod may have many stages
47+ // 1. pick a namespace named N (using ssn.NamespaceOrderFn)
48+ // 2. pick a queue named Q from N (using ssn.QueueOrderFn)
49+ // 3. pick a job named J from Q (using ssn.JobOrderFn)
50+ // 4. pick a task T from J (using ssn.TaskOrderFn)
51+ // 5. use predicateFn to filter out node that T can not be allocated on.
52+ // 6. use ssn.NodeOrderFn to judge the best node and assign it to T
53+
54+ namespaces := util .NewPriorityQueue (ssn .NamespaceOrderFn )
55+
56+ // jobsMap is map[api.NamespaceName]map[api.QueueID]PriorityQueue(*api.JobInfo)
57+ // used to find job with highest priority in given queue and namespace
58+ jobsMap := map [api.NamespaceName ]map [api.QueueID ]* util.PriorityQueue {}
4859
4960 for _ , job := range ssn .Jobs {
5061 if job .PodGroup .Status .Phase == scheduling .PodGroupPending {
@@ -55,23 +66,32 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
5566 continue
5667 }
5768
58- if queue , found := ssn .Queues [job .Queue ]; found {
59- queues .Push (queue )
60- } else {
69+ if _ , found := ssn .Queues [job .Queue ]; ! found {
6170 glog .Warningf ("Skip adding Job <%s/%s> because its queue %s is not found" ,
6271 job .Namespace , job .Name , job .Queue )
6372 continue
6473 }
6574
66- if _ , found := jobsMap [job .Queue ]; ! found {
67- jobsMap [job .Queue ] = util .NewPriorityQueue (ssn .JobOrderFn )
75+ namespace := api .NamespaceName (job .Namespace )
76+ queueMap , found := jobsMap [namespace ]
77+ if ! found {
78+ namespaces .Push (namespace )
79+
80+ queueMap = make (map [api.QueueID ]* util.PriorityQueue )
81+ jobsMap [namespace ] = queueMap
82+ }
83+
84+ jobs , found := queueMap [job .Queue ]
85+ if ! found {
86+ jobs = util .NewPriorityQueue (ssn .JobOrderFn )
87+ queueMap [job .Queue ] = jobs
6888 }
6989
7090 glog .V (4 ).Infof ("Added Job <%s/%s> into Queue <%s>" , job .Namespace , job .Name , job .Queue )
71- jobsMap [ job . Queue ] .Push (job )
91+ jobs .Push (job )
7292 }
7393
74- glog .V (3 ).Infof ("Try to allocate resource to %d Queues " , len (jobsMap ))
94+ glog .V (3 ).Infof ("Try to allocate resource to %d Namespaces " , len (jobsMap ))
7595
7696 pendingTasks := map [api.JobID ]* util.PriorityQueue {}
7797
@@ -92,21 +112,47 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
92112 return ssn .PredicateFn (task , node )
93113 }
94114
115+ // To pick <namespace, queue> tuple for job, we choose to pick namespace firstly.
116+ // Because we believe that number of queues would less than namespaces in most case.
117+ // And, this action would make the resource usage among namespace balanced.
95118 for {
96- if queues .Empty () {
119+ if namespaces .Empty () {
97120 break
98121 }
99122
100- queue := queues .Pop ().(* api.QueueInfo )
101- if ssn .Overused (queue ) {
102- glog .V (3 ).Infof ("Queue <%s> is overused, ignore it." , queue .Name )
103- continue
123+ // pick namespace from namespaces PriorityQueue
124+ namespace := namespaces .Pop ().(api.NamespaceName )
125+
126+ queueInNamespace := jobsMap [namespace ]
127+
128+ // pick queue for given namespace
129+ //
130+ // This block use a algorithm with time complex O(n).
131+ // But at least PriorityQueue could not be used here,
132+ // because the allocation of job would change the priority of queue among all namespaces,
133+ // and the PriorityQueue have no ability to update priority for a special queue.
134+ var queue * api.QueueInfo
135+ for queueId := range queueInNamespace {
136+ currentQueue := ssn .Queues [queueId ]
137+ if ssn .Overused (currentQueue ) {
138+ glog .V (3 ).Infof ("Namespace <%s> Queue <%s> is overused, ignore it." , namespace , currentQueue .Name )
139+ delete (queueInNamespace , queueId )
140+ continue
141+ }
142+
143+ if queue == nil || ssn .QueueOrderFn (currentQueue , queue ) {
144+ queue = currentQueue
145+ }
104146 }
105147
106- jobs , found := jobsMap [queue .UID ]
148+ if queue == nil {
149+ glog .V (3 ).Infof ("Namespace <%s> have no queue, skip it" , namespace )
150+ continue
151+ }
107152
108- glog .V (3 ).Infof ("Try to allocate resource to Jobs in Queue <%v>" , queue .Name )
153+ glog .V (3 ).Infof ("Try to allocate resource to Jobs in Namespace <%s> Queue <%v>" , namespace , queue .Name )
109154
155+ jobs , found := queueInNamespace [queue .UID ]
110156 if ! found || jobs .Empty () {
111157 glog .V (4 ).Infof ("Can not find jobs for queue %s." , queue .Name )
112158 continue
@@ -194,8 +240,9 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
194240 } else {
195241 stmt .Discard ()
196242 }
197- // Added Queue back until no job in Queue.
198- queues .Push (queue )
243+
244+ // Added Namespace back until no job in Namespace.
245+ namespaces .Push (namespace )
199246 }
200247}
201248
0 commit comments