File tree Expand file tree Collapse file tree
docs/samples/kubecon-2019-china Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ apiVersion : apps/v1
2+ kind : ReplicaSet
3+ metadata :
4+ name : nginx-1
5+ labels :
6+ app : nginx-1
7+ spec :
8+ # modify replicas according to your case
9+ replicas : 8
10+ selector :
11+ matchLabels :
12+ app : nginx-1
13+ template :
14+ metadata :
15+ labels :
16+ app : nginx-1
17+ spec :
18+ schedulerName : volcano
19+ containers :
20+ - name : nginx-1
21+ image : nginx
22+ resources :
23+ requests :
24+ cpu : " 1000m"
25+ limits :
26+ cpu : " 1000m"
Original file line number Diff line number Diff line change 1+ apiVersion : apps/v1
2+ kind : ReplicaSet
3+ metadata :
4+ name : nginx-2
5+ labels :
6+ app : nginx-2
7+ spec :
8+ # modify replicas according to your case
9+ replicas : 8
10+ selector :
11+ matchLabels :
12+ app : nginx-2
13+ template :
14+ metadata :
15+ labels :
16+ app : nginx-2
17+ spec :
18+ schedulerName : volcano
19+ containers :
20+ - name : nginx-2
21+ image : nginx
22+ resources :
23+ requests :
24+ cpu : " 1000m"
25+ limits :
26+ cpu : " 1000m"
Original file line number Diff line number Diff line change 1+ apiVersion : apps/v1
2+ kind : ReplicaSet
3+ metadata :
4+ name : nginx
5+ labels :
6+ app : nginx
7+ spec :
8+ # modify replicas according to your case
9+ replicas : 8
10+ selector :
11+ matchLabels :
12+ app : nginx
13+ template :
14+ metadata :
15+ labels :
16+ app : nginx
17+ spec :
18+ containers :
19+ - name : nginx
20+ image : nginx
21+ resources :
22+ requests :
23+ cpu : " 1000m"
24+ limits :
25+ cpu : " 1000m"
Original file line number Diff line number Diff line change 1+ apiVersion : batch.volcano.sh/v1alpha1
2+ kind : Job
3+ metadata :
4+ name : lm-mpi-job
5+ labels :
6+ # 根据业务需要设置作业类型
7+ " volcano.sh/job-type " : " MPI"
8+ spec :
9+ # 设置最小需要的服务 (小于总replicas数)
10+ minAvailable : 4
11+ schedulerName : volcano
12+ plugins :
13+ # 提供 ssh 免密认证
14+ ssh : []
15+ # 提供运行作业所需要的网络信息,hosts文件,headless service等
16+ svc : []
17+ # 如果有pod被 杀死,重启整个作业
18+ policies :
19+ - event : PodEvicted
20+ action : RestartJob
21+ tasks :
22+ - replicas : 1
23+ name : mpimaster
24+ # 当 mpiexec 结束,认识整个mpi作业结束
25+ policies :
26+ - event : TaskCompleted
27+ action : CompleteJob
28+ template :
29+ spec :
30+ # Volcano 的信息会统一放到 /etc/volcano 目录下
31+ containers :
32+ - command :
33+ - /bin/sh
34+ - -c
35+ - |
36+ MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`;
37+ mkdir -p /var/run/sshd; /usr/sbin/sshd;
38+ mpiexec --allow-run-as-root --host ${MPI_HOST} -np 3 mpi_hello_world;
39+ image : swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
40+ name : mpimaster
41+ ports :
42+ - containerPort : 22
43+ name : mpijob-port
44+ workingDir : /home
45+ resources :
46+ requests :
47+ cpu : " 500m"
48+ limits :
49+ cpu : " 500m"
50+ restartPolicy : OnFailure
51+ imagePullSecrets :
52+ - name : default-secret
53+ - replicas : 3
54+ name : mpiworker
55+ template :
56+ spec :
57+ containers :
58+ - command :
59+ - /bin/sh
60+ - -c
61+ - |
62+ mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
63+ image : swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
64+ name : mpiworker
65+ ports :
66+ - containerPort : 22
67+ name : mpijob-port
68+ workingDir : /home
69+ resources :
70+ requests :
71+ cpu : " 1000m"
72+ limits :
73+ cpu : " 1000m"
74+ restartPolicy : OnFailure
75+ imagePullSecrets :
76+ - name : default-secret
77+
Original file line number Diff line number Diff line change 1+ apiVersion : apps/v1
2+ kind : ReplicaSet
3+ metadata :
4+ name : nginx
5+ labels :
6+ app : nginx
7+ spec :
8+ # modify replicas according to your case
9+ replicas : 6
10+ selector :
11+ matchLabels :
12+ app : nginx
13+ template :
14+ metadata :
15+ labels :
16+ app : nginx
17+ spec :
18+ containers :
19+ - name : nginx
20+ image : nginx
21+ resources :
22+ requests :
23+ cpu : " 1000m"
24+ limits :
25+ cpu : " 1000m"
Original file line number Diff line number Diff line change 1+ apiVersion : batch.volcano.sh/v1alpha1
2+ kind : Job
3+ metadata :
4+ name : lm-horovod-job
5+ labels :
6+ " volcano.sh/job-type " : Horovod
7+ spec :
8+ minAvailable : 4
9+ schedulerName : volcano
10+ plugins :
11+ ssh : []
12+ svc : []
13+ # 如果有pod被 杀死,重启整个作业
14+ policies :
15+ - event : PodEvicted
16+ action : RestartJob
17+ tasks :
18+ - replicas : 1
19+ name : master
20+ policies :
21+ - event : TaskCompleted
22+ action : CompleteJob
23+ template :
24+ spec :
25+ containers :
26+ - command :
27+ - /bin/sh
28+ - -c
29+ - |
30+ WORKER_HOST=`cat /etc/volcano/worker.host | tr "\n" ","`;
31+ mkdir -p /var/run/sshd; /usr/sbin/sshd;
32+ mpiexec --allow-run-as-root --host ${WORKER_HOST} -np 3 python tensorflow_mnist_lm.py;
33+ image : swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/horovod-tf-mnist:0.5
34+ name : master
35+ ports :
36+ - containerPort : 22
37+ name : job-port
38+ resources :
39+ requests :
40+ cpu : " 500m"
41+ memory : " 1024Mi"
42+ limits :
43+ cpu : " 500m"
44+ memory : " 1024Mi"
45+ restartPolicy : OnFailure
46+ imagePullSecrets :
47+ - name : default-secret
48+ - replicas : 3
49+ name : worker
50+ template :
51+ spec :
52+ containers :
53+ - command :
54+ - /bin/sh
55+ - -c
56+ - |
57+ mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
58+ image : swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/horovod-tf-mnist:0.5
59+ name : worker
60+ ports :
61+ - containerPort : 22
62+ name : job-port
63+ resources :
64+ requests :
65+ cpu : " 1000m"
66+ memory : " 2048Mi"
67+ limits :
68+ cpu : " 1000m"
69+ memory : " 2048Mi"
70+ restartPolicy : OnFailure
71+ imagePullSecrets :
72+ - name : default-secret
73+ ---
Original file line number Diff line number Diff line change 1+ apiVersion : batch.volcano.sh/v1alpha1
2+ kind : Job
3+ metadata :
4+ name : lm-mpi-job
5+ labels :
6+ # 根据业务需要设置作业类型
7+ " volcano.sh/job-type " : " MPI"
8+ spec :
9+ # 设置最小需要的服务 (小于总replicas数)
10+ minAvailable : 3
11+ schedulerName : volcano
12+ plugins :
13+ # 提供 ssh 免密认证
14+ ssh : []
15+ # 提供运行作业所需要的网络信息,hosts文件,headless service等
16+ svc : []
17+ # 如果有pod被 杀死,重启整个作业
18+ policies :
19+ - event : PodEvicted
20+ action : RestartJob
21+ tasks :
22+ - replicas : 1
23+ name : mpimaster
24+ # 当 mpiexec 结束,认识整个mpi作业结束
25+ policies :
26+ - event : TaskCompleted
27+ action : CompleteJob
28+ template :
29+ spec :
30+ # Volcano 的信息会统一放到 /etc/volcano 目录下
31+ containers :
32+ - command :
33+ - /bin/sh
34+ - -c
35+ - |
36+ MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`;
37+ mkdir -p /var/run/sshd; /usr/sbin/sshd;
38+ mpiexec --allow-run-as-root --host ${MPI_HOST} -np 2 mpi_hello_world;
39+ image : swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
40+ name : mpimaster
41+ ports :
42+ - containerPort : 22
43+ name : mpijob-port
44+ workingDir : /home
45+ resources :
46+ requests :
47+ cpu : " 500m"
48+ memory : " 1024Mi"
49+ limits :
50+ cpu : " 500m"
51+ memory : " 1024Mi"
52+ restartPolicy : OnFailure
53+ imagePullSecrets :
54+ - name : default-secret
55+ - replicas : 2
56+ name : mpiworker
57+ template :
58+ spec :
59+ containers :
60+ - command :
61+ - /bin/sh
62+ - -c
63+ - |
64+ mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
65+ image : swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
66+ name : mpiworker
67+ ports :
68+ - containerPort : 22
69+ name : mpijob-port
70+ workingDir : /home
71+ resources :
72+ requests :
73+ cpu : " 1024m"
74+ memory : " 2048Mi"
75+ limits :
76+ cpu : " 1024m"
77+ memory : " 2048Mi"
78+ restartPolicy : OnFailure
79+ imagePullSecrets :
80+ - name : default-secret
81+
Original file line number Diff line number Diff line change 1+ apiVersion : apps/v1
2+ kind : ReplicaSet
3+ metadata :
4+ name : nginx
5+ labels :
6+ app : nginx
7+ spec :
8+ replicas : 8
9+ selector :
10+ matchLabels :
11+ app : nginx
12+ template :
13+ metadata :
14+ labels :
15+ app : nginx
16+ spec :
17+ schedulerName : volcano
18+ # Volcano Scheduler already pass conformance test!!!
19+ nodeSelector :
20+ " kubernetes.io/hostname " : " 192.168.23.24"
21+ containers :
22+ - name : nginx
23+ image : nginx
24+ resources :
25+ requests :
26+ cpu : " 1000m"
27+ limits :
28+ cpu : " 1000m"
You can’t perform that action at this time.
0 commit comments