Skip to content

Commit ab86c13

Browse files
Merge pull request #259 from k82cn/kubecon-2019-china-demo
Kubecon 2019 china demo
2 parents 02bef05 + 4d8751b commit ab86c13

17 files changed

Lines changed: 748 additions & 0 deletions

File tree

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
apiVersion: apps/v1
2+
kind: ReplicaSet
3+
metadata:
4+
name: nginx-1
5+
labels:
6+
app: nginx-1
7+
spec:
8+
# modify replicas according to your case
9+
replicas: 8
10+
selector:
11+
matchLabels:
12+
app: nginx-1
13+
template:
14+
metadata:
15+
labels:
16+
app: nginx-1
17+
spec:
18+
schedulerName: volcano
19+
containers:
20+
- name: nginx-1
21+
image: nginx
22+
resources:
23+
requests:
24+
cpu: "1000m"
25+
limits:
26+
cpu: "1000m"
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
apiVersion: apps/v1
2+
kind: ReplicaSet
3+
metadata:
4+
name: nginx-2
5+
labels:
6+
app: nginx-2
7+
spec:
8+
# modify replicas according to your case
9+
replicas: 8
10+
selector:
11+
matchLabels:
12+
app: nginx-2
13+
template:
14+
metadata:
15+
labels:
16+
app: nginx-2
17+
spec:
18+
schedulerName: volcano
19+
containers:
20+
- name: nginx-2
21+
image: nginx
22+
resources:
23+
requests:
24+
cpu: "1000m"
25+
limits:
26+
cpu: "1000m"
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
apiVersion: apps/v1
2+
kind: ReplicaSet
3+
metadata:
4+
name: nginx
5+
labels:
6+
app: nginx
7+
spec:
8+
# modify replicas according to your case
9+
replicas: 8
10+
selector:
11+
matchLabels:
12+
app: nginx
13+
template:
14+
metadata:
15+
labels:
16+
app: nginx
17+
spec:
18+
containers:
19+
- name: nginx
20+
image: nginx
21+
resources:
22+
requests:
23+
cpu: "1000m"
24+
limits:
25+
cpu: "1000m"
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
apiVersion: batch.volcano.sh/v1alpha1
2+
kind: Job
3+
metadata:
4+
name: lm-mpi-job
5+
labels:
6+
# 根据业务需要设置作业类型
7+
"volcano.sh/job-type": "MPI"
8+
spec:
9+
# 设置最小需要的服务 (小于总replicas数)
10+
minAvailable: 4
11+
schedulerName: volcano
12+
plugins:
13+
# 提供 ssh 免密认证
14+
ssh: []
15+
# 提供运行作业所需要的网络信息,hosts文件,headless service等
16+
svc: []
17+
# 如果有pod被 杀死,重启整个作业
18+
policies:
19+
- event: PodEvicted
20+
action: RestartJob
21+
tasks:
22+
- replicas: 1
23+
name: mpimaster
24+
# 当 mpiexec 结束,认识整个mpi作业结束
25+
policies:
26+
- event: TaskCompleted
27+
action: CompleteJob
28+
template:
29+
spec:
30+
# Volcano 的信息会统一放到 /etc/volcano 目录下
31+
containers:
32+
- command:
33+
- /bin/sh
34+
- -c
35+
- |
36+
MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`;
37+
mkdir -p /var/run/sshd; /usr/sbin/sshd;
38+
mpiexec --allow-run-as-root --host ${MPI_HOST} -np 3 mpi_hello_world;
39+
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
40+
name: mpimaster
41+
ports:
42+
- containerPort: 22
43+
name: mpijob-port
44+
workingDir: /home
45+
resources:
46+
requests:
47+
cpu: "500m"
48+
limits:
49+
cpu: "500m"
50+
restartPolicy: OnFailure
51+
imagePullSecrets:
52+
- name: default-secret
53+
- replicas: 3
54+
name: mpiworker
55+
template:
56+
spec:
57+
containers:
58+
- command:
59+
- /bin/sh
60+
- -c
61+
- |
62+
mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
63+
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
64+
name: mpiworker
65+
ports:
66+
- containerPort: 22
67+
name: mpijob-port
68+
workingDir: /home
69+
resources:
70+
requests:
71+
cpu: "1000m"
72+
limits:
73+
cpu: "1000m"
74+
restartPolicy: OnFailure
75+
imagePullSecrets:
76+
- name: default-secret
77+
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
apiVersion: apps/v1
2+
kind: ReplicaSet
3+
metadata:
4+
name: nginx
5+
labels:
6+
app: nginx
7+
spec:
8+
# modify replicas according to your case
9+
replicas: 6
10+
selector:
11+
matchLabels:
12+
app: nginx
13+
template:
14+
metadata:
15+
labels:
16+
app: nginx
17+
spec:
18+
containers:
19+
- name: nginx
20+
image: nginx
21+
resources:
22+
requests:
23+
cpu: "1000m"
24+
limits:
25+
cpu: "1000m"
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
apiVersion: batch.volcano.sh/v1alpha1
2+
kind: Job
3+
metadata:
4+
name: lm-horovod-job
5+
labels:
6+
"volcano.sh/job-type": Horovod
7+
spec:
8+
minAvailable: 4
9+
schedulerName: volcano
10+
plugins:
11+
ssh: []
12+
svc: []
13+
# 如果有pod被 杀死,重启整个作业
14+
policies:
15+
- event: PodEvicted
16+
action: RestartJob
17+
tasks:
18+
- replicas: 1
19+
name: master
20+
policies:
21+
- event: TaskCompleted
22+
action: CompleteJob
23+
template:
24+
spec:
25+
containers:
26+
- command:
27+
- /bin/sh
28+
- -c
29+
- |
30+
WORKER_HOST=`cat /etc/volcano/worker.host | tr "\n" ","`;
31+
mkdir -p /var/run/sshd; /usr/sbin/sshd;
32+
mpiexec --allow-run-as-root --host ${WORKER_HOST} -np 3 python tensorflow_mnist_lm.py;
33+
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/horovod-tf-mnist:0.5
34+
name: master
35+
ports:
36+
- containerPort: 22
37+
name: job-port
38+
resources:
39+
requests:
40+
cpu: "500m"
41+
memory: "1024Mi"
42+
limits:
43+
cpu: "500m"
44+
memory: "1024Mi"
45+
restartPolicy: OnFailure
46+
imagePullSecrets:
47+
- name: default-secret
48+
- replicas: 3
49+
name: worker
50+
template:
51+
spec:
52+
containers:
53+
- command:
54+
- /bin/sh
55+
- -c
56+
- |
57+
mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
58+
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/horovod-tf-mnist:0.5
59+
name: worker
60+
ports:
61+
- containerPort: 22
62+
name: job-port
63+
resources:
64+
requests:
65+
cpu: "1000m"
66+
memory: "2048Mi"
67+
limits:
68+
cpu: "1000m"
69+
memory: "2048Mi"
70+
restartPolicy: OnFailure
71+
imagePullSecrets:
72+
- name: default-secret
73+
---
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
apiVersion: batch.volcano.sh/v1alpha1
2+
kind: Job
3+
metadata:
4+
name: lm-mpi-job
5+
labels:
6+
# 根据业务需要设置作业类型
7+
"volcano.sh/job-type": "MPI"
8+
spec:
9+
# 设置最小需要的服务 (小于总replicas数)
10+
minAvailable: 3
11+
schedulerName: volcano
12+
plugins:
13+
# 提供 ssh 免密认证
14+
ssh: []
15+
# 提供运行作业所需要的网络信息,hosts文件,headless service等
16+
svc: []
17+
# 如果有pod被 杀死,重启整个作业
18+
policies:
19+
- event: PodEvicted
20+
action: RestartJob
21+
tasks:
22+
- replicas: 1
23+
name: mpimaster
24+
# 当 mpiexec 结束,认识整个mpi作业结束
25+
policies:
26+
- event: TaskCompleted
27+
action: CompleteJob
28+
template:
29+
spec:
30+
# Volcano 的信息会统一放到 /etc/volcano 目录下
31+
containers:
32+
- command:
33+
- /bin/sh
34+
- -c
35+
- |
36+
MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`;
37+
mkdir -p /var/run/sshd; /usr/sbin/sshd;
38+
mpiexec --allow-run-as-root --host ${MPI_HOST} -np 2 mpi_hello_world;
39+
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
40+
name: mpimaster
41+
ports:
42+
- containerPort: 22
43+
name: mpijob-port
44+
workingDir: /home
45+
resources:
46+
requests:
47+
cpu: "500m"
48+
memory: "1024Mi"
49+
limits:
50+
cpu: "500m"
51+
memory: "1024Mi"
52+
restartPolicy: OnFailure
53+
imagePullSecrets:
54+
- name: default-secret
55+
- replicas: 2
56+
name: mpiworker
57+
template:
58+
spec:
59+
containers:
60+
- command:
61+
- /bin/sh
62+
- -c
63+
- |
64+
mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
65+
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
66+
name: mpiworker
67+
ports:
68+
- containerPort: 22
69+
name: mpijob-port
70+
workingDir: /home
71+
resources:
72+
requests:
73+
cpu: "1024m"
74+
memory: "2048Mi"
75+
limits:
76+
cpu: "1024m"
77+
memory: "2048Mi"
78+
restartPolicy: OnFailure
79+
imagePullSecrets:
80+
- name: default-secret
81+
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
apiVersion: apps/v1
2+
kind: ReplicaSet
3+
metadata:
4+
name: nginx
5+
labels:
6+
app: nginx
7+
spec:
8+
replicas: 8
9+
selector:
10+
matchLabels:
11+
app: nginx
12+
template:
13+
metadata:
14+
labels:
15+
app: nginx
16+
spec:
17+
schedulerName: volcano
18+
# Volcano Scheduler already pass conformance test!!!
19+
nodeSelector:
20+
"kubernetes.io/hostname": "192.168.23.24"
21+
containers:
22+
- name: nginx
23+
image: nginx
24+
resources:
25+
requests:
26+
cpu: "1000m"
27+
limits:
28+
cpu: "1000m"

0 commit comments

Comments
 (0)