Skip to content

Commit 0b66712

Browse files
committed
feat: volume replication
Add the capability to replicate a ZFS disk to another zone (proxmox node). Signed-off-by: Serge Logvinov <[email protected]>
1 parent e3a25c2 commit 0b66712

File tree

10 files changed

+269
-13
lines changed

10 files changed

+269
-13
lines changed

docker-compose.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ services:
5252
- "--csi-address=unix:///csi/csi.sock"
5353
- "--leader-election=false"
5454
- "--kubeconfig=/etc/kubernetes/kubeconfig"
55-
- "--feature-gates=VolumeAttributesClass=true"
55+
# - "--feature-gates=VolumeAttributesClass=true"
5656
volumes:
5757
- type: volume
5858
source: socket-dir
@@ -69,7 +69,7 @@ services:
6969
- "--csi-address=unix:///csi/csi.sock"
7070
- "--leader-election=false"
7171
- "--default-fstype=ext4"
72-
- "--feature-gates=VolumeAttributesClass=true"
72+
# - "--feature-gates=VolumeAttributesClass=true"
7373
- "--enable-capacity"
7474
- "--capacity-ownerref-level=-1"
7575
- "--capacity-poll-interval=2m"

docs/options.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ parameters:
3838
diskIOPS: "4000"
3939
diskMBps: "1000"
4040

41+
## Optional: Zone replication
42+
replicate: "true"
43+
replicateSchedule: "*/15"
44+
replicateZones: "pve-1,pve-3"
45+
4146
# Optional: This field allows you to specify additional mount options to be applied when the volume is mounted on the node
4247
mountOptions:
4348
# Common for ssd
@@ -77,6 +82,10 @@ parameters:
7782
7883
## Optional: Backup disk with VM
7984
backup: "true"
85+
86+
## Optional: Zone replication
87+
replicateSchedule: "*/30"
88+
replicateZones: "rnd-1,rnd-2"
8089
```
8190

8291
## Parameters:
@@ -105,6 +114,10 @@ metadata:
105114

106115
* `backup` - set true if you want to backup the disk with VM. Dangerous option! Do not use it unless you fully understand how to use it in the recovery process.
107116

117+
* `replicate` - set true if you want to replicate the disk to another zone
118+
* `replicateSchedule` - replication schedule [in systemd calendar format](https://pve.proxmox.com/pve-docs/pve-admin-guide.html#pvesr_schedule_time_format) (default: `*/15`)
119+
* `replicateZones` - zones where the disk will be replicated, separated by commas
120+
108121
## AllowVolumeExpansion
109122

110123
Allow you to resize (expand) the PVC in future.

docs/proxmox-zfs.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
allowVolumeExpansion: true
2+
apiVersion: storage.k8s.io/v1
3+
kind: StorageClass
4+
metadata:
5+
name: proxmox-zfs
6+
parameters:
7+
csi.storage.k8s.io/fstype: xfs
8+
#
9+
storage: zfs
10+
#
11+
replicate: "true"
12+
replicateSchedule: "*/15"
13+
replicateZones: "pve-1,pve-2"
14+
provisioner: csi.proxmox.sinextra.dev
15+
reclaimPolicy: Delete
16+
volumeBindingMode: WaitForFirstConsumer

pkg/csi/controller.go

Lines changed: 83 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@ import (
3030
"google.golang.org/grpc/codes"
3131
"google.golang.org/grpc/status"
3232

33-
proxmox "github.com/sergelogvinov/proxmox-cloud-controller-manager/pkg/cluster"
33+
cluster "github.com/sergelogvinov/proxmox-cloud-controller-manager/pkg/cluster"
3434
"github.com/sergelogvinov/proxmox-csi-plugin/pkg/helpers/ptr"
3535
"github.com/sergelogvinov/proxmox-csi-plugin/pkg/metrics"
36+
"github.com/sergelogvinov/proxmox-csi-plugin/pkg/proxmox"
3637
"github.com/sergelogvinov/proxmox-csi-plugin/pkg/tools"
3738
volume "github.com/sergelogvinov/proxmox-csi-plugin/pkg/volume"
3839

@@ -60,22 +61,22 @@ var controllerCaps = []csi.ControllerServiceCapability_RPC_Type{
6061

6162
// ControllerService is the controller service for the CSI driver
6263
type ControllerService struct {
63-
Cluster *proxmox.Cluster
64+
Cluster *cluster.Cluster
6465
Kclient clientkubernetes.Interface
65-
Provider proxmox.Provider
66+
Provider cluster.Provider
6667
volumeLocks sync.Mutex
6768

6869
csi.UnimplementedControllerServer
6970
}
7071

7172
// NewControllerService returns a new controller service
7273
func NewControllerService(kclient *clientkubernetes.Clientset, cloudConfig string) (*ControllerService, error) {
73-
cfg, err := proxmox.ReadCloudConfigFromFile(cloudConfig)
74+
cfg, err := cluster.ReadCloudConfigFromFile(cloudConfig)
7475
if err != nil {
7576
return nil, fmt.Errorf("failed to read config: %v", err)
7677
}
7778

78-
cluster, err := proxmox.NewCluster(&cfg, nil)
79+
cluster, err := cluster.NewCluster(&cfg, nil)
7980
if err != nil {
8081
return nil, fmt.Errorf("failed to create proxmox cluster client: %v", err)
8182
}
@@ -110,7 +111,7 @@ func (d *ControllerService) CreateVolume(_ context.Context, request *csi.CreateV
110111

111112
klog.V(5).InfoS("CreateVolume: parameters", "parameters", params)
112113

113-
_, err := ExtractAndDefaultParameters(params)
114+
paramsSC, err := ExtractAndDefaultParameters(params)
114115
if err != nil {
115116
return nil, status.Error(codes.InvalidArgument, err.Error())
116117
}
@@ -192,6 +193,33 @@ func (d *ControllerService) CreateVolume(_ context.Context, request *csi.CreateV
192193
vmr.SetNode(zone)
193194
vmr.SetVmType("qemu")
194195

196+
if paramsSC.Replicate != nil && *paramsSC.Replicate {
197+
if storageConfig["type"].(string) != "zfspool" { //nolint:errcheck
198+
return nil, status.Error(codes.Internal, "error: storage type is not zfs in replication mode")
199+
}
200+
201+
vmr, err = cl.GetVmRefByName(pvc)
202+
if err != nil {
203+
id, err := cl.GetNextID(vmID + 1)
204+
if err != nil {
205+
klog.ErrorS(err, "CreateVolume: failed to get next id", "cluster", region)
206+
207+
return nil, status.Error(codes.Internal, err.Error())
208+
}
209+
210+
vmr = pxapi.NewVmRef(id)
211+
vmr.SetNode(zone)
212+
vmr.SetVmType("qemu")
213+
214+
mc := metrics.NewMetricContext("CreateVm")
215+
if err := proxmox.CreateQemuVM(cl, vmr, pvc); mc.ObserveRequest(err) != nil {
216+
klog.ErrorS(err, "CreateVolume: failed to create vm", "cluster", region)
217+
218+
return nil, status.Error(codes.Internal, err.Error())
219+
}
220+
}
221+
}
222+
195223
vol := volume.NewVolume(region, zone, params[StorageIDKey], fmt.Sprintf("vm-%d-%s", vmr.VmId(), pvc))
196224
if storageConfig["path"] != nil && storageConfig["path"].(string) != "" { //nolint:errcheck
197225
vol = volume.NewVolume(region, zone, params[StorageIDKey], fmt.Sprintf("%d/vm-%d-%s.raw", vmr.VmId(), vmr.VmId(), pvc))
@@ -218,6 +246,35 @@ func (d *ControllerService) CreateVolume(_ context.Context, request *csi.CreateV
218246
return nil, status.Error(codes.AlreadyExists, "volume already exists with same name and different capacity")
219247
}
220248

249+
if paramsSC.Replicate != nil && *paramsSC.Replicate {
250+
_, err := attachVolume(cl, vmr, vol.Storage(), vol.Disk(), paramsSC.ToMap())
251+
if err != nil {
252+
klog.ErrorS(err, "CreateVolume: failed to attach volume", "cluster", region, "volumeID", vol.VolumeID(), "vmID", vmr.VmId())
253+
254+
return nil, status.Error(codes.Internal, err.Error())
255+
}
256+
257+
if paramsSC.ReplicateZones != "" {
258+
var replicaZone string
259+
260+
for _, z := range strings.Split(paramsSC.ReplicateZones, ",") {
261+
if z != zone {
262+
replicaZone = z
263+
264+
break
265+
}
266+
}
267+
268+
if replicaZone != "" {
269+
if err := proxmox.SetQemuVMReplication(cl, vmr, replicaZone, paramsSC.ReplicateSchedule); err != nil {
270+
klog.ErrorS(err, "CreateVolume: failed to set replication", "cluster", region, "zone", replicaZone, "volumeID", vol.VolumeID(), "vmID", vmr.VmId())
271+
272+
return nil, status.Error(codes.Internal, err.Error())
273+
}
274+
}
275+
}
276+
}
277+
221278
volID := vol.VolumeID()
222279
if storageConfig["shared"] != nil && int(storageConfig["shared"].(float64)) == 1 { //nolint:errcheck
223280
volID = vol.VolumeSharedID()
@@ -279,6 +336,25 @@ func (d *ControllerService) DeleteVolume(_ context.Context, request *csi.DeleteV
279336
return nil, status.Error(codes.Internal, err.Error())
280337
}
281338

339+
if vmr.VmId() != vmID {
340+
config, err := cl.GetVmConfig(vmr)
341+
if err != nil {
342+
klog.ErrorS(err, "DeleteVolume: failed to get vm config", "cluster", vol.Cluster(), "volumeName", vol.Disk())
343+
}
344+
345+
if config != nil {
346+
vmName := config["name"].(string) //nolint:errcheck
347+
if vmName != "" && strings.HasSuffix(vol.Disk(), vmName) {
348+
mc := metrics.NewMetricContext("deleteVm")
349+
if err := proxmox.DeleteQemuVM(cl, vmr); mc.ObserveRequest(err) != nil {
350+
klog.ErrorS(err, "DeleteVolume: failed to delete vm", "cluster", vol.Cluster(), "volumeName", vol.Disk())
351+
352+
return nil, status.Error(codes.Internal, fmt.Sprintf("failed to delete volume: %s", vol.Disk()))
353+
}
354+
}
355+
}
356+
}
357+
282358
mc := metrics.NewMetricContext("deleteVolume")
283359
if _, err := cl.DeleteVolume(vmr, vol.Storage(), vol.Disk()); mc.ObserveRequest(err) != nil {
284360
klog.ErrorS(err, "DeleteVolume: failed to delete volume", "cluster", vol.Cluster(), "volumeName", vol.Disk())
@@ -729,7 +805,7 @@ func (d *ControllerService) getVMRefbyNodeID(ctx context.Context, cl *pxapi.Clie
729805
return nil, status.Error(codes.InvalidArgument, err.Error())
730806
}
731807

732-
if d.Provider == proxmox.ProviderCapmox {
808+
if d.Provider == cluster.ProviderCapmox {
733809
vmr, _, err = d.Cluster.FindVMByUUID(node.Status.NodeInfo.SystemUUID)
734810
if err != nil {
735811
return nil, status.Error(codes.Internal, err.Error())

pkg/csi/driver.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ const (
2121
// DriverName is the name of the CSI driver
2222
DriverName = "csi.proxmox.sinextra.dev"
2323
// DriverVersion is the version of the CSI driver
24-
DriverVersion = "0.4.0"
24+
DriverVersion = "0.5.0"
2525
// DriverSpecVersion CSI spec version
26-
DriverSpecVersion = "1.9.0"
26+
DriverSpecVersion = "1.11.0"
2727

2828
// MaxVolumesPerNode is the maximum number of volumes that can be attached to a node
2929
MaxVolumesPerNode = 24

pkg/csi/parameters.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,17 @@ type StorageParameters struct {
5858
IopsWrite *int `json:"iops_wr,omitempty"`
5959
ReadSpeedMbps *int `json:"mbps_rd,omitempty"`
6060
WriteSpeedMbps *int `json:"mbps_wr,omitempty"`
61-
Replicate *bool `json:"replicate,omitempty"`
6261
SSD *bool `json:"ssd,omitempty" cfg:"ssd"`
6362
ReadOnly *bool `json:"ro,omitempty"`
6463

6564
Iops *int `cfg:"diskIOPS"`
6665
SpeedMbps *int `cfg:"diskMBps"`
6766
BlockSize *int `cfg:"blockSize"`
6867
InodeSize *int `cfg:"inodeSize"`
68+
69+
Replicate *bool `json:"replicate,omitempty" cfg:"replicate"`
70+
ReplicateSchedule string `cfg:"replicateSchedule"`
71+
ReplicateZones string `cfg:"replicateZones"`
6972
}
7073

7174
// ModifyVolumeParameters contains parameters to modify a volume
@@ -80,6 +83,9 @@ type ModifyVolumeParameters struct {
8083

8184
Iops *int `cfg:"diskIOPS"`
8285
SpeedMbps *int `cfg:"diskMBps"`
86+
87+
ReplicateSchedule string `cfg:"replicateSchedule"`
88+
ReplicateZones string `cfg:"replicateZones"`
8389
}
8490

8591
// ExtractAndDefaultParameters extracts storage parameters from a map and sets default values.
@@ -122,6 +128,20 @@ func ExtractAndDefaultParameters(parameters map[string]string) (StorageParameter
122128

123129
f.Set(reflect.ValueOf(ptr.Ptr(i)))
124130
}
131+
} else {
132+
switch f.Kind() { //nolint:exhaustive
133+
case reflect.String:
134+
f.Set(reflect.ValueOf(v))
135+
case reflect.Bool:
136+
f.Set(reflect.ValueOf(v == "true"))
137+
case reflect.Int:
138+
i, err := strconv.Atoi(v)
139+
if err != nil {
140+
return p, fmt.Errorf("parameters %s must be a number", fieldName)
141+
}
142+
143+
f.Set(reflect.ValueOf(i))
144+
}
125145
}
126146
}
127147
}
@@ -154,6 +174,12 @@ func ExtractAndDefaultParameters(parameters map[string]string) (StorageParameter
154174
p.WriteSpeedMbps = ptr.Ptr(*p.SpeedMbps)
155175
}
156176

177+
if p.Replicate != nil && *p.Replicate {
178+
if p.ReplicateZones == "" {
179+
return p, fmt.Errorf("parameters %s must be provided", "replicateZones")
180+
}
181+
}
182+
157183
return p, nil
158184
}
159185

pkg/csi/parameters_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,20 @@ func Test_ExtractAndDefaultParameters(t *testing.T) {
9494
IopsWrite: ptr.Ptr(100),
9595
},
9696
},
97+
{
98+
msg: "replication disk",
99+
params: map[string]string{
100+
csi.StorageIDKey: "local-lvm",
101+
"replicate": "true",
102+
"replicateZones": "zone1,zone2",
103+
},
104+
storage: csi.StorageParameters{
105+
Backup: ptr.Ptr(false),
106+
IOThread: true,
107+
Replicate: ptr.Ptr(true),
108+
ReplicateZones: "zone1,zone2",
109+
},
110+
},
97111
}
98112

99113
for _, testCase := range tests {
@@ -140,6 +154,21 @@ func Test_ToMap(t *testing.T) {
140154
"iops_wr": "100",
141155
},
142156
},
157+
{
158+
msg: "Params with replication",
159+
storage: csi.StorageParameters{
160+
Cache: "directsync",
161+
IOThread: true,
162+
Replicate: ptr.Ptr(true),
163+
ReplicateZones: "zone1,zone2",
164+
ReplicateSchedule: "*/30",
165+
},
166+
params: map[string]string{
167+
"cache": "directsync",
168+
"iothread": "1",
169+
"replicate": "1",
170+
},
171+
},
143172
}
144173

145174
for _, testCase := range tests {

pkg/proxmox/doc.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/*
2+
Copyright 2023 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
// Package proxmox implements tools to work with Proxmox VM.
18+
package proxmox

0 commit comments

Comments
 (0)