Skip to content

Commit 535a92e

Browse files
authored
Merge pull request #70 from stefanprodan/append-headers
Allow headers to be appended to HTTP requests
2 parents 25fbe7e + 3411a6a commit 535a92e

File tree

10 files changed

+271
-38
lines changed

10 files changed

+271
-38
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22

33
All notable changes to this project are documented in this file.
44

5+
## Unreleased
6+
7+
#### Features
8+
9+
- Allow headers to be appended to HTTP requests [#70](https://github.com/stefanprodan/flagger/pull/70)
10+
511
## 0.7.0 (2019-02-28)
612

713
Adds support for custom metric checks, HTTP timeouts and HTTP retries

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,11 @@ spec:
106106
# HTTP rewrite (optional)
107107
rewrite:
108108
uri: /
109-
# timeout for HTTP requests (optional)
110-
timeout: 5s
111-
# retry policy when a HTTP request fails (optional)
112-
retries:
113-
attempts: 3
109+
# Envoy timeout and retry policy (optional)
110+
appendHeaders:
111+
x-envoy-upstream-rq-timeout-ms: "15000"
112+
x-envoy-max-retries: "10"
113+
x-envoy-retry-on: "gateway-error,connect-failure,refused-stream"
114114
# promote the canary without analysing it (default false)
115115
skipAnalysis: false
116116
# define the canary analysis timing and KPIs

artifacts/canaries/canary.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,19 @@ spec:
2626
# Istio virtual service host names (optional)
2727
hosts:
2828
- app.istio.weavedx.com
29-
# Istio virtual service HTTP match conditions (optional)
29+
# HTTP match conditions (optional)
3030
match:
3131
- uri:
3232
prefix: /
33-
# Istio virtual service HTTP rewrite (optional)
33+
# HTTP rewrite (optional)
3434
rewrite:
3535
uri: /
36-
# for emergency cases when you want to ship changes
37-
# in production without analysing the canary
36+
# Envoy timeout and retry policy (optional)
37+
appendHeaders:
38+
x-envoy-upstream-rq-timeout-ms: "15000"
39+
x-envoy-max-retries: "10"
40+
x-envoy-retry-on: "gateway-error,connect-failure,refused-stream"
41+
# promote the canary without analysing it (default false)
3842
skipAnalysis: false
3943
canaryAnalysis:
4044
# schedule interval (default 60s)

docs/gitbook/SUMMARY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@
1717
## Tutorials
1818

1919
* [Canaries with Helm charts and GitOps](tutorials/canary-helm-gitops.md)
20+
* [Zero downtime deployments](tutorials/zero-downtime-deployments.md)

docs/gitbook/how-it-works.md

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,11 @@ spec:
4646
# HTTP rewrite (optional)
4747
rewrite:
4848
uri: /
49-
# timeout for HTTP requests (optional)
50-
timeout: 5s
51-
# retry policy when a HTTP request fails (optional)
52-
retries:
53-
attempts: 3
54-
perTryTimeout: 3s
49+
# Envoy timeout and retry policy (optional)
50+
appendHeaders:
51+
x-envoy-upstream-rq-timeout-ms: "15000"
52+
x-envoy-max-retries: "10"
53+
x-envoy-retry-on: "gateway-error,connect-failure,refused-stream"
5554
# promote the canary without analysing it (default false)
5655
skipAnalysis: false
5756
# define the canary analysis timing and KPIs
@@ -138,8 +137,11 @@ metadata:
138137
# HTTP rewrite (optional)
139138
rewrite:
140139
uri: /
141-
# timeout for HTTP requests (optional)
142-
timeout: 5s
140+
# Envoy timeout and retry policy (optional)
141+
appendHeaders:
142+
x-envoy-upstream-rq-timeout-ms: "15000"
143+
x-envoy-max-retries: "10"
144+
x-envoy-retry-on: "gateway-error,connect-failure,refused-stream"
143145
# retry policy when a HTTP request fails (optional)
144146
retries:
145147
attempts: 3
@@ -174,10 +176,10 @@ spec:
174176
prefix: /
175177
rewrite:
176178
uri: /
177-
timeout: 5s
178-
retries:
179-
attempts: 3
180-
perTryTimeout: 3s
179+
appendHeaders:
180+
x-envoy-upstream-rq-timeout-ms: "15000"
181+
x-envoy-max-retries: "10"
182+
x-envoy-retry-on: "gateway-error,connect-failure,refused-stream"
181183
route:
182184
- destination:
183185
host: frontend-primary
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
# Zero downtime deployments
2+
3+
This is a list of things you should consider when dealing with a high traffic production environment if you want to
4+
minimise the impact of rolling updates and downscaling.
5+
6+
### Deployment strategy
7+
8+
Limit the number of unavailable pods during a rolling update:
9+
10+
```yaml
11+
apiVersion: apps/v1
12+
kind: Deployment
13+
spec:
14+
progressDeadlineSeconds: 120
15+
strategy:
16+
type: RollingUpdate
17+
rollingUpdate:
18+
maxUnavailable: 0
19+
```
20+
21+
The default progress deadline for a deployment is ten minutes.
22+
You should consider adjusting this value to make the deployment process fail faster.
23+
24+
### Liveness health check
25+
26+
You application should expose a HTTP endpoint that Kubernetes can call to determine if
27+
your app transitioned to a broken state from which it can't recover and needs to be restarted.
28+
29+
```yaml
30+
readinessProbe:
31+
exec:
32+
command:
33+
- wget
34+
- --quiet
35+
- --tries=1
36+
- --timeout=4
37+
- --spider
38+
- http://localhost:8080/healthz
39+
timeoutSeconds: 5
40+
initialDelaySeconds: 5
41+
```
42+
43+
If you've enabled mTLS, you'll have to use `exec` for liveness and readiness checks since
44+
kubelet is not part of the service mesh and doesn't have access to the TLS cert.
45+
46+
### Readiness health check
47+
48+
You application should expose a HTTP endpoint that Kubernetes can call to determine if
49+
your app is ready to receive traffic.
50+
51+
```yaml
52+
livenessProbe:
53+
exec:
54+
command:
55+
- wget
56+
- --quiet
57+
- --tries=1
58+
- --timeout=4
59+
- --spider
60+
- http://localhost:8080/readyz
61+
timeoutSeconds: 5
62+
initialDelaySeconds: 5
63+
periodSeconds: 5
64+
```
65+
66+
If your app depends on external services, you should check if those services are available before allowing Kubernetes
67+
to route traffic to an app instance. Keep in mind that the Envoy sidecar can have a slower startup than your app.
68+
This means that on application start you should retry for at least a couple of seconds any external connection.
69+
70+
### Graceful shutdown
71+
72+
Before a pod gets terminated, Kubernetes sends a `SIGTERM` signal to every container and waits for period of
73+
time (30s by default) for all containers to exit gracefully. If your app doesn't handle the `SIGTERM` signal or if it
74+
doesn't exit within the grace period, Kubernetes will kill the container and any inflight requests that your app is
75+
processing will fail.
76+
77+
```yaml
78+
apiVersion: apps/v1
79+
kind: Deployment
80+
spec:
81+
template:
82+
spec:
83+
terminationGracePeriodSeconds: 60
84+
containers:
85+
- name: app
86+
lifecycle:
87+
preStop:
88+
exec:
89+
command:
90+
- sleep
91+
- "10"
92+
```
93+
94+
Your app container should have a `preStop` hook that delays the container shutdown.
95+
This will allow the service mesh to drain the traffic and remove this pod from all other Envoy sidecars before your app
96+
becomes unavailable.
97+
98+
### Delay Envoy shutdown
99+
100+
Even if your app reacts to `SIGTERM` and tries to complete the inflight requests before shutdown, that
101+
doesn't mean that the response will make it back to the caller. If the Envoy sidecar shuts down before your app, then
102+
the caller will receive a 503 error.
103+
104+
To mitigate this issue you can add a `preStop` hook to the Istio proxy and wait for the main app to exist before Envoy exists.
105+
106+
```bash
107+
#!/bin/bash
108+
set -e
109+
if ! pidof envoy &>/dev/null; then
110+
exit 0
111+
fi
112+
113+
if ! pidof pilot-agent &>/dev/null; then
114+
exit 0
115+
fi
116+
117+
while [ $(netstat -plunt | grep tcp | grep -v envoy | wc -l | xargs) -ne 0 ]; do
118+
sleep 1;
119+
done
120+
121+
exit 0
122+
```
123+
124+
You'll have to build your own Envoy docker image with the above script and
125+
modify the Istio injection webhook with the `preStop` directive.
126+
127+
Thanks to Stono for his excellent [tips](https://github.com/istio/istio/issues/12183) on minimising 503s.
128+
129+
### Resource requests and limits
130+
131+
Setting CPU and memory requests/limits for all workloads is a mandatory step if you're running a production system.
132+
Without limits your nodes could run out of memory or become unresponsive due to CPU exhausting.
133+
Without CPU and memory requests,
134+
the Kubernetes scheduler will not be able to make decisions about which nodes to place pods on.
135+
136+
```yaml
137+
apiVersion: apps/v1
138+
kind: Deployment
139+
spec:
140+
template:
141+
spec:
142+
containers:
143+
- name: app
144+
resources:
145+
limits:
146+
cpu: 1000m
147+
memory: 1Gi
148+
requests:
149+
cpu: 100m
150+
memory: 128Mi
151+
```
152+
153+
Note that without resource requests the horizontal pod autoscaler can't determine when to scale your app.
154+
155+
### Autoscaling
156+
157+
A production environment should be able to handle traffic bursts without impacting the quality of service.
158+
This can be achieved with Kubernetes autoscaling capabilities.
159+
Autoscaling in Kubernetes has two dimensions: the Cluster Autoscaler that deals with node scaling operations and
160+
the Horizontal Pod Autoscaler that automatically scales the number of pods in a deployment.
161+
162+
```yaml
163+
apiVersion: autoscaling/v2beta1
164+
kind: HorizontalPodAutoscaler
165+
spec:
166+
scaleTargetRef:
167+
apiVersion: apps/v1
168+
kind: Deployment
169+
name: app
170+
minReplicas: 2
171+
maxReplicas: 4
172+
metrics:
173+
- type: Resource
174+
resource:
175+
name: cpu
176+
targetAverageValue: 900m
177+
- type: Resource
178+
resource:
179+
name: memory
180+
targetAverageValue: 768Mi
181+
```
182+
183+
The above HPA ensures your app will be scaled up before the pods reach the CPU or memory limits.
184+
185+
### Ingress retries
186+
187+
To minimise the impact of downscaling operations you can make use of Envoy retry capabilities.
188+
189+
```yaml
190+
apiVersion: flagger.app/v1alpha3
191+
kind: Canary
192+
spec:
193+
service:
194+
port: 9898
195+
gateways:
196+
- public-gateway.istio-system.svc.cluster.local
197+
hosts:
198+
- app.example.com
199+
appendHeaders:
200+
x-envoy-upstream-rq-timeout-ms: "15000"
201+
x-envoy-max-retries: "10"
202+
x-envoy-retry-on: "gateway-error,connect-failure,refused-stream"
203+
```
204+
205+
When the HPA scales down your app, your users could run into 503 errors.
206+
The above configuration will make Envoy retry the HTTP requests that failed due to gateway errors.

pkg/apis/flagger/v1alpha3/types.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,14 @@ type CanaryStatus struct {
109109
// CanaryService is used to create ClusterIP services
110110
// and Istio Virtual Service
111111
type CanaryService struct {
112-
Port int32 `json:"port"`
113-
Gateways []string `json:"gateways"`
114-
Hosts []string `json:"hosts"`
115-
Match []istiov1alpha3.HTTPMatchRequest `json:"match,omitempty"`
116-
Rewrite *istiov1alpha3.HTTPRewrite `json:"rewrite,omitempty"`
117-
Timeout string `json:"timeout,omitempty"`
118-
Retries *istiov1alpha3.HTTPRetry `json:"retries,omitempty"`
112+
Port int32 `json:"port"`
113+
Gateways []string `json:"gateways"`
114+
Hosts []string `json:"hosts"`
115+
Match []istiov1alpha3.HTTPMatchRequest `json:"match,omitempty"`
116+
Rewrite *istiov1alpha3.HTTPRewrite `json:"rewrite,omitempty"`
117+
Timeout string `json:"timeout,omitempty"`
118+
Retries *istiov1alpha3.HTTPRetry `json:"retries,omitempty"`
119+
AppendHeaders map[string]string `json:"appendHeaders,omitempty"`
119120
}
120121

121122
// CanaryAnalysis is used to describe how the analysis should be done

pkg/apis/flagger/v1alpha3/zz_generated.deepcopy.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/controller/router.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -203,11 +203,12 @@ func (c *CanaryRouter) syncVirtualService(cd *flaggerv1.Canary) error {
203203
Gateways: gateways,
204204
Http: []istiov1alpha3.HTTPRoute{
205205
{
206-
Match: cd.Spec.Service.Match,
207-
Rewrite: cd.Spec.Service.Rewrite,
208-
Timeout: cd.Spec.Service.Timeout,
209-
Retries: cd.Spec.Service.Retries,
210-
Route: route,
206+
Match: cd.Spec.Service.Match,
207+
Rewrite: cd.Spec.Service.Rewrite,
208+
Timeout: cd.Spec.Service.Timeout,
209+
Retries: cd.Spec.Service.Retries,
210+
AppendHeaders: cd.Spec.Service.AppendHeaders,
211+
Route: route,
211212
},
212213
},
213214
}
@@ -319,11 +320,12 @@ func (c *CanaryRouter) SetRoutes(
319320
vsCopy := vs.DeepCopy()
320321
vsCopy.Spec.Http = []istiov1alpha3.HTTPRoute{
321322
{
322-
Match: cd.Spec.Service.Match,
323-
Rewrite: cd.Spec.Service.Rewrite,
324-
Timeout: cd.Spec.Service.Timeout,
325-
Retries: cd.Spec.Service.Retries,
326-
Route: []istiov1alpha3.DestinationWeight{primary, canary},
323+
Match: cd.Spec.Service.Match,
324+
Rewrite: cd.Spec.Service.Rewrite,
325+
Timeout: cd.Spec.Service.Timeout,
326+
Retries: cd.Spec.Service.Retries,
327+
AppendHeaders: cd.Spec.Service.AppendHeaders,
328+
Route: []istiov1alpha3.DestinationWeight{primary, canary},
327329
},
328330
}
329331

test/e2e-tests.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ spec:
3333
progressDeadlineSeconds: 60
3434
service:
3535
port: 9898
36+
appendHeaders:
37+
x-envoy-upstream-rq-timeout-ms: "15000"
38+
x-envoy-max-retries: "10"
39+
x-envoy-retry-on: "gateway-error,connect-failure,refused-stream"
3640
canaryAnalysis:
3741
interval: 15s
3842
threshold: 15

0 commit comments

Comments
 (0)