From f148cf0421579babe2086304232292b0d562e600 Mon Sep 17 00:00:00 2001 From: Chaer Date: Thu, 18 Apr 2024 17:40:52 +0800 Subject: [PATCH] enhance: enable iptables modify nat output chain to redirect outbound traffic by init container --- artifacts/scripts/proxy-init.sh | 84 ++++++++++++++++----------------- pkg/apis/ctrlmesh/types.go | 12 +++-- pkg/cmd/proxy/main.go | 53 ++++++++++++++++++++- pkg/webhook/pod/injector.go | 20 ++++---- 4 files changed, 109 insertions(+), 60 deletions(-) diff --git a/artifacts/scripts/proxy-init.sh b/artifacts/scripts/proxy-init.sh index 20eda87..8734a7a 100755 --- a/artifacts/scripts/proxy-init.sh +++ b/artifacts/scripts/proxy-init.sh @@ -14,27 +14,27 @@ if [ ! -f "${SA_DIR}/token" ]; then fi # Remove the old chains, to generate new configs. -iptables -t nat -D PREROUTING -p tcp -j ctrlmesh_PROXY_INBOUND 2>/dev/null -iptables -t mangle -D PREROUTING -p tcp -j ctrlmesh_PROXY_INBOUND 2>/dev/null -iptables -t nat -D OUTPUT -p tcp -j ctrlmesh_PROXY_OUTPUT 2>/dev/null +iptables -t nat -D PREROUTING -p tcp -j CTRLMESH_INBOUND 2>/dev/null +iptables -t mangle -D PREROUTING -p tcp -j CTRLMESH_INBOUND 2>/dev/null +iptables -t nat -D OUTPUT -p tcp -j CTRLMESH_OUTPUT 2>/dev/null # Flush and delete the ctrlmesh chains. -iptables -t nat -F ctrlmesh_PROXY_OUTPUT 2>/dev/null -iptables -t nat -X ctrlmesh_PROXY_OUTPUT 2>/dev/null -iptables -t nat -F ctrlmesh_PROXY_INBOUND 2>/dev/null -iptables -t nat -X ctrlmesh_PROXY_INBOUND 2>/dev/null -iptables -t mangle -F ctrlmesh_PROXY_INBOUND 2>/dev/null -iptables -t mangle -X ctrlmesh_PROXY_INBOUND 2>/dev/null -iptables -t mangle -F ctrlmesh_PROXY_DIVERT 2>/dev/null -iptables -t mangle -X ctrlmesh_PROXY_DIVERT 2>/dev/null -iptables -t mangle -F ctrlmesh_PROXY_TPROXY 2>/dev/null -iptables -t mangle -X ctrlmesh_PROXY_TPROXY 2>/dev/null +iptables -t nat -F CTRLMESH_OUTPUT 2>/dev/null +iptables -t nat -X CTRLMESH_OUTPUT 2>/dev/null +iptables -t nat -F CTRLMESH_INBOUND 2>/dev/null +iptables -t nat -X CTRLMESH_INBOUND 2>/dev/null +iptables -t mangle -F CTRLMESH_INBOUND 2>/dev/null +iptables -t mangle -X CTRLMESH_INBOUND 2>/dev/null +iptables -t mangle -F CTRLMESH_DIVERT 2>/dev/null +iptables -t mangle -X CTRLMESH_DIVERT 2>/dev/null +iptables -t mangle -F CTRLMESH_TPROXY 2>/dev/null +iptables -t mangle -X CTRLMESH_TPROXY 2>/dev/null # Must be last, the others refer to it -iptables -t nat -F ctrlmesh_PROXY_REDIRECT 2>/dev/null -iptables -t nat -X ctrlmesh_PROXY_REDIRECT 2>/dev/null -iptables -t nat -F ctrlmesh_PROXY_IN_REDIRECT 2>/dev/null -iptables -t nat -X ctrlmesh_PROXY_IN_REDIRECT 2>/dev/null +iptables -t nat -F CTRLMESH_REDIRECT 2>/dev/null +iptables -t nat -X CTRLMESH_REDIRECT 2>/dev/null +iptables -t nat -F CTRLMESH_IN_REDIRECT 2>/dev/null +iptables -t nat -X CTRLMESH_IN_REDIRECT 2>/dev/null if [ "${1:-}" = "clean" ]; then echo "Only cleaning, no new rules added" @@ -70,13 +70,13 @@ set -o pipefail set -x # echo on # Create a new chain for redirecting outbound traffic to the apiserver port. -# In both chains, '-j RETURN' bypasses Proxy and '-j ctrlmesh_PROXY_REDIRECT' redirects to Proxy. -iptables -t nat -N ctrlmesh_PROXY_REDIRECT -iptables -t nat -A ctrlmesh_PROXY_REDIRECT -p tcp -j REDIRECT --to-port "${PROXY_APISERVER_PORT}" +# In both chains, '-j RETURN' bypasses Proxy and '-j CTRLMESH_REDIRECT' redirects to Proxy. +iptables -t nat -N CTRLMESH_REDIRECT +iptables -t nat -A CTRLMESH_REDIRECT -p tcp -j REDIRECT --to-port "${PROXY_APISERVER_PORT}" # Use this chain also for redirecting inbound traffic to the webhook port when not using TPROXY. -iptables -t nat -N ctrlmesh_PROXY_IN_REDIRECT -iptables -t nat -A ctrlmesh_PROXY_IN_REDIRECT -p tcp -j REDIRECT --to-port "${PROXY_WEBHOOK_PORT}" +iptables -t nat -N CTRLMESH_IN_REDIRECT +iptables -t nat -A CTRLMESH_IN_REDIRECT -p tcp -j REDIRECT --to-port "${PROXY_WEBHOOK_PORT}" # Handling of inbound ports. Traffic will be redirected to Proxy, which will process and forward # to the local webhook. If not set, no inbound port will be intercepted by the iptables. @@ -85,14 +85,14 @@ if [ -n "${INBOUND_WEBHOOK_PORT}" ]; then # When using TPROXY, create a new chain for routing all inbound traffic to # Proxy. Any packet entering this chain gets marked with the ${INBOUND_TPROXY_MARK} mark, # so that they get routed to the loopback interface in order to get redirected to Proxy. - # In the ctrlmesh_PROXY_INBOUND chain, '-j ctrlmesh_PROXY_DIVERT' reroutes to the loopback + # In the CTRLMESH_INBOUND chain, '-j CTRLMESH_DIVERT' reroutes to the loopback # interface. # Mark all inbound packets. - iptables -t mangle -N ctrlmesh_PROXY_DIVERT - iptables -t mangle -A ctrlmesh_PROXY_DIVERT -j MARK --set-mark "${INBOUND_TPROXY_MARK}" - iptables -t mangle -A ctrlmesh_PROXY_DIVERT -j ACCEPT + iptables -t mangle -N CTRLMESH_DIVERT + iptables -t mangle -A CTRLMESH_DIVERT -j MARK --set-mark "${INBOUND_TPROXY_MARK}" + iptables -t mangle -A CTRLMESH_DIVERT -j ACCEPT - # Route all packets marked in chain ctrlmesh_PROXY_DIVERT using routing table ${INBOUND_TPROXY_ROUTE_TABLE}. + # Route all packets marked in chain CTRLMESH_DIVERT using routing table ${INBOUND_TPROXY_ROUTE_TABLE}. ip -f inet rule add fwmark "${INBOUND_TPROXY_MARK}" lookup "${INBOUND_TPROXY_ROUTE_TABLE}" # In routing table ${INBOUND_TPROXY_ROUTE_TABLE}, create a single default rule to route all traffic to # the loopback interface. @@ -100,41 +100,41 @@ if [ -n "${INBOUND_WEBHOOK_PORT}" ]; then # Create a new chain for redirecting inbound traffic to the common Envoy # port. - # In the ctrlmesh_PROXY_INBOUND chain, '-j RETURN' bypasses Envoy and - # '-j ctrlmesh_PROXY_TPROXY' redirects to Envoy. - iptables -t mangle -N ctrlmesh_PROXY_TPROXY - iptables -t mangle -A ctrlmesh_PROXY_TPROXY ! -d 127.0.0.1/32 -p tcp -j TPROXY --tproxy-mark "${INBOUND_TPROXY_MARK}"/0xffffffff --on-port "${PROXY_PORT}" + # In the CTRLMESH_INBOUND chain, '-j RETURN' bypasses Envoy and + # '-j CTRLMESH_TPROXY' redirects to Envoy. + iptables -t mangle -N CTRLMESH_TPROXY + iptables -t mangle -A CTRLMESH_TPROXY ! -d 127.0.0.1/32 -p tcp -j TPROXY --tproxy-mark "${INBOUND_TPROXY_MARK}"/0xffffffff --on-port "${PROXY_PORT}" table=mangle else table=nat fi - iptables -t "${table}" -N ctrlmesh_PROXY_INBOUND - iptables -t "${table}" -A PREROUTING -p tcp -j ctrlmesh_PROXY_INBOUND + iptables -t "${table}" -N CTRLMESH_INBOUND + iptables -t "${table}" -A PREROUTING -p tcp -j CTRLMESH_INBOUND if [ "${INBOUND_INTERCEPTION_MODE}" = "TPROXY" ]; then - iptables -t mangle -A ctrlmesh_PROXY_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -m socket -j ctrlmesh_PROXY_DIVERT || echo "No socket match support" - iptables -t mangle -A ctrlmesh_PROXY_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -m socket -j ctrlmesh_PROXY_DIVERT || echo "No socket match support" - iptables -t mangle -A ctrlmesh_PROXY_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -j ctrlmesh_PROXY_TPROXY + iptables -t mangle -A CTRLMESH_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -m socket -j CTRLMESH_DIVERT || echo "No socket match support" + iptables -t mangle -A CTRLMESH_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -m socket -j CTRLMESH_DIVERT || echo "No socket match support" + iptables -t mangle -A CTRLMESH_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -j CTRLMESH_TPROXY else - iptables -t nat -A ctrlmesh_PROXY_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -j ctrlmesh_PROXY_IN_REDIRECT + iptables -t nat -A CTRLMESH_INBOUND -p tcp --dport "${INBOUND_WEBHOOK_PORT}" -j CTRLMESH_IN_REDIRECT fi fi # Create a new chain for selectively redirecting outbound packets to Proxy. -iptables -t nat -N ctrlmesh_PROXY_OUTPUT +iptables -t nat -N CTRLMESH_OUTPUT -# Jump to the ctrlmesh_PROXY_OUTPUT chain from OUTPUT chain for all tcp traffic. -iptables -t nat -A OUTPUT -p tcp -j ctrlmesh_PROXY_OUTPUT +# Jump to the CTRLMESH_OUTPUT chain from OUTPUT chain for all tcp traffic. +iptables -t nat -A OUTPUT -p tcp -j CTRLMESH_OUTPUT for uid in ${PROXY_UID}; do # Avoid infinite loops. Don't redirect Proxy traffic directly back to # Proxy for non-loopback traffic. - iptables -t nat -A ctrlmesh_PROXY_OUTPUT -m owner --uid-owner "${uid}" -j RETURN + iptables -t nat -A CTRLMESH_OUTPUT -m owner --uid-owner "${uid}" -j RETURN done # Redirect all apiserver outbound traffic to Proxy. -iptables -t nat -A ctrlmesh_PROXY_OUTPUT -d "${KUBERNETES_SERVICE_HOST}" -j ctrlmesh_PROXY_REDIRECT +iptables -t nat -A CTRLMESH_OUTPUT -d "${KUBERNETES_SERVICE_HOST}" -j CTRLMESH_REDIRECT # Generate certs mount -o remount,rw "${SA_DIR}" diff --git a/pkg/apis/ctrlmesh/types.go b/pkg/apis/ctrlmesh/types.go index f9375d0..3979819 100644 --- a/pkg/apis/ctrlmesh/types.go +++ b/pkg/apis/ctrlmesh/types.go @@ -31,11 +31,13 @@ const ( // Labels const ( - CtrlmeshControlPrefix = "ctrlmesh.kusionstack.io/" - CtrlmeshIgnoreWebhookLabel = "ctrlmesh.kusionstack.io/ignore-webhook" - CtrlmeshIgnoreValidateLabel = "ctrlmesh.kusionstack.io/ignore-validate" - CtrlmeshDefaultReplicasLabel = "ctrlmesh.kusionstack.io/default-replicas" - CtrlmeshEnableProxyLabel = "ctrlmesh.kusionstack.io/enable-proxy" + CtrlmeshControlPrefix = "ctrlmesh.kusionstack.io/" + CtrlmeshIgnoreWebhookLabel = "ctrlmesh.kusionstack.io/ignore-webhook" + CtrlmeshIgnoreValidateLabel = "ctrlmesh.kusionstack.io/ignore-validate" + CtrlmeshDefaultReplicasLabel = "ctrlmesh.kusionstack.io/default-replicas" + CtrlmeshEnableProxyLabel = "ctrlmesh.kusionstack.io/enable-proxy" + CtrlmeshEnableIptableMode = "ctrlmesh.kusionstack.io/enable-iptables" + CtrlmeshAutoShardingRootLabel = "ctrlmesh.kusionstack.io/auto-sharding-root" CtrlmeshInRollingLabel = "ctrlmesh.kusionstack.io/rolling" CtrlmeshDisableFakeKubeconfigArgLabel = "ctrlmesh.kusionstack.io/disable-fake-kubeconfig-arg" diff --git a/pkg/cmd/proxy/main.go b/pkg/cmd/proxy/main.go index f91ee91..77daaa9 100644 --- a/pkg/cmd/proxy/main.go +++ b/pkg/cmd/proxy/main.go @@ -35,7 +35,6 @@ import ( "github.com/KusionStack/controller-mesh/pkg/apis/ctrlmesh/constants" "github.com/KusionStack/controller-mesh/pkg/client" - proxyapiserver "github.com/KusionStack/controller-mesh/pkg/proxy/apiserver" proxycache "github.com/KusionStack/controller-mesh/pkg/proxy/cache" "github.com/KusionStack/controller-mesh/pkg/proxy/circuitbreaker" @@ -56,6 +55,8 @@ var ( webhookCertDir = flag.String(constants.ProxyWebhookCertDirFlag, "", "The directory where the webhook certs generated or mounted.") proxyIptablePort = flag.Int(constants.ProxyIptablesFlag, constants.ProxyIptablesPort, "port that http-tproxy listens on") + + enableIpTable = os.Getenv(constants.EnvIPTable) == "true" ) func main() { @@ -66,7 +67,17 @@ func main() { klog.Fatalf("Environment %s=%s %s=%s not exist.", constants.EnvPodNamespace, os.Getenv(constants.EnvPodNamespace), constants.EnvPodName, os.Getenv(constants.EnvPodName)) } - cfg := ctrl.GetConfigOrDie() + var cfg *rest.Config + + if enableIpTable { + var err error + cfg, err = getRestConfig() + if err != nil { + klog.Fatalf("Failed to get rest config: %v", err) + } + } else { + cfg = ctrl.GetConfigOrDie() + } cfg.UserAgent = "ctrlmesh" if err := client.NewRegistry(cfg); err != nil { klog.Fatalf("Failed to new client registry: %v", err) @@ -165,3 +176,41 @@ func serveHTTP(ctx context.Context, readyHandler *healthz.Handler) { klog.Fatalf("Serve HTTP shutting down on :%d: %v", *metricsHealthPort, err) } } + +func getRestConfig() (*rest.Config, error) { + const ( + tokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token" + //rootCAFile = "/var/run/secrets/kubernetes.io/serviceaccount/..data/ca.crt" + ) + host, port := os.Getenv("KUBERNETES_SERVICE_HOST"), os.Getenv("KUBERNETES_SERVICE_PORT") + if len(host) == 0 || len(port) == 0 { + return nil, rest.ErrNotInCluster + } + + token, err := os.ReadFile(tokenFile) + if err != nil { + return nil, err + } + + tlsClientConfig := rest.TLSClientConfig{Insecure: true} + + //if _, err := certutil.NewPool(rootCAFile); err != nil { + // klog.Errorf("Expected to load root CA config from %s, but got err: %v", rootCAFile, err) + //} else { + // tlsClientConfig.CAFile = rootCAFile + //} + + cfg := &rest.Config{ + // TODO: switch to using cluster DNS. + Host: "https://" + net.JoinHostPort(host, port), + TLSClientConfig: tlsClientConfig, + BearerToken: string(token), + BearerTokenFile: tokenFile, + + Burst: 3000, + QPS: 2000.0, + } + klog.V(3).Infof("Starting with rest config: %v", utils.DumpJSON(cfg)) + + return cfg, nil +} diff --git a/pkg/webhook/pod/injector.go b/pkg/webhook/pod/injector.go index db5c7bb..50de8f7 100644 --- a/pkg/webhook/pod/injector.go +++ b/pkg/webhook/pod/injector.go @@ -131,7 +131,7 @@ func (h *MutatingHandler) injectByShardingConfig(ctx context.Context, pod *v1.Po if *proxyImage == "" { return fmt.Errorf("the images for ControllerMesh init or proxy container have not set in args") } - + enableIpTable := pod.Labels[ctrlmesh.CtrlmeshEnableIptableMode] == "true" imagePullPolicy := v1.PullAlways if *proxyImagePullPolicy == string(v1.PullIfNotPresent) { imagePullPolicy = v1.PullIfNotPresent @@ -177,6 +177,13 @@ func (h *MutatingHandler) injectByShardingConfig(ctx context.Context, pod *v1.Po }, } + if enableIpTable { + proxyContainer.Env = append(proxyContainer.Env, v1.EnvVar{ + Name: constants.EnvIPTable, + Value: "true", + }) + } + if val, ok := pod.Annotations[ctrlmesh.CtrlmeshProxyContainerResourceAnno]; ok { req := &v1.ResourceRequirements{} if err := json.Unmarshal([]byte(val), req); err != nil { @@ -213,15 +220,6 @@ func (h *MutatingHandler) injectByShardingConfig(ctx context.Context, pod *v1.Po proxyContainer.Env = append(proxyContainer.Env, apiserverHostPortEnvs...) } - ipTableEnvs := getEnv(pod, constants.EnvIPTable) - enableIpTable := false - if len(ipTableEnvs) > 0 { - initContainer.Env = append(initContainer.Env, ipTableEnvs...) - //proxyContainer.Env = append(proxyContainer.Env, ipTableEnvs...) - if ipTableEnvs[0].Value == "true" { - enableIpTable = true - } - } if !enableIpTable { if err := h.applyFakeConfigMap(pod); err != nil { return err @@ -271,7 +269,7 @@ func (h *MutatingHandler) injectByShardingConfig(ctx context.Context, pod *v1.Po proxyContainer.VolumeMounts = append(proxyContainer.VolumeMounts, certVolumeMounts[0]) } } - if *initImage != "" { + if enableIpTable && *initImage != "" { pod.Spec.InitContainers = append([]v1.Container{*initContainer}, pod.Spec.InitContainers...) } if pod.Labels == nil {