From 9187735efaa88882bf8c687e57a59a7525d2f674 Mon Sep 17 00:00:00 2001 From: David Young Date: Thu, 19 Oct 2023 11:22:37 +1300 Subject: [PATCH] Add post on cilium 1.14 Signed-off-by: David Young --- docs/blog/.authors.yml | 9 +- .../cilium-1.14-breaks-toservice-policies.md | 121 ++++++++++++++++++ .../persistence/rook-ceph/cluster.md | 1 - .../persistence/rook-ceph/operator.md | 1 - mkdocs-insiders.yml | 18 --- mkdocs.yml | 31 ++++- 6 files changed, 154 insertions(+), 27 deletions(-) create mode 100644 docs/blog/posts/notes/cilium-1.14-breaks-toservice-policies.md diff --git a/docs/blog/.authors.yml b/docs/blog/.authors.yml index bb0cd88..7019d3f 100644 --- a/docs/blog/.authors.yml +++ b/docs/blog/.authors.yml @@ -1,4 +1,5 @@ -funkypenguin: - name: David Young - description: Geek Chef - avatar: https://github.com/funkypenguin.png +authors: + funkypenguin: + name: David Young + description: Geek Chef + avatar: https://github.com/funkypenguin.png diff --git a/docs/blog/posts/notes/cilium-1.14-breaks-toservice-policies.md b/docs/blog/posts/notes/cilium-1.14-breaks-toservice-policies.md new file mode 100644 index 0000000..99675f7 --- /dev/null +++ b/docs/blog/posts/notes/cilium-1.14-breaks-toservice-policies.md @@ -0,0 +1,121 @@ +--- +title: How Cilium 1.14 solved a security issue by breaking toService-toPort policies +date: 2023-10-19 +tags: + - cilium + - kubernetes +categories: + - note +description: How to rewrite your CiliumNetworkPolicies to be secure, and 1.14-compatible +--- +I've been working with a client on upgrading a Cilium v1.13 instance to v1.14.. and as usual, chaos ensued.. here's what you need to know before upgrading to Cilium v1.14... + + + +## What happened? + +!!! summary "Background" + We use CiliumNetworkPolicies selectively, locking down some namespaces to permitted ingress/egress only, and allowing others free reign (*we also use [Istio for namespace isolation](https://www.funkypenguin.co.nz/blog/istio-namespace-isolation-tricks/)*) + +The first clue was, things broke. Pods with istio-proxy sidecars weren't able to talk to istiod, and consequently pods were crashlooping all over the place. The second clue was this line in cilium's output: + +``` +level=warning msg="Unable to add CiliumNetworkPolicy" +ciliumNetworkPolicyName=kube-cluster-namespace-defaults error="Invalid +CiliumNetworkPolicy spec: +Combining ToServices and ToPorts is not supported yet" +k8sApiVersion=cilium.io/v2 k8sNamespace=rainloop subsys=k8s-watcher +``` + +I didn't think too much of it initially, because (a) I wasn't changing policies, and (b) everything was working under 1.13, so I reasoned that something which wasn't supported "yet" sounded like a new feature (*which I obviously wasn't using*) and, didn't seem likely to affect the configuration / policies I'd already been running on previous versions. + +Ha. + +Eventually I conceded that this error was the most likely cause of my issues, so I searched for the string in the [cilium/cilium repo](https://github.com/cilium/cilium/). Sure enough, I found a [recent commit](https://github.com/cilium/cilium/commit/7959bf5b3ca1428481391b6ee001aff931b2753e) indicating a change in supported functionality. + +## What was the impact? + +Fortunately, this client's environment is quite mature, and all changes are deployed on multiple CI clusters (automated and manual), before being deployed into prod. So, the CI clusters were a mess, but prod was unaffected (*which is why we test all updates in CI!*) +## Why did it happen? + +Why was a previously-working function marked as "not yet supported?" + +Turns out what **actually** happened is that it was previously possible to create an egress policy matching a Kubernetes service in a particular namespace, and restricted to certain ports. For example, this policy "worked"[^1] in Cilium 1.13: + +```yaml title="CiliumNetworkPolicy working in Cilium v1.13" +apiVersion: "cilium.io/v2" +kind: CiliumNetworkPolicy +metadata: + name: "allow-minio-restore-egress-cluster-vault" + namespace: minio +spec: + endpointSelector: + matchLabels: + io.cilium.k8s.policy.serviceaccount: minio-restore + egress: + - toServices: + - k8sService: + serviceName: cluster-vault + namespace: vault + toPorts: + - ports: + - port: '8200' + protocol: TCP + +``` + +But as described in [this issue](https://github.com/cilium/cilium/issues/20067), what the above policy _actually_ does (*because k8sService only works on services without a selector :facepalm:*), is permit **any** egress on TCP port 8200 :scream: + +The solution was to flip the switch on the toServices/toPorts combo, making it unsupported in Cilium 1.14, which caused the policies to fail to load (*no more unlimited egress!*). + +## How was it fixed? + +In my case, this meant a bulk update of 40-50 policies, but it turns out that a "supported" fix was relatively simple. The `toEndpoints` egress selector can achieve the same result. The gotcha is you need to match on your target services' label, as well as the cilium-specific `k8s:io.kubernetes.pod.namespace` label, which indicates which namespace the target pods can be found in. + +!!! note "What about targeting services in the same namespace?" + It seems that unless the `k8s:io.kubernetes.pod.namespace` is found in the policy, the policy will only apply to pods in the namespace in which is found. This is a subtle change in behaviour which could easily result in confusion - i.e., you'd assume that omitting the `k8s:io.kubernetes.pod.namespace` tag would result in matching endpoints across the **entire** cluster (*and why would you do that?*) + +So I changed this: + +```yaml + - toServices: + - k8sService: + serviceName: cluster-vault + namespace: vault +``` + +To this: + +```yaml + - toEndpoints: + - matchLabels: + app.kubernetes.io/name: vault + k8s:io.kubernetes.pod.namespace: vault +``` + +Here's the entirety of my new policy: +```yaml title="CiliumNetworkPolicy updated to work in Cilium v1.14" +apiVersion: "cilium.io/v2" +kind: CiliumNetworkPolicy +metadata: + name: "allow-minio-restore-egress-cluster-vault" + namespace: minio +spec: + endpointSelector: + matchLabels: + io.cilium.k8s.policy.serviceaccount: minio-restore + egress: + - toEndpoints: + - matchLabels: + app.kubernetes.io/name: vault + k8s:io.kubernetes.pod.namespace: vault + toPorts: + - ports: + - port: '8200' + protocol: TCP + +``` + +[^1]: "Worked" in that it permitted egress to **any** host on the specified ports! :scream: + +--8<-- "blog-footer.md" \ No newline at end of file diff --git a/docs/kubernetes/persistence/rook-ceph/cluster.md b/docs/kubernetes/persistence/rook-ceph/cluster.md index 5c7828b..6248156 100644 --- a/docs/kubernetes/persistence/rook-ceph/cluster.md +++ b/docs/kubernetes/persistence/rook-ceph/cluster.md @@ -56,7 +56,6 @@ spec: sourceRef: kind: GitRepository name: flux-system - validation: server ``` 1. Note that we use the `spec.dependsOn` to ensure that this Kustomization is only applied **after** the rook-ceph operator is deployed and operational. This ensures that the necessary CRDs are in place, and avoids a dry-run error on the reconciliation. diff --git a/docs/kubernetes/persistence/rook-ceph/operator.md b/docs/kubernetes/persistence/rook-ceph/operator.md index fa9a429..0589a91 100644 --- a/docs/kubernetes/persistence/rook-ceph/operator.md +++ b/docs/kubernetes/persistence/rook-ceph/operator.md @@ -66,7 +66,6 @@ spec: sourceRef: kind: GitRepository name: flux-system - validation: server healthChecks: - apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition diff --git a/mkdocs-insiders.yml b/mkdocs-insiders.yml index 81f61c5..8a80cf0 100644 --- a/mkdocs-insiders.yml +++ b/mkdocs-insiders.yml @@ -3,24 +3,6 @@ INHERIT: mkdocs.yml plugins: extra-sass: - blog: - # post_excerpt: required - post_url_format: "{date}/{file}" # this allows us to change the post title in future without breaking URLs - tags: - tags_file: blog/tags.md - rss: - match_path: blog/posts/.* - date_from_meta: - as_creation: date - categories: - - categories - - tags - abstract_chars_count: 200 - image: https://geek-cookbook.funkypenguin.co.nz/images/site-logo.png - url_parameters: - utm_source: "geek-cookbook-blog" - utm_medium: "RSS" - utm_campaign: "feed-syndication" social: cards_layout_options: font_family: Ubuntu diff --git a/mkdocs.yml b/mkdocs.yml index b01fa1e..60cc63a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -28,6 +28,25 @@ plugins: include_dir: _includes meta-descriptions: # If not provided, auto-generate a description (https://pypi.org/project/mkdocs-meta-descriptions-plugin/) meta: + blog: + # post_excerpt: required + post_url_format: "{date}/{file}" # this allows us to change the post title in future without breaking URLs + tags: + tags_file: blog/tags.md + rss: + match_path: blog/posts/.* + date_from_meta: + as_creation: date + categories: + - categories + - tags + abstract_chars_count: 200 + image: https://geek-cookbook.funkypenguin.co.nz/images/site-logo.png + url_parameters: + utm_source: "geek-cookbook-blog" + utm_medium: "RSS" + utm_campaign: "feed-syndication" + #theme_dir: mkdocs-material nav: @@ -92,6 +111,7 @@ nav: - InstaPy: recipes/instapy.md - Invidious: recipes/invidious.md - Jellyfin: recipes/jellyfin.md + - Joplin Server: recipes/joplin-server.md - Keycloak: - recipes/keycloak/index.md - OIDC Provider: recipes/keycloak/setup-oidc-provider.md @@ -200,9 +220,14 @@ nav: - kubernetes/oidc/index.md - Authentik: kubernetes/oidc/authentik.md # - Keycloak: kubernetes/oidc/authentik.md - # - Backup: - # - kubernetes/backup/index.md - # - kubernetes/wip.md + - Backup: + - kubernetes/backup/index.md + - CSI Snapshots: + - kubernetes/backup/csi-snapshots/index.md + - Snapshot Validation Webhook: kubernetes/backup/csi-snapshots/snapshot-validation-webhook.md + - Snapshot Controller: kubernetes/backup/csi-snapshots/snapshot-controller.md + - Velero: kubernetes/backup/velero.md + # - Logging: # - fluent-bit, graylog, etc.