diff --git a/README.md b/README.md index d991afcf69fa..6f6f2b17c02d 100644 --- a/README.md +++ b/README.md @@ -18,3 +18,4 @@ This repository provides kubernetes yaml deployments and markdown examples for N * [SRIOV examples](./examples/sriov) * [Memory examples](./examples/memory) * [Heal examples](./examples/heal) + * [Scalability tests](./examples/scalability) diff --git a/examples/basic/README.md b/examples/basic/README.md index 0b95af979bfb..0e89f1e9b43d 100644 --- a/examples/basic/README.md +++ b/examples/basic/README.md @@ -58,7 +58,7 @@ kubectl apply -k . ## Cleanup -To free resouces follow the next command: +To free resources follow the next command: ```bash kubectl delete mutatingwebhookconfiguration --all diff --git a/examples/scalability/.gitignore b/examples/scalability/.gitignore new file mode 100644 index 000000000000..17c8c3b25e85 --- /dev/null +++ b/examples/scalability/.gitignore @@ -0,0 +1,7 @@ +kustomization.yaml +netsvcs.yaml +nse.yaml +set_params.sh +result_data-*/ +nsm_setup/logs*/ +port_forwarder_out.log diff --git a/examples/scalability/README.md b/examples/scalability/README.md new file mode 100644 index 000000000000..63b61288a74c --- /dev/null +++ b/examples/scalability/README.md @@ -0,0 +1,34 @@ +# Scalability tests + +This folder contains scalability tests. + +These tests can technically be run manually, like all other tests, +however they are meant to be run only automatically, +for better precision of measurements. + +These tests require you to write a file with test params before each test run. +This file must be placed in the [cases](./cases) folder (near other .sh files), +and must have the following structure: +```bash +#!/bin/bash +TEST_NS_COUNT=1 +TEST_NSE_COUNT=1 +TEST_NSC_COUNT=1 +TEST_REMOTE_CASE=false +``` + +Note, that you need [styx](https://github.com/go-pluto/styx) installed +for statistics gathering to work. + +## Requires + +- [Prometheus](./prometheus) +- [Basic NSM setup](./nsm_setup) +- [Gnuplot deployment](./gnuplot) + +## Includes + +- [Single start without heal](./cases/SingleStart) +- [Single start with unsuccessful heal](./cases/DryHeal) +- [Clients restart](./cases/ClientsRestart) +- [Endpoints restart: successful heal](./cases/Heal) diff --git a/examples/scalability/cadvisor/README.md b/examples/scalability/cadvisor/README.md new file mode 100644 index 000000000000..4eed5fced479 --- /dev/null +++ b/examples/scalability/cadvisor/README.md @@ -0,0 +1,21 @@ +# cAdvisor + +Contains setup for cAdvisor. + +## Run + +Deploy cAdvisor: +```bash +kubectl apply -k . +``` + +Wait for application ready: +```bash +kubectl -n cadvisor --timeout=1m wait pod --for=condition=ready -l app=cadvisor +``` + +## Cleanup + +```bash +kubectl delete -k . +``` diff --git a/examples/scalability/cadvisor/cluster-role-binding.yaml b/examples/scalability/cadvisor/cluster-role-binding.yaml new file mode 100644 index 000000000000..2b58ca87eb0a --- /dev/null +++ b/examples/scalability/cadvisor/cluster-role-binding.yaml @@ -0,0 +1,13 @@ +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: cadvisor +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cadvisor +subjects: + - kind: ServiceAccount + name: cadvisor + namespace: cadvisor diff --git a/examples/scalability/cadvisor/cluster-role.yaml b/examples/scalability/cadvisor/cluster-role.yaml new file mode 100644 index 000000000000..14889fa73e6a --- /dev/null +++ b/examples/scalability/cadvisor/cluster-role.yaml @@ -0,0 +1,14 @@ +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: cadvisor +rules: + - apiGroups: + - policy + resources: + - podsecuritypolicies + verbs: + - use + resourceNames: + - cadvisor diff --git a/examples/scalability/cadvisor/daemonset.yaml b/examples/scalability/cadvisor/daemonset.yaml new file mode 100644 index 000000000000..663ff5fc37c5 --- /dev/null +++ b/examples/scalability/cadvisor/daemonset.yaml @@ -0,0 +1,86 @@ +--- +# for Kubernetes versions before 1.9.0 use apps/v1beta2 +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cadvisor + namespace: cadvisor + annotations: + seccomp.security.alpha.kubernetes.io/pod: 'docker/default' +spec: + selector: + matchLabels: + name: cadvisor + template: + metadata: + labels: + name: cadvisor + spec: + serviceAccountName: cadvisor + containers: + - name: cadvisor + image: gcr.io/cadvisor/cadvisor:v0.39.0 + imagePullPolicy: IfNotPresent + args: + - --port=9089 + - --housekeeping_interval=1s + - --max_housekeeping_interval=2s + - --event_storage_event_limit=default=0 + - --event_storage_age_limit=default=0 + - --disable_metrics=accelerator,cpu_topology,disk,diskIO,memory_numa,network,tcp,udp,advtcp,sched,process,percpu,hugetlb,referenced_memory,resctrl,cpuset + - --docker_only + - --store_container_labels=false + - --whitelisted_container_labels=io.kubernetes.container.name, io.kubernetes.pod.name,io.kubernetes.pod.namespace + ports: + - name: http + hostPort: 9089 + containerPort: 9089 + protocol: TCP + volumeMounts: + - name: rootfs + mountPath: /rootfs + readOnly: true + - name: var-run + mountPath: /var/run + readOnly: true + - name: sys + mountPath: /sys + readOnly: true + - name: docker + mountPath: /var/lib/docker + readOnly: true + - name: disk + mountPath: /dev/disk + readOnly: true + resources: + requests: + memory: 100Mi + cpu: 200m + limits: + memory: 400Mi + cpu: 1000m + readinessProbe: + exec: + command: + - wget + - localhost:9089/healthz + - -O + - /dev/null + automountServiceAccountToken: false + terminationGracePeriodSeconds: 30 + volumes: + - name: rootfs + hostPath: + path: / + - name: var-run + hostPath: + path: /var/run + - name: sys + hostPath: + path: /sys + - name: docker + hostPath: + path: /var/lib/docker + - name: disk + hostPath: + path: /dev/disk diff --git a/examples/scalability/cadvisor/kustomization.yaml b/examples/scalability/cadvisor/kustomization.yaml new file mode 100644 index 000000000000..2d129ee9e9bb --- /dev/null +++ b/examples/scalability/cadvisor/kustomization.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: cadvisor + +commonLabels: + app: cadvisor + +resources: + - cluster-role.yaml + - cluster-role-binding.yaml + - daemonset.yaml + - namespace.yaml + - pod-security-policy.yaml + - service-account.yaml + - service.yaml diff --git a/examples/scalability/cadvisor/namespace.yaml b/examples/scalability/cadvisor/namespace.yaml new file mode 100644 index 000000000000..b310c9e6e104 --- /dev/null +++ b/examples/scalability/cadvisor/namespace.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: cadvisor diff --git a/examples/scalability/cadvisor/pod-security-policy.yaml b/examples/scalability/cadvisor/pod-security-policy.yaml new file mode 100644 index 000000000000..f6482b9d8401 --- /dev/null +++ b/examples/scalability/cadvisor/pod-security-policy.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: cadvisor +spec: + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + runAsUser: + rule: RunAsAny + fsGroup: + rule: RunAsAny + volumes: + - '*' + allowedHostPaths: + - pathPrefix: "/" + - pathPrefix: "/var/run" + - pathPrefix: "/sys" + - pathPrefix: "/var/lib/docker" + - pathPrefix: "/dev/disk" diff --git a/examples/scalability/cadvisor/service-account.yaml b/examples/scalability/cadvisor/service-account.yaml new file mode 100644 index 000000000000..82fddce572c0 --- /dev/null +++ b/examples/scalability/cadvisor/service-account.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cadvisor + namespace: cadvisor diff --git a/examples/scalability/cadvisor/service.yaml b/examples/scalability/cadvisor/service.yaml new file mode 100644 index 000000000000..000a974197c3 --- /dev/null +++ b/examples/scalability/cadvisor/service.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: cadvisor +spec: + selector: + app: cadvisor + ports: + - name: cadvisor + protocol: TCP + port: 9089 + targetPort: 9089 diff --git a/examples/scalability/cases/ClientsRestart/README.md b/examples/scalability/cases/ClientsRestart/README.md new file mode 100644 index 000000000000..acc2c7750d69 --- /dev/null +++ b/examples/scalability/cases/ClientsRestart/README.md @@ -0,0 +1,240 @@ +# Scalability clients restart test + +This test has the following scenario: +1. Deploy endpoints +2. Deploy clients +3. Delete clients +4. Deploy new clients +5. Delete everything +6. Gather statistics + +## Run + +Save test time, for drawing plots: +```bash +TEST_TIME_START=$(date -Iseconds) +``` + +Create test namespace: +```bash +NAMESPACE=($(kubectl create -f ../../namespace.yaml)[0]) +NAMESPACE=${NAMESPACE:10} +``` + +Register namespace in `spire` server: +```bash +kubectl exec -n spire spire-server-0 -- \ +/opt/spire/bin/spire-server entry create \ +-spiffeID spiffe://example.org/ns/${NAMESPACE}/sa/default \ +-parentID spiffe://example.org/ns/spire/sa/spire-agent \ +-selector k8s:ns:${NAMESPACE} \ +-selector k8s:sa:default +``` + +Create helper functions: +```bash +. ../define_helper_functions.sh +``` + +Set test parameters: +```bash +readParams .. +``` + +Select nodes to deploy NSC and NSE: +```bash +NODES=($(kubectl get nodes -o go-template='{{range .items}}{{ if not .spec.taints }}{{ .metadata.name }} {{end}}{{end}}')) +NSE_NODE=${NODES[0]} +if [[ "${TEST_REMOTE_CASE}" == "true" ]]; then + NSC_NODE=${NODES[1]} +else + NSC_NODE=${NODES[0]} +fi +echo NSE_NODE ${NSE_NODE}, NSC_NODE ${NSC_NODE} +``` + +Deploy network services: +```bash +generate_netsvc ${TEST_NS_COUNT} +``` +```bash +kubectl apply -f netsvcs.yaml +``` + +Deploy endpoints: +```bash +create_endpoint_patches ${TEST_NSE_COUNT} ${NSE_NODE} endpoints 0 +``` +```bash +kubectl apply -k ./endpoints +``` +```bash +timeout -v --kill-after=10s 3m kubectl -n ${NAMESPACE} wait pod --timeout=3m -l app=nse-kernel --for=condition=ready +``` + +Make sure that all endpoints have finished registration: +```bash +checkEndpointsStart ${NAMESPACE} endpoints +``` +```bash +EVENT_LIST="${EVENT_LIST} ENDPOINTS_0_STARTED" +EVENT_TIME_ENDPOINTS_0_STARTED="$(date -Iseconds)" +EVENT_TEXT_ENDPOINTS_0_STARTED="All endpoints started" +``` + +Deploy clients: +```bash +create_client_patches ${TEST_NSC_COUNT} ${NSC_NODE} clients-0 +``` +```bash +kubectl apply -k ./clients-0 +``` +```bash +timeout -v --kill-after=10s 3m kubectl -n ${NAMESPACE} wait pod --timeout=3m -l app=nsc-kernel --for=condition=ready +``` + +```bash +checkClientsSvid ${NAMESPACE} +``` +```bash +EVENT_LIST="${EVENT_LIST} CLIENTS_GOT_SVID" +EVENT_TIME_CLIENTS_GOT_SVID="$(date -Iseconds)" +EVENT_TEXT_CLIENTS_GOT_SVID="All clients-0 obtained svid" +``` + +```bash +checkConnectionsCount ${NAMESPACE} "10.0" ${TEST_NS_COUNT} +``` +```bash +EVENT_LIST="${EVENT_LIST} CONNECTIONS_0_READY" +EVENT_TIME_CONNECTIONS_0_READY="$(date -Iseconds)" +EVENT_TEXT_CONNECTIONS_0_READY="Conns 0 ready" +``` +```bash +sleep 15 +``` + +Delete first batch of clients: +```bash +EVENT_LIST="${EVENT_LIST} DELETE_CLIENTS_0" +EVENT_TIME_DELETE_CLIENTS_0="$(date -Iseconds)" +EVENT_TEXT_DELETE_CLIENTS_0="Delete clients-0..." +``` +```bash +kubectl -n ${NAMESPACE} delete -k ./clients-0 --cascade=foreground +``` +```bash +EVENT_LIST="${EVENT_LIST} CLIENTS_DELETED_0" +EVENT_TIME_CLIENTS_DELETED_0="$(date -Iseconds)" +EVENT_TEXT_CLIENTS_DELETED_0="Clients-0 deleted" +``` + +Create second batch of clients: +```bash +create_client_patches ${TEST_NSC_COUNT} ${NSC_NODE} clients-1 +``` +```bash +kubectl apply -k ./clients-1 +``` +```bash +timeout -v --kill-after=10s 3m kubectl -n ${NAMESPACE} wait pod --timeout=3m -l app=nsc-kernel --for=condition=ready +``` + +```bash +checkClientsSvid ${NAMESPACE} +``` +```bash +EVENT_LIST="${EVENT_LIST} CLIENTS_1_GOT_SVID" +EVENT_TIME_CLIENTS_1_GOT_SVID="$(date -Iseconds)" +EVENT_TEXT_CLIENTS_1_GOT_SVID="All clients-1 obtained svid" +``` + +```bash +checkConnectionsCount ${NAMESPACE} "10.0" ${TEST_NS_COUNT} +``` +```bash +EVENT_LIST="${EVENT_LIST} CONNECTIONS_1_READY" +EVENT_TIME_CONNECTIONS_1_READY="$(date -Iseconds)" +EVENT_TEXT_CONNECTIONS_1_READY="Conns 1 ready" +``` +```bash +sleep 15 +``` + +```bash +EVENT_LIST="${EVENT_LIST} DELETE_CLIENTS_1" +EVENT_TIME_DELETE_CLIENTS_1="$(date -Iseconds)" +EVENT_TEXT_DELETE_CLIENTS_1="Delete clients-1..." +``` +```bash +kubectl delete -k ./clients-1 --cascade=foreground +``` +```bash +EVENT_LIST="${EVENT_LIST} CLIENTS_DELETED_1" +EVENT_TIME_CLIENTS_DELETED_1="$(date -Iseconds)" +EVENT_TEXT_CLIENTS_DELETED_1="Clients-1 deleted" +``` +```bash +sleep 15 +``` + +Delete everything: +```bash +EVENT_LIST="${EVENT_LIST} DELETE_NAMESPACE" +EVENT_TIME_DELETE_NAMESPACE="$(date -Iseconds)" +EVENT_TEXT_DELETE_NAMESPACE="Delete namespace..." +``` +```bash +kubectl delete ns ${NAMESPACE} +``` +```bash +EVENT_LIST="${EVENT_LIST} NAMESPACE_DELETED" +EVENT_TIME_NAMESPACE_DELETED="$(date -Iseconds)" +EVENT_TEXT_NAMESPACE_DELETED="Namespace deleted" +``` + +## Cleanup + +Save possible test fail event: +```bash +if [[ "${EVENT_TIME_NAMESPACE_DELETED}" == "" ]]; then + EVENT_LIST="${EVENT_LIST} TEST_FAIL" + EVENT_TIME_TEST_FAIL="$(date -Iseconds)" + EVENT_TEXT_TEST_FAIL="Fail" +fi +``` + +Delete resources: +```bash +kubectl delete ns ${NAMESPACE} --ignore-not-found +``` +```bash +kubectl delete -f netsvcs.yaml --ignore-not-found +``` + +Wait few seconds to capture performance after test end: +```bash +sleep 15 +``` + +Mark test end: +```bash +TEST_TIME_END="$(date -Iseconds)" +``` + +Save statistics: +```bash +RESULT_DIR="./${RESULTS_PARENT_DIR}/results-$(date --date="${TEST_TIME_START}" -u +%FT%H-%M-%S%z)-netsvc=${TEST_NS_COUNT}-nse=${TEST_NSE_COUNT}-nsc=${TEST_NSC_COUNT}" +PARAM_ANNOTATION="client restart case, ${TEST_NS_COUNT} service(s), ${TEST_NSE_COUNT} NSE(s), ${TEST_NSC_COUNT} NSC(s)" +if [[ "${TEST_REMOTE_CASE}" == "true" ]]; then + PARAM_ANNOTATION="${PARAM_ANNOTATION}, remote case" + RESULT_DIR="${RESULT_DIR}-remote" +else + PARAM_ANNOTATION="${PARAM_ANNOTATION}, local case" + RESULT_DIR="${RESULT_DIR}-local" +fi +PARAM_ANNOTATION="${PARAM_ANNOTATION}, run at ${TEST_TIME_START}" +``` +```bash +. ../save_metrics.sh +``` diff --git a/examples/scalability/cases/DryHeal/README.md b/examples/scalability/cases/DryHeal/README.md new file mode 100644 index 000000000000..c943a194b234 --- /dev/null +++ b/examples/scalability/cases/DryHeal/README.md @@ -0,0 +1,204 @@ +# Scalability dry heal test + +This test has the following scenario: +1. Deploy endpoints +2. Deploy clients +3. Delete endpoints +4. Wait few seconds to capture load during healing +5. Delete clients +6. Gather statistics + +## Run + +Save test time, for drawing plots: +```bash +TEST_TIME_START=$(date -Iseconds) +``` + +Create test namespace: +```bash +NAMESPACE=($(kubectl create -f ../../namespace.yaml)[0]) +NAMESPACE=${NAMESPACE:10} +``` + +Register namespace in `spire` server: +```bash +kubectl exec -n spire spire-server-0 -- \ +/opt/spire/bin/spire-server entry create \ +-spiffeID spiffe://example.org/ns/${NAMESPACE}/sa/default \ +-parentID spiffe://example.org/ns/spire/sa/spire-agent \ +-selector k8s:ns:${NAMESPACE} \ +-selector k8s:sa:default +``` + +Create helper functions: +```bash +. ../define_helper_functions.sh +``` + +Set test parameters: +```bash +readParams .. +``` + +Select nodes to deploy NSC and NSE: +```bash +NODES=($(kubectl get nodes -o go-template='{{range .items}}{{ if not .spec.taints }}{{ .metadata.name }} {{end}}{{end}}')) +NSE_NODE=${NODES[0]} +if [[ "${TEST_REMOTE_CASE}" == "true" ]]; then + NSC_NODE=${NODES[1]} +else + NSC_NODE=${NODES[0]} +fi +echo NSE_NODE ${NSE_NODE}, NSC_NODE ${NSC_NODE} +``` + +Deploy network services: +```bash +generate_netsvc ${TEST_NS_COUNT} +``` +```bash +kubectl apply -f netsvcs.yaml +``` + +Deploy endpoints: +```bash +create_endpoint_patches ${TEST_NSE_COUNT} ${NSE_NODE} endpoints 0 +``` +```bash +kubectl apply -k ./endpoints +``` +```bash +timeout -v --kill-after=10s 3m kubectl -n ${NAMESPACE} wait pod --timeout=3m -l app=nse-kernel --for=condition=ready +``` + +Make sure that all endpoints have finished registration: +```bash +checkEndpointsStart ${NAMESPACE} endpoints +``` +```bash +EVENT_LIST="${EVENT_LIST} ENDPOINTS_0_STARTED" +EVENT_TIME_ENDPOINTS_0_STARTED="$(date -Iseconds)" +EVENT_TEXT_ENDPOINTS_0_STARTED="All endpoints started" +``` + +Deploy clients: +```bash +create_client_patches ${TEST_NSC_COUNT} ${NSC_NODE} clients +``` +```bash +kubectl apply -k ./clients +``` +```bash +timeout -v --kill-after=10s 3m kubectl -n ${NAMESPACE} wait pod --timeout=3m -l app=nsc-kernel --for=condition=ready +``` + +```bash +checkClientsSvid ${NAMESPACE} +``` +```bash +EVENT_LIST="${EVENT_LIST} CLIENTS_GOT_SVID" +EVENT_TIME_CLIENTS_GOT_SVID="$(date -Iseconds)" +EVENT_TEXT_CLIENTS_GOT_SVID="All clients obtained svid" +``` + +```bash +checkConnectionsCount ${NAMESPACE} "10.0" ${TEST_NS_COUNT} +``` +```bash +EVENT_LIST="${EVENT_LIST} CONNECTIONS_READY" +EVENT_TIME_CONNECTIONS_READY="$(date -Iseconds)" +EVENT_TEXT_CONNECTIONS_READY="Connections established" +``` +```bash +sleep 15 +``` + +Run test scenario actions: +```bash +EVENT_LIST="${EVENT_LIST} DELETE_ENDPOINTS" +EVENT_TIME_DELETE_ENDPOINTS="$(date -Iseconds)" +EVENT_TEXT_DELETE_ENDPOINTS="Delete endpoints..." +``` +```bash +kubectl -n ${NAMESPACE} delete -k ./endpoints --cascade=foreground +``` +```bash +EVENT_LIST="${EVENT_LIST} ENDPOINTS_DELETED" +EVENT_TIME_ENDPOINTS_DELETED="$(date -Iseconds)" +EVENT_TEXT_ENDPOINTS_DELETED="Endpoints deleted" +``` + +Make sure clients don't have any open connections: +```bash +checkConnectionsCount ${NAMESPACE} "10.0" 0 +``` +```bash +EVENT_LIST="${EVENT_LIST} CLIENT_ROUTES_DELETED" +EVENT_TIME_CLIENT_ROUTES_DELETED="$(date -Iseconds)" +EVENT_TEXT_CLIENT_ROUTES_DELETED="Client routes deleted" +``` +```bash +sleep 15 +``` + +Delete everything: +```bash +EVENT_LIST="${EVENT_LIST} DELETE_NAMESPACE" +EVENT_TIME_DELETE_NAMESPACE="$(date -Iseconds)" +EVENT_TEXT_DELETE_NAMESPACE="Delete namespace..." +``` +```bash +kubectl delete ns ${NAMESPACE} +``` +```bash +EVENT_LIST="${EVENT_LIST} NAMESPACE_DELETED" +EVENT_TIME_NAMESPACE_DELETED="$(date -Iseconds)" +EVENT_TEXT_NAMESPACE_DELETED="Namespace deleted" +``` + +## Cleanup + +Save possible test fail event: +```bash +if [[ "${EVENT_TIME_NAMESPACE_DELETED}" == "" ]]; then + EVENT_LIST="${EVENT_LIST} TEST_FAIL" + EVENT_TIME_TEST_FAIL="$(date -Iseconds)" + EVENT_TEXT_TEST_FAIL="Fail" +fi +``` + +Delete resources: +```bash +kubectl delete ns ${NAMESPACE} --ignore-not-found +``` +```bash +kubectl delete -f netsvcs.yaml --ignore-not-found +``` + +Wait few seconds to capture performance after test end: +```bash +sleep 15 +``` + +Mark test end: +```bash +TEST_TIME_END="$(date -Iseconds)" +``` + +Save statistics: +```bash +RESULT_DIR="./${RESULTS_PARENT_DIR}/results-$(date --date="${TEST_TIME_START}" -u +%FT%H-%M-%S%z)-netsvc=${TEST_NS_COUNT}-nse=${TEST_NSE_COUNT}-nsc=${TEST_NSC_COUNT}" +PARAM_ANNOTATION="dry heal case, ${TEST_NS_COUNT} service(s), ${TEST_NSE_COUNT} NSE(s), ${TEST_NSC_COUNT} NSC(s)" +if [[ "${TEST_REMOTE_CASE}" == "true" ]]; then + PARAM_ANNOTATION="${PARAM_ANNOTATION}, remote case" + RESULT_DIR="${RESULT_DIR}-remote" +else + PARAM_ANNOTATION="${PARAM_ANNOTATION}, local case" + RESULT_DIR="${RESULT_DIR}-local" +fi +PARAM_ANNOTATION="${PARAM_ANNOTATION}, run at ${TEST_TIME_START}" +``` +```bash +. ../save_metrics.sh +``` diff --git a/examples/scalability/cases/Heal/README.md b/examples/scalability/cases/Heal/README.md new file mode 100644 index 000000000000..9f6b82e784fc --- /dev/null +++ b/examples/scalability/cases/Heal/README.md @@ -0,0 +1,235 @@ +# Scalability heal test + +This test has the following scenario: +1. Deploy endpoints +2. Deploy clients +3. Deploy new endpoints +4. Delete old endpoints +5. Wait for all connections to heal +6. Delete clients +7. Delete endpoints +8. Gather statistics + +## Run + +Save test time, for drawing plots: +```bash +TEST_TIME_START=$(date -Iseconds) +``` + +Create test namespace: +```bash +NAMESPACE=($(kubectl create -f ../../namespace.yaml)[0]) +NAMESPACE=${NAMESPACE:10} +``` + +Register namespace in `spire` server: +```bash +kubectl exec -n spire spire-server-0 -- \ +/opt/spire/bin/spire-server entry create \ +-spiffeID spiffe://example.org/ns/${NAMESPACE}/sa/default \ +-parentID spiffe://example.org/ns/spire/sa/spire-agent \ +-selector k8s:ns:${NAMESPACE} \ +-selector k8s:sa:default +``` + +Create helper functions: +```bash +. ../define_helper_functions.sh +``` + +Set test parameters: +```bash +readParams .. +``` + +Select nodes to deploy NSC and NSE: +```bash +NODES=($(kubectl get nodes -o go-template='{{range .items}}{{ if not .spec.taints }}{{ .metadata.name }} {{end}}{{end}}')) +NSE_NODE=${NODES[0]} +if [[ "${TEST_REMOTE_CASE}" == "true" ]]; then + NSC_NODE=${NODES[1]} +else + NSC_NODE=${NODES[0]} +fi +echo NSE_NODE ${NSE_NODE}, NSC_NODE ${NSC_NODE} +``` + +Deploy network services: +```bash +generate_netsvc ${TEST_NS_COUNT} +``` +```bash +kubectl apply -f netsvcs.yaml +``` + +Deploy endpoints: +```bash +create_endpoint_patches ${TEST_NSE_COUNT} ${NSE_NODE} endpoints-0 0 +``` +```bash +kubectl apply -k ./endpoints-0 +``` +```bash +timeout -v --kill-after=10s 3m kubectl -n ${NAMESPACE} wait pod --timeout=3m -l app=nse-kernel --for=condition=ready +``` + +Make sure that all endpoints have finished registration: +```bash +checkEndpointsStart ${NAMESPACE} endpoints-0 +``` +```bash +EVENT_LIST="${EVENT_LIST} ENDPOINTS_0_STARTED" +EVENT_TIME_ENDPOINTS_0_STARTED="$(date -Iseconds)" +EVENT_TEXT_ENDPOINTS_0_STARTED="All endpoints-0 started" +``` + +Deploy clients: +```bash +create_client_patches ${TEST_NSC_COUNT} ${NSC_NODE} clients +``` +```bash +kubectl apply -k ./clients +``` +```bash +timeout -v --kill-after=10s 3m kubectl -n ${NAMESPACE} wait pod --timeout=3m -l app=nsc-kernel --for=condition=ready +``` + +```bash +checkClientsSvid ${NAMESPACE} +``` +```bash +EVENT_LIST="${EVENT_LIST} CLIENTS_GOT_SVID" +EVENT_TIME_CLIENTS_GOT_SVID="$(date -Iseconds)" +EVENT_TEXT_CLIENTS_GOT_SVID="All clients obtained svid" +``` + +```bash +checkConnectionsCount ${NAMESPACE} "10.0" ${TEST_NS_COUNT} +``` +```bash +EVENT_LIST="${EVENT_LIST} CONNECTIONS_READY" +EVENT_TIME_CONNECTIONS_READY="$(date -Iseconds)" +EVENT_TEXT_CONNECTIONS_READY="Connections established" +``` +```bash +sleep 15 +``` + +Deploy second batch of endpoints: +```bash +create_endpoint_patches ${TEST_NSE_COUNT} ${NSE_NODE} endpoints-1 1 +``` +```bash +EVENT_LIST="${EVENT_LIST} RECREATE_ENDPOINTS" +EVENT_TIME_RECREATE_ENDPOINTS="$(date -Iseconds)" +EVENT_TEXT_RECREATE_ENDPOINTS="Create endpoints-1" +``` +```bash +kubectl apply -k ./endpoints-1 +``` +```bash +timeout -v --kill-after=10s 3m kubectl -n ${NAMESPACE} wait pod --timeout=3m -l app=nse-kernel --for=condition=ready +``` + +Make sure that all endpoints have finished registration: +```bash +checkEndpointsStart ${NAMESPACE} endpoints-1 +``` +```bash +EVENT_LIST="${EVENT_LIST} ENDPOINTS_1_STARTED" +EVENT_TIME_ENDPOINTS_1_STARTED="$(date -Iseconds)" +EVENT_TEXT_ENDPOINTS_1_STARTED="All endpoints-1 started" +``` + +Delete first batch of endpoints: +```bash +EVENT_LIST="${EVENT_LIST} DELETE_ENDPOINTS" +EVENT_TIME_DELETE_ENDPOINTS="$(date -Iseconds)" +EVENT_TEXT_DELETE_ENDPOINTS="Delete endpoints-0..." +``` +```bash +kubectl -n ${NAMESPACE} delete -k ./endpoints-0 --cascade=foreground +``` +```bash +EVENT_LIST="${EVENT_LIST} ENDPOINTS_DELETED" +EVENT_TIME_ENDPOINTS_DELETED="$(date -Iseconds)" +EVENT_TEXT_ENDPOINTS_DELETED="Endpoints-0 deleted" +``` + +Wait for all connections to heal: +```bash +checkConnectionsCount ${NAMESPACE} "10.1" ${TEST_NS_COUNT} +``` +```bash +EVENT_LIST="${EVENT_LIST} HEAL_FINISHED" +EVENT_TIME_HEAL_FINISHED="$(date -Iseconds)" +EVENT_TEXT_HEAL_FINISHED="Heal finished" +``` +```bash +sleep 15 +``` + +Delete everything: +```bash +EVENT_LIST="${EVENT_LIST} DELETE_NAMESPACE" +EVENT_TIME_DELETE_NAMESPACE="$(date -Iseconds)" +EVENT_TEXT_DELETE_NAMESPACE="Delete namespace..." +``` +```bash +kubectl -n ${NAMESPACE} delete -k ./clients --cascade=foreground +``` +```bash +kubectl delete ns ${NAMESPACE} +``` +```bash +EVENT_LIST="${EVENT_LIST} NAMESPACE_DELETED" +EVENT_TIME_NAMESPACE_DELETED="$(date -Iseconds)" +EVENT_TEXT_NAMESPACE_DELETED="Namespace deleted" +``` + +## Cleanup + +Save possible test fail event: +```bash +if [[ "${EVENT_TIME_NAMESPACE_DELETED}" == "" ]]; then + EVENT_LIST="${EVENT_LIST} TEST_FAIL" + EVENT_TIME_TEST_FAIL="$(date -Iseconds)" + EVENT_TEXT_TEST_FAIL="Fail" +fi +``` + +Delete resources: +```bash +kubectl delete ns ${NAMESPACE} --ignore-not-found +``` +```bash +kubectl delete -f netsvcs.yaml --ignore-not-found +``` + +Wait few seconds to capture performance after test end: +```bash +sleep 15 +``` + +Mark test end: +```bash +TEST_TIME_END="$(date -Iseconds)" +``` + +Save statistics: +```bash +RESULT_DIR="./${RESULTS_PARENT_DIR}/results-$(date --date="${TEST_TIME_START}" -u +%FT%H-%M-%S%z)-netsvc=${TEST_NS_COUNT}-nse=${TEST_NSE_COUNT}-nsc=${TEST_NSC_COUNT}" +PARAM_ANNOTATION="heal case, ${TEST_NS_COUNT} service(s), ${TEST_NSE_COUNT} NSE(s), ${TEST_NSC_COUNT} NSC(s)" +if [[ "${TEST_REMOTE_CASE}" == "true" ]]; then + PARAM_ANNOTATION="${PARAM_ANNOTATION}, remote case" + RESULT_DIR="${RESULT_DIR}-remote" +else + PARAM_ANNOTATION="${PARAM_ANNOTATION}, local case" + RESULT_DIR="${RESULT_DIR}-local" +fi +PARAM_ANNOTATION="${PARAM_ANNOTATION}, run at ${TEST_TIME_START}" +``` +```bash +. ../save_metrics.sh +``` diff --git a/examples/scalability/cases/SingleStart/README.md b/examples/scalability/cases/SingleStart/README.md new file mode 100644 index 000000000000..5d22af1c4903 --- /dev/null +++ b/examples/scalability/cases/SingleStart/README.md @@ -0,0 +1,193 @@ +# Scalability single start test + +This test has the following scenario: +1. Deploy endpoints +2. Deploy clients +3. Delete clients +4. Delete endpoints +5. Gather statistics + +## Run + +Save test time, for drawing plots: +```bash +TEST_TIME_START=$(date -Iseconds) +``` + +Create test namespace: +```bash +NAMESPACE=($(kubectl create -f ../../namespace.yaml)[0]) +NAMESPACE=${NAMESPACE:10} +``` + +Register namespace in `spire` server: +```bash +kubectl exec -n spire spire-server-0 -- \ +/opt/spire/bin/spire-server entry create \ +-spiffeID spiffe://example.org/ns/${NAMESPACE}/sa/default \ +-parentID spiffe://example.org/ns/spire/sa/spire-agent \ +-selector k8s:ns:${NAMESPACE} \ +-selector k8s:sa:default +``` + +Create helper functions: +```bash +. ../define_helper_functions.sh +``` + +Set test parameters: +```bash +readParams .. +``` + +Select nodes to deploy NSC and NSE: +```bash +NODES=($(kubectl get nodes -o go-template='{{range .items}}{{ if not .spec.taints }}{{ .metadata.name }} {{end}}{{end}}')) +NSE_NODE=${NODES[0]} +if [[ "${TEST_REMOTE_CASE}" == "true" ]]; then + NSC_NODE=${NODES[1]} +else + NSC_NODE=${NODES[0]} +fi +echo NSE_NODE ${NSE_NODE}, NSC_NODE ${NSC_NODE} +``` + +Deploy network services: +```bash +generate_netsvc ${TEST_NS_COUNT} +``` +```bash +kubectl apply -f netsvcs.yaml +``` + +Deploy endpoints: +```bash +create_endpoint_patches ${TEST_NSE_COUNT} ${NSE_NODE} endpoints 0 +``` +```bash +kubectl apply -k ./endpoints +``` +```bash +timeout -v --kill-after=10s 3m kubectl -n ${NAMESPACE} wait pod --timeout=3m -l app=nse-kernel --for=condition=ready +``` + +Make sure that all endpoints have finished registration: +```bash +checkEndpointsStart ${NAMESPACE} endpoints +``` +```bash +EVENT_LIST="${EVENT_LIST} ENDPOINTS_0_STARTED" +EVENT_TIME_ENDPOINTS_0_STARTED="$(date -Iseconds)" +EVENT_TEXT_ENDPOINTS_0_STARTED="All endpoints started" +``` + +Deploy clients: +```bash +create_client_patches ${TEST_NSC_COUNT} ${NSC_NODE} clients +``` +```bash +kubectl apply -k ./clients +``` +```bash +timeout -v --kill-after=10s 3m kubectl -n ${NAMESPACE} wait pod --timeout=3m -l app=nsc-kernel --for=condition=ready +``` + +```bash +checkClientsSvid ${NAMESPACE} +``` +```bash +EVENT_LIST="${EVENT_LIST} CLIENTS_GOT_SVID" +EVENT_TIME_CLIENTS_GOT_SVID="$(date -Iseconds)" +EVENT_TEXT_CLIENTS_GOT_SVID="All clients obtained svid" +``` + +```bash +checkConnectionsCount ${NAMESPACE} "10.0" ${TEST_NS_COUNT} +``` +```bash +EVENT_LIST="${EVENT_LIST} CONNECTIONS_READY" +EVENT_TIME_CONNECTIONS_READY="$(date -Iseconds)" +EVENT_TEXT_CONNECTIONS_READY="Connections established" +``` +```bash +sleep 15 +``` + +Run test scenario actions: +```bash +EVENT_LIST="${EVENT_LIST} DELETE_CLIENTS" +EVENT_TIME_DELETE_CLIENTS="$(date -Iseconds)" +EVENT_TEXT_DELETE_CLIENTS="Delete clients..." +``` +```bash +kubectl -n ${NAMESPACE} delete -k ./clients --cascade=foreground +``` +```bash +EVENT_LIST="${EVENT_LIST} CLIENTS_DELETED" +EVENT_TIME_CLIENTS_DELETED="$(date -Iseconds)" +EVENT_TEXT_CLIENTS_DELETED="Clients deleted" +``` +```bash +sleep 15 +``` + +Delete everything: +```bash +EVENT_LIST="${EVENT_LIST} DELETE_NAMESPACE" +EVENT_TIME_DELETE_NAMESPACE="$(date -Iseconds)" +EVENT_TEXT_DELETE_NAMESPACE="Delete namespace..." +``` +```bash +kubectl delete ns ${NAMESPACE} +``` +```bash +EVENT_LIST="${EVENT_LIST} NAMESPACE_DELETED" +EVENT_TIME_NAMESPACE_DELETED="$(date -Iseconds)" +EVENT_TEXT_NAMESPACE_DELETED="Namespace deleted" +``` + +## Cleanup + +Save possible test fail event: +```bash +if [[ "${EVENT_TIME_NAMESPACE_DELETED}" == "" ]]; then + EVENT_LIST="${EVENT_LIST} TEST_FAIL" + EVENT_TIME_TEST_FAIL="$(date -Iseconds)" + EVENT_TEXT_TEST_FAIL="Fail" +fi +``` + +Delete resources: +```bash +kubectl delete ns ${NAMESPACE} --ignore-not-found +``` +```bash +kubectl delete -f netsvcs.yaml --ignore-not-found +``` + +Wait few seconds to capture performance after test end: +```bash +sleep 15 +``` + +Mark test end: +```bash +TEST_TIME_END="$(date -Iseconds)" +``` + +Save statistics: +```bash +RESULT_DIR="./${RESULTS_PARENT_DIR}/results-$(date --date="${TEST_TIME_START}" -u +%FT%H-%M-%S%z)-netsvc=${TEST_NS_COUNT}-nse=${TEST_NSE_COUNT}-nsc=${TEST_NSC_COUNT}" +PARAM_ANNOTATION="single start case, ${TEST_NS_COUNT} service(s), ${TEST_NSE_COUNT} NSE(s), ${TEST_NSC_COUNT} NSC(s)" +if [[ "${TEST_REMOTE_CASE}" == "true" ]]; then + PARAM_ANNOTATION="${PARAM_ANNOTATION}, remote case" + RESULT_DIR="${RESULT_DIR}-remote" +else + PARAM_ANNOTATION="${PARAM_ANNOTATION}, local case" + RESULT_DIR="${RESULT_DIR}-local" +fi +PARAM_ANNOTATION="${PARAM_ANNOTATION}, run at ${TEST_TIME_START}" +``` +```bash +. ../save_metrics.sh +``` diff --git a/examples/scalability/cases/define_helper_functions.sh b/examples/scalability/cases/define_helper_functions.sh new file mode 100644 index 000000000000..d63e4d023f50 --- /dev/null +++ b/examples/scalability/cases/define_helper_functions.sh @@ -0,0 +1,328 @@ +#!/bin/bash + +function readParams() { + local params_folder=$1 + + if [[ -f "${params_folder}/set_params.sh" ]]; then + . "${params_folder}/set_params.sh" + fi + + if [[ "${TEST_NS_COUNT}" == "" ]]; then + TEST_NS_COUNT=1 + fi + + if [[ "${TEST_NSE_COUNT}" == "" ]]; then + TEST_NSE_COUNT=1 + fi + + if [[ "${TEST_NSC_COUNT}" == "" ]]; then + TEST_NSC_COUNT=1 + fi + + if [[ "${TEST_REMOTE_CASE}" == "" ]]; then + TEST_REMOTE_CASE=false + fi +} + +function generate_netsvc() { + local ns_count=$1 + + local ns_list= + local ns_url_list= + cat /dev/null > netsvcs.yaml + for (( i = 0; i < ns_count; i++ )) + do + ns=scalability-local-ns-$i + nsIfName=nsm-$i + cat >> netsvcs.yaml < "./${batch_name}/kustomization.yaml" <"./${batch_name}/patch-nse.yaml" + cat /dev/null >"./${batch_name}/nse.yaml" + for ((i = 0; i < nse_count; i++)); do + sed "s/name: nse-kernel/name: nse-kernel-$i/g" ../../../../apps/nse-kernel/nse.yaml >>"./${batch_name}/nse.yaml" + local cidr_prefix=10.${ip_interfix}.$i.0/24 + cat >>"./${batch_name}/patch-nse.yaml" < "./${batch_name}/kustomization.yaml" <"./${batch_name}/patch-nsc.yaml" < "${result_dir}/plot.gp" <> "${result_dir}/plot.gp" <> "${result_dir}/plot.gp" < "${RESULT_DIR}/${name}.csv" || return 2 + + sed -E -i "${name_replacement}" "${RESULT_DIR}/${name}.csv" + + local gnuplot_pod + gnuplot_pod=$(kubectl -n gnuplot get pod --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' -l app=gnuplot) + <"${RESULT_DIR}/${name}.csv" kubectl -n gnuplot exec "${gnuplot_pod}" -i -- cp /dev/stdin data.csv || return 3 + sed 's/set title ""/set title "'"${title}"'"/' "${RESULT_DIR}/plot.gp" | kubectl -n gnuplot exec "${gnuplot_pod}" -i -- gnuplot || return 4 + kubectl -n gnuplot exec "${gnuplot_pod}" -- cat result.png >"${RESULT_DIR}/${name}.png" || return 5 + + echo "${name} saved successfully" +} diff --git a/examples/scalability/cases/save_metrics.sh b/examples/scalability/cases/save_metrics.sh new file mode 100644 index 000000000000..759f88ac8bcd --- /dev/null +++ b/examples/scalability/cases/save_metrics.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +createGnuplotFile "${RESULT_DIR}" "${EVENT_LIST}" "EVENT_TEXT" "EVENT_TIME" "${TEST_TIME_START}" "${TEST_TIME_END}" + +CONT_REPLACE='s/\{container_label_io_kubernetes_container_name="([^"]*)",kubernetes_io_hostname="([^"]*)"}/\1 on \2/g' +POD_REPLACE='s/\{kubernetes_io_hostname="([^"]*)",pod="([^"]*)"}/\2 on \1/g' + +saveData nsm-cpu "NSM CPU Usage in cores, ${PARAM_ANNOTATION}" "${CONT_REPLACE}" " +sum by (container_label_io_kubernetes_container_name, kubernetes_io_hostname) ( + rate(container_cpu_usage_seconds_total{ + container_label_io_kubernetes_pod_namespace=~'nsm-system|spire', + container_label_io_kubernetes_container_name!='', + container_label_io_kubernetes_container_name!='POD', + id!~'/docker.*', + kubernetes_io_hostname=~'${NSC_NODE}|${NSE_NODE}' + }[5s]) +)" || return $((10 + $?)) + +saveData nsm-mem "NSM Memory Usage in megabytes, ${PARAM_ANNOTATION}" "${CONT_REPLACE}" " +sum by (container_label_io_kubernetes_container_name, kubernetes_io_hostname) ( + container_memory_working_set_bytes{ + container_label_io_kubernetes_pod_namespace=~'nsm-system|spire', + container_label_io_kubernetes_container_name!='', + container_label_io_kubernetes_container_name!='POD', + id!~'/docker.*', + kubernetes_io_hostname=~'${NSC_NODE}|${NSE_NODE}' + } +) / 1024 / 1024" || return $((20 + $?)) + +saveData test-cpu "Test CPU Usage in cores, ${PARAM_ANNOTATION}" "${POD_REPLACE}" " + sum by (pod, kubernetes_io_hostname) ( + label_replace( + rate(container_cpu_usage_seconds_total{ + container_label_io_kubernetes_pod_namespace='${NAMESPACE}', + container_label_io_kubernetes_container_name!='', + container_label_io_kubernetes_container_name!='POD', + id!~'/docker.*', + kubernetes_io_hostname=~'${NSC_NODE}|${NSE_NODE}' + }[5s]), + 'uid', '\$1', 'id', '.*/pod(.*)/.*' + ) * on(uid) group_left(pod) kube_pod_info +)" || return $((30 + $?)) + +saveData test-mem "Test Memory Usage in megabytes, ${PARAM_ANNOTATION}" "${POD_REPLACE}" " +sum by (pod, kubernetes_io_hostname) ( + label_replace( + container_memory_working_set_bytes{ + container_label_io_kubernetes_pod_namespace='${NAMESPACE}', + container_label_io_kubernetes_container_name!='', + container_label_io_kubernetes_container_name!='POD', + id!~'/docker.*', + kubernetes_io_hostname=~'${NSC_NODE}|${NSE_NODE}' + }, + 'uid', '\$1', 'id', '.*/pod(.*)/.*' + ) * on(uid) group_left(pod) kube_pod_info +) / 1024 / 1024" || return $((40 + $?)) + +saveData test-cpu-average "Averaged Test CPU Usage in cores, ${PARAM_ANNOTATION}" "${CONT_REPLACE}" " +avg by (container_label_io_kubernetes_container_name, kubernetes_io_hostname) ( + rate(container_cpu_usage_seconds_total{ + container_label_io_kubernetes_pod_namespace='${NAMESPACE}', + container_label_io_kubernetes_container_name!='', + container_label_io_kubernetes_container_name!='POD', + id!~'/docker.*', + kubernetes_io_hostname=~'${NSC_NODE}|${NSE_NODE}' + }[5s]) +)" || return $((50 + $?)) + +saveData test-mem-average "Averaged Test Memory Usage in megabytes, ${PARAM_ANNOTATION}" "${CONT_REPLACE}" " +avg by (container_label_io_kubernetes_container_name, kubernetes_io_hostname) ( + container_memory_working_set_bytes{ + container_label_io_kubernetes_pod_namespace='${NAMESPACE}', + container_label_io_kubernetes_container_name!='', + container_label_io_kubernetes_container_name!='POD', + id!~'/docker.*', + kubernetes_io_hostname=~'${NSC_NODE}|${NSE_NODE}' + } +) / 1024 / 1024" || return $((60 + $?)) + +saveData test-cpu-sum "Summarized Test CPU Usage in cores, ${PARAM_ANNOTATION}" "${CONT_REPLACE}" " +sum by (container_label_io_kubernetes_container_name, kubernetes_io_hostname) ( + rate(container_cpu_usage_seconds_total{ + container_label_io_kubernetes_pod_namespace='${NAMESPACE}', + container_label_io_kubernetes_container_name!='', + container_label_io_kubernetes_container_name!='POD', + id!~'/docker.*', + kubernetes_io_hostname=~'${NSC_NODE}|${NSE_NODE}' + }[5s]) +)" || return $((70 + $?)) + +saveData test-mem-sum "Summarized Test Memory Usage in megabytes, ${PARAM_ANNOTATION}" "${CONT_REPLACE}" " +sum by (container_label_io_kubernetes_container_name, kubernetes_io_hostname) ( + container_memory_working_set_bytes{ + container_label_io_kubernetes_pod_namespace='${NAMESPACE}', + container_label_io_kubernetes_container_name!='', + container_label_io_kubernetes_container_name!='POD', + id!~'/docker.*', + kubernetes_io_hostname=~'${NSC_NODE}|${NSE_NODE}' + } +) / 1024 / 1024" || return $((80 + $?)) diff --git a/examples/scalability/gnuplot/README.md b/examples/scalability/gnuplot/README.md new file mode 100644 index 000000000000..38b4f7108464 --- /dev/null +++ b/examples/scalability/gnuplot/README.md @@ -0,0 +1,21 @@ +# Gnuplot + +Contains setup for gnuplot. + +## Run + +Deploy gnuplot: +```bash +kubectl apply -k . +``` + +Wait till gnuplot pod is up and running: +```bash +kubectl -n gnuplot --timeout=1m wait pod --for=condition=ready -l app=gnuplot +``` + +## Cleanup + +```bash +kubectl delete -k . +``` diff --git a/examples/scalability/gnuplot/deployment.yaml b/examples/scalability/gnuplot/deployment.yaml new file mode 100644 index 000000000000..8fc5224c83a6 --- /dev/null +++ b/examples/scalability/gnuplot/deployment.yaml @@ -0,0 +1,26 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gnuplot + labels: + app: gnuplot +spec: + replicas: 1 + selector: + matchLabels: + app: gnuplot + template: + metadata: + labels: + app: gnuplot + spec: + containers: + - name: gnuplot + image: remuslazar/gnuplot@sha256:f8a3604fdd0e1881ef25ddf5bec9e5390813270d8faad3c5da6916fad2e17fb0 + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - |- + trap : TERM INT; sleep 9999999999d & wait diff --git a/examples/scalability/gnuplot/kustomization.yaml b/examples/scalability/gnuplot/kustomization.yaml new file mode 100644 index 000000000000..9d1dcca54f48 --- /dev/null +++ b/examples/scalability/gnuplot/kustomization.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: gnuplot + +resources: + - ./namespace.yaml + - ./deployment.yaml diff --git a/examples/scalability/gnuplot/namespace.yaml b/examples/scalability/gnuplot/namespace.yaml new file mode 100644 index 000000000000..1884284a6f96 --- /dev/null +++ b/examples/scalability/gnuplot/namespace.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: gnuplot diff --git a/examples/scalability/kube_state_metrics/README.md b/examples/scalability/kube_state_metrics/README.md new file mode 100644 index 000000000000..7aa9d88bf176 --- /dev/null +++ b/examples/scalability/kube_state_metrics/README.md @@ -0,0 +1,21 @@ +# kube-state-metrics + +Contains setup for kube-state-metrics. + +## Run + +Deploy kube-state-metrics: +```bash +kubectl apply -k . +``` + +Wait for application ready: +```bash +kubectl -n kube-system --timeout=1m wait pod --for=condition=ready -l app=kube-state-metrics +``` + +## Cleanup + +```bash +kubectl delete -k . +``` diff --git a/examples/scalability/kube_state_metrics/cluster-role-binding.yaml b/examples/scalability/kube_state_metrics/cluster-role-binding.yaml new file mode 100644 index 000000000000..d13b521f1eda --- /dev/null +++ b/examples/scalability/kube_state_metrics/cluster-role-binding.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.1.0 + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: + - kind: ServiceAccount + name: kube-state-metrics + namespace: kube-system diff --git a/examples/scalability/kube_state_metrics/cluster-role.yaml b/examples/scalability/kube_state_metrics/cluster-role.yaml new file mode 100644 index 000000000000..c072018726f2 --- /dev/null +++ b/examples/scalability/kube_state_metrics/cluster-role.yaml @@ -0,0 +1,109 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.1.0 + name: kube-state-metrics +rules: + - apiGroups: + - "" + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: + - list + - watch + - apiGroups: + - apps + resources: + - statefulsets + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch + - apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - list + - watch + - apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - list + - watch + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create + - apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - list + - watch + - apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - list + - watch + - apiGroups: + - storage.k8s.io + resources: + - storageclasses + - volumeattachments + verbs: + - list + - watch + - apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: + - list + - watch + - apiGroups: + - networking.k8s.io + resources: + - networkpolicies + - ingresses + verbs: + - list + - watch + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - list + - watch diff --git a/examples/scalability/kube_state_metrics/deployment.yaml b/examples/scalability/kube_state_metrics/deployment.yaml new file mode 100644 index 000000000000..d95bb5679b6b --- /dev/null +++ b/examples/scalability/kube_state_metrics/deployment.yaml @@ -0,0 +1,46 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.1.0 + name: kube-state-metrics + namespace: kube-system +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + template: + metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.1.0 + spec: + containers: + - name: kube-state-metrics + image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.1.0 + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 8081 + name: telemetry + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: / + port: 8081 + initialDelaySeconds: 5 + timeoutSeconds: 5 + securityContext: + runAsUser: 65534 + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: kube-state-metrics diff --git a/examples/scalability/kube_state_metrics/kustomization.yaml b/examples/scalability/kube_state_metrics/kustomization.yaml new file mode 100644 index 000000000000..82eb7443da69 --- /dev/null +++ b/examples/scalability/kube_state_metrics/kustomization.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: kube-system + +commonLabels: + app: kube-state-metrics + +resources: + - ./cluster-role.yaml + - ./cluster-role-binding.yaml + - ./deployment.yaml + - ./service.yaml + - ./service-account.yaml diff --git a/examples/scalability/kube_state_metrics/service-account.yaml b/examples/scalability/kube_state_metrics/service-account.yaml new file mode 100644 index 000000000000..8286d21bd7c2 --- /dev/null +++ b/examples/scalability/kube_state_metrics/service-account.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.1.0 + name: kube-state-metrics + namespace: kube-system diff --git a/examples/scalability/kube_state_metrics/service.yaml b/examples/scalability/kube_state_metrics/service.yaml new file mode 100644 index 000000000000..77db51c0ae8b --- /dev/null +++ b/examples/scalability/kube_state_metrics/service.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.1.0 + name: kube-state-metrics + namespace: kube-system +spec: + clusterIP: None + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + - name: telemetry + port: 8081 + targetPort: telemetry + selector: + app.kubernetes.io/name: kube-state-metrics diff --git a/examples/scalability/namespace.yaml b/examples/scalability/namespace.yaml new file mode 100644 index 000000000000..7919ccab9bec --- /dev/null +++ b/examples/scalability/namespace.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + generateName: ns- diff --git a/examples/scalability/nsm_setup/README.md b/examples/scalability/nsm_setup/README.md new file mode 100644 index 000000000000..bf8c5ed1ae01 --- /dev/null +++ b/examples/scalability/nsm_setup/README.md @@ -0,0 +1,102 @@ +# NSM setup + +Contain NSM setup for scalability tests. Identical to basic setup, expect without limits. + +## Requires + +- [spire](../../spire) + +## Run + +1. Register `nsm-system` namespace in spire: + +```bash +kubectl exec -n spire spire-server-0 -- \ +/opt/spire/bin/spire-server entry create \ +-spiffeID spiffe://example.org/ns/nsm-system/sa/default \ +-parentID spiffe://example.org/ns/spire/sa/spire-agent \ +-selector k8s:ns:nsm-system \ +-selector k8s:sa:default +``` + +2. Register `registry-k8s-sa` in spire: + +```bash +kubectl exec -n spire spire-server-0 -- \ +/opt/spire/bin/spire-server entry create \ +-spiffeID spiffe://example.org/ns/nsm-system/sa/registry-k8s-sa \ +-parentID spiffe://example.org/ns/spire/sa/spire-agent \ +-selector k8s:ns:nsm-system \ +-selector k8s:sa:registry-k8s-sa +``` + +3. Choose node for NSM components: +```bash +NODE=($(kubectl get nodes -o go-template='{{range .items}}{{ if not .spec.taints }}{{ .metadata.name }} {{end}}{{end}}')[0]) +``` + +4. Create patches for registry and webhook: +```bash +cat > patch-registry-k8s.yaml < patch-admission-webhook.yaml <port_forwarder_out.log 2>&1 & +``` + +Make sure the proxy is working, and we can access Prometheus through it: +```bash +curl "http://localhost:9090/-/healthy" --silent --show-error +``` + +## Cleanup + +Kill proxy to prometheus: +```bash +PORT_FORWARDER_JOB=$(jobs | grep "prometheus port-forward" | cut -d] -f1 | cut -c 2-) +if [[ "${PORT_FORWARDER_JOB}" != "" ]]; then + kill %${PORT_FORWARDER_JOB} + cat port_forwarder_out.log + rm port_forwarder_out.log +fi +``` + +Remove Prometheus: +```bash +kubectl delete -k . +``` diff --git a/examples/scalability/prometheus/clusterRole.yaml b/examples/scalability/prometheus/clusterRole.yaml new file mode 100644 index 000000000000..a66eeafde342 --- /dev/null +++ b/examples/scalability/prometheus/clusterRole.yaml @@ -0,0 +1,43 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus +rules: + - apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: + - get + - list + - watch + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - watch + - nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: + - kind: ServiceAccount + name: default + namespace: monitoring diff --git a/examples/scalability/prometheus/config-map.yaml b/examples/scalability/prometheus/config-map.yaml new file mode 100644 index 000000000000..ce5772ef3481 --- /dev/null +++ b/examples/scalability/prometheus/config-map.yaml @@ -0,0 +1,89 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-server-conf + labels: + name: prometheus-server-conf +data: + prometheus.rules: |- + groups: + - name: devopscube demo alert + rules: + - alert: High Pod Memory + expr: sum(container_memory_usage_bytes) > 1 + for: 1m + labels: + severity: slack + annotations: + summary: High Memory Usage + prometheus.yml: |- + global: + scrape_interval: 1s + evaluation_interval: 15s + rule_files: + - /etc/prometheus/prometheus.rules + alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - "alertmanager.monitoring.svc:9093" + + scrape_configs: + - job_name: 'node-exporter' + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - source_labels: [__meta_kubernetes_endpoints_name] + regex: 'node-exporter' + action: keep + + - job_name: 'kube-state-metrics' + static_configs: + - targets: ['kube-state-metrics.kube-system.svc.cluster.local:8080'] + + - job_name: 'kubernetes-pods' + + kubernetes_sd_configs: + - role: pod + + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + + - job_name: 'kubernetes-cadvisor' + + scheme: http + + kubernetes_sd_configs: + - role: node + + metrics_path: /metrics + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__address__] + action: replace + regex: ([^:]+):.* + replacement: $1:9089 + target_label: __address__ diff --git a/examples/scalability/prometheus/deployment.yaml b/examples/scalability/prometheus/deployment.yaml new file mode 100644 index 000000000000..b4a011467322 --- /dev/null +++ b/examples/scalability/prometheus/deployment.yaml @@ -0,0 +1,46 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-deployment + labels: + app: prometheus-server +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus-server + template: + metadata: + labels: + app: prometheus-server + spec: + containers: + - name: prometheus + image: prom/prometheus:v2.28.1 + imagePullPolicy: IfNotPresent + args: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus/ + - --query.lookback-delta=5s + ports: + - containerPort: 9090 + volumeMounts: + - name: prometheus-config-volume + mountPath: /etc/prometheus/ + - name: prometheus-storage-volume + mountPath: /prometheus/ + readinessProbe: + exec: + command: + - wget + - localhost:9090/-/healthy + - -O + - /dev/null + volumes: + - name: prometheus-config-volume + configMap: + defaultMode: 420 + name: prometheus-server-conf + - name: prometheus-storage-volume + emptyDir: {} diff --git a/examples/scalability/prometheus/kustomization.yaml b/examples/scalability/prometheus/kustomization.yaml new file mode 100644 index 000000000000..ff8ad7d0efe6 --- /dev/null +++ b/examples/scalability/prometheus/kustomization.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: prometheus + +resources: + - ./namespace.yaml + - ./clusterRole.yaml + - ./config-map.yaml + - ./deployment.yaml diff --git a/examples/scalability/prometheus/namespace.yaml b/examples/scalability/prometheus/namespace.yaml new file mode 100644 index 000000000000..ea8144c2a2ee --- /dev/null +++ b/examples/scalability/prometheus/namespace.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: prometheus