From 3c6797a3838967aa875ef6dbb2e18a9a5a0dbf04 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Mon, 28 Oct 2024 15:53:15 -0700 Subject: [PATCH 01/15] accidentally committed, safe to delete --- .../config/core-pool-sysctl.yaml.bak | 43 ------------------- 1 file changed, 43 deletions(-) delete mode 100644 vendor/google/gke/node-pool/config/core-pool-sysctl.yaml.bak diff --git a/vendor/google/gke/node-pool/config/core-pool-sysctl.yaml.bak b/vendor/google/gke/node-pool/config/core-pool-sysctl.yaml.bak deleted file mode 100644 index 1e22a09b9..000000000 --- a/vendor/google/gke/node-pool/config/core-pool-sysctl.yaml.bak +++ /dev/null @@ -1,43 +0,0 @@ -kubeletConfig: - cpuManagerPolicy: static -# kubeletExtraConfig: -# allowedUnsafeSysctls: 'net.core.*,net.ipv4.*' -linuxConfig: - sysctl: - # tune the ipv4 settings to not cause nginx to use all of the tcp memory - # addresses: https://jira-secure.berkeley.edu/browse/DH-3 - # - # following this process: - # https://cloud.google.com/kubernetes-engine/docs/how-to/node-system-config - # - # man page: - # https://man7.org/linux/man-pages/man7/tcp.7.html - # - # figures below are measured in units of system page size (4096B), - # and gleaned from the following articles: - # https://cromwell-intl.com/open-source/performance-tuning/tcp.html - # https://www.ibm.com/docs/en/linux-on-systems?topic=tuning-tcpip-ipv4-settings - # https://www.ibm.com/docs/en/linux-on-systems?topic=tuning-network-stack-settings - # - # net.ipv4.tcp_mem seems to be automagically generated from the supplied tcp_rmem - # and tcp_wmem settings. i believe? - # - # here be dragons. - # - # original values (as of 2023-19-04): - # net.core.netdev_max_backlog=1000 - # net.core.rmem_max=212992 - # net.core.wmem_max=212992 - # net.ipv4.tcp_rmem=4096 87380 6291456 - # net.ipv4.tcp_wmem=4096 16384 4194304 - # - # https://fasterdata.es.net/host-tuning/linux/#toc-anchor-2 - net.core.netdev_max_backlog: '30000' - net.ipv4.tcp_max_syn_backlog: '8192' - net.core.rmem_default: - net.core.rmem_max: '67108864' - net.core.wmem_max: '67108864' - net.ipv4.tcp_rmem: '4096 87380 33554432' - net.ipv4.tcp_wmem: '4096 87380 33554432' - # http://simonhf.wordpress.com/2010/10/01/node-js-versus-sxe-hello-world-complexity-speed-and-memory-usage/ - net.core.somaxconn: '65535' From de1e3efeab81d4e9e5ed56f42d467b653df003df Mon Sep 17 00:00:00 2001 From: shane knapp Date: Mon, 28 Oct 2024 15:58:24 -0700 Subject: [PATCH 02/15] end of file fixer --- .../google/gke/node-pool/config/user-pool-sysctl.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 vendor/google/gke/node-pool/config/user-pool-sysctl.yaml diff --git a/vendor/google/gke/node-pool/config/user-pool-sysctl.yaml b/vendor/google/gke/node-pool/config/user-pool-sysctl.yaml new file mode 100644 index 000000000..48a476106 --- /dev/null +++ b/vendor/google/gke/node-pool/config/user-pool-sysctl.yaml @@ -0,0 +1,11 @@ +kubeletConfig: + cpuManagerPolicy: static +linuxConfig: + sysctl: + # tune the systcl settings of user pools to allow for more ephemeral ports + # https://jira-secure.berkeley.edu/browse/DH-377 + # https://github.com/jupyterhub/configurable-http-proxy/issues/557 + # + # original values: + # net.ipv4.ip_local_port_range = 32768 60999 + net.ipv4.ip_local_port_range=10000 65000 From f495b4a978105f7f9be2fee60667dc453687ec72 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Mon, 28 Oct 2024 15:59:48 -0700 Subject: [PATCH 03/15] update docs --- docs/tasks/new-hub.qmd | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/tasks/new-hub.qmd b/docs/tasks/new-hub.qmd index a59f5e0ae..ac4755416 100644 --- a/docs/tasks/new-hub.qmd +++ b/docs/tasks/new-hub.qmd @@ -103,6 +103,7 @@ gcloud container node-pools create "user--" \ --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" \ --no-enable-autoupgrade --enable-autorepair \ --max-surge-upgrade 1 --max-unavailable-upgrade 0 --max-pods-per-node "110" + --system-config-from-file=vendor/google/gke/node-pool/config/user-pool-sysctl.yaml ``` ### Creating a new filestore instance From c43e78cd4f3a16b1afcf76002085d3324343610c Mon Sep 17 00:00:00 2001 From: shane knapp Date: Mon, 28 Oct 2024 16:04:44 -0700 Subject: [PATCH 04/15] derp, we're not modifying the user pool --- .../google/gke/node-pool/config/user-pool-sysctl.yaml | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 vendor/google/gke/node-pool/config/user-pool-sysctl.yaml diff --git a/vendor/google/gke/node-pool/config/user-pool-sysctl.yaml b/vendor/google/gke/node-pool/config/user-pool-sysctl.yaml deleted file mode 100644 index 48a476106..000000000 --- a/vendor/google/gke/node-pool/config/user-pool-sysctl.yaml +++ /dev/null @@ -1,11 +0,0 @@ -kubeletConfig: - cpuManagerPolicy: static -linuxConfig: - sysctl: - # tune the systcl settings of user pools to allow for more ephemeral ports - # https://jira-secure.berkeley.edu/browse/DH-377 - # https://github.com/jupyterhub/configurable-http-proxy/issues/557 - # - # original values: - # net.ipv4.ip_local_port_range = 32768 60999 - net.ipv4.ip_local_port_range=10000 65000 From b3ae2db22934865bf1ff599b8b4c45c30814d590 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Mon, 28 Oct 2024 16:05:24 -0700 Subject: [PATCH 05/15] derp, we're not modifying the user pool --- .../node-pool/config/core-pool-sysctl.yaml | 96 ++++++++++--------- 1 file changed, 52 insertions(+), 44 deletions(-) diff --git a/vendor/google/gke/node-pool/config/core-pool-sysctl.yaml b/vendor/google/gke/node-pool/config/core-pool-sysctl.yaml index 793ae82e3..ce62ca14c 100644 --- a/vendor/google/gke/node-pool/config/core-pool-sysctl.yaml +++ b/vendor/google/gke/node-pool/config/core-pool-sysctl.yaml @@ -4,48 +4,56 @@ kubeletConfig: # allowedUnsafeSysctls: 'net.core.*,net.ipv4.*' linuxConfig: sysctl: - # tune the ipv4 settings to not cause nginx to use all of the tcp memory - # addresses: https://jira-secure.berkeley.edu/browse/DH-3 - # - # following this process: - # https://cloud.google.com/kubernetes-engine/docs/how-to/node-system-config - # - # man page: - # https://man7.org/linux/man-pages/man7/tcp.7.html - # - # figures below are measured in units of system page size (4096B), - # and gleaned from the following articles: - # https://cromwell-intl.com/open-source/performance-tuning/tcp.html - # https://www.ibm.com/docs/en/linux-on-systems?topic=tuning-tcpip-ipv4-settings - # https://www.ibm.com/docs/en/linux-on-systems?topic=tuning-network-stack-settings - # - # net.ipv4.tcp_mem seems to be automagically generated from the supplied tcp_rmem - # and tcp_wmem settings. i believe? - # - # here be dragons. - # - # original values (as of 2023-04-19): - # net.core.netdev_max_backlog=1000 - # net.core.rmem_max=212992 - # net.core.wmem_max=212992 - # net.ipv4.tcp_rmem=4096 87380 6291456 - # net.ipv4.tcp_wmem=4096 16384 4194304 - # - # changes and additional tweaks (2024-04-11): - # net.ipv4.tcp_max_syn_backlog=4096 - # net.core.rmem_max=3276800 - # net.core.wmem_max=3276800 - # net.ipv4.tcp_rmem=4096 87380 16777216 - # net.ipv4.tcp_wmem=4096 87380 16777216 - # net.core.somaxconn=1024 - # - # https://fasterdata.es.net/host-tuning/linux/#toc-anchor-2 - net.core.netdev_max_backlog: '30000' - net.core.somaxconn: '4096' - # net.ipv4.tcp_max_syn_backlog: '8192' + # tune the ipv4 settings to not cause nginx to use all of the tcp memory + # addresses: https://jira-secure.berkeley.edu/browse/DH-3 + # + # following this process: + # https://cloud.google.com/kubernetes-engine/docs/how-to/node-system-config + # + # man page: + # https://man7.org/linux/man-pages/man7/tcp.7.html + # + # figures below are measured in units of system page size (4096B), + # and gleaned from the following articles: + # https://cromwell-intl.com/open-source/performance-tuning/tcp.html + # https://www.ibm.com/docs/en/linux-on-systems?topic=tuning-tcpip-ipv4-settings + # https://www.ibm.com/docs/en/linux-on-systems?topic=tuning-network-stack-settings + # + # net.ipv4.tcp_mem seems to be automagically generated from the supplied tcp_rmem + # and tcp_wmem settings. i believe? + # + # here be dragons. + # + # original values (as of 2023-04-19): + # net.core.netdev_max_backlog=1000 + # net.core.rmem_max=212992 + # net.core.wmem_max=212992 + # net.ipv4.tcp_rmem=4096 87380 6291456 + # net.ipv4.tcp_wmem=4096 16384 4194304 + # + # changes and additional tweaks (2024-04-11): + # net.ipv4.tcp_max_syn_backlog=4096 + # net.core.rmem_max=3276800 + # net.core.wmem_max=3276800 + # net.ipv4.tcp_rmem=4096 87380 16777216 + # net.ipv4.tcp_wmem=4096 87380 16777216 + # net.core.somaxconn=1024 + # + # https://fasterdata.es.net/host-tuning/linux/#toc-anchor-2 + net.core.netdev_max_backlog: '30000' + net.core.somaxconn: '4096' + # net.ipv4.tcp_max_syn_backlog: '8192' - # these values are in bytes - net.core.rmem_max: '67108864' - net.core.wmem_max: '67108864' - net.ipv4.tcp_rmem: '4096 87380 33554432' - net.ipv4.tcp_wmem: '4096 87380 33554432' + # these values are in bytes + net.core.rmem_max: '67108864' + net.core.wmem_max: '67108864' + net.ipv4.tcp_rmem: '4096 87380 33554432' + net.ipv4.tcp_wmem: '4096 87380 33554432' + + # the chp was also running out of ehpemeral ports: + # https://jira-secure.berkeley.edu/browse/DH-377 + # https://github.com/jupyterhub/configurable-http-proxy/issues/557 + # + # original values: + # net.ipv4.ip_local_port_range = 32768 60999 + net.ipv4.ip_local_port_range: '10000 65000' From 68b4dcb8f4289b39edd596edb608d275ce9e725f Mon Sep 17 00:00:00 2001 From: shane knapp Date: Mon, 28 Oct 2024 16:13:45 -0700 Subject: [PATCH 06/15] undo doc change --- docs/tasks/new-hub.qmd | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/tasks/new-hub.qmd b/docs/tasks/new-hub.qmd index ac4755416..a59f5e0ae 100644 --- a/docs/tasks/new-hub.qmd +++ b/docs/tasks/new-hub.qmd @@ -103,7 +103,6 @@ gcloud container node-pools create "user--" \ --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" \ --no-enable-autoupgrade --enable-autorepair \ --max-surge-upgrade 1 --max-unavailable-upgrade 0 --max-pods-per-node "110" - --system-config-from-file=vendor/google/gke/node-pool/config/user-pool-sysctl.yaml ``` ### Creating a new filestore instance From 1bf7c6809fd3b3c90fc31bcb7f49d84e6a5902cd Mon Sep 17 00:00:00 2001 From: shane knapp Date: Tue, 29 Oct 2024 11:25:42 -0700 Subject: [PATCH 07/15] revert addition of ephemeral port settings --- vendor/google/gke/node-pool/config/core-pool-sysctl.yaml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vendor/google/gke/node-pool/config/core-pool-sysctl.yaml b/vendor/google/gke/node-pool/config/core-pool-sysctl.yaml index ce62ca14c..c81b79680 100644 --- a/vendor/google/gke/node-pool/config/core-pool-sysctl.yaml +++ b/vendor/google/gke/node-pool/config/core-pool-sysctl.yaml @@ -49,11 +49,3 @@ linuxConfig: net.core.wmem_max: '67108864' net.ipv4.tcp_rmem: '4096 87380 33554432' net.ipv4.tcp_wmem: '4096 87380 33554432' - - # the chp was also running out of ehpemeral ports: - # https://jira-secure.berkeley.edu/browse/DH-377 - # https://github.com/jupyterhub/configurable-http-proxy/issues/557 - # - # original values: - # net.ipv4.ip_local_port_range = 32768 60999 - net.ipv4.ip_local_port_range: '10000 65000' From fc0c73f7cc997fbc7bf52c470c9f53f3c544b1fc Mon Sep 17 00:00:00 2001 From: shane knapp Date: Tue, 29 Oct 2024 11:28:34 -0700 Subject: [PATCH 08/15] add .bak to gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 1bdc6b279..93baf27cd 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,6 @@ hub/charts **/__pycache__ **/*.pyc +# random stuff **/.DS_Store +**/*.bak From 76b825bc7ffe84d960797831ef189ebb9e490505 Mon Sep 17 00:00:00 2001 From: "Image Builder Bot[tm]" Date: Wed, 30 Oct 2024 17:09:34 +0000 Subject: [PATCH 09/15] update a11y image tag to 710a5e6cc90c --- deployments/a11y/hubploy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployments/a11y/hubploy.yaml b/deployments/a11y/hubploy.yaml index 0c9d33c63..bfe84d961 100644 --- a/deployments/a11y/hubploy.yaml +++ b/deployments/a11y/hubploy.yaml @@ -1,6 +1,6 @@ images: images: - - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/a11y-user-image:9cb7ae26ffbe + - name: us-central1-docker.pkg.dev/ucb-datahub-2018/user-images/a11y-user-image:710a5e6cc90c cluster: provider: gcloud From 7998db5cf45fed47e45fab54b613ac0fc5283a76 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 30 Oct 2024 11:18:19 -0700 Subject: [PATCH 10/15] set net.ipv4.ip_local_port_range to 10000 65000 --- hub/values.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hub/values.yaml b/hub/values.yaml index 1883d69db..8eab2cc64 100644 --- a/hub/values.yaml +++ b/hub/values.yaml @@ -39,6 +39,16 @@ jupyterhub: # https://github.com/Jimbly/http-proxy-node16/commit/56283e33edfc7aad8c2605dd493da8a196b4371d # https://github.com/consideRatio/configurable-http-proxy/commits/main/ # https://jira-secure.berkeley.edu/browse/DH-382 timeouts break stuff + # + # bump the default ip_local_port_range from "32768 60999" to "10000 65000" + # https://z2jh.jupyter.org/en/latest/resources/reference.html#proxy-chp-extrapodspec + # https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec + # + extraPodSpec: + securityContext: + sysctls: + name: "net.ipv4.ip_local_port_range" + value: "10000 65000" image: tag: 4.6.2 # extraCommandLineFlags: From e5caaf63e41c9d1e861b0e19c8c7874dc754198e Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 30 Oct 2024 11:18:42 -0700 Subject: [PATCH 11/15] trailing spaces --- hub/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hub/values.yaml b/hub/values.yaml index 8eab2cc64..1d9e1c622 100644 --- a/hub/values.yaml +++ b/hub/values.yaml @@ -43,10 +43,10 @@ jupyterhub: # bump the default ip_local_port_range from "32768 60999" to "10000 65000" # https://z2jh.jupyter.org/en/latest/resources/reference.html#proxy-chp-extrapodspec # https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec - # + # extraPodSpec: securityContext: - sysctls: + sysctls: name: "net.ipv4.ip_local_port_range" value: "10000 65000" image: From 9a54d65969fd3108b4bfdcd86c1a42515a079c71 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 30 Oct 2024 11:43:39 -0700 Subject: [PATCH 12/15] forcing a rebuild i hope --- hub/values.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hub/values.yaml b/hub/values.yaml index 1d9e1c622..3c0edda2b 100644 --- a/hub/values.yaml +++ b/hub/values.yaml @@ -45,10 +45,11 @@ jupyterhub: # https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec # extraPodSpec: + # why u fail? securityContext: sysctls: name: "net.ipv4.ip_local_port_range" - value: "10000 65000" + value: "10000 65001" image: tag: 4.6.2 # extraCommandLineFlags: From 9b451d161364379f322cc60a15415767f5acf05f Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 30 Oct 2024 11:44:21 -0700 Subject: [PATCH 13/15] forcing a rebuild i hope part deux --- hub/values.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hub/values.yaml b/hub/values.yaml index 3c0edda2b..ebcac0077 100644 --- a/hub/values.yaml +++ b/hub/values.yaml @@ -44,12 +44,12 @@ jupyterhub: # https://z2jh.jupyter.org/en/latest/resources/reference.html#proxy-chp-extrapodspec # https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec # - extraPodSpec: + #extraPodSpec: # why u fail? - securityContext: - sysctls: - name: "net.ipv4.ip_local_port_range" - value: "10000 65001" + # securityContext: + # sysctls: + # name: "net.ipv4.ip_local_port_range" + # value: "10000 65001" image: tag: 4.6.2 # extraCommandLineFlags: From 3e417283d0af9107e0bfd3b8b9264d8b5de82bd2 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 30 Oct 2024 11:45:03 -0700 Subject: [PATCH 14/15] wtf --- hub/Chart.yaml | 2 +- node-placeholder/Chart.yaml | 2 +- node-placeholder/values.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hub/Chart.yaml b/hub/Chart.yaml index 5559675a6..bd41991e2 100644 --- a/hub/Chart.yaml +++ b/hub/Chart.yaml @@ -2,4 +2,4 @@ apiVersion: v1 appVersion: '1.0' description: Deployment Chart for JupyterHub name: hub -version: 20240731-224556.git.8607.hf7abb041 +version: 20240731-224556.git.8734.h9b451d16 diff --git a/node-placeholder/Chart.yaml b/node-placeholder/Chart.yaml index 912579940..79c4385d5 100644 --- a/node-placeholder/Chart.yaml +++ b/node-placeholder/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 20240731-224556.git.8610.hedc17750 +version: 20240731-224556.git.8652.h618d172c # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/node-placeholder/values.yaml b/node-placeholder/values.yaml index 6dada1924..0d3589c06 100644 --- a/node-placeholder/values.yaml +++ b/node-placeholder/values.yaml @@ -4,7 +4,7 @@ image: repository: us-central1-docker.pkg.dev/ucb-datahub-2018/core/node-placeholder-scaler pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "20240731-224556.git.8610.hedc17750" + tag: "20240731-224556.git.8652.h618d172c" imagePullSecrets: [] nameOverride: "" From 299c086e47ef718eb8fc59107a609e293a015c98 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Wed, 30 Oct 2024 11:51:28 -0700 Subject: [PATCH 15/15] chartpressed and fixed --- hub/Chart.yaml | 2 +- hub/values.yaml | 11 +++++------ node-placeholder/Chart.yaml | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/hub/Chart.yaml b/hub/Chart.yaml index bd41991e2..2dfb4abd2 100644 --- a/hub/Chart.yaml +++ b/hub/Chart.yaml @@ -2,4 +2,4 @@ apiVersion: v1 appVersion: '1.0' description: Deployment Chart for JupyterHub name: hub -version: 20240731-224556.git.8734.h9b451d16 +version: 20240731-224556.git.8735.h3e417283 diff --git a/hub/values.yaml b/hub/values.yaml index ebcac0077..fd5ae1a74 100644 --- a/hub/values.yaml +++ b/hub/values.yaml @@ -44,12 +44,11 @@ jupyterhub: # https://z2jh.jupyter.org/en/latest/resources/reference.html#proxy-chp-extrapodspec # https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec # - #extraPodSpec: - # why u fail? - # securityContext: - # sysctls: - # name: "net.ipv4.ip_local_port_range" - # value: "10000 65001" + extraPodSpec: + securityContext: + sysctls: + - name: net.ipv4.ip_local_port_range + value: "10000 65000" image: tag: 4.6.2 # extraCommandLineFlags: diff --git a/node-placeholder/Chart.yaml b/node-placeholder/Chart.yaml index 79c4385d5..e9e55d215 100644 --- a/node-placeholder/Chart.yaml +++ b/node-placeholder/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 20240731-224556.git.8652.h618d172c +version: 20240731-224556.git.8735.h3e417283 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to