Merge branch 'release/0.1.7'

ml-tooling · Oct 22, 2019 · b3a169a · b3a169a
2 parents 9ea90a2 + 48d9999
commit b3a169a
Show file tree

Hide file tree

Showing 12 changed files with 207 additions and 98 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -92,7 +92,7 @@ RUN \
 COPY resources/mlhubspawner /mlhubspawner
 
 RUN \
-   pip install --no-cache dockerspawner && \
+   pip install --no-cache git+https://github.com/jupyterhub/dockerspawner@d1f27e2855d2cefbdb25b29cc069b9ca69d564e3 && \
    pip install --no-cache git+https://github.com/ml-tooling/nativeauthenticator@983b203069ca797ff5c595f985075c11ae17656c && \
    pip install --no-cache git+https://github.com/ryanlovett/imagespawner && \
    pip install --no-cache /mlhubspawner && \
@@ -119,6 +119,9 @@ RUN PYCURL_SSL_LIBRARY=openssl pip3 install --no-cache-dir \
          # Cleanup
          clean-layer.sh
 
+RUN pip3 install oauthenticator psutil
+RUN apt-get update && apt-get install -y pcregrep && clean-layer.sh
+
 ### END INCUBATION ZONE ###
 
 ### CONFIGURATION ###
@@ -150,7 +153,8 @@ ENV \
    START_SSH=true \
    START_JHUB=true \
    START_CHP=false \
-   EXECUTION_MODE="local"
+   EXECUTION_MODE="local" \
+   HUB_NAME="mlhub"
 
 ### END CONFIGURATION ###
 
@@ -206,3 +210,4 @@ CMD ["/bin/bash", "/resources/docker-entrypoint.sh"]
 
 # The port on which nginx listens and checks whether it's http(s) or ssh traffic
 EXPOSE 8080
+
diff --git a/README.md b/README.md
@@ -49,14 +49,13 @@ Most parts will be identical to the configuration of Jupyterhub 1.0.0. One of th
 ```bash
 docker run \
     -p 8080 \
-    --name mlhub \
     -v /var/run/docker.sock:/var/run/docker.sock \
     -v jupyterhub_data:/data \
     mltooling/ml-hub:latest
 ```
 
 To persist the hub data, such as started workspaces and created users, mount a directory to `/data`.
-A name (`--name`) must be set for the mlhub container, since we let the workspace container connect to the hub not via its docker id but its docker name. This way, the workspaces can still connect to the hub in case it was deleted and re-created (for example when the hub was updated).
+Any given name (`--name`) will be overruled by the environment variable `HUB_NAME`.
 
 For Kubernetes deployment, we forked and modified [zero-to-jupyterhub-k8s](https://github.com/jupyterhub/zero-to-jupyterhub-k8s) which you can find [here](https://github.com/ml-tooling/zero-to-mlhub-k8s).
 
@@ -74,6 +73,14 @@ Here are the additional environment variables for the hub:
         <th>Description</th>
         <th>Default</th>
     </tr>
+    <tr>
+        <td>HUB_NAME</td>
+        <td>In Docker-local mode, the container will be (re-)named based on the value of this environment variable. All resources created by the hub will take this name into account. Hence, you can have multiple hub instances running without any naming conflicts.
+        Further, we let the workspace containers connect to the hub not via its docker id but its docker name. This way, the workspaces can still connect to the hub in case it was deleted and re-created (for example when the hub was updated).
+        The value must be DNS compliant and must be between 1 and 5 characters long.
+        </td>
+        <td>mlhub</td>
+    </tr>
     <tr>
         <td>SSL_ENABLED</td>
         <td>Enable SSL. If you don't provide an ssl certificate as described in <a href="https://github.com/ml-tooling/ml-hub#enable-sslhttps">Section "Enable SSL/HTTPS"</a>, certificates will be generated automatically. As this auto-generated certificate is not signed, you have to trust it in the browser. Without ssl enabled, ssh access won't work as the container uses a single port and has to tell https and ssh traffic apart.</td>

diff --git a/resources/docker-entrypoint.sh b/resources/docker-entrypoint.sh
@@ -14,25 +14,16 @@ if [ "$execution_mode" == "k8s" ]; then
   # Preserve Kubernetes-specific environment variables for sshd process
   echo "export KUBERNETES_SERVICE_HOST=$KUBERNETES_SERVICE_HOST" >> $SSHD_ENVIRONMENT_VARIABLES
   echo "export KUBERNETES_SERVICE_PORT=$KUBERNETES_SERVICE_PORT" >> $SSHD_ENVIRONMENT_VARIABLES
-fi
-
-# It is possible to override the default sshd target with this command,
-# e.g. if it runs in a different container
-if [ ! -z "${SSHD_TARGET}" ]; then
-  sed -i "s/127.0.0.1:22/${SSHD_TARGET}/g" /etc/nginx/nginx.conf
+else
+  if ! echo $HUB_NAME | pcregrep "^(?![0-9]+$)(?!-)[a-zA-Z0-9-]{1,5}(?<!-)$" > /dev/null; then
+    echo "Container name for ml-hub is either too long or not DNS-compatible. Make sure that a DNS-compatible name (--env HUB_NAME) with 1 to 5 characters is provided for the ml-hub container."
+    exit 1
+  fi
 fi
 
 # create / copy certificates
 $_RESOURCES_PATH/scripts/setup_certs.sh
 
-if [ "${START_NGINX}" == true ]; then
-  # Configure and start nginx
-  # TODO: restart nginx
-  # TODO: make dependent on Kubernetes mode
-
-  python $_RESOURCES_PATH/scripts/run_nginx.py
-fi
-
 function start_ssh {
     echo "Start SSH Daemon service"
     # Run ssh-bastion image entrypoint
@@ -62,6 +53,19 @@ if [ "${START_CHP}" == true ]; then
   start_http_proxy
 fi
 
+if [ "${START_NGINX}" == true ]; then
+  # It is possible to override the default sshd target with this command,
+  # e.g. if it runs in a different container
+  if [ ! -z "${SSHD_TARGET}" ]; then
+    sed -i "s/127.0.0.1:22/${SSHD_TARGET}/g" /etc/nginx/nginx.conf
+  fi
+  # Configure and start nginx
+  # TODO: restart nginx
+  # TODO: make dependent on Kubernetes mode
+
+  python $_RESOURCES_PATH/scripts/run_nginx.py
+fi
+
 # Copied from: https://docs.docker.com/config/containers/multi-service_container/
 # Naive check runs checks once a minute to see if either of the processes exited.
 # This illustrates part of the heavy lifting you need to do if you want to run

diff --git a/resources/jupyterhub-mod/template-home.html b/resources/jupyterhub-mod/template-home.html
@@ -17,6 +17,15 @@
       {% if not default_server.active %}Start{% endif %}
       My Server
       </a>
+      {% if default_server.is_update_available is defined %}
+      {% if default_server.is_update_available() %}
+      <a id="start" role="button" class="btn btn-lg btn-primary" 
+        onclick="fetch('{{ base_url }}api/users/{{ user.name }}/server', {'method': 'DELETE'})
+                  .then(() => { fetch('{{ base_url }}api/users/{{ user.name }}/server', {'method': 'POST', 'body': JSON.stringify({'update': true})}); window.setTimeout(() => location.reload(), 1500);})">
+      Update Workspace
+      </a>
+      {% endif %}
+      {% endif %}
     </div>
   </div>
   {% if allow_named_servers %}

diff --git a/resources/jupyterhub_config.py b/resources/jupyterhub_config.py
@@ -3,9 +3,45 @@
 """
 
 import os
+import socket
+
+from mlhubspawner import utils
+from subprocess import call
 
 c = get_config()
 
+# Override the Jupyterhub `normalize_username` function to remove problematic characters from the username - independent from the used authenticator.
+# E.g. when the username is "lastname, firstname" and the comma and whitespace are not removed, they are encoded by the browser, which can lead to broken routing in our nginx proxy, 
+# especially for the tools-part. 
+# Everybody who starts the hub can override this behavior the same way we do in a mounted `jupyterhub_user_config.py` (Docker local) or via the `hub.extraConfig` (Kubernetes)
+from jupyterhub.auth import Authenticator
+original_normalize_username = Authenticator.normalize_username
+def custom_normalize_username(self, username):
+    username = original_normalize_username(self, username)
+    for forbidden_username_char in [" ", ",", ";", "."]:
+        username = username.replace(forbidden_username_char, "")
+    return username
+Authenticator.normalize_username = custom_normalize_username
+
+### Helper Functions ###
+
+def get_or_init(config: object, config_type: type) -> object:
+    if not isinstance(config, config_type):
+        return config_type()
+    return config
+
+def combine_config_dicts(*configs) -> dict:
+    combined_config = {}
+    for config in configs:
+        if not isinstance(config, dict):
+            config = {}
+        combined_config.update(config)
+    return combined_config
+
+### END HELPER FUNCTIONS###
+
+ENV_HUB_NAME = os.environ['HUB_NAME']
+
 # User containers will access hub by container name on the Docker network
 c.JupyterHub.hub_ip = '0.0.0.0' #'research-hub'
 c.JupyterHub.port = 8000
@@ -34,16 +70,16 @@
 # --- Spawner-specific ----
 c.JupyterHub.spawner_class = 'mlhubspawner.MLHubDockerSpawner' # override in your config if you want to have a different spawner. If it is the or inherits from DockerSpawner, the c.DockerSpawner config can have an effect.
 
-c.Spawner.image = "mltooling/ml-workspace:0.8.6"
-c.Spawner.workspace_images = [c.Spawner.image, "mltooling/ml-workspace-gpu:0.8.6", "mltooling/ml-workspace-r:0.8.6"]
+c.Spawner.image = "mltooling/ml-workspace:0.8.7"
+c.Spawner.workspace_images = [c.Spawner.image, "mltooling/ml-workspace-gpu:0.8.7", "mltooling/ml-workspace-r:0.8.7", "mltooling/ml-workspace-spark:0.8.7"]
 c.Spawner.notebook_dir = '/workspace'
 
 # Connect containers to this Docker network
 c.Spawner.use_internal_ip = True
 c.Spawner.extra_host_config = { 'shm_size': '256m' }
 
 c.Spawner.prefix = 'ws' 
-c.Spawner.name_template = c.Spawner.prefix + '-{username}-hub{servername}' # override in your config when you want to have a different name schema. Also consider changing c.Authenticator.username_pattern and check the environment variables to permit ssh connection
+c.Spawner.name_template = c.Spawner.prefix + '-{username}-' + ENV_HUB_NAME + '{servername}' # override in your config when you want to have a different name schema. Also consider changing c.Authenticator.username_pattern and check the environment variables to permit ssh connection
 
 # Don't remove containers once they are stopped - persist state
 c.Spawner.remove_containers = False
@@ -81,14 +117,23 @@
     from z2jh import set_config_if_not_none
     set_config_if_not_none(c.KubeSpawner, 'workspace_images', 'singleuser.workspaceImages')
 
-    if not isinstance(c.KubeSpawner.environment, dict):
-        c.KubeSpawner.environment = {}
+    c.KubeSpawner.environment = get_or_init(c.KubeSpawner.environment, dict)
+    # if not isinstance(c.KubeSpawner.environment, dict):
+    #     c.KubeSpawner.environment = {}
     c.KubeSpawner.environment.update(default_env)
+else:
+    client_kwargs = {**get_or_init(c.DockerSpawner.client_kwargs, dict), **get_or_init(c.MLHubDockerSpawner.client_kwargs, dict)}
+    tls_config = {**get_or_init(c.DockerSpawner.tls_config, dict), **get_or_init(c.MLHubDockerSpawner.tls_config, dict)}
+
+    docker_client = utils.init_docker_client(client_kwargs, tls_config)
+    docker_client.containers.list(filters={"id": socket.gethostname()})[0].rename(ENV_HUB_NAME)
+    c.MLHubDockerSpawner.hub_name = ENV_HUB_NAME
 
 # Add nativeauthenticator-specific templates
 if c.JupyterHub.authenticator_class == NATIVE_AUTHENTICATOR_CLASS:
     import nativeauthenticator
     # if template_paths is not set yet in user_config, it is of type traitlets.config.loader.LazyConfigValue; in other words, it was not initialized yet
-    if not isinstance(c.JupyterHub.template_paths, list):
-        c.JupyterHub.template_paths = []
+    c.JupyterHub.template_paths = get_or_init(c.JupyterHub.template_paths, list)
+    # if not isinstance(c.JupyterHub.template_paths, list):
+    #     c.JupyterHub.template_paths = []
     c.JupyterHub.template_paths.append("{}/templates/".format(os.path.dirname(nativeauthenticator.__file__)))
diff --git a/resources/mlhubspawner/mlhubspawner/mlhubkubernetesspawner.py b/resources/mlhubspawner/mlhubspawner/mlhubkubernetesspawner.py
@@ -30,7 +30,7 @@ class MLHubKubernetesSpawner(KubeSpawner):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-        self.hub_name = os.getenv("MLHUB_NAME", "mlhub")
+        self.hub_name = os.getenv("HUB_NAME", "mlhub")
         self.default_label = {"origin": self.hub_name}   
 
     @default('options_form')
@@ -79,7 +79,7 @@ def start(self):
             self.cpu_limit = float(self.user_options.get('cpu_limit'))
 
         if self.user_options.get('mem_limit'):
-            memory = str(self.user_options.get('mem_limit'))
+            memory = str(self.user_options.get('mem_limit')) + "G"
             self.mem_limit = memory.upper().replace("GB", "G").replace("KB", "K").replace("MB", "M").replace("TB", "T")
 
         #if self.user_options.get('is_mount_volume') == 'on':