Skip to content

Commit

Permalink
Add node_name to API responses (#13, PR #27)
Browse files Browse the repository at this point in the history
  • Loading branch information
vlerkin authored Apr 10, 2024
1 parent 1e0fda4 commit e6107be
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 18 deletions.
9 changes: 9 additions & 0 deletions kubernetes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ spec:
- containerPort: 6800
name: http
protocol: TCP
env:
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: MY_DEPLOYMENT_NAME
valueFrom:
fieldRef:
fieldPath: metadata.labels['app.kubernetes.io/name']
readinessProbe:
failureThreshold: 3
httpGet:
Expand Down
14 changes: 12 additions & 2 deletions scrapyd_k8s/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import uuid

from flask import Flask, request
from flask import Flask, request, Response, jsonify
from flask_basicauth import BasicAuth
from markupsafe import escape
from natsort import natsort_keygen, ns
Expand All @@ -14,6 +14,7 @@
launcher = (config.launcher_cls())(config)
scrapyd_config = config.scrapyd()


@app.get("/")
def home():
return "<html><body><h1>scrapyd-k8s</h1></body></html>"
Expand Down Expand Up @@ -102,6 +103,15 @@ def api_listjobs():
# TODO perhaps remove state from jobs
return { 'status': 'ok', 'pending': pending, 'running': running, 'finished': finished }

# middleware that adds "node_name" to each response if it is a JSON
@app.after_request
def after_request(response: Response):
if response.is_json:
data = response.json
data["node_name"] = config.scrapyd().get("node_name", launcher.get_node_name())
response.data = jsonify(data).data
return response

def error(msg, status=200):
return { 'status': 'error', 'message': msg }, status

Expand All @@ -124,4 +134,4 @@ def run():
enable_authentication(app, config_username, config_password)

# run server
app.run(host=host, port=port)
app.run(host=host, port=port)
6 changes: 6 additions & 0 deletions scrapyd_k8s/launcher/docker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import re
import os
import socket

import docker
from ..utils import native_stringify_dict

Expand All @@ -18,6 +21,9 @@ class Docker:
def __init__(self, config):
self._docker = docker.from_env()

def get_node_name(self):
return socket.gethostname()

def listjobs(self, project_id=None):
label = self.LABEL_PROJECT + ('=%s'%(project_id) if project_id else '')
jobs = self._docker.containers.list(all=True, filters={ 'label': label })
Expand Down
7 changes: 7 additions & 0 deletions scrapyd_k8s/launcher/k8s.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

import kubernetes
import kubernetes.stream
from signal import Signals
Expand All @@ -23,6 +25,11 @@ def __init__(self, config):
self._k8s = kubernetes.client.CoreV1Api()
self._k8s_batch = kubernetes.client.BatchV1Api()

def get_node_name(self):
deployment = os.getenv('MY_DEPLOYMENT_NAME', 'default')
namespace = os.getenv('MY_NAMESPACE')
return ".".join([n for n in [namespace, deployment] if n])

def listjobs(self, project=None):
label = self.LABEL_PROJECT + ('=%s'%(project) if project else '')
jobs = self._k8s_batch.list_namespaced_job(namespace=self._namespace, label_selector=label)
Expand Down
39 changes: 23 additions & 16 deletions test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,36 +15,37 @@
MAX_WAIT = int(os.getenv('TEST_MAX_WAIT', '6'))
STATIC_SLEEP = float(os.getenv('TEST_STATIC_SLEEP', '2'))


def test_root_ok():
response = requests.get(BASE_URL)
assert response.status_code == 200
assert response.headers['Content-Type'] == 'text/html; charset=utf-8'
assert 'scrapyd-k8s' in response.text
assert '</html>' in response.text


def test_healthz_ok():
response = requests.get(BASE_URL + '/healthz')
assert response.status_code == 200


def test_daemonstatus_ok():
response = requests.get(BASE_URL + '/daemonstatus.json')
assert_response_ok(response)
# TODO assert response.json() == { 'status': 'ok', ... }


def test_listprojects_ok():
response = requests.get(BASE_URL + '/listprojects.json')
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'projects': AVAIL_PROJECTS }

json = response.json()
assert json['projects'] == AVAIL_PROJECTS
assert 'node_name' in json

def test_listversions_ok():
response = requests.get(BASE_URL + '/listversions.json?project=' + RUN_PROJECT)
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'versions': AVAIL_VERSIONS }

json = response.json()
assert json['versions'] == AVAIL_VERSIONS
assert 'node_name' in json

def test_listversions_project_missing():
response = requests.get(BASE_URL + '/listversions.json')
Expand All @@ -54,16 +55,21 @@ def test_listversions_project_not_found():
response = requests.get(BASE_URL + '/listversions.json?project=' + 'nonexistant')
assert_response_error(response, 404)


def test_listspiders_ok():
response = requests.get(BASE_URL + '/listspiders.json?project=' + RUN_PROJECT + '&_version=' + RUN_VERSION)
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'spiders': AVAIL_SPIDERS }

json = response.json()
assert json['spiders'] == AVAIL_SPIDERS
assert 'node_name' in json

def test_listspiders_ok_without_version():
response = requests.get(BASE_URL + '/listspiders.json?project=' + RUN_PROJECT)
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'spiders': AVAIL_SPIDERS }

json = response.json()
assert json['spiders'] == AVAIL_SPIDERS
assert 'node_name' in json

def test_listspiders_project_missing():
response = requests.get(BASE_URL + '/listspiders.json')
Expand All @@ -77,7 +83,6 @@ def test_listspiders_version_not_found():
response = requests.get(BASE_URL + '/listspiders.json?project=' + RUN_PROJECT + '&_version=' + 'nonexistant')
assert_response_error(response, 404)


def test_schedule_project_missing():
response = requests.post(BASE_URL + '/schedule.json', data={})
assert_response_error(response, 400)
Expand All @@ -93,7 +98,6 @@ def test_schedule_spider_missing():
# scheduling a non-existing spider will try to start it, so no error
# scheduling a non-existing project version will try to start it, so no error


def test_cancel_project_missing():
response = requests.post(BASE_URL + '/cancel.json', data={})
assert_response_error(response, 400)
Expand All @@ -105,8 +109,6 @@ def test_cancel_jobid_missing():
assert_response_error(response, 400)

# TODO test cancel with invalid signal (currently returns server error, could be improved)


def test_scenario_regular_ok():
scenario_regular({
'project': RUN_PROJECT, '_version': RUN_VERSION, 'spider': RUN_SPIDER,
Expand All @@ -120,7 +122,6 @@ def test_scenario_regular_ok_without_version():
})

# TODO test_scenario_cancel_scheduled_ok (needs a way to make sure a job is not running yet)

def test_scenario_cancel_running_finished_ok():
assert_listjobs()
# schedule a new job and wait until it is running
Expand All @@ -138,16 +139,22 @@ def test_scenario_cancel_running_finished_ok():
'project': RUN_PROJECT, 'job': jobid, 'signal': 'KILL'
})
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'prevstate': 'running' }

json = response.json()
assert json['prevstate'] == 'running'
assert 'node_name' in json

# wait until the job has stopped
listjobs_wait(jobid, 'finished')
jobinfo = assert_listjobs(finished=jobid)
assert jobinfo == { 'id': jobid, 'project': RUN_PROJECT, 'spider': RUN_SPIDER, 'state': 'finished' }
# then cancel it again, though nothing would happen
response = requests.post(BASE_URL + '/cancel.json', data={ 'project': RUN_PROJECT, 'job': jobid })
assert_response_ok(response)
assert response.json() == { 'status': 'ok', 'prevstate': 'finished' }

json = response.json()
assert json['prevstate'] == 'finished'
assert 'node_name' in json

def scenario_regular(schedule_args):
assert_listjobs()
Expand Down

0 comments on commit e6107be

Please sign in to comment.