Skip to content

Commit

Permalink
Persistent logs, with uploading to object storage (#28, PR #31)
Browse files Browse the repository at this point in the history
  • Loading branch information
vlerkin authored Oct 31, 2024
1 parent 01e473d commit 7975a70
Show file tree
Hide file tree
Showing 12 changed files with 590 additions and 8 deletions.
35 changes: 33 additions & 2 deletions kubernetes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ spec:
mountPath: /opt/app/scrapyd_k8s.conf
readOnly: true
subPath: scrapyd_k8s.conf
#- name: joblogs
# mountPath: /data
# Enable if your spider repository needs a pull secret
# - name: scrapyd-k8s-pull-secret
# mountPath: /opt/app/.docker
Expand All @@ -59,6 +61,9 @@ spec:
- configMap:
name: scrapyd-k8s-config
name: scrapyd-k8s-config
#- name: joblogs
# persistentVolumeClaim:
# claimName: pv-claim
# Enable if your spider repository needs a pull secret
# - secret:
# secretName: pull-secret
Expand All @@ -80,7 +85,7 @@ data:
repository = scrapyd_k8s.repository.Remote
launcher = scrapyd_k8s.launcher.K8s
namespace = default
# This is an example spider that should work out of the box.
Expand All @@ -106,6 +111,29 @@ metadata:
app.kubernetes.io/name: spider-example
stringData:
FOO_API_KEY: "1234567890abcdef"
#---
#apiVersion: v1
#kind: PersistentVolume
#metadata:
# name: pv-volume
#spec:
# capacity:
# storage: 5Gi
# accessModes:
# - ReadWriteOnce
# hostPath:
# path: "/mnt/data"
#---
#apiVersion: v1
#kind: PersistentVolumeClaim
#metadata:
# name: pv-claim
#spec:
# accessModes:
# - ReadWriteOnce
# resources:
# requests:
# storage: 5Gi
---
apiVersion: v1
kind: ConfigMap
Expand Down Expand Up @@ -144,10 +172,13 @@ metadata:
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["get"]
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["get", "list", "create", "delete"]
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ kubernetes>=27.2.0 # introduction of suspend in jobspec
flask>=2.0.0
natsort>=8.0.0
Flask-BasicAuth>=0.2.0
MarkupSafe>=2.1.5
apache-libcloud>=3.8.0
15 changes: 15 additions & 0 deletions scrapyd_k8s.sample-k8s.conf
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,18 @@ requests_cpu = 80m
requests_memory = 0.12G
limits_cpu = 0.5
limits_memory = 0.2G

#[joblogs]
# Choose storage provider
#storage_provider = s3
#container_name = scrapyd-k8s-example-bucket

# Choose number of unique logs, but at least 2
#num_lines_to_check = 2

#[joblogs.storage.s3]
# Set your S3 key as ENV or below
#key = ${S3_KEY}
# Set your S3 secret key as ENV or below
#secret = ${S3_SECRET}
#region = eu-north-1
19 changes: 17 additions & 2 deletions scrapyd_k8s/__main__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
from .api import run
import logging
import sys
from .api import run, config
from .joblogs import joblogs_init

run()
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(name)s [%(levelname)s]: %(message)s',
handlers=[
logging.StreamHandler(sys.stdout)
]
)

if __name__ == "__main__":
setup_logging()
joblogs_init(config)
run()
10 changes: 8 additions & 2 deletions scrapyd_k8s/api.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python3
import uuid
import logging

from flask import Flask, request, Response, jsonify
from flask_basicauth import BasicAuth
from markupsafe import escape
from natsort import natsort_keygen, ns

from .config import Config
Expand All @@ -13,7 +13,7 @@
repository = (config.repository_cls())(config)
launcher = (config.launcher_cls())(config)
scrapyd_config = config.scrapyd()

logger = logging.getLogger(__name__)

@app.get("/")
def home():
Expand Down Expand Up @@ -155,5 +155,11 @@ def run():
if config_username is not None and config_password is not None:
enable_authentication(app, config_username, config_password)

if config.joblogs() is not None:
launcher.enable_joblogs(config)
logger.info("Job logs handling enabled.")
else:
logger.debug("Job logs handling not enabled; 'joblogs' configuration section is missing.")

# run server
app.run(host=host, port=port)
14 changes: 14 additions & 0 deletions scrapyd_k8s/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,27 @@ def launcher_cls(self):
pkg, cls = repo.rsplit('.', 1)
return getattr(import_module(pkg), cls)

def joblogs(self):
if self._config.has_section('joblogs'):
return self._config['joblogs']
else:
return None

def joblogs_storage(self, provider):
if not self._config.has_section('joblogs.storage.%s' % provider):
return None
return self._config['joblogs.storage.%s' % provider]

def listprojects(self):
return self._projects

def project(self, project):
if project in self._projects:
return ProjectConfig(self._config, project, self._config['project.' + project])

def namespace(self):
return self.scrapyd().get('namespace', 'default')

class ProjectConfig:
def __init__(self, config, projectid, projectconfig):
self._id = projectid
Expand Down
25 changes: 25 additions & 0 deletions scrapyd_k8s/joblogs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import logging
from scrapyd_k8s.joblogs.log_handler_k8s import KubernetesJobLogHandler

logger = logging.getLogger(__name__)

def joblogs_init(config):
"""
Initializes job logs handling by starting the Kubernetes job log handler.
Parameters
----------
config : Config
Configuration object containing settings for job logs and storage.
Returns
-------
None
"""
joblogs_config = config.joblogs()
if joblogs_config and joblogs_config.get('storage_provider') is not None:
log_handler = KubernetesJobLogHandler(config)
log_handler.start()
logger.info("Job logs handler started.")
else:
logger.warning("No storage provider configured; job logs will not be uploaded.")
Loading

0 comments on commit 7975a70

Please sign in to comment.