From c1afc1641a2b32a7278beaa4c1a49e8e5c98817f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Blanc?= Date: Fri, 3 Nov 2023 17:29:45 +0100 Subject: [PATCH] Initial commit --- .github/workflows/lint.yml | 57 ++++++++++ .github/workflows/publish.yml | 46 ++++++++ .gitignore | 4 + .pre-commit-config.yaml | 51 +++++++++ CONTRIBUTING.md | 19 ++++ Dockerfile | 11 ++ LICENSE | 21 ++++ README.md | 59 ++++++++++ requirements/development.txt | 9 ++ requirements/requirements.txt | 7 ++ schemas/qwc-postgres-fts-service.json | 101 +++++++++++++++++ search_service.py | 149 ++++++++++++++++++++++++++ server.py | 75 +++++++++++++ server.wsgi | 13 +++ setup.cfg | 26 +++++ test.py | 5 + 16 files changed, 653 insertions(+) create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/publish.yml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 CONTRIBUTING.md create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 README.md create mode 100644 requirements/development.txt create mode 100644 requirements/requirements.txt create mode 100644 schemas/qwc-postgres-fts-service.json create mode 100644 search_service.py create mode 100644 server.py create mode 100644 server.wsgi create mode 100644 setup.cfg create mode 100644 test.py diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..6a7b1bc --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,57 @@ +name: "๐Ÿ Lint" + +# Controls when the action will run. Triggers the workflow on push or pull request +# events but only for the master branch +on: + push: + branches: + - main + paths: + - "**.py" + - ".github/workflows/linter.yml" + pull_request: + branches: + - main + paths: + - "**.py" + +env: + PYTHON_VERSION: "3.10" + +jobs: + lint: + name: ๐Ÿ Lint + runs-on: ubuntu-latest + + steps: + - name: Get source code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: "pip" + cache-dependency-path: "requirements/*.txt" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip setuptools wheel + python -m pip install -r requirements/requirements.txt + python -m pip install -r requirements/development.txt + + - name: Format with black + uses: psf/black@stable + with: + options: "--check --verbose" + version: "23.10.0" + + - name: Run isort + uses: isort/isort-action@v1 + + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..c850eee --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,46 @@ +name: ๐Ÿš€ Publish Docker image + +on: + push: + branches: + - main + tags: + - "v*" + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + name: ๐Ÿš€ Build and push Docker image + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + steps: + - name: Get source code + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..943ef2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.venv/ +__pycache__/ +.flaskenv +.env diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c10cc1e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,51 @@ +exclude: ".venv|__pycache__" + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-added-large-files + args: ["--maxkb=500"] + - id: check-case-conflict + - id: check-json + - id: check-merge-conflict + - id: check-yaml + - id: detect-private-key + - id: end-of-file-fixer + - id: fix-byte-order-marker + - id: fix-encoding-pragma + args: [--remove] + - id: pretty-format-json + args: ["--autofix", "--no-sort-keys"] + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.3 + hooks: + - id: ruff + args: ["--fix-only"] + + - repo: https://github.com/psf/black + rev: 23.10.0 + hooks: + - id: black + args: ["--target-version=py310"] + + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + args: ["--profile", "black", "--filter-files"] + + - repo: https://github.com/pycqa/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + language: python + types: + - python + args: + [ + "--config=setup.cfg", + ] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..d71d63f --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,19 @@ +# Contributing Guidelines + +First off, thanks for considering to contribute to this project! + +These are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. + +## Git hooks + +We use git hooks through [pre-commit](https://pre-commit.com/) to enforce and automatically check some "rules". Please install them (`pre-commit install`) before to push any commit. + +See the relevant configuration file: `.pre-commit-config.yaml`. + +## Code Style + +Make sure your code *roughly* follows [PEP-8](https://www.python.org/dev/peps/pep-0008/) and keeps things consistent with the rest of the code: + +- formatting: [black](https://black.readthedocs.io/) is used to automatically format the code without debate. +- sorted imports: [isort](https://pycqa.github.io/isort/) is used to sort imports +- static analisis: [flake8](https://flake8.pycqa.org/en/latest/) is used to catch some dizziness and keep the source code healthy. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3ff765b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM sourcepole/qwc-uwsgi-base:alpine-v2023.10.26 + +ADD . /srv/qwc_service + +RUN \ + apk add --no-cache --update --virtual runtime-deps postgresql-libs && \ + apk add --no-cache --update --virtual build-deps git postgresql-dev g++ python3-dev && \ + pip3 install --no-cache-dir -r /srv/qwc_service/requirements/requirements.txt && \ + apk del build-deps + +ENV SERVICE_MOUNTPOINT=/api/v1/postgresfts diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f2d2314 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Benoรฎt Blanc + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..534e2c7 --- /dev/null +++ b/README.md @@ -0,0 +1,59 @@ +QWC Postgres Fulltext Search Service +==================================== + +Setup +----- + +Configuration +------------- + +Usage +----- + +Set the `CONFIG_PATH` environment variable to the path containing the service config and permission files when starting this service (default: `config`). + +Base URL: + + http://localhost:5050/ + +Service API: + + http://localhost:5050/api/ + +Development +----------- + +Create a virtual environment: + + virtualenv --python=/usr/bin/python3 --system-site-packages .venv + +Without system packages: + + virtualenv --python=/usr/bin/python3 .venv + +Activate virtual environment: + + source .venv/bin/activate + +Install requirements: + + pip install -r requirements/requirements.txt + +Set the `CONFIG_PATH` environment variable to the path containing the service config and permission files when starting this service (default: `config`). + + export CONFIG_PATH=../qwc-docker/volumes/config + +Configure environment: + + echo FLASK_ENV=development >.flaskenv + +Start local service: + + python server.py + +Testing +------- + +Run all tests: + + python test.py diff --git a/requirements/development.txt b/requirements/development.txt new file mode 100644 index 0000000..53b0602 --- /dev/null +++ b/requirements/development.txt @@ -0,0 +1,9 @@ +# Develoment dependencies +# ----------------------- + +black +flake8>=4,<7 +flake8-builtins>=1.5,<3 +flake8-isort>=6,<7 +isort>=5.11,<6 +pre-commit>=3,<4 diff --git a/requirements/requirements.txt b/requirements/requirements.txt new file mode 100644 index 0000000..c61dee2 --- /dev/null +++ b/requirements/requirements.txt @@ -0,0 +1,7 @@ +Flask==2.3.2 +flask-restx==1.1.0 +Flask-JWT-Extended==4.4.4 +SQLAlchemy==1.4.48 +psycopg2==2.9.6 +requests==2.31.0 +qwc-services-core==1.3.19 diff --git a/schemas/qwc-postgres-fts-service.json b/schemas/qwc-postgres-fts-service.json new file mode 100644 index 0000000..cff6baf --- /dev/null +++ b/schemas/qwc-postgres-fts-service.json @@ -0,0 +1,101 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://raw.githubusercontent.com/qwc-services/qwc-postgres-fts-service/master/schemas/qwc-postgres-fts-service.json", + "title": "QWC Postgres FTS Service", + "type": "object", + "properties": { + "$schema": { + "title": "JSON Schema", + "description": "Reference to JSON schema of this config", + "type": "string", + "format": "uri", + "default": "https://raw.githubusercontent.com/qwc-services/qwc-postgres-fts-service/master/schemas/qwc-postgres-fts-service.json" + }, + "service": { + "title": "Service name", + "type": "string", + "const": "postgresFTSSearch" + }, + "config": { + "title": "Config options", + "type": "object", + "properties": { + "search_result_limit": { + "description": "Result count limit per search", + "type": "integer", + "default": 50 + } + } + }, + "resources": { + "title": "Resources", + "type": "object", + "properties": { + "documents": { + "title": "Search documents", + "type": "array", + "items": { + "title": "Document", + "type": "object", + "properties": { + "name": { + "description": "Document name", + "type": "string" + }, + "text_search_config": { + "description": "Text search configuration used for search", + "type": "string", + "default": "english" + }, + "db_url": { + "description": "DB connection for result query", + "type": "string" + }, + "schema": { + "description": "Schema to query for search", + "type": "string" + }, + "table": { + "description": "Table to query for search", + "type": "string" + }, + "primary_key": { + "description": "Primary key of the table to query", + "type": "string" + }, + "columns": { + "description": "Attributes to query for search and display as result", + "type": "array", + "items": { + "type": "string" + } + }, + "geometry_column": { + "description": "Geometry column in search result table", + "type": "string", + "default": "geom" + } + }, + "required": [ + "name", + "db_url", + "schema", + "table", + "primary_key", + "display_name", + "columns" + ] + } + } + }, + "required": [ + "documents" + ] + } + }, + "required": [ + "service", + "config", + "resources" + ] +} diff --git a/search_service.py b/search_service.py new file mode 100644 index 0000000..0bab5b0 --- /dev/null +++ b/search_service.py @@ -0,0 +1,149 @@ +import json +from collections import OrderedDict +from datetime import date +from decimal import Decimal +from uuid import UUID + +from qwc_services_core.database import DatabaseEngine +from sqlalchemy.sql import text as sql_text + + +class PostgresFTSClient: + """PostgresFTSClient class""" + + def __init__(self, tenant, logger, config): + """Constructor + + :param str tenant: Tenant ID + :param Logger logger: Application logger + """ + self.logger = logger + self.tenant = tenant + self.config = config + self.default_search_limit = self.config.get("search_result_limit", 50) + self.resources = self.load_resources() + self.db_engine = DatabaseEngine() + + def search(self, searchtext): + results = OrderedDict() + for key, document in self.resources.get("documents", []).items(): + self.logger.info(f"Search for document {document['name']}") + columns = (", ").join( + self.escape_column_names( + [document["primary_key"]] + document["columns"] + ) + ) + columns += """, + ST_AsGeoJSON("{geom}") AS json_geom, + ST_Extent("{geom}") AS bbox + """.format( + geom=document.get("geometry_column", "geom") + ) + sql = sql_text( + ( + """ + SELECT {columns} + FROM "{schema}"."{table}" + WHERE ts @@ to_tsquery('{text_search_config}', '{search_string}') + GROUP BY "{primary_key}" + ORDER BY ts_rank(ts, to_tsquery('{text_search_config}', '{search_string}')) DESC + LIMIT {search_result_limit}; + """ + ).format( + columns=columns, + schema=document["schema"], + table=document["table"], + primary_key=document["primary_key"], + text_search_config=document.get("text_search_config", "english"), + search_string=searchtext, + search_result_limit=self.default_search_limit, + ) + ) + self.logger.debug(f"SQL Query : {sql}") + conn = self.db_engine.db_engine(document["db_url"]).connect() + results[key] = [] + try: + result = conn.execute(sql) + for row in result: + row_result = self._feature_from_query(row, document["primary_key"]) + results[key].append(row_result) + self.logger.debug(f"SQL Result for document {key} : {results[key]}") + except Exception as e: + self.logger.error(f"Error for document {key} on query {sql}: {e}") + return results + + def _feature_from_query(self, row, primary_key): + """Build GeoJSON Feature from query result row. + + :param obj row: Row result from query + """ + result = OrderedDict() + for attr in row._mapping: + value = row[attr] + if attr == "json_geom": + geom = json.loads(value) + elif attr == "bbox": + bbox = self.parse_box2d(value) + elif attr == primary_key: + # Ensure UUID primary key is JSON serializable + pk = str(value) + elif isinstance(value, date): + result[attr] = value.isoformat() + elif isinstance(value, Decimal): + result[attr] = float(value) + elif isinstance(value, UUID): + result[attr] = str(value) + else: + result[attr] = value + + return { + "type": "Feature", + "id": pk, + "geometry": geom, + "bbox": bbox, + "properties": result, + } + + def escape_column_names(self, columns): + """Return escaped column names by converting them to + quoted identifiers. + + :param list(str) columns: Column names + """ + return ['"%s"' % column for column in columns] + + def load_resources(self): + """Load service resources from config. + + :param RuntimeConfig config: Config handler + """ + # collect service resources + documents = {} + for document in self.config.resources().get("documents", []): + documents[document["name"]] = document + + return {"documents": documents} + + def parse_box2d(self, box2d): + """Parse Box2D string and return bounding box + as [,,,]. + + :param str box2d: Box2D string + """ + bbox = None + + if box2d is None: + # bounding box is empty + return None + + # extract coords from Box2D string + # e.g. "BOX(950598.12 6003950.34,950758.567 6004010.8)" + # truncate brackets and split into coord pairs + parts = box2d[4:-1].split(",") + if len(parts) == 2: + # split coords, e.g. "950598.12 6003950.34" + minx, miny = parts[0].split(" ") + maxx, maxy = parts[1].split(" ") + bbox = [float(minx), float(miny), float(maxx), float(maxy)] + + return bbox diff --git a/server.py b/server.py new file mode 100644 index 0000000..49457c9 --- /dev/null +++ b/server.py @@ -0,0 +1,75 @@ +import logging + +from flask import Flask, jsonify, request +from flask_restx import Api, Resource +from qwc_services_core.auth import auth_manager, optional_auth +from qwc_services_core.runtime_config import RuntimeConfig +from qwc_services_core.tenant_handler import TenantHandler + +from search_service import PostgresFTSClient + +# Flask application +app = Flask(__name__) +api = Api( + app, + version="1.0", + title="Postgres Fulltext Search API", + description="API for QWC Postgres Fulltext Search service", + default_label="Postgres Fulltext Search operations", + doc="/api/", +) + +auth = auth_manager(app, api) + +# create tenant handler +tenant_handler = TenantHandler(app.logger) + +logging.getLogger().setLevel(logging.DEBUG if app.debug else logging.INFO) + + +def search_handler(): + """Get or create a SearchService instance for a tenant.""" + tenant = tenant_handler.tenant() + handler = tenant_handler.handler("postgresFTSSearch", "postgresfts", tenant) + if handler is None: + config_handler = RuntimeConfig("postgresFTSSearch", app.logger) + config = config_handler.tenant_config(tenant) + handler = tenant_handler.register_handler( + "postgresfts", tenant, PostgresFTSClient(tenant, app.logger, config) + ) + return handler + + +@api.route("/search/", "/") +class SearchResult(Resource): + @api.doc("search") + @api.param("searchtext", "Search string") + @optional_auth + def get(self): + """Search for searchtext and return the results""" + searchtext = request.args.get("searchtext") + if not searchtext: + return {"error": "Missing search string"} + + handler = search_handler() + result = handler.search(searchtext) + + return result + + +@app.route("/ready", methods=["GET"]) +def ready(): + """readyness probe endpoint""" + return jsonify({"status": "OK"}) + + +@app.route("/healthz", methods=["GET"]) +def healthz(): + """liveness probe endpoint""" + return jsonify({"status": "OK"}) + + +# local webserver +if __name__ == "__main__": + print("Starting Postgres Fulltext Search service...") + app.run(host="localhost", port=5050, debug=True) diff --git a/server.wsgi b/server.wsgi new file mode 100644 index 0000000..a05c298 --- /dev/null +++ b/server.wsgi @@ -0,0 +1,13 @@ +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.realpath(__file__))) + + +def application(environ, start_response): + for key in environ: + if isinstance(environ[key], str): + os.environ[key] = environ[key] + from server import app + + return app(environ, start_response) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..7e542be --- /dev/null +++ b/setup.cfg @@ -0,0 +1,26 @@ +# -- Code quality ------------------------------------ + +[flake8] +count = True +exclude = + # No need to traverse our git directory + .git, + # There's no value in checking cache directories + __pycache__, + # This contains local virtual environments + .venv*, +max-complexity = 15 +max-doc-length = 130 +max-line-length = 100 +output-file = dev_flake8_report.txt +statistics = True +tee = True + +[isort] +ensure_newline_before_comments = True +force_grid_wrap = 0 +include_trailing_comma = True +line_length = 88 +multi_line_output = 3 +profile = black +use_parentheses = True diff --git a/test.py b/test.py new file mode 100644 index 0000000..693664e --- /dev/null +++ b/test.py @@ -0,0 +1,5 @@ +import unittest + +if __name__ == "__main__": + # run all imported test cases + unittest.main()