From 65afc3002d9fbe7e946d42302194ceaa4ff0797b Mon Sep 17 00:00:00 2001 From: danielpdwalker Date: Sat, 24 Jun 2023 09:36:45 +0100 Subject: [PATCH] Initial tap template commit --- .github/dependabot.yml | 26 +++++++ .github/workflows/test.yml | 30 ++++++++ .gitignore | 136 +++++++++++++++++++++++++++++++++++++ .pre-commit-config.yaml | 38 +++++++++++ .secrets/.gitignore | 10 +++ README.md | 131 +++++++++++++++++++++++++++++++++++ meltano.yml | 30 ++++++++ output/.gitignore | 4 ++ pyproject.toml | 58 ++++++++++++++++ tap_themeparks/__init__.py | 1 + tap_themeparks/client.py | 128 ++++++++++++++++++++++++++++++++++ tap_themeparks/streams.py | 65 ++++++++++++++++++ tap_themeparks/tap.py | 58 ++++++++++++++++ tests/__init__.py | 1 + tests/conftest.py | 3 + tests/test_core.py | 22 ++++++ tox.ini | 19 ++++++ 17 files changed, 760 insertions(+) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/test.yml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 .secrets/.gitignore create mode 100644 README.md create mode 100644 meltano.yml create mode 100644 output/.gitignore create mode 100644 pyproject.toml create mode 100644 tap_themeparks/__init__.py create mode 100644 tap_themeparks/client.py create mode 100644 tap_themeparks/streams.py create mode 100644 tap_themeparks/tap.py create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_core.py create mode 100644 tox.ini diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..933e6b1 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,26 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: pip + directory: "/" + schedule: + interval: "daily" + commit-message: + prefix: "chore(deps): " + prefix-development: "chore(deps-dev): " + - package-ecosystem: pip + directory: "/.github/workflows" + schedule: + interval: daily + commit-message: + prefix: "ci: " + - package-ecosystem: github-actions + directory: "/" + schedule: + interval: "weekly" + commit-message: + prefix: "ci: " diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..1bc45d2 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,30 @@ +### A CI workflow template that runs linting and python testing +### TODO: Modify as needed or as desired. + +name: Test tap-themeparks + +on: [push] + +jobs: + pytest: + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + strategy: + matrix: + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install Poetry + run: | + pip install poetry + - name: Install dependencies + run: | + poetry install + - name: Test with pytest + run: | + poetry run pytest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..475019c --- /dev/null +++ b/.gitignore @@ -0,0 +1,136 @@ +# Secrets and internal config files +**/.secrets/* + +# Ignore meltano internal cache and sqlite systemdb + +.meltano/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..24251f4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,38 @@ +ci: + autofix_prs: true + autoupdate_schedule: weekly + autoupdate_commit_msg: 'chore: pre-commit autoupdate' + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-json + - id: check-toml + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + +- repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.23.0 + hooks: + - id: check-dependabot + - id: check-github-workflows + +- repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.269 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + +- repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.3.0 + hooks: + - id: mypy + additional_dependencies: + - types-requests diff --git a/.secrets/.gitignore b/.secrets/.gitignore new file mode 100644 index 0000000..33c6acd --- /dev/null +++ b/.secrets/.gitignore @@ -0,0 +1,10 @@ +# IMPORTANT! This folder is hidden from git - if you need to store config files or other secrets, +# make sure those are never staged for commit into your git repo. You can store them here or another +# secure location. +# +# Note: This may be redundant with the global .gitignore for, and is provided +# for redundancy. If the `.secrets` folder is not needed, you may delete it +# from the project. + +* +!.gitignore diff --git a/README.md b/README.md new file mode 100644 index 0000000..50b7949 --- /dev/null +++ b/README.md @@ -0,0 +1,131 @@ +# tap-themeparks + +`tap-themeparks` is a Singer tap for themeparks. + +Built with the [Meltano Tap SDK](https://sdk.meltano.com) for Singer Taps. + + + +## Configuration + +### Accepted Config Options + + + +A full list of supported settings and capabilities for this +tap is available by running: + +```bash +tap-themeparks --about +``` + +### Configure using environment variables + +This Singer tap will automatically import any environment variables within the working directory's +`.env` if the `--config=ENV` is provided, such that config values will be considered if a matching +environment variable is set either in the terminal context or in the `.env` file. + +### Source Authentication and Authorization + + + +## Usage + +You can easily run `tap-themeparks` by itself or in a pipeline using [Meltano](https://meltano.com/). + +### Executing the Tap Directly + +```bash +tap-themeparks --version +tap-themeparks --help +tap-themeparks --config CONFIG --discover > ./catalog.json +``` + +## Developer Resources + +Follow these instructions to contribute to this project. + +### Initialize your Development Environment + +```bash +pipx install poetry +poetry install +``` + +### Create and Run Tests + +Create tests within the `tests` subfolder and + then run: + +```bash +poetry run pytest +``` + +You can also test the `tap-themeparks` CLI interface directly using `poetry run`: + +```bash +poetry run tap-themeparks --help +``` + +### Testing with [Meltano](https://www.meltano.com) + +_**Note:** This tap will work in any Singer environment and does not require Meltano. +Examples here are for convenience and to streamline end-to-end orchestration scenarios._ + + + +Next, install Meltano (if you haven't already) and any needed plugins: + +```bash +# Install meltano +pipx install meltano +# Initialize meltano within this directory +cd tap-themeparks +meltano install +``` + +Now you can test and orchestrate using Meltano: + +```bash +# Test invocation: +meltano invoke tap-themeparks --version +# OR run a test `elt` pipeline: +meltano elt tap-themeparks target-jsonl +``` + +### SDK Dev Guide + +See the [dev guide](https://sdk.meltano.com/en/latest/dev_guide.html) for more instructions on how to use the SDK to +develop your own taps and targets. diff --git a/meltano.yml b/meltano.yml new file mode 100644 index 0000000..165d55f --- /dev/null +++ b/meltano.yml @@ -0,0 +1,30 @@ +version: 1 +send_anonymous_usage_stats: true +project_id: "tap-themeparks" +default_environment: test +environments: +- name: test +plugins: + extractors: + - name: "tap-themeparks" + namespace: "tap_themeparks" + pip_url: -e . + capabilities: + - state + - catalog + - discover + - about + - stream-maps + config: + start_date: '2010-01-01T00:00:00Z' + settings: + # TODO: To configure using Meltano, declare settings and their types here: + - name: username + - name: password + kind: password + - name: start_date + value: '2010-01-01T00:00:00Z' + loaders: + - name: target-jsonl + variant: andyh1203 + pip_url: target-jsonl diff --git a/output/.gitignore b/output/.gitignore new file mode 100644 index 0000000..80ff9d2 --- /dev/null +++ b/output/.gitignore @@ -0,0 +1,4 @@ +# This directory is used as a target by target-jsonl, so ignore all files + +* +!.gitignore diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..faa95a5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,58 @@ +[tool.poetry] +name = "dw-tap-themeparks" +version = "0.0.1" +description = "`tap-themeparks` is a Singer tap for themeparks, built with the Meltano Singer SDK." +readme = "README.md" +authors = ["Daniel Walker"] +keywords = [ + "ELT", + "themeparks", +] +license = "Apache-2.0" +packages = [ + { include = "tap_themeparks" }, +] + +[tool.poetry.dependencies] +python = "<3.12,>=3.7.1" +singer-sdk = { version="^0.28.0" } +fs-s3fs = { version = "^1.1.1", optional = true } +requests = "^2.31.0" + +[tool.poetry.group.dev.dependencies] +pytest = "^7.2.1" +singer-sdk = { version="^0.28.0", extras = ["testing"] } + +[tool.poetry.extras] +s3 = ["fs-s3fs"] + +[tool.mypy] +python_version = "3.9" +warn_unused_configs = true + +[tool.ruff] +ignore = [ + "ANN101", # missing-type-self + "ANN102", # missing-type-cls +] +select = ["ALL"] +src = ["tap_themeparks"] +target-version = "py37" + + +[tool.ruff.flake8-annotations] +allow-star-arg-any = true + +[tool.ruff.isort] +known-first-party = ["tap_themeparks"] + +[tool.ruff.pydocstyle] +convention = "google" + +[build-system] +requires = ["poetry-core>=1.0.8"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry.scripts] +# CLI declaration +tap-themeparks = 'tap_themeparks.tap:Tapthemeparks.cli' diff --git a/tap_themeparks/__init__.py b/tap_themeparks/__init__.py new file mode 100644 index 0000000..bb1eeff --- /dev/null +++ b/tap_themeparks/__init__.py @@ -0,0 +1 @@ +"""Tap for themeparks.""" diff --git a/tap_themeparks/client.py b/tap_themeparks/client.py new file mode 100644 index 0000000..995e9af --- /dev/null +++ b/tap_themeparks/client.py @@ -0,0 +1,128 @@ +"""REST client handling, including themeparksStream base class.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Callable, Iterable + +import requests +from singer_sdk.helpers.jsonpath import extract_jsonpath +from singer_sdk.pagination import BaseAPIPaginator # noqa: TCH002 +from singer_sdk.streams import RESTStream + +_Auth = Callable[[requests.PreparedRequest], requests.PreparedRequest] +SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") + + +class themeparksStream(RESTStream): + """themeparks stream class.""" + + @property + def url_base(self) -> str: + """Return the API URL root, configurable via tap settings.""" + # TODO: hardcode a value here, or retrieve it from self.config + return "https://api.mysample.com" + + records_jsonpath = "$[*]" # Or override `parse_response`. + + # Set this value or override `get_new_paginator`. + next_page_token_jsonpath = "$.next_page" # noqa: S105 + + @property + def http_headers(self) -> dict: + """Return the http headers needed. + + Returns: + A dictionary of HTTP headers. + """ + headers = {} + if "user_agent" in self.config: + headers["User-Agent"] = self.config.get("user_agent") + # If not using an authenticator, you may also provide inline auth headers: + # headers["Private-Token"] = self.config.get("auth_token") # noqa: ERA001 + return headers + + def get_new_paginator(self) -> BaseAPIPaginator: + """Create a new pagination helper instance. + + If the source API can make use of the `next_page_token_jsonpath` + attribute, or it contains a `X-Next-Page` header in the response + then you can remove this method. + + If you need custom pagination that uses page numbers, "next" links, or + other approaches, please read the guide: https://sdk.meltano.com/en/v0.25.0/guides/pagination-classes.html. + + Returns: + A pagination helper instance. + """ + return super().get_new_paginator() + + def get_url_params( + self, + context: dict | None, # noqa: ARG002 + next_page_token: Any | None, + ) -> dict[str, Any]: + """Return a dictionary of values to be used in URL parameterization. + + Args: + context: The stream context. + next_page_token: The next page index or value. + + Returns: + A dictionary of URL query parameters. + """ + params: dict = {} + if next_page_token: + params["page"] = next_page_token + if self.replication_key: + params["sort"] = "asc" + params["order_by"] = self.replication_key + return params + + def prepare_request_payload( + self, + context: dict | None, # noqa: ARG002 + next_page_token: Any | None, # noqa: ARG002 + ) -> dict | None: + """Prepare the data payload for the REST API request. + + By default, no payload will be sent (return None). + + Args: + context: The stream context. + next_page_token: The next page index or value. + + Returns: + A dictionary with the JSON body for a POST requests. + """ + # TODO: Delete this method if no payload is required. (Most REST APIs.) + return None + + def parse_response(self, response: requests.Response) -> Iterable[dict]: + """Parse the response and return an iterator of result records. + + Args: + response: The HTTP ``requests.Response`` object. + + Yields: + Each record from the source. + """ + # TODO: Parse response body and return a set of records. + yield from extract_jsonpath(self.records_jsonpath, input=response.json()) + + def post_process( + self, + row: dict, + context: dict | None = None, # noqa: ARG002 + ) -> dict | None: + """As needed, append or transform raw data to match expected structure. + + Args: + row: An individual record from the stream. + context: The stream context. + + Returns: + The updated record dictionary, or ``None`` to skip the record. + """ + # TODO: Delete this method if not needed. + return row diff --git a/tap_themeparks/streams.py b/tap_themeparks/streams.py new file mode 100644 index 0000000..de73465 --- /dev/null +++ b/tap_themeparks/streams.py @@ -0,0 +1,65 @@ +"""Stream type classes for tap-themeparks.""" + +from __future__ import annotations + +from pathlib import Path + +from singer_sdk import typing as th # JSON Schema typing helpers + +from tap_themeparks.client import themeparksStream + +# TODO: Delete this is if not using json files for schema definition +SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") +# TODO: - Override `UsersStream` and `GroupsStream` with your own stream definition. +# - Copy-paste as many times as needed to create multiple stream types. + + +class UsersStream(themeparksStream): + """Define custom stream.""" + + name = "users" + path = "/users" + primary_keys = ["id"] + replication_key = None + # Optionally, you may also use `schema_filepath` in place of `schema`: + # schema_filepath = SCHEMAS_DIR / "users.json" # noqa: ERA001 + schema = th.PropertiesList( + th.Property("name", th.StringType), + th.Property( + "id", + th.StringType, + description="The user's system ID", + ), + th.Property( + "age", + th.IntegerType, + description="The user's age in years", + ), + th.Property( + "email", + th.StringType, + description="The user's email address", + ), + th.Property("street", th.StringType), + th.Property("city", th.StringType), + th.Property( + "state", + th.StringType, + description="State name in ISO 3166-2 format", + ), + th.Property("zip", th.StringType), + ).to_dict() + + +class GroupsStream(themeparksStream): + """Define custom stream.""" + + name = "groups" + path = "/groups" + primary_keys = ["id"] + replication_key = "modified" + schema = th.PropertiesList( + th.Property("name", th.StringType), + th.Property("id", th.StringType), + th.Property("modified", th.DateTimeType), + ).to_dict() diff --git a/tap_themeparks/tap.py b/tap_themeparks/tap.py new file mode 100644 index 0000000..bdc054c --- /dev/null +++ b/tap_themeparks/tap.py @@ -0,0 +1,58 @@ +"""themeparks tap class.""" + +from __future__ import annotations + +from singer_sdk import Tap +from singer_sdk import typing as th # JSON schema typing helpers + +# TODO: Import your custom stream types here: +from tap_themeparks import streams + + +class Tapthemeparks(Tap): + """themeparks tap class.""" + + name = "tap-themeparks" + + # TODO: Update this section with the actual config values you expect: + config_jsonschema = th.PropertiesList( + th.Property( + "auth_token", + th.StringType, + required=True, + secret=True, # Flag config as protected. + description="The token to authenticate against the API service", + ), + th.Property( + "project_ids", + th.ArrayType(th.StringType), + required=True, + description="Project IDs to replicate", + ), + th.Property( + "start_date", + th.DateTimeType, + description="The earliest record date to sync", + ), + th.Property( + "api_url", + th.StringType, + default="https://api.mysample.com", + description="The url for the API service", + ), + ).to_dict() + + def discover_streams(self) -> list[streams.themeparksStream]: + """Return a list of discovered streams. + + Returns: + A list of discovered streams. + """ + return [ + streams.GroupsStream(self), + streams.UsersStream(self), + ] + + +if __name__ == "__main__": + Tapthemeparks.cli() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..fc537d3 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Test suite for tap-themeparks.""" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..6bb3ec2 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,3 @@ +"""Test Configuration.""" + +pytest_plugins = ("singer_sdk.testing.pytest_plugin",) diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 0000000..0b22cc6 --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,22 @@ +"""Tests standard tap features using the built-in SDK tests library.""" + +import datetime + +from singer_sdk.testing import get_tap_test_class + +from tap_themeparks.tap import Tapthemeparks + +SAMPLE_CONFIG = { + "start_date": datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d"), + # TODO: Initialize minimal tap config +} + + +# Run standard built-in tap tests from the SDK: +TestTapthemeparks = get_tap_test_class( + tap_class=Tapthemeparks, + config=SAMPLE_CONFIG, +) + + +# TODO: Create additional tests as appropriate for your tap. diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..70b9e4a --- /dev/null +++ b/tox.ini @@ -0,0 +1,19 @@ +# This file can be used to customize tox tests as well as other test frameworks like flake8 and mypy + +[tox] +envlist = py37, py38, py39, py310, py311 +isolated_build = true + +[testenv] +allowlist_externals = poetry +commands = + poetry install -v + poetry run pytest + +[testenv:pytest] +# Run the python tests. +# To execute, run `tox -e pytest` +envlist = py37, py38, py39, py310, py311 +commands = + poetry install -v + poetry run pytest