diff --git a/docker/Dockerfile.dev.ui b/docker/Dockerfile.dev.ui index ab7709e3c..e49705cfe 100644 --- a/docker/Dockerfile.dev.ui +++ b/docker/Dockerfile.dev.ui @@ -16,7 +16,7 @@ COPY keep-ui/package*.json /app/ COPY ./keep-ui/ /app # Install dependencies in /app -RUN npm install --legacy-peer-deps +RUN npm install # Ensure port 3000 is accessible to our system EXPOSE 3000 diff --git a/docker/Dockerfile.ui b/docker/Dockerfile.ui index 57acb52a2..91e23410f 100644 --- a/docker/Dockerfile.ui +++ b/docker/Dockerfile.ui @@ -10,7 +10,7 @@ WORKDIR /app # Install dependencies based on the preferred package manager COPY package.json package-lock.json ./ -RUN npm ci --legacy-peer-deps +RUN npm ci # Rebuild the source code only when needed diff --git a/docs/api-ref/tenant/is-onboarded.mdx b/docs/api-ref/tenant/is-onboarded.mdx deleted file mode 100644 index aeea46a93..000000000 --- a/docs/api-ref/tenant/is-onboarded.mdx +++ /dev/null @@ -1,3 +0,0 @@ ---- -openapi: get /tenant/onboarded ---- diff --git a/docs/api-ref/tenant/save-github-installation-id.mdx b/docs/api-ref/tenant/save-github-installation-id.mdx deleted file mode 100644 index 12737090b..000000000 --- a/docs/api-ref/tenant/save-github-installation-id.mdx +++ /dev/null @@ -1,3 +0,0 @@ ---- -openapi: post /tenant/github ---- diff --git a/docs/development/authentication.mdx b/docs/deployment/authentication.mdx similarity index 100% rename from docs/development/authentication.mdx rename to docs/deployment/authentication.mdx diff --git a/docs/deployment/docker.mdx b/docs/deployment/docker.mdx new file mode 100644 index 000000000..5b77e205c --- /dev/null +++ b/docs/deployment/docker.mdx @@ -0,0 +1,31 @@ +--- +title: "Docker" +sidebarTitle: "Docker" +--- + +### Spin up Keep with docker-compose latest images +The easiest way to start keep is is with docker-compose: +```shell +curl https://raw.githubusercontent.com/keephq/keep/main/start.sh | sh +``` + +```bash start.sh +#!/bin/bash +# Keep install script for docker compose + +echo "Creating state directory." +mkdir -p state +test -e state +echo "Changing directory ownership to non-privileged user." +chown -R 999:999 state || echo "Unable to change directory ownership, changing permissions instead." && chmod -R 0777 state +which curl &> /dev/null || echo "curl not installed" +curl https://raw.githubusercontent.com/keephq/keep/main/docker-compose.yml --output docker-compose.yml +curl https://raw.githubusercontent.com/keephq/keep/main/docker-compose.common.yml --output docker-compose.common.yml + +docker compose up -d +``` + +The docker-compose.yml contains 3 services: +- [keep-backend](https://console.cloud.google.com/artifacts/docker/keephq/us-central1/keep/keep-api?project=keephq) - a fastapi service that as the API server. +- [keep-frontend](https://console.cloud.google.com/artifacts/docker/keephq/us-central1/keep/keep-ui?project=keephq) - a nextjs app that serves as Keep UI interface. +- [keep-websocket-server](https://docs.soketi.app/getting-started/installation/docker) - Soketi (a pusher compatible websocket server) for real time alerting. diff --git a/docs/deployment/kubernetes.mdx b/docs/deployment/kubernetes.mdx new file mode 100644 index 000000000..714a20563 --- /dev/null +++ b/docs/deployment/kubernetes.mdx @@ -0,0 +1,23 @@ +--- +title: "Kubernetes" +sidebarTitle: "Kubernetes" +--- + +Keep can be installed via Helm Chart. + +First, clone Keep: +``` +git clone https://github.com/keephq/keep.git && cd keep +``` + +Next, install using: +``` +helm install -f chart/keep/values.yaml keep chart/keep/ +``` + +Notice for it to work locally, you'll need this port forwarding: +``` +kubectl port-forward svc/keep-frontend 3000:3000 +``` + +To learn more about Keep's helm chart, see https://github.com/keephq/keep/blob/main/chart/keep/README.md diff --git a/docs/deployment/openshift.mdx b/docs/deployment/openshift.mdx new file mode 100644 index 000000000..f53c223b1 --- /dev/null +++ b/docs/deployment/openshift.mdx @@ -0,0 +1,15 @@ +--- +title: "Openshift" +sidebarTitle: "Openshift" +--- + +Keep's Helm Chart also supports Openshift installation. + +Simply follow the Kubernetes set-up guide, but make sure to modify the following lines under frontend(/backend).route in the values.yaml file as follows: +``` +enabled: true +host: +path: # should be / for default +tls: +wildcardPolicy: +``` diff --git a/docs/deployment/secret-manager.mdx b/docs/deployment/secret-manager.mdx new file mode 100644 index 000000000..4de17c68a --- /dev/null +++ b/docs/deployment/secret-manager.mdx @@ -0,0 +1,84 @@ +--- +title: "Secret Manager" +sidebarTitle: "Secret Manager" +--- + +## Overview + +Secret Manager selection is crucial for securing your application. Different modes can be set up depending on the deployment type. Our system supports four primary secret manager types. + +## Secret Manager Factory + +The `SecretManagerFactory` is a utility class used to create instances of different types of secret managers. It leverages the Factory design pattern to abstract the creation logic based on the type of secret manager required. The factory supports creating instances of File, GCP, Kubernetes, and Vault Secret Managers. + +The `SECRET_MANAGER_TYPE` environment variable plays a crucial role in the SecretManagerFactory for determining the default type of secret manager to be instantiated when no specific type is provided in the method call. + +**Functionality**: + +**Default Secret Manager**: If the `SECRET_MANAGER_TYPE` environment variable is set, its value dictates the default type of secret manager that the factory will create. +The value of this variable should correspond to one of the types defined in SecretManagerTypes enum (`FILE`, `GCP`, `K8S`, `VAULT`). + +**Example Configuration**: + +Setting `SECRET_MANAGER_TYPE=GCP` in the environment will make the factory create instances of GcpSecretManager by default. +If `SECRET_MANAGER_TYPE` is not set or is set to `FILE`, the factory defaults to creating instances of FileSecretManager. +This environment variable provides flexibility and ease of configuration, allowing different secret managers to be used in different environments or scenarios without code changes. + +## File Secert Manager + +The `FileSecretManager` is a concrete implementation of the BaseSecretManager for managing secrets stored in the file system. It uses a specified directory (defaulting to ./) to read, write, and delete secret files. + +Configuration: + +Set the environment variable `SECRET_MANAGER_DIRECTORY` to specify the directory where secrets are stored. If not set, defaults to the current directory (./). + +Usage: + +- Secrets are stored as files in the specified directory. +- Reading a secret involves fetching content from a file. +- Writing a secret creates or updates a file with the given content. +- Deleting a secret removes the corresponding file. + +## Kubernetes Secret Manager + +The `KubernetesSecretManager` interfaces with Kubernetes' native secrets system. It manages secrets within a specified Kubernetes namespace and is designed to operate within a Kubernetes cluster. + +Configuration: + +Set `K8S_NAMESPACE` environment variable to specify the Kubernetes namespace. Defaults to default if not set. Assumes Kubernetes configurations (like service account tokens) are properly set up when running within a cluster. + +Usage: + +- Secrets are stored as Kubernetes Secret objects. +- Provides functionalities to create, retrieve, and delete Kubernetes secrets. +- Handles base64 encoding and decoding as required by Kubernetes. + +## GCP Secret Manager + +The `GcpSecretManager` utilizes Google Cloud's Secret Manager service for secret management. It requires setting up with Google Cloud credentials and a project ID. + +Configuration: + +Ensure the environment variable `GOOGLE_CLOUD_PROJECT` is set with your Google Cloud project ID. + +Usage: + +- Secrets are managed using Google Cloud's Secret Manager. +- Supports operations to create, access, and delete secrets in the cloud. +- Integrates with OpenTelemetry for tracing secret management operations. + +## Hashicorp Vault Secret Manager + +The `VaultSecretManager` is tailored for Hashicorp Vault, a tool for managing sensitive data. It supports token-based authentication as well as Kubernetes-based authentication for Vault. + +Configuration: + +- Set `HASHICORP_VAULT_ADDR` to the Vault server address. Defaults to http://localhost:8200. +- Use `HASHICORP_VAULT_TOKEN` for token-based authentication. +- Set `HASHICORP_VAULT_USE_K8S` to True and provide `HASHICORP_VAULT_K8S_ROLE` for Kubernetes-based authentication. + +Usage: + +- Manages secrets in a Hashicorp Vault server. +- Provides methods to write, read, and delete secrets from Vault. +- Supports different Vault authentication methods including static tokens and Kubernetes service account tokens. diff --git a/docs/development/adding-a-new-provider.mdx b/docs/development/adding-a-new-provider.mdx deleted file mode 100644 index 8f9f8f982..000000000 --- a/docs/development/adding-a-new-provider.mdx +++ /dev/null @@ -1,170 +0,0 @@ ---- -title: "Adding a new Provider" -sidebarTitle: "Adding a New Provider" ---- -Under contstruction - -### Basics - -- BaseProvider is the base class every provider needs to inherit from -- BaseProvider exposes 4 important functions: - - `query(self, **kwargs: dict)` which is used to query the provider in steps - - `notify(self, **kwargs: dict)` which is used to notify via the provider in actions - - `dispose(self)` which is used to dispose the provider after usage (e.g. close the connection to the DB) - - `validate_config(self)` which is used to validate the configuration passed to the Provider -- And 4 functions that are not required: - - `get_alerts(self)` which is used to fetch configured alerts (**not the currently active alerts**) - - `deploy_alert(self, alert: dict, alert_id: Optional[str]` which is used to deploy an alert to the provider - - `get_alert_schema(self)` which is used to describe the provider's API schema of how to deploy alert - - `get_logs(self, limit)` which is used to fetch logs from the provider (currently used by the AI layer to generate more accurate results) -- Providers must be located in the providers directory -- Provider directory must start with the provider's unique identifier followed by underscore+provider (e.g. `slack_provider`) -- Provider file name must start with the provider's unique identifier followed by underscore+provider+.py (e.g. `slack_provider.py`) - -### ProviderConfig - -```python -@dataclass -class ProviderConfig: - """ - Provider configuration model. - - Args: - description (Optional[str]): The description of the provider. - authentication (dict): The configuration for the provider. - """ - - authentication: dict - description: Optional[str] = None - - def __post_init__(self): - if not self.authentication: - return - for key, value in self.authentication.items(): - if ( - isinstance(value, str) - and value.startswith("{{") - and value.endswith("}}") - ): - self.authentication[key] = chevron.render(value, {"env": os.environ}) -``` - -### BaseProvider - -```python -class BaseProvider(metaclass=abc.ABCMeta): - def __init__(self, provider_id: str, config: ProviderConfig): - """ - Initialize a provider. - - Args: - provider_id (str): The provider id. - **kwargs: Provider configuration loaded from the provider yaml file. - """ - # Initalize logger for every provider - self.logger = logging.getLogger(self.__class__.__name__) - self.id = provider_id - self.config = config - self.validate_config() - self.logger.debug( - "Base provider initalized", extra={"provider": self.__class__.__name__} - ) - - @property - def provider_id(self) -> str: - """ - Get the provider id. - - Returns: - str: The provider id. - """ - return self.id - - @abc.abstractmethod - def dispose(self): - """ - Dispose of the provider. - """ - raise NotImplementedError("dispose() method not implemented") - - @abc.abstractmethod - def validate_config(): - """ - Validate provider configuration. - """ - raise NotImplementedError("validate_config() method not implemented") - - def notify(self, **kwargs): - """ - Output alert message. - - Args: - **kwargs (dict): The provider context (with statement) - """ - raise NotImplementedError("notify() method not implemented") - - def query(self, **kwargs: dict): - """ - Query the provider using the given query - - Args: - kwargs (dict): The provider context (with statement) - - Raises: - NotImplementedError: _description_ - """ - raise NotImplementedError("query() method not implemented") - - def get_alerts(self, alert_id: Optional[str] = None): - """ - Get alerts from the provider. - - Args: - alert_id (Optional[str], optional): If given, gets a specific alert by id. Defaults to None. - """ - # todo: we'd want to have a common alert model for all providers (also for consistent output from GPT) - raise NotImplementedError("get_alerts() method not implemented") - - def deploy_alert(self, alert: dict, alert_id: Optional[str] = None): - """ - Deploy an alert to the provider. - - Args: - alert (dict): The alert to deploy. - alert_id (Optional[str], optional): If given, deploys a specific alert by id. Defaults to None. - """ - raise NotImplementedError("deploy_alert() method not implemented") - - @staticmethod - def get_alert_schema() -> dict: - """ - Get the alert schema description for the provider. - e.g. How to define an alert for the provider that can be pushed via the API. - - Returns: - str: The alert format description. - """ - raise NotImplementedError( - "get_alert_format_description() method not implemented" - ) - - def get_logs(self, limit: int = 5) -> list: - """ - Get logs from the provider. - - Args: - limit (int): The number of logs to get. - """ - raise NotImplementedError("get_logs() method not implemented") - - def expose(self): - """Expose parameters that were calculated during query time. - - Each provider can expose parameters that were calculated during query time. - E.g. parameters that were supplied by the user and were rendered by the provider. - - A concrete example is the "_from" and "to" of the Datadog Provider which are calculated during execution. - """ - # TODO - implement dynamically using decorators and - return {} -``` diff --git a/docs/development/external-url.mdx b/docs/development/external-url.mdx index 9eaf2f479..70528f532 100644 --- a/docs/development/external-url.mdx +++ b/docs/development/external-url.mdx @@ -1,6 +1,6 @@ --- -title: "Keep with External URL" -sidebarTitle: "Keep with External URL" +title: "Keep with Internet URL" +sidebarTitle: "Keep with Internet URL" --- ## Introduction diff --git a/docs/development/getting-started.mdx b/docs/development/getting-started.mdx index 31ab3d0d0..8db52799f 100644 --- a/docs/development/getting-started.mdx +++ b/docs/development/getting-started.mdx @@ -3,19 +3,6 @@ title: "Getting started" sidebarTitle: "Getting started" --- -## Docker-compose - -### Spin up Keep with docker-compose latest images -The easiest way to start keep is is with docker-compose: -```shell -curl https://raw.githubusercontent.com/keephq/keep/main/start.sh | sh -``` - -The docker-compose.yml contains two services: -- keep-backend - a fastapi service that as the API server. -- keep-frontend - a nextjs app that serves as Keep UI interface. -- keep-websocket-server - Soketi (a pusher compatible websocket server). - ### Docker-compose dev images You can use `docker-compose.dev.yaml` to start Keep in a development mode. @@ -29,41 +16,6 @@ Next, run docker-compose -f docker-compose.dev.yaml - up ``` - -## Kubernetes -Keep can be installed via Helm Chart. - -First, clone Keep: -``` -git clone https://github.com/keephq/keep.git && cd keep -``` - -Next, install using: -``` -helm install -f chart/keep/values.yaml keep chart/keep/ -``` - -Notice for it to work locally, you'll need this port forwarding: -``` -kubectl port-forward svc/keep-frontend 3000:3000 -``` - -To learn more about Keep's helm chart, see https://github.com/keephq/keep/blob/main/chart/keep/README.md - - -## Openshift -Keep's Helm Chart also supports Openshift installation. - -Simply follow the Kubernetes set-up guide, but make sure to modify the following lines under frontend(/backend).route in the values.yaml file as follows: -``` -enabled: true -host: -path: # should be / for default -tls: -wildcardPolicy: -``` - - ## VSCode You can run Keep from your VSCode (after cloning the repo) by adding this configurations to your `launch.json`: diff --git a/docs/mint.json b/docs/mint.json index 0f21a508d..75e2bca54 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -28,11 +28,28 @@ "pages": [ "overview/introduction", "overview/keyconcepts", - "overview/alertseverityandstatus", "overview/usecases", "overview/ruleengine", "overview/examples", - "overview/alternatives" + "overview/comparison" + ] + }, + { + "group": "Development", + "pages": [ + "development/getting-started", + "development/authentication", + "development/external-url" + ] + }, + { + "group": "Deployment", + "pages": [ + "deployment/authentication", + "deployment/secret-manager", + "deployment/docker", + "deployment/kubernetes", + "deployment/openshift" ] }, { @@ -41,6 +58,7 @@ "platform/overview", "platform/providers", "platform/alerts", + "platform/alertseverityandstatus", "platform/workflows", "platform/workflowbuilder", "platform/settings" @@ -51,8 +69,9 @@ "pages": [ "providers/overview", "providers/fingerprints", + "providers/adding-a-new-provider", { - "group": "Supported providers", + "group": "Supported Providers", "pages": [ "providers/documentation/aks-provider", "providers/documentation/axiom-provider", @@ -172,13 +191,6 @@ "group": "Healthcheck", "pages": ["api-ref/healthcheck/healthcheck"] }, - { - "group": "Tenant", - "pages": [ - "api-ref/tenant/is-onboarded", - "api-ref/tenant/save-github-installation-id" - ] - }, { "group": "Alerts", "pages": ["api-ref/alerts/get-alerts", "api-ref/alerts/receive-event"] @@ -248,15 +260,6 @@ ] } ] - }, - { - "group": "Development", - "pages": [ - "development/getting-started", - "development/authentication", - "development/external-url", - "development/adding-a-new-provider" - ] } ], "footerSocials": { diff --git a/docs/overview/alternatives.mdx b/docs/overview/alternatives.mdx deleted file mode 100644 index 563cb79f5..000000000 --- a/docs/overview/alternatives.mdx +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: "Comparison to other tools" ---- - -Sometimes, it's easier to understand a tool's capabilities by comparing it to other tools in the same ecosystem. In this section, we'll discuss how Keep interacts with and compares to other tools in the ecosystem. - -## Keep vs Alerta -> alerta monitoring system is a tool used to consolidate and de-duplicate alerts from multiple sources for quick ‘at-a-glance’ visualisation. With just one system you can monitor alerts from many other monitoring tools on a single screen -[(alerta docs)](https://docs.alerta.io/). - - - -| | Keep | Alternative | -|----------|-----------------------|----------------------| -| Aggregation and correlation | ✅ | ✅ | -| Integrations | ✅ (both manually and automatically with Webhooks Integration) | ✅ (manually) | -| Alerts enrichment | ✅ | ❌ -| Open source | ✅ | ✅ | -| Workflow automation | ✅ | ❌ | -| Managed version | ✅ | ❌ | -| CI/CD integration | 🚧 | ❌ | -| Noise reduction | 🚧 | ❌ | - - -## Keep vs Grafana - -> Using Grafana Alerting, you create queries and expressions from multiple data sources — no matter where your data is stored — giving you the flexibility to combine your data and alert on your metrics and logs in new and unique ways. You can then create, manage, and take action on your alerts from a single, consolidated view, and improve your team’s ability to identify and resolve issues quickly. -[(grafana docs)](https://grafana.com/docs/grafana/latest/alerting/). - -| | Keep | Alternative | -|----------|-----------------------|----------------------| -| Aggregation and correlation of alerts | ✅ | ❌ | -| Integrations | ✅ (both manually and automatically with Webhooks Integration) | ✅ (manually) | -| Alerts enrichment | ✅ | ❌ -| Open source | ✅ | ✅ | -| Workflow automation | ✅ | ❌| -| Managed version | ✅ | ✅ | -| CI/CD integration | 🚧 | ⚠️ has terraform integration | -| Noise reduction | 🚧 | ❌ | - - -## Keep vs observability tools (Datadog, New Relic, etc) -Most existing observability tools, such as Datadog and New Relic, have implemented alerting mechanisms. However, they generally have two main limitations: -- They are data centric - which means your data needs to be in the tool in order to be able to alert on it. -- In many observability tools, alerting features are often not as robust or flexible as other functionalities. While setting up basic alerts is usually straightforward, these tools often fall short when it comes to build a mature alerting culture. - - -> If you are not suffering from the problems above or the [problems Keep solves](/overview/introduction#what-problem-does-keep-solve) and satisfied with your alerting, you probably don't need Keep - -Keep mitigates these concerns by being agnostic to where the data is, so you can alert on whatever you want, and treats alerts as first-class citizen which let you create a great alerting culture. - - - -## Keep vs incident management tools (PagerDuty, OpsGenie, etc) -Most incident management tools offer features like alert aggregation and workflow automation. However, their core value is the incident management itself, which Keep aims to prevent. Keep focuses only on the alert lifecycle. diff --git a/docs/overview/comparison.mdx b/docs/overview/comparison.mdx new file mode 100644 index 000000000..f12085a6f --- /dev/null +++ b/docs/overview/comparison.mdx @@ -0,0 +1,41 @@ +--- +title: "Comparison" +--- + +Sometimes, it's easier to understand a tool's capabilities by comparing it to other tools in the same ecosystem. In this section, we'll discuss how Keep interacts with and compares to other tools in the ecosystem. + +## Keep vs Alerta + +> alerta monitoring system is a tool used to consolidate and de-duplicate alerts from multiple sources for quick ‘at-a-glance’ visualisation. With just one system you can monitor alerts from many other monitoring tools on a single screen +> [(alerta docs)](https://docs.alerta.io/). + +| | Keep | Alternative | +| --------------------------- | -------------------------------------------------------------- | ------------- | +| Aggregation and correlation | ✅ | ✅ | +| Integrations | ✅ (Both manually and automatically with Webhooks Integration) | ✅ (Manually) | +| Alerts enrichment | ✅ | ❌ | +| Open source | ✅ | ✅ | +| Workflow automation | ✅ | ❌ | +| Managed version | ✅ | ❌ | +| CI/CD integration | ✅ | ❌ | +| Noise reduction | ✅ | ❌ | + +## Keep vs Grafana + +> Using Grafana Alerting, you create queries and expressions from multiple data sources — no matter where your data is stored — giving you the flexibility to combine your data and alert on your metrics and logs in new and unique ways. You can then create, manage, and take action on your alerts from a single, consolidated view, and improve your team’s ability to identify and resolve issues quickly. +> [(Grafana docs)](https://grafana.com/docs/grafana/latest/alerting/). + +| | Keep | Alternative | +| ------------------------------------- | -------------------------------------------------------------- | ---------------------------- | +| Aggregation and correlation of alerts | ✅ | ❌ | +| Integrations | ✅ (Both manually and automatically with Webhooks Integration) | ✅ (Manually) | +| Alerts enrichment | ✅ | ❌ | +| Open source | ✅ | ✅ | +| Workflow automation | ✅ | ❌ | +| Managed version | ✅ | ✅ | +| CI/CD integration | ✅ | ⚠️ has terraform integration | +| Noise reduction | ✅ | ❌ | + +## Keep vs incident management tools (PagerDuty, OpsGenie, etc) + +Most incident management tools offer features like alert aggregation and workflow automation. However, their core value is the incident management itself, which Keep aims to prevent. Keep focuses only on the alert lifecycle. diff --git a/docs/overview/introduction.mdx b/docs/overview/introduction.mdx index 96cea438f..951be111c 100644 --- a/docs/overview/introduction.mdx +++ b/docs/overview/introduction.mdx @@ -17,8 +17,8 @@ Alerts usually categorized into three different groups: ## What problem does Keep solve? Keep helps with every step of the alert lifecycle: 1. Creation - Keep offers a framework for creating, debugging, and testing alerts through code that scales with your teams. -2. Maintenance - Keep integrates with your existing tools, allowing you to manage all of your alerts within a single interface. -3. Noise reduction - By integrating with tools that trigger alerts, Keep can deduplicate and correlate alerts to reduce noise in your organization. +2. Maintenance - Keep integrates with your tools, allowing you to manage all of your alerts within a single interface. +3. Noise reduction - By integrating with monitoring tools, Keep can deduplicate and correlate alerts to reduce noise in your organization. 4. Automation - [Keep Workflows](/workflows) enable automated alert enrichment and response. ## How does Keep integrate with the alerts? diff --git a/docs/overview/alertseverityandstatus.mdx b/docs/platform/alertseverityandstatus.mdx similarity index 100% rename from docs/overview/alertseverityandstatus.mdx rename to docs/platform/alertseverityandstatus.mdx diff --git a/docs/providers/adding-a-new-provider.mdx b/docs/providers/adding-a-new-provider.mdx index 3c92a88c3..2fccc2dc2 100644 --- a/docs/providers/adding-a-new-provider.mdx +++ b/docs/providers/adding-a-new-provider.mdx @@ -1,10 +1,8 @@ --- -title: "➕ New Provider" +title: "Adding a new Provider" sidebarTitle: "Adding a New Provider" --- - -This is an outdated documentation and will soon be updated. - +Under contstruction ### Basics @@ -23,6 +21,30 @@ sidebarTitle: "Adding a New Provider" - Provider directory must start with the provider's unique identifier followed by underscore+provider (e.g. `slack_provider`) - Provider file name must start with the provider's unique identifier followed by underscore+provider+.py (e.g. `slack_provider.py`) +### ProviderScope +```python +@dataclass +class ProviderScope: + """ + Provider scope model. + + Args: + name (str): The name of the scope. + description (Optional[str]): The description of the scope. + mandatory (bool): Whether the scope is mandatory. + mandatory_for_webhook (bool): Whether the scope is mandatory for webhook auto installation. + documentation_url (Optional[str]): The documentation url of the scope. + alias (Optional[str]): Another alias of the scope. + """ + + name: str + description: Optional[str] = None + mandatory: bool = False + mandatory_for_webhook: bool = False + documentation_url: Optional[str] = None + alias: Optional[str] = None +``` + ### ProviderConfig ```python @@ -36,7 +58,8 @@ class ProviderConfig: authentication (dict): The configuration for the provider. """ - authentication: dict + authentication: Optional[dict] + name: Optional[str] = None description: Optional[str] = None def __post_init__(self): @@ -54,8 +77,27 @@ class ProviderConfig: ### BaseProvider ```python +""" +Base class for all providers. +""" class BaseProvider(metaclass=abc.ABCMeta): - def __init__(self, provider_id: str, config: ProviderConfig): + OAUTH2_URL = None + PROVIDER_SCOPES: list[ProviderScope] = [] + PROVIDER_METHODS: list[ProviderMethod] = [] + FINGERPRINT_FIELDS: list[str] = [] + PROVIDER_TAGS: list[ + Literal["alert", "ticketing", "messaging", "data", "queue"] + ] = [] + + def __init__( + self, + context_manager: ContextManager, + provider_id: str, + config: ProviderConfig, + webhooke_template: Optional[str] = None, + webhook_description: Optional[str] = None, + provider_description: Optional[str] = None, + ): """ Initialize a provider. @@ -63,24 +105,36 @@ class BaseProvider(metaclass=abc.ABCMeta): provider_id (str): The provider id. **kwargs: Provider configuration loaded from the provider yaml file. """ - # Initalize logger for every provider - self.logger = logging.getLogger(self.__class__.__name__) - self.id = provider_id + self.provider_id = provider_id + self.config = config + self.webhooke_template = webhooke_template + self.webhook_description = webhook_description + self.provider_description = provider_description + self.context_manager = context_manager + self.logger = context_manager.get_logger() self.validate_config() self.logger.debug( "Base provider initalized", extra={"provider": self.__class__.__name__} ) + self.provider_type = self._extract_type() + self.results = [] + # tb: we can have this overriden by customer configuration, when initializing the provider + self.fingerprint_fields = self.FINGERPRINT_FIELDS - @property - def provider_id(self) -> str: + def _extract_type(self): """ - Get the provider id. + Extract the provider type from the provider class name. Returns: - str: The provider id. + str: The provider type. """ - return self.id + name = self.__class__.__name__ + name_without_provider = name.replace("Provider", "") + name_with_spaces = ( + re.sub("([A-Z])", r" \1", name_without_provider).lower().strip() + ) + return name_with_spaces.replace(" ", ".") @abc.abstractmethod def dispose(self): @@ -96,16 +150,106 @@ class BaseProvider(metaclass=abc.ABCMeta): """ raise NotImplementedError("validate_config() method not implemented") + def validate_scopes(self) -> dict[str, bool | str]: + """ + Validate provider scopes. + + Returns: + dict: where key is the scope name and value is whether the scope is valid (True boolean) or string with error message. + """ + return {} + def notify(self, **kwargs): """ Output alert message. + Args: + **kwargs (dict): The provider context (with statement) + """ + # trigger the provider + results = self._notify(**kwargs) + self.results.append(results) + # if the alert should be enriched, enrich it + enrich_alert = kwargs.get("enrich_alert", []) + if not enrich_alert or not results: + return results if results else None + + self._enrich_alert(enrich_alert, results) + return results + + def _enrich_alert(self, enrichments, results): + """ + Enrich alert with provider specific data. + + """ + self.logger.debug("Extracting the fingerprint from the alert") + if "fingerprint" in results: + fingerprint = results["fingerprint"] + elif self.context_manager.foreach_context.get("value", {}): + # TODO: if it's zipped, we need to extract the fingerprint from the zip (i.e. multiple foreach) + fingerprint = self.context_manager.foreach_context.get("value", {}).get( + "fingerprint" + ) + # else, if we are in an event context, use the event fingerprint + elif self.context_manager.event_context: + # TODO: map all casses event_context is dict and update them to the DTO + # and remove this if statement + if isinstance(self.context_manager.event_context, dict): + fingerprint = self.context_manager.event_context.get("fingerprint") + # Alert DTO + else: + fingerprint = self.context_manager.event_context.fingerprint + else: + fingerprint = None + + if not fingerprint: + self.logger.error( + "No fingerprint found for alert enrichment", + extra={"provider": self.provider_id}, + ) + raise Exception("No fingerprint found for alert enrichment") + self.logger.debug("Fingerprint extracted", extra={"fingerprint": fingerprint}) + + _enrichments = {} + # enrich only the requested fields + for enrichment in enrichments: + try: + if enrichment["value"].startswith("results."): + val = enrichment["value"].replace("results.", "") + parts = val.split(".") + r = copy.copy(results) + for part in parts: + r = r[part] + _enrichments[enrichment["key"]] = r + else: + _enrichments[enrichment["key"]] = enrichment["value"] + except Exception: + self.logger.error( + f"Failed to enrich alert - enrichment: {enrichment}", + extra={"fingerprint": fingerprint, "provider": self.provider_id}, + ) + continue + self.logger.info("Enriching alert", extra={"fingerprint": fingerprint}) + try: + enrich_alert(self.context_manager.tenant_id, fingerprint, _enrichments) + except Exception as e: + self.logger.error( + "Failed to enrich alert in db", + extra={"fingerprint": fingerprint, "provider": self.provider_id}, + ) + raise e + self.logger.info("Alert enriched", extra={"fingerprint": fingerprint}) + + def _notify(self, **kwargs): + """ + Output alert message. + Args: **kwargs (dict): The provider context (with statement) """ raise NotImplementedError("notify() method not implemented") - def query(self, **kwargs: dict): + def _query(self, **kwargs: dict): """ Query the provider using the given query @@ -117,9 +261,60 @@ class BaseProvider(metaclass=abc.ABCMeta): """ raise NotImplementedError("query() method not implemented") - def get_alerts(self, alert_id: Optional[str] = None): + def query(self, **kwargs: dict): + # just run the query + results = self._query(**kwargs) + # now add the type of the results to the global context + if results and isinstance(results, list): + self.context_manager.dependencies.add(results[0].__class__) + elif results: + self.context_manager.dependencies.add(results.__class__) + + enrich_alert = kwargs.get("enrich_alert", []) + if enrich_alert: + self._enrich_alert(enrich_alert, results) + # and return the results + return results + + @staticmethod + def _format_alert(event: dict) -> AlertDto | list[AlertDto]: + raise NotImplementedError("format_alert() method not implemented") + + @classmethod + def format_alert(cls, event: dict) -> AlertDto | list[AlertDto]: + logger = logging.getLogger(__name__) + logger.debug("Formatting alert") + formatted_alert = cls._format_alert(event) + logger.debug("Alert formatted") + return formatted_alert + + @staticmethod + def get_alert_fingerprint(alert: AlertDto, fingerprint_fields: list = []) -> str: """ - Get alerts from the provider. + Get the fingerprint of an alert. + + Args: + event (AlertDto): The alert to get the fingerprint of. + fingerprint_fields (list, optional): The fields we calculate the fingerprint upon. Defaults to []. + + Returns: + str: hexdigest of the fingerprint or the event.name if no fingerprint_fields were given. + """ + if not fingerprint_fields: + return alert.name + fingerprint = hashlib.sha256() + event_dict = alert.dict() + for fingerprint_field in fingerprint_fields: + fingerprint_field_value = event_dict.get(fingerprint_field, None) + if isinstance(fingerprint_field_value, (list, dict)): + fingerprint_field_value = json.dumps(fingerprint_field_value) + if fingerprint_field_value: + fingerprint.update(str(fingerprint_field_value).encode()) + return fingerprint.hexdigest() + + def get_alerts_configuration(self, alert_id: Optional[str] = None): + """ + Get configuration of alerts from the provider. Args: alert_id (Optional[str], optional): If given, gets a specific alert by id. Defaults to None. @@ -137,6 +332,91 @@ class BaseProvider(metaclass=abc.ABCMeta): """ raise NotImplementedError("deploy_alert() method not implemented") + def _get_alerts(self) -> list[AlertDto]: + """ + Get alerts from the provider. + """ + raise NotImplementedError("get_alerts() method not implemented") + + def get_alerts(self) -> list[AlertDto]: + """ + Get alerts from the provider. + """ + with tracer.start_as_current_span(f"{self.__class__.__name__}-get_alerts"): + alerts = self._get_alerts() + # enrich alerts with provider id + for alert in alerts: + alert.providerId = self.provider_id + return alerts + + def get_alerts_by_fingerprint(self, tenant_id: str) -> dict[str, list[AlertDto]]: + """ + Get alerts from the provider grouped by fingerprint, sorted by lastReceived. + + Returns: + dict[str, list[AlertDto]]: A dict of alerts grouped by fingerprint, sorted by lastReceived. + """ + alerts = self.get_alerts() + + if not alerts: + return {} + + # get alerts, group by fingerprint and sort them by lastReceived + with tracer.start_as_current_span(f"{self.__class__.__name__}-get_last_alerts"): + get_attr = operator.attrgetter("fingerprint") + grouped_alerts = { + fingerprint: list(alerts) + for fingerprint, alerts in itertools.groupby( + sorted( + alerts, + key=get_attr, + ), + get_attr, + ) + } + + # enrich alerts + with tracer.start_as_current_span(f"{self.__class__.__name__}-enrich_alerts"): + pulled_alerts_enrichments = get_enrichments( + tenant_id=tenant_id, + fingerprints=grouped_alerts.keys(), + ) + for alert_enrichment in pulled_alerts_enrichments: + if alert_enrichment: + alerts_to_enrich = grouped_alerts.get( + alert_enrichment.alert_fingerprint + ) + for alert_to_enrich in alerts_to_enrich: + parse_and_enrich_deleted_and_assignees( + alert_to_enrich, alert_enrichment.enrichments + ) + for enrichment in alert_enrichment.enrichments: + # set the enrichment + setattr( + alert_to_enrich, + enrichment, + alert_enrichment.enrichments[enrichment], + ) + + return grouped_alerts + + def setup_webhook( + self, tenant_id: str, keep_api_url: str, api_key: str, setup_alerts: bool = True + ): + """ + Setup a webhook for the provider. + + Args: + tenant_id (str): _description_ + keep_api_url (str): _description_ + api_key (str): _description_ + setup_alerts (bool, optional): _description_. Defaults to True. + + Raises: + NotImplementedError: _description_ + """ + raise NotImplementedError("setup_webhook() method not implemented") + @staticmethod def get_alert_schema() -> dict: """ @@ -150,6 +430,37 @@ class BaseProvider(metaclass=abc.ABCMeta): "get_alert_format_description() method not implemented" ) + @staticmethod + def oauth2_logic(**payload) -> dict: + """ + Logic for oauth2 authentication. + + For example, in Slack oauth2, we need to get the code from the payload and exchange it for a token. + + return: dict: The secrets to be saved as the provider configuration. (e.g. the Slack access token) + """ + raise NotImplementedError("oauth2_logic() method not implemented") + + @staticmethod + def parse_event_raw_body(raw_body: bytes) -> bytes: + """ + Parse the raw body of an event and create an ingestable dict from it. + + For instance, in parseable, the "event" is just a string + > b'Alert: Server side error triggered on teststream1\nMessage: server reporting status as 500\nFailing Condition: status column equal to abcd, 2 times' + and we want to return an object + > b"{'alert': 'Server side error triggered on teststream1', 'message': 'server reporting status as 500', 'failing_condition': 'status column equal to abcd, 2 times'}" + + If this method is not implemented for a provider, just return the raw body. + + Args: + raw_body (bytes): The raw body of the incoming event (/event endpoint in alerts.py) + + Returns: + dict: Ingestable event + """ + return raw_body + def get_logs(self, limit: int = 5) -> list: """ Get logs from the provider. @@ -169,4 +480,96 @@ class BaseProvider(metaclass=abc.ABCMeta): """ # TODO - implement dynamically using decorators and return {} + + def start_consume(self): + """Get the consumer for the provider. + + should be implemented by the provider if it has a consumer. + + for an example, see Kafka Provider + + Returns: + Consumer: The consumer for the provider. + """ + return + + def status(self) -> bool: + """Return the status of the provider. + + Returns: + bool: The status of the provider. + """ + return { + "status": "should be implemented by the provider if it has a consumer", + "error": "", + } + + @property + def is_consumer(self) -> bool: + """Return consumer if the inherited class has a start_consume method. + + Returns: + bool: _description_ + """ + return self.start_consume.__qualname__ != "BaseProvider.start_consume" + + def _push_alert(self, alert: dict): + """ + Push an alert to the provider. + + Args: + alert (dict): The alert to push. + """ + # if this is not a dict, try to convert it to a dict + if not isinstance(alert, dict): + try: + alert_data = json.loads(alert) + except Exception: + alert_data = alert_data + else: + alert_data = alert + + # if this is still not a dict, we can't push it + if not isinstance(alert_data, dict): + self.logger.warning( + "We currently support only alert represented as a dict, dismissing alert", + extra={"alert": alert}, + ) + return + # now try to build the alert model + # we will have a lot of default values here to support all providers and all cases, the + # way to fine tune those would be to use the provider specific model or enforce that the event from the queue will be casted into the fields + alert_model = AlertDto( + id=alert_data.get("id", str(uuid.uuid4())), + name=alert_data.get("name", "alert-from-event-queue"), + status=alert_data.get("status", AlertStatus.FIRING), + lastReceived=alert_data.get("lastReceived", datetime.datetime.now()), + environment=alert_data.get("environment", "alert-from-event-queue"), + isDuplicate=alert_data.get("isDuplicate", False), + duplicateReason=alert_data.get("duplicateReason", None), + service=alert_data.get("service", "alert-from-event-queue"), + source=alert_data.get("source", [self.provider_type]), + message=alert_data.get("message", "alert-from-event-queue"), + description=alert_data.get("description", "alert-from-event-queue"), + severity=alert_data.get("severity", AlertSeverity.INFO), + pushed=alert_data.get("pushed", False), + event_id=alert_data.get("event_id", str(uuid.uuid4())), + url=alert_data.get("url", None), + fingerprint=alert_data.get("fingerprint", None), + ) + # push the alert to the provider + url = f'{os.environ["KEEP_API_URL"]}/alerts/event' + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + "X-API-KEY": self.context_manager.api_key, + } + response = requests.post(url, json=alert_model.dict(), headers=headers) + try: + response.raise_for_status() + self.logger.info("Alert pushed successfully") + except Exception: + self.logger.error( + f"Failed to push alert to {self.provider_id}: {response.content}" + ) ``` diff --git a/examples/workflows/signl4-alerting-workflow.yaml b/examples/workflows/signl4-alerting-workflow.yaml new file mode 100644 index 000000000..7923b3129 --- /dev/null +++ b/examples/workflows/signl4-alerting-workflow.yaml @@ -0,0 +1,18 @@ +id: signl4-alerting-workflow +description: handle alerts +triggers: +- filters: + - key: source + value: r".*" + type: alert +owners: [] +services: [] +steps: [] +actions: +- name: signl4-action + provider: + config: '{{ providers.SIGNL4 Alerting }}' + type: signl4 + with: + message: Test. + title: Keep Alert diff --git a/keep-ui/app/alerts/alert-actions.tsx b/keep-ui/app/alerts/alert-actions.tsx index d6ca4e316..ec26b7ca4 100644 --- a/keep-ui/app/alerts/alert-actions.tsx +++ b/keep-ui/app/alerts/alert-actions.tsx @@ -4,6 +4,10 @@ import { getSession } from "next-auth/react"; import { getApiURL } from "utils/apiUrl"; import { AlertDto } from "./models"; import { useAlerts } from "utils/hooks/useAlerts"; +import { PlusIcon } from "@radix-ui/react-icons"; +import { toast } from "react-toastify"; +import { usePresets } from "utils/hooks/usePresets"; +import { usePathname, useRouter } from "next/navigation"; interface Props { selectedRowIds: string[]; @@ -11,8 +15,18 @@ interface Props { } export default function AlertActions({ selectedRowIds, alerts }: Props) { + const pathname = usePathname(); + const router = useRouter(); const { useAllAlerts } = useAlerts(); - const { mutate } = useAllAlerts(); + const { mutate } = useAllAlerts({ revalidateOnFocus: false }); + const { useAllPresets } = usePresets(); + const { mutate: presetsMutator } = useAllPresets({ + revalidateOnFocus: false, + }); + + const selectedAlerts = alerts.filter((_alert, index) => + selectedRowIds.includes(index.toString()) + ); const onDelete = async () => { const confirmed = confirm( @@ -23,10 +37,6 @@ export default function AlertActions({ selectedRowIds, alerts }: Props) { const session = await getSession(); const apiUrl = getApiURL(); - const selectedAlerts = alerts.filter((_alert, index) => - selectedRowIds.includes(index.toString()) - ); - for await (const alert of selectedAlerts) { const { fingerprint } = alert; @@ -51,6 +61,36 @@ export default function AlertActions({ selectedRowIds, alerts }: Props) { } }; + async function addOrUpdatePreset() { + const presetName = prompt("Enter new preset name"); + if (presetName) { + const distinctAlertNames = Array.from( + new Set(selectedAlerts.map((alert) => alert.name)) + ); + const options = distinctAlertNames.map((name) => { + return { value: `name=${name}`, label: `name=${name}` }; + }); + const session = await getSession(); + const apiUrl = getApiURL(); + const response = await fetch(`${apiUrl}/preset`, { + method: "POST", + headers: { + Authorization: `Bearer ${session?.accessToken}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ name: presetName, options: options }), + }); + if (response.ok) { + toast(`Preset ${presetName} created!`, { + position: "top-left", + type: "success", + }); + presetsMutator(); + router.replace(`${pathname}?selectedPreset=${presetName}`); + } + } + } + return (
+
); } diff --git a/keep-ui/app/alerts/alert-assign-ticket-modal.tsx b/keep-ui/app/alerts/alert-assign-ticket-modal.tsx new file mode 100644 index 000000000..be2107132 --- /dev/null +++ b/keep-ui/app/alerts/alert-assign-ticket-modal.tsx @@ -0,0 +1,199 @@ +import React from 'react'; +import Select, { components } from 'react-select'; +import { Dialog } from '@headlessui/react'; +import { Button, TextInput } from '@tremor/react'; +import { PlusIcon } from '@heroicons/react/20/solid' +import { useForm, Controller, SubmitHandler } from 'react-hook-form'; +import { Providers } from "./../providers/providers"; +import { useSession } from "next-auth/react"; +import { getApiURL } from 'utils/apiUrl'; + +interface AlertAssignTicketModalProps { + isOpen: boolean; + onClose: () => void; + ticketingProviders: Providers; // Replace 'ProviderType' with the actual type of ticketingProviders + alertFingerprint: string; // Replace 'string' with the actual type of alertFingerprint +} + +interface OptionType { + value: string; + label: string; + id: string; + type: string; + icon?: string; + isAddProvider?: boolean; +} + +interface FormData { + provider: { + id: string; + value: string; + type: string; + }; + ticket_url: string; +} + +const AlertAssignTicketModal = ({ isOpen, onClose, ticketingProviders, alertFingerprint }: AlertAssignTicketModalProps) => { + const { handleSubmit, control, formState: { errors } } = useForm(); + // get the token + const { data: session } = useSession(); + + const onSubmit: SubmitHandler = async (data) => { + try { + // build the formData + const requestData = { + enrichments: { + ticket_type: data.provider.type, + ticket_url: data.ticket_url, + ticket_provider_id: data.provider.value, + }, + fingerprint: alertFingerprint, + }; + + + const response = await fetch(`${getApiURL()}/alerts/enrich`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${session?.accessToken}`, + }, + body: JSON.stringify(requestData), + }); + + if (response.ok) { + // Handle success + console.log("Ticket assigned successfully"); + onClose(); + } else { + // Handle error + console.error("Failed to assign ticket"); + } + } catch (error) { + // Handle unexpected error + console.error("An unexpected error occurred"); + } + }; + + const providerOptions: OptionType[] = ticketingProviders.map((provider) => ({ + id: provider.id, + value: provider.id, + label: provider.details.name || '', + type: provider.type, + })); + + const customOptions: OptionType[] = [ + ...providerOptions, + { + value: 'add_provider', + label: 'Add another ticketing provider', + icon: 'plus', + isAddProvider: true, + id: 'add_provider', + type: '', + }, + ]; + + const handleOnChange = (option: any) => { + if (option.value === 'add_provider') { + window.open('/providers?labels=ticketing', '_blank'); + } + }; + + + const Option = (props: any) => { + // Check if the option is 'add_provider' + const isAddProvider = props.data.isAddProvider; + + return ( + +
+ {isAddProvider ? ( + + ) : ( + props.data.type && + )} + {props.data.label} +
+
+ ); + }; + + const SingleValue = (props: any) => { + const { children, data } = props; + + return ( + +
+ {data.isAddProvider ? ( + + ) : ( + data.type && + )} + {children} +
+
+ ); + }; + + return ( + +
+ +
+ Assign Ticket + {ticketingProviders.length > 0 ? ( +
+
+ + ( +