Skip to content

Commit

Permalink
Merge branch 'master' into feature/cus-3546-get-connection-object-pag…
Browse files Browse the repository at this point in the history
…ination-metrics
  • Loading branch information
sgomezvillamor authored Jan 13, 2025
2 parents 5faf0ab + 457f96e commit 507a4f5
Show file tree
Hide file tree
Showing 22 changed files with 905 additions and 712 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,12 @@ jobs:
run: |
echo "BACKEND_FILES=`find ./build/coverage-reports/ -type f | grep -E '(metadata-models|entity-registry|datahuyb-graphql-core|metadata-io|metadata-jobs|metadata-utils|metadata-service|medata-dao-impl|metadata-operation|li-utils|metadata-integration|metadata-events|metadata-auth|ingestion-scheduler|notifications|datahub-upgrade)' | xargs | sed 's/ /,/g'`" >> $GITHUB_ENV
echo "FRONTEND_FILES=`find ./build/coverage-reports/ -type f | grep -E '(datahub-frontend|datahub-web-react).*\.(xml|json)$' | xargs | sed 's/ /,/g'`" >> $GITHUB_ENV
- name: Generate tz artifact name
run: echo "NAME_TZ=$(echo ${{ matrix.timezone }} | tr '/' '-')" >> $GITHUB_ENV
- uses: actions/upload-artifact@v4
if: always()
with:
name: Test Results (build) - ${{ matrix.command}}-${{ matrix.timezone }}
name: Test Results (build) - ${{ matrix.command}}-${{ env.NAME_TZ }}
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
Expand Down
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) {

resolutionStrategy.force externalDependency.antlr4Runtime
resolutionStrategy.force externalDependency.antlr4
resolutionStrategy.force 'org.apache.mina:mina-core:2.2.4'
}
}

Expand Down
4 changes: 0 additions & 4 deletions docker/datahub-upgrade/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,12 @@ ARG MAVEN_CENTRAL_REPO_URL
RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi

ENV JMX_VERSION=0.18.0
ENV JETTY_VERSION=11.0.21

# Upgrade Alpine and base packages
# PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762
RUN apk --no-cache --update-cache --available upgrade \
&& apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat snappy \
&& apk --no-cache add openjdk17-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \
&& curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-runner/${JETTY_VERSION}/jetty-runner-${JETTY_VERSION}.jar --output jetty-runner.jar \
&& curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-jmx/${JETTY_VERSION}/jetty-jmx-${JETTY_VERSION}.jar --output jetty-jmx.jar \
&& curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-util/${JETTY_VERSION}/jetty-util-${JETTY_VERSION}.jar --output jetty-util.jar \
&& wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \
&& wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \
&& cp /usr/lib/jvm/java-17-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks
Expand Down
24 changes: 20 additions & 4 deletions docs-website/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ The purpose of this section is to provide developers & technical users with conc

This section aims to provide plain-language feature overviews for both technical and non-technical readers alike.


## Docs Generation Features

**Includes all markdown files**
Expand All @@ -145,16 +144,33 @@ You can suppress this check by adding the path to the file in a comment in `side

Use an "inline" directive to include code snippets from other files. The `show_path_as_comment` option will include the path to the file as a comment at the top of the snippet.

```python
{{ inline /metadata-ingestion/examples/library/data_quality_mcpw_rest.py show_path_as_comment }}
```
```python
{{ inline /metadata-ingestion/examples/library/data_quality_mcpw_rest.py show_path_as_comment }}
```

**Command Output**

Use the `{{ command-output cmd }}` directive to run subprocesses and inject the outputs into the final markdown.

{{ command-output python -c 'print("Hello world")' }}

This also works for multi-line scripts.

{{ command-output
source metadata-ingestion/venv/bin/activate
python -m <something>
}}

Regardless of the location of the markdown file, the subcommands will be executed with working directory set to the repo root.

Only the stdout of the subprocess will be outputted. The stderr, if any, will be included as a comment in the markdown.

## Docs site generation process

This process is orchestrated by a combination of Gradle and Yarn tasks. The main entrypoint is via the `docs-website:yarnGenerate` task, which in turn eventually runs `yarn run generate`.

Steps:

1. Generate the GraphQL combined schema using the gradle's `docs-website:generateGraphQLSchema` task. This generates `./graphql/combined.graphql`.
2. Generate docs for ingestion sources using the `:metadata-ingestion:docGen` gradle task.
3. Generate docs for our metadata model using the `:metadata-ingestion:modelDocGen` gradle task.
Expand Down
37 changes: 37 additions & 0 deletions docs-website/generateDocsDir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,42 @@ function markdown_process_inline_directives(
contents.content = new_content;
}

function markdown_process_command_output(
contents: matter.GrayMatterFile<string>,
filepath: string
): void {
const new_content = contents.content.replace(
/^{{\s*command-output\s*([\s\S]*?)\s*}}$/gm,
(_, command: string) => {
try {
// Change to repo root directory before executing command
const repoRoot = path.resolve(__dirname, "..");

console.log(`Executing command: ${command}`);

// Execute the command and capture output
const output = execSync(command, {
cwd: repoRoot,
encoding: "utf8",
stdio: ["pipe", "pipe", "pipe"],
});

// Return the command output
return output.trim();
} catch (error: any) {
// If there's an error, include it as a comment
const errorMessage = error.stderr
? error.stderr.toString()
: error.message;
return `${
error.stdout ? error.stdout.toString().trim() : ""
}\n<!-- Error: ${errorMessage.trim()} -->`;
}
}
);
contents.content = new_content;
}

function markdown_sanitize_and_linkify(content: string): string {
// MDX escaping
content = content.replace(/</g, "&lt;");
Expand Down Expand Up @@ -602,6 +638,7 @@ function copy_python_wheels(): void {
markdown_rewrite_urls(contents, filepath);
markdown_enable_specials(contents, filepath);
markdown_process_inline_directives(contents, filepath);
markdown_process_command_output(contents, filepath);
//copy_platform_logos();
// console.log(contents);

Expand Down
36 changes: 36 additions & 0 deletions docs/managed-datahub/subscription-and-notification.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,30 @@ Email will work out of box. For installing the DataHub Slack App, see:
This feature is especially useful in helping you stay on top of any upstream changes that could impact the assets you or your stakeholders rely on. It eliminates the need for you and your team to manually check for upstream changes, or for upstream stakeholders to identify and notify impacted users.
As a user, you can subscribe to and receive notifications about changes such as deprecations, schema changes, changes in ownership, assertions, or incidents. You’ll always been in the know about potential data quality issues so you can proactively manage your data resources.


## Platform Admin Notifications

Datahub provides three levels of notifications:

- **Platform-level**
- **Group-level** (described in other sections)
- **User-level** (described in other sections)

**Setting Platform-Level Notifications:**
This requires appropriate permissions. Go to `Settings` > `Notifications` (under the `Platform` section, not `My Notifications`).

**Platform-level Notifications:**
Platform-level notifications are applied to all assets within Datahub.
Example: If "An owner is added or removed from a data asset" is ticked, the designated Slack channel or email will receive notifications for any such changes across all assets.

**Our Recommendations:**

Notifying on tag changes for every asset in the platform would be noisy, and so we recommend to use these platform-level notifications only where appropriate. For example, we recommend notifications for ingestion failures routed to a central Slack channel or email. This will help you proactively ensure your Datahub metadata stays fresh.

## Prerequisites

Once you have [configured Slack within your DataHub instance](slack/saas-slack-setup.md), you will be able to subscribe to any Entity in DataHub and begin recieving notifications via DM.

To begin receiving personal notifications, go to Settings > "My Notifications". From here, toggle on Slack Notifications and input your Slack Member ID.

If you want to create and manage group-level Subscriptions for your team, you will need [the following privileges](../../docs/authorization/roles.md#role-privileges):
Expand Down Expand Up @@ -162,6 +183,21 @@ You can unsubscribe from any asset to stop receiving notifications about it. On
What if I want to be notified about different changes?
</summary>
To modify your subscription, use the dropdown menu next to the Subscribe button to modify the changes you want to be notified about.
</details>
<details>
<summary>
I want to configure multiple channels. How many Slack channels or emails can I configure to get notified?
</summary>
At the platform-level, you can configure one email and one Slack channel.

At the user and group -levels, you can configure one default email and Slack channel as well as overwrite that email/channel when you
go to a specific asset to subscribe to.

To configure multiple channels, as a prereq, ensure you have the appropriate privileges. And then:
1. Create a datahub group for each channel you want notifications for.
2. Add yourself as a member to each of the groups.
3. Now, when you visit an asset and go to subscribe, you'll see the option "Manage Group Subscriptions".

</details>

## Reference
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
### Configuration Notes

See the

1. [Microsoft Grant user access to a Report Server doc](https://docs.microsoft.com/en-us/sql/reporting-services/security/grant-user-access-to-a-report-server?view=sql-server-ver16)
2. Use your user credentials from previous step in yaml file

### Concept mapping

| Power BI Report Server | Datahub |
| ---------------------- | ----------- |
| `Paginated Report` | `Dashboard` |
| `Power BI Report` | `Dashboard` |
| `Mobile Report` | `Dashboard` |
| `Linked Report` | `Dashboard` |
| `Dataset, Datasource` | `N/A` |

This file was deleted.

38 changes: 25 additions & 13 deletions metadata-ingestion/scripts/avro_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def write_urn_classes(key_aspects: List[dict], urn_dir: Path) -> None:
code = """
# This file contains classes corresponding to entity URNs.
from typing import ClassVar, List, Optional, Type, TYPE_CHECKING
from typing import ClassVar, List, Optional, Type, TYPE_CHECKING, Union
import functools
from deprecated.sphinx import deprecated as _sphinx_deprecated
Expand Down Expand Up @@ -547,10 +547,31 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str:
assert fields[0]["type"] == ["null", "string"]
fields[0]["type"] = "string"

field_urn_type_classes = {}
for field in fields:
# Figure out if urn types are valid for each field.
field_urn_type_class = None
if field_name(field) == "platform":
field_urn_type_class = "DataPlatformUrn"
elif field.get("Urn"):
if len(field.get("entityTypes", [])) == 1:
field_entity_type = field["entityTypes"][0]
field_urn_type_class = f"{capitalize_entity_name(field_entity_type)}Urn"
else:
field_urn_type_class = "Urn"

field_urn_type_classes[field_name(field)] = field_urn_type_class

_init_arg_parts: List[str] = []
for field in fields:
field_urn_type_class = field_urn_type_classes[field_name(field)]

default = '"PROD"' if field_name(field) == "env" else None
_arg_part = f"{field_name(field)}: {field_type(field)}"

type_hint = field_type(field)
if field_urn_type_class:
type_hint = f'Union["{field_urn_type_class}", str]'
_arg_part = f"{field_name(field)}: {type_hint}"
if default:
_arg_part += f" = {default}"
_init_arg_parts.append(_arg_part)
Expand Down Expand Up @@ -579,16 +600,7 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str:
init_validation += f'if not {field_name(field)}:\n raise InvalidUrnError("{class_name} {field_name(field)} cannot be empty")\n'

# Generalized mechanism for validating embedded urns.
field_urn_type_class = None
if field_name(field) == "platform":
field_urn_type_class = "DataPlatformUrn"
elif field.get("Urn"):
if len(field.get("entityTypes", [])) == 1:
field_entity_type = field["entityTypes"][0]
field_urn_type_class = f"{capitalize_entity_name(field_entity_type)}Urn"
else:
field_urn_type_class = "Urn"

field_urn_type_class = field_urn_type_classes[field_name(field)]
if field_urn_type_class:
init_validation += f"{field_name(field)} = str({field_name(field)})\n"
init_validation += (
Expand All @@ -608,7 +620,7 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str:
init_coercion += " platform_name = DataPlatformUrn.from_string(platform_name).platform_name\n"

if field_name(field) == "platform":
init_coercion += "platform = DataPlatformUrn(platform).urn()\n"
init_coercion += "platform = platform.urn() if isinstance(platform, DataPlatformUrn) else DataPlatformUrn(platform).urn()\n"
elif field_urn_type_class is None:
# For all non-urns, run the value through the UrnEncoder.
init_coercion += (
Expand Down
Loading

0 comments on commit 507a4f5

Please sign in to comment.