Skip to content

Commit

Permalink
Merge pull request #306 from acryldata/oss-merge
Browse files Browse the repository at this point in the history
merge upstream 2
  • Loading branch information
hsheth2 authored Jan 17, 2025
2 parents 9d71e50 + 9d04e2a commit 9d34988
Show file tree
Hide file tree
Showing 44 changed files with 766 additions and 227 deletions.
10 changes: 8 additions & 2 deletions .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
branches:
- "**"
paths:
- ".github/workflows/documentation.yml"
- "metadata-ingestion/**"
- "metadata-models/**"
- "docs/**"
Expand All @@ -13,6 +14,7 @@ on:
branches:
- master
paths:
- ".github/workflows/documentation.yml"
- "metadata-ingestion/**"
- "metadata-models/**"
- "docs/**"
Expand Down Expand Up @@ -56,9 +58,13 @@ jobs:
./gradlew --info docs-website:build
- name: Deploy
if: github.event_name == 'push'
uses: peaceiris/actions-gh-pages@v3
if: github.event_name == 'push' && github.repository == 'acryldata/datahub'
uses: peaceiris/actions-gh-pages@v4
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs-website/build
cname: datahubproject.io
# The gh-pages branch stores the built docs site. We don't need to preserve
# the full history of the .html files, since they're generated from our
# source files. Doing so significantly reduces the size of the repo's .git dir.
force_orphan: true
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ metadata-service/plugin/src/test/resources/sample-plugins/**
smoke-test/rollback-reports
coverage*.xml
.vercel
.envrc

# A long series of binary directories we should ignore
datahub-frontend/bin/main/
Expand Down
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ project.ext.externalDependency = [
'junitJupiterEngine': "org.junit.jupiter:junit-jupiter-engine:$junitJupiterVersion",
// avro-serde includes dependencies for `kafka-avro-serializer` `kafka-schema-registry-client` and `avro`
'kafkaAvroSerde': "io.confluent:kafka-streams-avro-serde:$kafkaVersion",
'kafkaAvroSerializer': 'io.confluent:kafka-avro-serializer:5.1.4',
'kafkaAvroSerializer': "io.confluent:kafka-avro-serializer:$kafkaVersion",
'kafkaClients': "org.apache.kafka:kafka-clients:$kafkaVersion-ccs",
'snappy': 'org.xerial.snappy:snappy-java:1.1.10.5',
'logbackClassic': "ch.qos.logback:logback-classic:$logbackClassic",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,10 @@ public static <T> T restrictEntity(@Nonnull Object entity, Class<T> clazz) {
try {
Object[] args =
allFields.stream()
// New versions of graphql.codegen generate serialVersionUID
// We need to filter serialVersionUID out because serialVersionUID is
// never part of the entity type constructor
.filter(field -> !field.getName().contains("serialVersionUID"))
.map(
field -> {
// properties are often not required but only because
Expand Down
7 changes: 7 additions & 0 deletions datahub-web-react/src/app/context/CustomUserContext.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/**
* Custom User Context State - This is a custom user context state and can be overriden in specific fork of DataHub.
* The below type can be customized with specific object properties as well if needed.
*/
export type CustomUserContextState = Record<string, any>;

export const DEFAULT_CUSTOM_STATE: CustomUserContextState = {};
3 changes: 3 additions & 0 deletions datahub-web-react/src/app/context/userContext.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import React from 'react';
import { CorpUser, PlatformPrivileges } from '../../types.generated';
import { CustomUserContextState, DEFAULT_CUSTOM_STATE } from './CustomUserContext';

/**
* Local State is persisted to local storage.
Expand All @@ -22,6 +23,7 @@ export type State = {
loadedPersonalDefaultViewUrn: boolean;
hasSetDefaultView: boolean;
};
customState?: CustomUserContextState;
};

/**
Expand Down Expand Up @@ -51,6 +53,7 @@ export const DEFAULT_STATE: State = {
loadedPersonalDefaultViewUrn: false,
hasSetDefaultView: false,
},
customState: DEFAULT_CUSTOM_STATE,
};

export const DEFAULT_CONTEXT = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ function EntityName(props: Props) {
setIsEditing(false);
return;
}
setUpdatedName(name);
updateName({ variables: { input: { name, urn } } })
.then(() => {
setUpdatedName(name);
setIsEditing(false);
message.success({ content: 'Name Updated', duration: 2 });
refetch();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,26 @@ export const EditOwnersModal = ({
const renderSearchResult = (entity: Entity) => {
const avatarUrl =
(entity.type === EntityType.CorpUser && (entity as CorpUser).editableProperties?.pictureLink) || undefined;
const corpUserDepartmentName =
(entity.type === EntityType.CorpUser && (entity as CorpUser).properties?.departmentName) || '';
const corpUserId = (entity.type === EntityType.CorpUser && (entity as CorpUser).username) || '';
const corpUserTitle = (entity.type === EntityType.CorpUser && (entity as CorpUser).properties?.title) || '';
const displayName = entityRegistry.getDisplayName(entity.type, entity);

return (
<Select.Option value={entity.urn} key={entity.urn}>
<OwnerLabel name={displayName} avatarUrl={avatarUrl} type={entity.type} />
<Select.Option
key={entity.urn}
value={entity.urn}
label={<OwnerLabel name={displayName} avatarUrl={avatarUrl} type={entity.type} />}
>
<OwnerLabel
name={displayName}
avatarUrl={avatarUrl}
type={entity.type}
corpUserId={corpUserId}
corpUserTitle={corpUserTitle}
corpUserDepartmentName={corpUserDepartmentName}
/>
</Select.Option>
);
};
Expand Down Expand Up @@ -381,6 +397,7 @@ export const EditOwnersModal = ({
value: owner.value.ownerUrn,
label: owner.label,
}))}
optionLabelProp="label"
>
{ownerSearchOptions}
</SelectInput>
Expand Down
12 changes: 10 additions & 2 deletions datahub-web-react/src/app/shared/OwnerLabel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,22 @@ type Props = {
name: string;
avatarUrl: string | undefined;
type: EntityType;
corpUserId?: string;
corpUserTitle?: string;
corpUserDepartmentName?: string;
};

export const OwnerLabel = ({ name, avatarUrl, type }: Props) => {
export const OwnerLabel = ({ name, avatarUrl, type, corpUserId, corpUserTitle, corpUserDepartmentName }: Props) => {
const subHeader = [corpUserId, corpUserTitle, corpUserDepartmentName].filter(Boolean).join(' - ');

return (
<OwnerContainerWrapper>
<OwnerContentWrapper>
<CustomAvatar size={24} name={name} photoUrl={avatarUrl} isGroup={type === EntityType.CorpGroup} />
<div>{name}</div>
<div>
<div>{name}</div>
{subHeader && <div style={{ color: 'gray' }}>{subHeader}</div>}
</div>
</OwnerContentWrapper>
</OwnerContainerWrapper>
);
Expand Down
4 changes: 4 additions & 0 deletions datahub-web-react/src/graphql/search.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,8 @@ fragment searchResultsWithoutSchemaField on Entity {
lastName
fullName
email
departmentName
title
}
info {
active
Expand All @@ -442,6 +444,8 @@ fragment searchResultsWithoutSchemaField on Entity {
lastName
fullName
email
departmentName
title
}
editableProperties {
displayName
Expand Down
2 changes: 1 addition & 1 deletion docs-website/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ module.exports = {
announcementBar: {
id: "announcement-3",
content:
'<div style="display: flex; justify-content: center; align-items: center;width: 100%;"><!--img src="/img/acryl-logo-white-mark.svg" / --><!--div style="font-size: .8rem; font-weight: 600; background-color: white; color: #111; padding: 0px 8px; border-radius: 4px; margin-right:12px;">NEW</div--><p>Watch Metadata & AI Summit sessions on-demand.</p><a href="https://www.youtube.com/@DataHubProject/videos" target="_blank" class="button">Watch Now<span> →</span></a></div>',
'<div style="display: flex; justify-content: center; align-items: center;width: 100%;"><!--img src="/img/acryl-logo-white-mark.svg" / --><!--div style="font-size: .8rem; font-weight: 600; background-color: white; color: #111; padding: 0px 8px; border-radius: 4px; margin-right:12px;">NEW</div--><p>Learn about DataHub 1.0 launching at our 5th birthday party!</p><a href="https://lu.ma/0j5jcocn" target="_blank" class="button">Register<span> →</span></a></div>',
backgroundColor: "#111",
textColor: "#ffffff",
isCloseable: false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const solutionsDropdownContent = {
title: "DataHub Core",
description: "Get started with the Open Source platform.",
iconImage: "/img/solutions/icon-dropdown-core.png",
href: "/",
href: "/docs/quickstart",
},
{
title: "Cloud vs Core",
Expand Down
8 changes: 4 additions & 4 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,17 +115,17 @@ datahub ingest -c ./examples/recipes/example_to_datahub_rest.dhub.yaml --dry-run
datahub ingest -c ./examples/recipes/example_to_datahub_rest.dhub.yaml -n
```

#### ingest --list-source-runs
#### ingest list-source-runs

The `--list-source-runs` option of the `ingest` command lists the previous runs, displaying their run ID, source name,
The `list-source-runs` option of the `ingest` command lists the previous runs, displaying their run ID, source name,
start time, status, and source URN. This command allows you to filter results using the --urn option for URN-based
filtering or the --source option to filter by source name (partial or complete matches are supported).

```shell
# List all ingestion runs
datahub ingest --list-source-runs
datahub ingest list-source-runs
# Filter runs by a source name containing "demo"
datahub ingest --list-source-runs --source "demo"
datahub ingest list-source-runs --source "demo"
```

#### ingest --preview
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package com.linkedin.common.urn;

import com.linkedin.data.template.Custom;
import com.linkedin.data.template.DirectCoercer;
import com.linkedin.data.template.TemplateOutputCastException;
import java.net.URISyntaxException;

public final class DataPlatformInstanceUrn extends Urn {

public static final String ENTITY_TYPE = "dataPlatformInstance";

private final DataPlatformUrn _platform;
private final String _instanceId;

public DataPlatformInstanceUrn(DataPlatformUrn platform, String instanceId) {
super(ENTITY_TYPE, TupleKey.create(platform, instanceId));
this._platform = platform;
this._instanceId = instanceId;
}

public DataPlatformUrn getPlatformEntity() {
return _platform;
}

public String getInstance() {
return _instanceId;
}

public static DataPlatformInstanceUrn createFromString(String rawUrn) throws URISyntaxException {
return createFromUrn(Urn.createFromString(rawUrn));
}

public static DataPlatformInstanceUrn createFromUrn(Urn urn) throws URISyntaxException {
if (!"li".equals(urn.getNamespace())) {
throw new URISyntaxException(urn.toString(), "Urn namespace type should be 'li'.");
} else if (!ENTITY_TYPE.equals(urn.getEntityType())) {
throw new URISyntaxException(
urn.toString(), "Urn entity type should be 'dataPlatformInstance'.");
} else {
TupleKey key = urn.getEntityKey();
if (key.size() != 2) {
throw new URISyntaxException(urn.toString(), "Invalid number of keys.");
} else {
try {
return new DataPlatformInstanceUrn(
(DataPlatformUrn) key.getAs(0, DataPlatformUrn.class),
(String) key.getAs(1, String.class));
} catch (Exception e) {
throw new URISyntaxException(urn.toString(), "Invalid URN Parameter: '" + e.getMessage());
}
}
}
}

public static DataPlatformInstanceUrn deserialize(String rawUrn) throws URISyntaxException {
return createFromString(rawUrn);
}

static {
Custom.initializeCustomClass(DataPlatformUrn.class);
Custom.initializeCustomClass(DataPlatformInstanceUrn.class);
Custom.registerCoercer(
new DirectCoercer<DataPlatformInstanceUrn>() {
public Object coerceInput(DataPlatformInstanceUrn object) throws ClassCastException {
return object.toString();
}

public DataPlatformInstanceUrn coerceOutput(Object object)
throws TemplateOutputCastException {
try {
return DataPlatformInstanceUrn.createFromString((String) object);
} catch (URISyntaxException e) {
throw new TemplateOutputCastException("Invalid URN syntax: " + e.getMessage(), e);
}
}
},
DataPlatformInstanceUrn.class);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
namespace com.linkedin.common

/**
* Standardized dataset identifier.
*/
@java.class = "com.linkedin.common.urn.DataPlatformInstanceUrn"
@validate.`com.linkedin.common.validator.TypedUrnValidator` = {
"accessible" : true,
"owningTeam" : "urn:li:internalTeam:datahub",
"entityType" : "dataPlatformInstance",
"constructable" : true,
"namespace" : "li",
"name" : "DataPlatformInstance",
"doc" : "Standardized data platform instance identifier.",
"owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ],
"fields" : [ {
"type" : "com.linkedin.common.urn.DataPlatformUrn",
"name" : "platform",
"doc" : "Standardized platform urn."
}, {
"name" : "instance",
"doc" : "Instance of the data platform (e.g. db instance)",
"type" : "string",
} ],
"maxLength" : 100
}
typeref DataPlatformInstanceUrn = string
13 changes: 3 additions & 10 deletions metadata-ingestion/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -106,25 +106,18 @@ task modelDocUpload(type: Exec, dependsOn: [modelDocGen]) {


task lint(type: Exec, dependsOn: installDev) {
/*
The find/sed combo below is a temporary work-around for the following mypy issue with airflow 2.2.0:
"venv/lib/python3.8/site-packages/airflow/_vendor/connexion/spec.py:169: error: invalid syntax".
*/
commandLine 'bash', '-c',
"find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " +
"source ${venv_name}/bin/activate && set -x && " +
"black --check --diff src/ tests/ examples/ && " +
"isort --check --diff src/ tests/ examples/ && " +
"flake8 --count --statistics src/ tests/ examples/ && " +
"ruff check src/ tests/ examples/ && " +
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
}

task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"black src/ tests/ examples/ && " +
"isort src/ tests/ examples/ && " +
"flake8 src/ tests/ examples/ && " +
"mypy --show-traceback --show-error-codes src/ tests/ examples/"
"ruff check --fix src/ tests/ examples/"
}

def pytest_default_env = "PYTHONDEVMODE=1"
Expand Down
10 changes: 7 additions & 3 deletions metadata-ingestion/developing.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ cd metadata-ingestion-modules/gx-plugin
source venv/bin/activate
datahub version # should print "DataHub CLI version: unavailable (installed in develop mode)"
```

### (Optional) Set up your Python environment for developing on Dagster Plugin

From the repository root:
Expand All @@ -99,6 +100,7 @@ cd metadata-ingestion-modules/dagster-plugin
source venv/bin/activate
datahub version # should print "DataHub CLI version: unavailable (installed in develop mode)"
```

### Common setup issues

Common issues (click to expand):
Expand Down Expand Up @@ -175,19 +177,21 @@ The architecture of this metadata ingestion framework is heavily inspired by [Ap

## Code style

We use black, isort, flake8, and mypy to ensure consistent code style and quality.
We use black, ruff, and mypy to ensure consistent code style and quality.

```shell
# Assumes: pip install -e '.[dev]' and venv is activated
black src/ tests/
isort src/ tests/
flake8 src/ tests/
ruff check src/ tests/
mypy src/ tests/
```

or you can run from root of the repository

```shell
./gradlew :metadata-ingestion:lint

# This will auto-fix some linting issues.
./gradlew :metadata-ingestion:lintFix
```

Expand Down
Loading

0 comments on commit 9d34988

Please sign in to comment.