Skip to content

Commit

Permalink
Merge branch 'master' into cus3379-tableau-ingestion-node-limit-exceeded
Browse files Browse the repository at this point in the history
  • Loading branch information
sid-acryl authored Dec 10, 2024
2 parents 53a205c + 0a2ac70 commit 6069326
Show file tree
Hide file tree
Showing 47 changed files with 1,420 additions and 650 deletions.
16 changes: 4 additions & 12 deletions .github/workflows/airflow-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,21 @@ jobs:
include:
# Note: this should be kept in sync with tox.ini.
- python-version: "3.8"
extra_pip_requirements: "apache-airflow~=2.1.4"
extra_pip_extras: plugin-v1
- python-version: "3.8"
extra_pip_requirements: "apache-airflow~=2.2.4"
extra_pip_extras: plugin-v1
extra_pip_requirements: "apache-airflow~=2.3.4"
extra_pip_extras: test-airflow23
- python-version: "3.10"
extra_pip_requirements: "apache-airflow~=2.4.3"
extra_pip_extras: plugin-v2,test-airflow24
extra_pip_extras: test-airflow24
- python-version: "3.10"
extra_pip_requirements: "apache-airflow~=2.6.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.10"
extra_pip_requirements: "apache-airflow~=2.7.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.10"
extra_pip_requirements: "apache-airflow~=2.8.1 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt"
extra_pip_extras: plugin-v2
- python-version: "3.11"
extra_pip_requirements: "apache-airflow~=2.9.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt"
extra_pip_extras: plugin-v2
- python-version: "3.11"
extra_pip_requirements: "apache-airflow~=2.10.2 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.10.2/constraints-3.11.txt"
extra_pip_extras: plugin-v2
extra_pip_requirements: "apache-airflow~=2.10.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.10.3/constraints-3.11.txt"
fail-fast: false
steps:
- name: Set up JDK 17
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import { useGetSearchResultsLazyQuery } from '../../../../../../../graphql/searc
import { Container, Entity, EntityType } from '../../../../../../../types.generated';
import { useEnterKeyListener } from '../../../../../../shared/useEnterKeyListener';
import { useEntityRegistry } from '../../../../../../useEntityRegistry';
import { getParentEntities } from '../../../../../../search/filters/utils';
import ParentEntities from '../../../../../../search/filters/ParentEntities';

type Props = {
onCloseModal: () => void;
Expand All @@ -26,14 +28,18 @@ const StyleTag = styled(Tag)`
align-items: center;
`;

const PreviewImage = styled.img`
export const PreviewImage = styled.img`
max-height: 18px;
width: auto;
object-fit: contain;
background-color: transparent;
margin-right: 4px;
`;

export const ParentWrapper = styled.div`
max-width: 400px;
`;

export const ContainerSelectModal = ({ onCloseModal, defaultValues, onOkOverride, titleOverride }: Props) => {
const [containerSearch, { data: platforSearchData }] = useGetSearchResultsLazyQuery();
const entityRegistry = useEntityRegistry();
Expand Down Expand Up @@ -65,10 +71,16 @@ export const ContainerSelectModal = ({ onCloseModal, defaultValues, onOkOverride
// Renders a search result in the select dropdown.
const renderSearchResult = (entity: Container) => {
const displayName = entityRegistry.getDisplayName(EntityType.Container, entity);
const parentEntities: Entity[] = getParentEntities(entity as Entity) || [];

const truncatedDisplayName = displayName.length > 25 ? `${displayName.slice(0, 25)}...` : displayName;
return (
<Tooltip title={displayName}>
{parentEntities.length > 0 && (
<ParentWrapper>
<ParentEntities parentEntities={parentEntities} />
</ParentWrapper>
)}
<PreviewImage src={entity.platform?.properties?.logoUrl || undefined} alt={entity.properties?.name} />
<span>{truncatedDisplayName}</span>
</Tooltip>
Expand Down
11 changes: 10 additions & 1 deletion datahub-web-react/src/app/search/SearchFilterLabel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ import CustomAvatar from '../shared/avatar/CustomAvatar';
import { IconStyleType } from '../entity/Entity';
import { formatNumber } from '../shared/formatNumber';
import useGetBrowseV2LabelOverride from './filters/useGetBrowseV2LabelOverride';
import { getParentEntities } from './filters/utils';
import { ParentWrapper } from '../entity/shared/containers/profile/sidebar/Container/ContainerSelectModal';
import ParentEntities from './filters/ParentEntities';

type Props = {
field: string;
Expand Down Expand Up @@ -157,11 +160,17 @@ export const SearchFilterLabel = ({ field, value, entity, count, hideCount }: Pr
if (entity?.type === EntityType.Container) {
const container = entity as Container;
const displayName = entityRegistry.getDisplayName(EntityType.Container, container);
const parentEntities: Entity[] = getParentEntities(container as Entity) || [];
const truncatedDisplayName = displayName.length > 25 ? `${displayName.slice(0, 25)}...` : displayName;
return (
<Tooltip title={displayName}>
{!!container.platform?.properties?.logoUrl && (
<PreviewImage src={container.platform?.properties?.logoUrl} alt={container.properties?.name} />
<>
<ParentWrapper style={{ width: '200px' }}>
<ParentEntities parentEntities={parentEntities} />
</ParentWrapper>
<PreviewImage src={container.platform?.properties?.logoUrl} alt={container.properties?.name} />
</>
)}
<span>
{truncatedDisplayName}
Expand Down
16 changes: 15 additions & 1 deletion datahub-web-react/src/app/search/SimpleSearchFilters.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@ import { FacetFilterInput, FacetMetadata } from '../../types.generated';
import { FilterScenarioType } from './filters/render/types';
import { useFilterRendererRegistry } from './filters/render/useFilterRenderer';
import { SimpleSearchFilter } from './SimpleSearchFilter';
import { ENTITY_FILTER_NAME, ENTITY_INDEX_FILTER_NAME, LEGACY_ENTITY_FILTER_NAME } from './utils/constants';
import {
DEGREE_FILTER_NAME,
ENTITY_FILTER_NAME,
ENTITY_INDEX_FILTER_NAME,
LEGACY_ENTITY_FILTER_NAME,
} from './utils/constants';

const TOP_FILTERS = ['degree', ENTITY_FILTER_NAME, 'platform', 'tags', 'glossaryTerms', 'domains', 'owners'];

Expand Down Expand Up @@ -43,6 +48,15 @@ export const SimpleSearchFilters = ({ facets, selectedFilters, onFilterSelect, l
: filter,
)
.filter((filter) => filter.field !== field || !(filter.values?.length === 0));

// Do not let user unselect all degree filters
if (field === DEGREE_FILTER_NAME && !selected) {
const hasDegreeFilter = newFilters.find((filter) => filter.field === DEGREE_FILTER_NAME);
if (!hasDegreeFilter) {
return;
}
}

setCachedProps({ ...cachedProps, selectedFilters: newFilters });
onFilterSelect(newFilters);
};
Expand Down
3 changes: 2 additions & 1 deletion datahub-web-react/src/app/search/filters/FilterOption.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { generateColor } from '../../entity/shared/components/styled/StyledTag';
import { ANTD_GRAY } from '../../entity/shared/constants';
import { useEntityRegistry } from '../../useEntityRegistry';
import {
CONTAINER_FILTER_NAME,
ENTITY_SUB_TYPE_FILTER_NAME,
MAX_COUNT_VAL,
PLATFORM_FILTER_NAME,
Expand Down Expand Up @@ -125,7 +126,7 @@ export default function FilterOption({
const { field, value, count, entity } = filterOption;
const entityRegistry = useEntityRegistry();
const { icon, label } = getFilterIconAndLabel(field, value, entityRegistry, entity || null, 14);
const shouldShowIcon = field === PLATFORM_FILTER_NAME && icon !== null;
const shouldShowIcon = (field === PLATFORM_FILTER_NAME || field === CONTAINER_FILTER_NAME) && icon !== null;
const shouldShowTagColor = field === TAGS_FILTER_NAME && entity?.type === EntityType.Tag;
const isSubTypeFilter = field === TYPE_NAMES_FILTER_NAME;
const parentEntities: Entity[] = getParentEntities(entity as Entity) || [];
Expand Down
14 changes: 14 additions & 0 deletions datahub-web-react/src/app/search/filters/utils.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import {
FacetFilterInput,
FacetMetadata,
GlossaryTerm,
Container,
} from '../../../types.generated';
import { IconStyleType } from '../../entity/Entity';
import {
Expand Down Expand Up @@ -186,6 +187,15 @@ export function getFilterIconAndLabel(
entityRegistry.getIcon(EntityType.DataPlatform, size || 12, IconStyleType.ACCENT, ANTD_GRAY[9])
);
label = filterEntity ? entityRegistry.getDisplayName(EntityType.DataPlatform, filterEntity) : filterValue;
} else if (filterField === CONTAINER_FILTER_NAME) {
// Scenario where the filter entity exists and filterField is container
const logoUrl = (filterEntity as Container)?.platform?.properties?.logoUrl;
icon = logoUrl ? (
<PlatformIcon src={logoUrl} size={size} />
) : (
entityRegistry.getIcon(EntityType.DataPlatform, size || 12, IconStyleType.ACCENT, ANTD_GRAY[9])
);
label = entityRegistry.getDisplayName(EntityType.Container, filterEntity);
} else if (filterField === BROWSE_PATH_V2_FILTER_NAME) {
icon = <FolderFilled size={size} color="black" />;
label = getLastBrowseEntryFromFilterValue(filterValue);
Expand All @@ -196,6 +206,7 @@ export function getFilterIconAndLabel(
filterEntity,
size,
);

icon = newIcon;
label = newLabel;
} else {
Expand Down Expand Up @@ -344,6 +355,9 @@ export function getParentEntities(entity: Entity): Entity[] | null {
if (entity.type === EntityType.Domain) {
return (entity as Domain).parentDomains?.domains || [];
}
if (entity.type === EntityType.Container) {
return (entity as Container).parentContainers?.containers || [];
}
return null;
}

Expand Down
1 change: 1 addition & 0 deletions datahub-web-react/src/graphql/fragments.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,7 @@ fragment entityContainer on Container {

fragment parentContainerFields on Container {
urn
type
properties {
name
}
Expand Down
3 changes: 3 additions & 0 deletions datahub-web-react/src/graphql/search.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,9 @@ fragment facetFields on FacetMetadata {
properties {
name
}
parentContainers {
...parentContainersFields
}
}
... on CorpUser {
username
Expand Down
11 changes: 10 additions & 1 deletion docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe

- #11701: The Fivetran `sources_to_database` field is deprecated in favor of setting directly within `sources_to_platform_instance.<key>.database`.
- #11742: For PowerBi ingestion, `use_powerbi_email` is now enabled by default when extracting ownership information.
- #12056: The DataHub Airflow plugin no longer supports Airflow 2.1 and Airflow 2.2.
- #12056: The DataHub Airflow plugin now defaults to the v2 plugin implementation.

### Breaking Changes

Expand All @@ -46,7 +48,14 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
- #11619 - schema field/column paths can no longer be duplicated within the schema
- #11570 - The `DatahubClientConfig`'s server field no longer defaults to `http://localhost:8080`. Be sure to explicitly set this.
- #11570 - If a `datahub_api` is explicitly passed to a stateful ingestion config provider, it will be used. We previously ignored it if the pipeline context also had a graph object.
- #11518 - DataHub Garbage Collection: Various entities that are soft-deleted (after 10d) or are timeseries *entities* (dataprocess, execution requests) will be removed automatically using logic in the `datahub-gc` ingestion source.
- #11518 - DataHub Garbage Collection: Various entities that are soft-deleted
(after 10d) or are timeseries *entities* (dataprocess, execution requests)
will be removed automatically using logic in the `datahub-gc` ingestion
source.
- #12067 - Default behavior of DataJobPatchBuilder in Python sdk has been
changed to NOT fill out `created` and `lastModified` auditstamps by default
for input and output dataset edges. This should not have any user-observable
impact (time-based lineage viz will still continue working based on observed time), but could break assumptions previously being made by clients.

### Potential Downtime

Expand Down
24 changes: 13 additions & 11 deletions docs/lineage/airflow.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ The DataHub Airflow plugin supports:
- Task run information, including task successes and failures.
- Manual lineage annotations using `inlets` and `outlets` on Airflow operators.

There's two actively supported implementations of the plugin, with different Airflow version support.
There's two implementations of the plugin, with different Airflow version support.

| Approach | Airflow Version | Notes |
| --------- | --------------- | --------------------------------------------------------------------------- |
| Plugin v2 | 2.3.4+ | Recommended. Requires Python 3.8+ |
| Plugin v1 | 2.1 - 2.8 | No automatic lineage extraction; may not extract lineage if the task fails. |
| Approach | Airflow Versions | Notes |
| --------- | ---------------- | --------------------------------------------------------------------------------------- |
| Plugin v2 | 2.3.4+ | Recommended. Requires Python 3.8+ |
| Plugin v1 | 2.3 - 2.8 | Deprecated. No automatic lineage extraction; may not extract lineage if the task fails. |

If you're using Airflow older than 2.1, it's possible to use the v1 plugin with older versions of `acryl-datahub-airflow-plugin`. See the [compatibility section](#compatibility) for more details.
If you're using Airflow older than 2.3, it's possible to use the v1 plugin with older versions of `acryl-datahub-airflow-plugin`. See the [compatibility section](#compatibility) for more details.

<!-- TODO: Update the local Airflow guide and link to it here. -->
<!-- If you are looking to run Airflow and DataHub using docker locally, follow the guide [here](../../docker/airflow/local_airflow.md). -->
Expand All @@ -29,7 +29,7 @@ If you're using Airflow older than 2.1, it's possible to use the v1 plugin with

### Installation

The v2 plugin requires Airflow 2.3+ and Python 3.8+. If you don't meet these requirements, use the v1 plugin instead.
The v2 plugin requires Airflow 2.3+ and Python 3.8+. If you don't meet these requirements, see the [compatibility section](#compatibility) for other options.

```shell
pip install 'acryl-datahub-airflow-plugin[plugin-v2]'
Expand Down Expand Up @@ -84,9 +84,10 @@ enabled = True # default

### Installation

The v1 plugin requires Airflow 2.1 - 2.8 and Python 3.8+. If you're on older versions, it's still possible to use an older version of the plugin. See the [compatibility section](#compatibility) for more details.
The v1 plugin requires Airflow 2.3 - 2.8 and Python 3.8+. If you're on older versions, it's still possible to use an older version of the plugin. See the [compatibility section](#compatibility) for more details.

If you're using Airflow 2.3+, we recommend using the v2 plugin instead. If you need to use the v1 plugin with Airflow 2.3+, you must also set the environment variable `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN=true`.
Note that the v1 plugin is less featureful than the v2 plugin, and is overall not actively maintained.
Since datahub v0.15.0, the v2 plugin has been the default. If you need to use the v1 plugin with `acryl-datahub-airflow-plugin` v0.15.0+, you must also set the environment variable `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN=true`.

```shell
pip install 'acryl-datahub-airflow-plugin[plugin-v1]'
Expand Down Expand Up @@ -340,11 +341,12 @@ The solution is to upgrade `acryl-datahub-airflow-plugin>=0.12.0.4` or upgrade `

## Compatibility

We no longer officially support Airflow <2.1. However, you can use older versions of `acryl-datahub-airflow-plugin` with older versions of Airflow.
Both of these options support Python 3.7+.
We no longer officially support Airflow <2.3. However, you can use older versions of `acryl-datahub-airflow-plugin` with older versions of Airflow.
The first two options support Python 3.7+, and the last option supports Python 3.8+.

- Airflow 1.10.x, use DataHub plugin v1 with acryl-datahub-airflow-plugin <= 0.9.1.0.
- Airflow 2.0.x, use DataHub plugin v1 with acryl-datahub-airflow-plugin <= 0.11.0.1.
- Airflow 2.2.x, use DataHub plugin v2 with acryl-datahub-airflow-plugin <= 0.14.1.5.

DataHub also previously supported an Airflow [lineage backend](https://airflow.apache.org/docs/apache-airflow/2.2.0/lineage.html#lineage-backend) implementation. While the implementation is still in our codebase, it is deprecated and will be removed in a future release.
Note that the lineage backend did not support automatic lineage extraction, did not capture task failures, and did not work in AWS MWAA.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@
public interface AspectsBatch {
Collection<? extends BatchItem> getItems();

Collection<? extends BatchItem> getInitialItems();

RetrieverContext getRetrieverContext();

/**
* Returns MCP items. Could be patch, upsert, etc.
* Returns MCP items. Could be one of patch, upsert, etc.
*
* @return batch items
*/
Expand Down Expand Up @@ -160,13 +162,24 @@ static Stream<MCLItem> applyMCLSideEffects(
}

default boolean containsDuplicateAspects() {
return getItems().stream()
.map(i -> String.format("%s_%s", i.getClass().getName(), i.hashCode()))
return getInitialItems().stream()
.map(i -> String.format("%s_%s", i.getClass().getSimpleName(), i.hashCode()))
.distinct()
.count()
!= getItems().size();
}

default Map<String, List<? extends BatchItem>> duplicateAspects() {
return getInitialItems().stream()
.collect(
Collectors.groupingBy(
i -> String.format("%s_%s", i.getClass().getSimpleName(), i.hashCode())))
.entrySet()
.stream()
.filter(entry -> entry.getValue() != null && entry.getValue().size() > 1)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}

default Map<String, Set<String>> getUrnAspectsMap() {
return getItems().stream()
.map(aspect -> Pair.of(aspect.getUrn().toString(), aspect.getAspectName()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,11 @@ public interface BatchItem extends ReadItem {
*/
@Nonnull
ChangeType getChangeType();

/**
* Determines if this item is a duplicate of another item in terms of the operation it represents
* to the database.Each implementation can define what constitutes a duplicate based on its
* specific fields which are persisted.
*/
boolean isDatabaseDuplicateOf(BatchItem other);
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import com.linkedin.metadata.models.registry.config.LoadStatus;
import com.linkedin.util.Pair;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.nio.file.Files;
Expand Down Expand Up @@ -204,8 +203,8 @@ private void loadOneRegistry(
loadResultBuilder.plugins(entityRegistry.getPluginFactory().getPluginLoadResult());

log.info("Loaded registry {} successfully", entityRegistry);
} catch (RuntimeException | EntityRegistryException | IOException e) {
log.debug("{}: Failed to load registry {} with {}", this, registryName, e.getMessage());
} catch (Exception | EntityRegistryException e) {
log.error("{}: Failed to load registry {} with {}", this, registryName, e.getMessage(), e);
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
e.printStackTrace(pw);
Expand Down
Loading

0 comments on commit 6069326

Please sign in to comment.