Skip to content

Commit

Permalink
feat(ingest/dbt): dbt model performance (datahub-project#9992)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Mar 27, 2024
1 parent ef0048e commit f0bdc24
Show file tree
Hide file tree
Showing 35 changed files with 19,911 additions and 236 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ public com.linkedin.datahub.graphql.generated.DataProcessRunEvent apply(
if (runEvent.hasResult()) {
result.setResult(DataProcessInstanceRunResultMapper.map(context, runEvent.getResult()));
}
if (runEvent.hasDurationMillis()) {
result.setDurationMillis(runEvent.getDurationMillis());
}

return result;
}
Expand Down
5 changes: 5 additions & 0 deletions datahub-graphql-core/src/main/resources/entity.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -6471,6 +6471,11 @@ type DataProcessRunEvent implements TimeSeriesAspect {
The timestamp associated with the run event in milliseconds
"""
timestampMillis: Long!

"""
The duration of the run in milliseconds
"""
durationMillis: Long
}

"""
Expand Down
3 changes: 2 additions & 1 deletion datahub-web-react/.eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ module.exports = {
'plugin:vitest/recommended',
'prettier',
],
plugins: ['@typescript-eslint'],
plugins: ['@typescript-eslint', 'react-refresh'],
parserOptions: {
ecmaVersion: 2020, // Allows for the parsing of modern ECMAScript features
sourceType: 'module', // Allows for the use of imports
Expand Down Expand Up @@ -48,6 +48,7 @@ module.exports = {
],
'vitest/prefer-to-be': 'off',
'@typescript-eslint/no-use-before-define': ['error', { functions: false, classes: false }],
'react-refresh/only-export-components': ['warn', { 'allowConstantExport': true }],
},
settings: {
react: {
Expand Down
1 change: 1 addition & 0 deletions datahub-web-react/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
"eslint-plugin-jsx-a11y": "^6.5.1",
"eslint-plugin-react": "^7.28.0",
"eslint-plugin-react-hooks": "^4.3.0",
"eslint-plugin-react-refresh": "^0.4.6",
"eslint-plugin-vitest": "^0.3.17",
"jsdom": "^22.1.0",
"less": "^4.2.0",
Expand Down
3 changes: 1 addition & 2 deletions datahub-web-react/src/Mocks.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -625,8 +625,7 @@ export const dataset3 = {
health: [],
assertions: null,
status: null,
readRuns: null,
writeRuns: null,
runs: null,
testResults: null,
siblings: null,
statsSummary: null,
Expand Down
11 changes: 4 additions & 7 deletions datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -178,18 +178,15 @@ export class DatasetEntity implements Entity<Dataset> {
},
},
{
name: 'Operations',
name: 'Runs',
// TODO: Rename this to DatasetRunsTab.
component: OperationsTab,
display: {
visible: (_, dataset: GetDatasetQuery) => {
return (
(dataset?.dataset?.readRuns?.total || 0) + (dataset?.dataset?.writeRuns?.total || 0) > 0
);
return (dataset?.dataset?.runs?.total || 0) > 0;
},
enabled: (_, dataset: GetDatasetQuery) => {
return (
(dataset?.dataset?.readRuns?.total || 0) + (dataset?.dataset?.writeRuns?.total || 0) > 0
);
return (dataset?.dataset?.runs?.total || 0) > 0;
},
},
},
Expand Down
119 changes: 82 additions & 37 deletions datahub-web-react/src/app/entity/dataset/profile/OperationsTab.tsx
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import { DeliveredProcedureOutlined } from '@ant-design/icons';
import { Button, Pagination, Table, Tooltip, Typography } from 'antd';
import ButtonGroup from 'antd/lib/button/button-group';
import { Pagination, Table, Tooltip, Typography } from 'antd';
import React, { useState } from 'react';
import styled from 'styled-components';

import { useGetDatasetRunsQuery } from '../../../../graphql/dataset.generated';
import { GetDatasetRunsQuery, useGetDatasetRunsQuery } from '../../../../graphql/dataset.generated';
import {
DataProcessInstanceRunResultType,
DataProcessRunStatus,
EntityType,
RelationshipDirection,
} from '../../../../types.generated';
import {
Expand All @@ -20,6 +20,8 @@ import { ANTD_GRAY } from '../../shared/constants';
import { useEntityData } from '../../shared/EntityContext';
import LoadingSvg from '../../../../images/datahub-logo-color-loading_pendulum.svg?react';
import { scrollToTop } from '../../../shared/searchUtils';
import { formatDuration } from '../../../shared/formatDuration';
import { notEmpty } from '../../shared/utils';

const ExternalUrlLink = styled.a`
font-size: 16px;
Expand All @@ -32,10 +34,6 @@ const PaginationControlContainer = styled.div`
text-align: center;
`;

const ReadWriteButtonGroup = styled(ButtonGroup)`
padding: 12px;
`;

const LoadingText = styled.div`
margin-top: 18px;
font-size: 12px;
Expand Down Expand Up @@ -67,6 +65,12 @@ const columns = [
<Tooltip title={new Date(Number(value)).toUTCString()}>{new Date(Number(value)).toLocaleString()}</Tooltip>
),
},
{
title: 'Duration',
dataIndex: 'duration',
key: 'duration',
render: (durationMs: number) => formatDuration(durationMs),
},
{
title: 'Run ID',
dataIndex: 'name',
Expand Down Expand Up @@ -129,14 +133,59 @@ const columns = [
const PAGE_SIZE = 20;

export const OperationsTab = () => {
const { urn } = useEntityData();
const { urn, entityData } = useEntityData();
const [page, setPage] = useState(1);
const [direction, setDirection] = useState(RelationshipDirection.Incoming);

const { loading, data } = useGetDatasetRunsQuery({
variables: { urn, start: (page - 1) * PAGE_SIZE, count: PAGE_SIZE, direction },
// Fetch data across all siblings.
const allUrns = [urn, ...(entityData?.siblings?.siblings || []).map((sibling) => sibling?.urn).filter(notEmpty)];
const loadings: boolean[] = [];
const datas: GetDatasetRunsQuery[] = [];
allUrns.forEach((entityUrn) => {
// Because there's a consistent number and order of the urns,
// this usage of a hook within a loop should be safe.
// eslint-disable-next-line react-hooks/rules-of-hooks
const { loading, data } = useGetDatasetRunsQuery({
variables: {
urn: entityUrn,
start: (page - 1) * PAGE_SIZE,
count: PAGE_SIZE,
direction: RelationshipDirection.Outgoing,
},
});
loadings.push(loading);
if (data) {
datas.push(data);
}
});
const runs = data && data?.dataset?.runs?.runs;

const loading = loadings.some((loadingEntry) => loadingEntry);

// Merge the runs data from all entities.
// If there's more than one entity contributing to the data, then we can't do pagination.
let canPaginate = true;
let dataRuns: NonNullable<GetDatasetRunsQuery['dataset']>['runs'] | undefined;
if (datas.length > 0) {
let numWithRuns = 0;
for (let i = 0; i < datas.length; i++) {
if (datas[i]?.dataset?.runs?.total) {
numWithRuns++;
}

if (dataRuns && dataRuns.runs) {
dataRuns.runs.push(...(datas[i]?.dataset?.runs?.runs || []));
dataRuns.total = (dataRuns.total ?? 0) + (datas[i]?.dataset?.runs?.total ?? 0);
} else {
dataRuns = JSON.parse(JSON.stringify(datas[i]?.dataset?.runs));
}
}

if (numWithRuns > 1) {
canPaginate = false;
}
}

// This also sorts the runs data across all entities.
const runs = dataRuns?.runs?.sort((a, b) => (b?.created?.time ?? 0) - (a?.created?.time ?? 0));

const tableData = runs
?.filter((run) => run)
Expand All @@ -145,33 +194,27 @@ export const OperationsTab = () => {
name: run?.name,
status: run?.state?.[0]?.status,
resultType: run?.state?.[0]?.result?.resultType,
duration: run?.state?.[0]?.durationMillis,
inputs: run?.inputs?.relationships.map((relationship) => relationship.entity),
outputs: run?.outputs?.relationships.map((relationship) => relationship.entity),
externalUrl: run?.externalUrl,
parentTemplate: run?.parentTemplate?.relationships?.[0]?.entity,
}));

// If the table contains jobs, we need to show the job-related columns. Otherwise we can simplify the table.
const containsJobs = tableData?.some((run) => run.parentTemplate?.type !== EntityType.Dataset);
const simplifiedColumns = containsJobs
? columns
: columns.filter((column) => !['name', 'inputs', 'outputs'].includes(column.key));

const onChangePage = (newPage: number) => {
scrollToTop();
setPage(newPage);
};

// TODO: Much of this file is duplicated from RunsTab.tsx. We should refactor this to share code.
return (
<>
<ReadWriteButtonGroup>
<Button
type={direction === RelationshipDirection.Incoming ? 'primary' : 'default'}
onClick={() => setDirection(RelationshipDirection.Incoming)}
>
Reads
</Button>
<Button
type={direction === RelationshipDirection.Outgoing ? 'primary' : 'default'}
onClick={() => setDirection(RelationshipDirection.Outgoing)}
>
Writes
</Button>
</ReadWriteButtonGroup>
{loading && (
<LoadingContainer>
<LoadingSvg height={80} width={80} />
Expand All @@ -180,17 +223,19 @@ export const OperationsTab = () => {
)}
{!loading && (
<>
<Table dataSource={tableData} columns={columns} pagination={false} />
<PaginationControlContainer>
<Pagination
current={page}
pageSize={PAGE_SIZE}
total={data?.dataset?.runs?.total || 0}
showLessItems
onChange={onChangePage}
showSizeChanger={false}
/>
</PaginationControlContainer>
<Table dataSource={tableData} columns={simplifiedColumns} pagination={false} />
{canPaginate && (
<PaginationControlContainer>
<Pagination
current={page}
pageSize={PAGE_SIZE}
total={dataRuns?.total || 0}
showLessItems
onChange={onChangePage}
showSizeChanger={false}
/>
</PaginationControlContainer>
)}
</>
)}
</>
Expand Down
4 changes: 3 additions & 1 deletion datahub-web-react/src/app/entity/shared/siblingUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ function cleanHelper(obj, visited) {
if ((v && typeof v === 'object' && !Object.keys(v).length) || v === null || v === undefined || v === '') {
if (Array.isArray(object)) {
object.splice(Number(k), 1);
} else {
} else if (Object.getOwnPropertyDescriptor(object, k)?.configurable) {
// TODO(hsheth2): Not sure why we needed to add the above "configurable" check.
// However, I was getting errors when it was not present in dev mode (but not in prod mode).
delete object[k];
}
}
Expand Down
1 change: 1 addition & 0 deletions datahub-web-react/src/graphql/dataProcess.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ fragment runResults on DataProcessInstanceResult {
nativeResultType
}
timestampMillis
durationMillis
}
inputs: relationships(input: { types: ["Consumes"], direction: OUTGOING, start: 0, count: 20 }) {
...runRelationshipResults
Expand Down
7 changes: 1 addition & 6 deletions datahub-web-react/src/graphql/dataset.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,7 @@ fragment nonSiblingDatasetFields on Dataset {
status {
removed
}
readRuns: runs(start: 0, count: 20, direction: INCOMING) {
count
start
total
}
writeRuns: runs(start: 0, count: 20, direction: OUTGOING) {
runs: runs(start: 0, count: 20, direction: OUTGOING) {
count
start
total
Expand Down
6 changes: 6 additions & 0 deletions datahub-web-react/src/setupTests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ global.matchMedia =

window.location = { ...window.location, replace: () => {} };

// Suppress `Error: Not implemented: window.computedStyle(elt, pseudoElt)`.
// From https://github.com/vitest-dev/vitest/issues/2061
// and https://github.com/NickColley/jest-axe/issues/147#issuecomment-758804533
const { getComputedStyle } = window;
window.getComputedStyle = (elt) => getComputedStyle(elt);

vi.mock('js-cookie', () => ({
default: {
get: () => 'urn:li:corpuser:2',
Expand Down
1 change: 1 addition & 0 deletions datahub-web-react/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export default defineConfig(({ mode }) => {
};

return {
appType: 'spa',
plugins: [
react(),
svgr(),
Expand Down
5 changes: 5 additions & 0 deletions datahub-web-react/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5799,6 +5799,11 @@ eslint-plugin-react-hooks@^4.3.0:
resolved "https://registry.yarnpkg.com/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-4.6.0.tgz#4c3e697ad95b77e93f8646aaa1630c1ba607edd3"
integrity sha512-oFc7Itz9Qxh2x4gNHStv3BqJq54ExXmfC+a1NjAta66IAN87Wu0R/QArgIS9qKzX3dXKPI9H5crl9QchNMY9+g==

eslint-plugin-react-refresh@^0.4.6:
version "0.4.6"
resolved "https://registry.yarnpkg.com/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.4.6.tgz#e8e8accab681861baed00c5c12da70267db0936f"
integrity sha512-NjGXdm7zgcKRkKMua34qVO9doI7VOxZ6ancSvBELJSSoX97jyndXcSoa8XBh69JoB31dNz3EEzlMcizZl7LaMA==

eslint-plugin-react@^7.28.0:
version "7.32.2"
resolved "https://registry.yarnpkg.com/eslint-plugin-react/-/eslint-plugin-react-7.32.2.tgz#e71f21c7c265ebce01bcbc9d0955170c55571f10"
Expand Down
Loading

0 comments on commit f0bdc24

Please sign in to comment.