Skip to content

Commit

Permalink
HCK-9462: add support for CLUSTER BY (#250)
Browse files Browse the repository at this point in the history
* HCK-9462: enable cluster by clause if using is not defined

* HCK-9462: add altering for CLUSTER BY option

* HCK-9462: fix typo

* HCK-9462: add limit for clustering keys

* HCK-9462: update grammar to parse CLUSTER BY clause

* HCK-9462: fix condition for sorted by clause

* HCK-9462: remove LOCATION clause duplicate
  • Loading branch information
serhii-filonenko authored Jan 17, 2025
1 parent da3592a commit f7a47c1
Show file tree
Hide file tree
Showing 13 changed files with 7,366 additions and 7,122 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ const {
wrapInSingleQuotes,
isSupportUnityCatalog,
isSupportNotNullConstraints,
checkLiquidClusteringPropertyChanged,
} = require('../../../utils/general');
const { getTableStatement } = require('../../../helpers/tableHelper');
const { AlterScriptDto } = require('../../types/AlterScriptDto');
const { getModifiedTablePropertiesScriptDtos } = require('./modifyPropertiesHelper');
const { getModifyCheckConstraintsScriptDtos } = require('./checkConstraintsHelper');
const { getModifyUnityEntityTagsScriptDtos } = require('./alterUnityTagsHelper');
const { getPropertiesNamesByGUIDs } = require('./primaryKeyHelper');

const tableProperties = [
'compositeClusteringKey',
Expand Down Expand Up @@ -138,6 +140,30 @@ const getModifyLocationScriptDto =
return undefined;
};

const getModifyClusteringScriptDto =
({ ddlProvider }) =>
({ collection, dbVersion }) => {
const compMod = _.get(collection, 'role.compMod', {});
const compositeClusteringKeys = _.get(compMod, 'compositeClusteringKey', {});
const oldCompositeClusteringKeys = compositeClusteringKeys.old;
const newCompositeClusteringKeys = compositeClusteringKeys.new;

if (
!Array.isArray(newCompositeClusteringKeys) ||
_.isEqual(oldCompositeClusteringKeys, newCompositeClusteringKeys)
) {
return;
}

const keyIds = newCompositeClusteringKeys.map(({ keyId }) => keyId);
const keyNames = getPropertiesNamesByGUIDs(collection, keyIds);
const clustering = keyNames.length ? `(${keyNames.join(', ')})` : 'NONE';
const fullTableName = generateFullEntityName({ entity: collection, dbVersion });
const script = ddlProvider.setTableClustering({ clustering, fullTableName });

return AlterScriptDto.getInstance([script], true, false);
};

/**
* @return {({collection, dbVersion }: {collection: Object, dbVersion: string }) => {
* type: 'modify' | 'new',
Expand All @@ -160,6 +186,10 @@ const getModifyCollectionScriptDtos =
entityData: collection,
name: fullCollectionName,
});
const checkLiquidClusteringScriptDtos = getModifyClusteringScriptDto({ ddlProvider })({
collection,
dbVersion,
});

return {
type: 'modify',
Expand All @@ -170,6 +200,7 @@ const getModifyCollectionScriptDtos =
AlterScriptDto.getInstance([serDeProperties], true, false),
modifyLocationScriptDto,
...unityEntityTagsDtos,
checkLiquidClusteringScriptDtos,
].filter(Boolean),
};
};
Expand All @@ -182,7 +213,8 @@ const getModifyCollectionScriptDtos =
* */
const generateModifyCollectionScript = app => (collection, definitions, ddlProvider, dbVersion) => {
const compMod = _.get(collection, 'role.compMod', {});
const shouldDropAndRecreate = getIsChangeProperties(compMod, tableProperties);
const shouldDropAndRecreate =
!checkLiquidClusteringPropertyChanged(compMod) && getIsChangeProperties(compMod, tableProperties);

if (shouldDropAndRecreate) {
return getDropAndRecreateCollectionScriptDtos(app, ddlProvider)(collection, definitions, dbVersion);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const getPropertyNameByGuid = (collection, guid) => {
/**
* @return {(collection: Object, guids: string[]) => Array<Object>}
* */
const getPropertiesNamesByGuids = (collection, guids) => {
const getPropertiesNamesByGUIDs = (collection, guids) => {
return guids.map(guid => getPropertyNameByGuid(collection, guid)).filter(Boolean);
};

Expand Down Expand Up @@ -58,7 +58,7 @@ const getAddCompositePkScripts =
.map(newPk => {
const compositePrimaryKey = newPk.compositePrimaryKey || [];
const guidsOfColumnsInPk = compositePrimaryKey.map(compositePkEntry => compositePkEntry.keyId);
const columnNamesForDDL = getPropertiesNamesByGuids(collection, guidsOfColumnsInPk);
const columnNamesForDDL = getPropertiesNamesByGUIDs(collection, guidsOfColumnsInPk);
if (!columnNamesForDDL.length) {
return undefined;
}
Expand Down Expand Up @@ -199,4 +199,5 @@ const getModifyPkConstraintsScripts =

module.exports = {
getModifyPkConstraintsScripts,
getPropertiesNamesByGUIDs,
};
13 changes: 13 additions & 0 deletions forward_engineering/ddlProvider/ddlProvider.js
Original file line number Diff line number Diff line change
Expand Up @@ -472,5 +472,18 @@ module.exports = app => {
unsetColumnTags({ tableName, columnName, tags }) {
return assignTemplates(templates.unsetColumnTags, { tableName, columnName, tags });
},

/**
* @param {string} fullTableName
* @param {string} location
* @return {string}
* */
setTableClustering({ fullTableName, clustering }) {
const templatesConfig = {
name: fullTableName,
clustering,
};
return assignTemplates(templates.setTableClustering, templatesConfig);
},
};
};
2 changes: 2 additions & 0 deletions forward_engineering/ddlProvider/ddlTemplates.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,6 @@ module.exports = {
setColumnTags: 'ALTER TABLE ${tableName} ALTER COLUMN ${columnName}\nSET TAGS (${tags});',

unsetColumnTags: 'ALTER TABLE ${tableName} ALTER COLUMN ${columnName}\nUNSET TAGS (${tags});',

setTableClustering: 'ALTER TABLE ${name} CLUSTER BY ${clustering};',
};
33 changes: 24 additions & 9 deletions forward_engineering/helpers/tableHelper.js
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ const getCreateStatement = ({
foreignKeyStatement,
comment,
partitionedByKeys,
clusteredKeys,
sortedKeys,
numBuckets,
rowFormatStatement,
storedAsStatement,
location,
Expand Down Expand Up @@ -158,15 +161,18 @@ const getCreateUsingStatement = ({
rowFormatStatement,
`ROW FORMAT ${rowFormatStatement}`,
)(storedAsStatement, storedAsStatement)(partitionedByKeys, `PARTITIONED BY (${partitionedByKeys})`)(
clusteredKeys,
`CLUSTERED BY (${clusteredKeys})`,
)(sortedKeys && clusteredKeys, `SORTED BY (${sortedKeys})`)(
numBuckets && clusteredKeys,
`INTO ${numBuckets} BUCKETS`,
)(location, `LOCATION '${location}'`)(comment, `COMMENT '${encodeStringLiteral(comment)}'`)(
checkTablePropertiesDefined(tableProperties),
`TBLPROPERTIES (${getTablePropertiesClause(tableProperties)})`,
)(tableOptions, `OPTIONS ${tableOptions}`)(selectStatement, `AS ${selectStatement}`)(true, ';')();
!numBuckets && clusteredKeys,
`CLUSTER BY (${clusteredKeys})`,
)(numBuckets && clusteredKeys, `CLUSTERED BY (${clusteredKeys})`)(
numBuckets && sortedKeys && clusteredKeys,
`SORTED BY (${sortedKeys})`,
)(numBuckets && clusteredKeys, `INTO ${numBuckets} BUCKETS`)(location, `LOCATION '${location}'`)(
comment,
`COMMENT '${encodeStringLiteral(comment)}'`,
)(checkTablePropertiesDefined(tableProperties), `TBLPROPERTIES (${getTablePropertiesClause(tableProperties)})`)(
tableOptions,
`OPTIONS ${tableOptions}`,
)(selectStatement, `AS ${selectStatement}`)(true, ';')();
};

const getCreateHiveStatement = ({
Expand All @@ -177,6 +183,9 @@ const getCreateHiveStatement = ({
foreignKeyStatement,
comment,
partitionedByKeys,
clusteredKeys,
sortedKeys,
numBuckets,
rowFormatStatement,
storedAsStatement,
location,
Expand All @@ -196,6 +205,12 @@ const getCreateHiveStatement = ({
)(isAddBrackets, ')')(comment, `COMMENT '${encodeStringLiteral(comment)}'`)(
partitionedByKeys,
`PARTITIONED BY (${partitionedByKeys})`,
)(!numBuckets && clusteredKeys, `CLUSTER BY (${clusteredKeys})`)(
numBuckets && clusteredKeys,
`CLUSTERED BY (${clusteredKeys})`,
)(numBuckets && sortedKeys && clusteredKeys, `SORTED BY (${sortedKeys})`)(
numBuckets && clusteredKeys,
`INTO ${numBuckets} BUCKETS`,
)(rowFormatStatement, `ROW FORMAT ${rowFormatStatement}`)(storedAsStatement, storedAsStatement)(
location,
`LOCATION '${location}'`,
Expand Down
5 changes: 5 additions & 0 deletions forward_engineering/utils/general.js
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,10 @@ const checkFieldPropertiesChanged = (compMod, propertiesToCheck) => {
return propertiesToCheck.some(prop => compMod?.oldField[prop] !== compMod?.newField[prop]);
};

const checkLiquidClusteringPropertyChanged = compMod => {
return !compMod?.numBuckets?.new && compareProperties(compMod.compositeClusteringKey || {});
};

module.exports = {
buildStatement,
getName,
Expand Down Expand Up @@ -337,5 +341,6 @@ module.exports = {
isSupportUnityCatalog,
isSupportNotNullConstraints,
checkFieldPropertiesChanged,
checkLiquidClusteringPropertyChanged,
generateFullEntityNameFromBucketAndTableNames,
};
5 changes: 4 additions & 1 deletion properties_pane/entity_level/entityLevelConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,10 @@ making sure that you maintain a proper JSON format.
"isCompositeKey": true,
"setPrimaryKey": false,
"template": "collectiontree",
"abbr": "ck"
"abbr": "ck",
"templateOptions": {
"maxFields": 4
}
},
{
"propertyName": "Sorted by",
Expand Down
7 changes: 7 additions & 0 deletions reverse_engineering/grammars/HiveParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ createTableStatement
| tablePropertiesPrefixed
| tableOptions
| tableComment
| clusterByClause
)*
(KW_AS selectStatementWithCTE)?
)
Expand Down Expand Up @@ -342,6 +343,7 @@ alterTblPartitionStatementSuffix
| alterStatementSuffixRenameCol
| alterStatementSuffixAddCol
| alterStatementSuffixUpdateColumns
| alterStatementSuffixClusterBy
;

alterStatementPartitionKeyType
Expand Down Expand Up @@ -481,6 +483,11 @@ alterStatementSuffixClusterbySortby
| tableBuckets
;

alterStatementSuffixClusterBy
: clusterByClause
| KW_CLUSTER KW_BY KW_NONE
;

alterTblPartitionStatementSuffixSkewedLocation
: KW_SET KW_SKEWED KW_LOCATION skewedLocations
;
Expand Down
2 changes: 1 addition & 1 deletion reverse_engineering/grammars/IdentifiersParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ orderByClause
;

clusterByClause
: KW_CLUSTER KW_BY expressions
: KW_CLUSTER KW_BY LPAREN columnNameList RPAREN
;

partitionByClause
Expand Down
30 changes: 29 additions & 1 deletion reverse_engineering/hqlToCollectionsVisitor.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ class Visitor extends HiveParserVisitor {
const compositePartitionKey = this.visitWhenExists(ctx, 'tablePartition', [])?.[0] || [];
const { compositeClusteringKey, numBuckets, sortedByKey } =
this.visitWhenExists(ctx, 'tableBuckets', [])?.[0] || {};
const { compositeClusteringKey: compositeLiquidClusteringKey } =
this.visitWhenExists(ctx, 'clusterByClause', [])?.[0] || {};
const { skewedby, skewedOn, skewStoredAsDir } = this.visitWhenExists(ctx, 'tableSkewed', [])?.[0] || {};
const tableRowFormat = this.visitWhenExists(ctx, 'tableRowFormat', {})?.[0] || {};
const description = this.visitWhenExists(ctx, 'tableComment');
Expand Down Expand Up @@ -148,7 +150,7 @@ class Visitor extends HiveParserVisitor {
externalTable,
description: Array.isArray(description) ? description[0] || '' : String(description),
compositePartitionKey: compositePartitionKey.map(([name]) => ({ name })),
compositeClusteringKey,
compositeClusteringKey: compositeClusteringKey || compositeLiquidClusteringKey,
numBuckets,
sortedByKey,
skewedby,
Expand Down Expand Up @@ -494,6 +496,7 @@ class Visitor extends HiveParserVisitor {
'alterStatementSuffixClusterbySortby',
'alterStatementSuffixRenameCol',
'alterStatementSuffixAddCol',
'alterStatementSuffixClusterBy',
]
.map(statement => this.visitWhenExists(ctx, statement))
.filter(Boolean)[0];
Expand Down Expand Up @@ -552,6 +555,19 @@ class Visitor extends HiveParserVisitor {
};
}

visitAlterStatementSuffixClusterBy(ctx) {
const compositeClusteringKey = ctx.KW_NONE()
? []
: this.visitWhenExists(ctx, 'clusterByClause', {}).compositeClusteringKey;

return {
type: UPDATE_ENTITY_LEVEL_DATA_COMMAND,
data: {
compositeClusteringKey,
},
};
}

visitAlterStatementSuffixRenameCol(ctx) {
const columnConstraint = this.visitWhenExists(ctx, 'alterColumnConstraint', {});

Expand Down Expand Up @@ -1632,6 +1648,18 @@ class Visitor extends HiveParserVisitor {
getText(expression) {
return this.originalText.slice(expression.start.start, expression.stop.stop + 1);
}

visitClusterByClause(ctx) {
const compositeClusteringKey = ctx
.columnNameList()
.getText()
.split(',')
.map(name => ({ name }));

return {
compositeClusteringKey,
};
}
}

const removeQuotes = (string = '') => string.replace(/^(`)(.*)\1$/, '$2');
Expand Down
3 changes: 2 additions & 1 deletion reverse_engineering/parser/Hive/HiveParser.interp

Large diffs are not rendered by default.

Loading

0 comments on commit f7a47c1

Please sign in to comment.