Skip to content

Commit

Permalink
refactor(GMS): migrate GMS to Spring boot
Browse files Browse the repository at this point in the history
* GMS: Migrate GMS Spring WebApp -> Spring Boot (embedded jetty 12)
* Spring WebMvc endpoints are the default, Rest.li endpoints are secondary
* Removed jetty 11
* Refactor kafka consumers startup
* GMS with all embedded consumers startup <40s
* Neo4j upgraded
* Dgraph upgraded
  • Loading branch information
david-leifker committed Jan 19, 2025
1 parent 262dd76 commit 2a30b07
Show file tree
Hide file tree
Showing 73 changed files with 784 additions and 905 deletions.
23 changes: 16 additions & 7 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ buildscript {
ext.springBootVersion = '3.2.9'
ext.springKafkaVersion = '3.1.6'
ext.openTelemetryVersion = '1.18.0'
ext.neo4jVersion = '5.14.0'
ext.neo4jTestVersion = '5.14.0'
ext.neo4jApocVersion = '5.14.0'
ext.neo4jVersion = '5.20.0'
ext.neo4jTestVersion = '5.20.0'
ext.neo4jApocVersion = '5.20.0'
ext.testContainersVersion = '1.17.4'
ext.elasticsearchVersion = '2.11.1' // ES 7.10, Opensearch 1.x, 2.x
ext.jacksonVersion = '2.15.3'
ext.jettyVersion = '11.0.21'
ext.jettyVersion = '12.0.16'
// see also datahub-frontend/play.gradle
ext.playVersion = '2.8.22'
ext.playScalaVersion = '2.13'
Expand Down Expand Up @@ -136,7 +136,8 @@ project.ext.externalDependency = [
'datastaxOssNativeProtocol': 'com.datastax.oss:native-protocol:1.5.1',
'datastaxOssCore': 'com.datastax.oss:java-driver-core:4.14.1',
'datastaxOssQueryBuilder': 'com.datastax.oss:java-driver-query-builder:4.14.1',
'dgraph4j' : 'io.dgraph:dgraph4j:21.12.0',
'dgraph4j' : 'io.dgraph:dgraph4j:24.1.1',
'dgraphNetty': 'io.grpc:grpc-netty-shaded:1.69.0',
'dropwizardMetricsCore': 'io.dropwizard.metrics:metrics-core:4.2.3',
'dropwizardMetricsJmx': 'io.dropwizard.metrics:metrics-jmx:4.2.3',
'ebean': 'io.ebean:ebean:' + ebeanVersion,
Expand Down Expand Up @@ -176,8 +177,9 @@ project.ext.externalDependency = [
'jakartaValidation': 'jakarta.validation:jakarta.validation-api:3.1.0-M2',
'jerseyCore': 'org.glassfish.jersey.core:jersey-client:2.41',
'jerseyGuava': 'org.glassfish.jersey.bundles.repackaged:jersey-guava:2.25.1',
'jettyJaas': "org.eclipse.jetty:jetty-jaas:$jettyVersion",
'jettySecurity': "org.eclipse.jetty:jetty-security:$jettyVersion",
'jettyClient': "org.eclipse.jetty:jetty-client:$jettyVersion",
'jettyJmx': "org.eclipse.jetty:jetty-jmx:$jettyVersion",
'jettison': 'org.codehaus.jettison:jettison:1.5.4',
'jgrapht': 'org.jgrapht:jgrapht-core:1.5.1',
'jna': 'net.java.dev.jna:jna:5.12.1',
Expand Down Expand Up @@ -380,6 +382,13 @@ configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) {
resolutionStrategy.force externalDependency.antlr4Runtime
resolutionStrategy.force externalDependency.antlr4
resolutionStrategy.force 'org.apache.mina:mina-core:2.2.4'
resolutionStrategy {
force "org.eclipse.jetty:jetty-security:${jettyVersion}"
force "org.eclipse.jetty:jetty-server:${jettyVersion}"
force "org.eclipse.jetty:jetty-ee10-servlet:${jettyVersion}"
force "org.eclipse.jetty:jetty-ee10-webapp:${jettyVersion}"
force "org.eclipse.jetty:jetty-xml:${jettyVersion}"
}
}
}

Expand Down Expand Up @@ -407,7 +416,7 @@ subprojects {
implementation externalDependency.annotationApi
constraints {
implementation("com.google.googlejavaformat:google-java-format:$googleJavaFormatVersion")
implementation('io.netty:netty-all:4.1.115.Final')
implementation('io.netty:netty-all:4.1.116.Final')
implementation('org.apache.commons:commons-compress:1.27.1')
implementation('org.apache.velocity:velocity-engine-core:2.4')
implementation('org.hibernate:hibernate-validator:6.0.20.Final')
Expand Down
65 changes: 15 additions & 50 deletions datahub-frontend/app/security/AuthenticationManager.java
Original file line number Diff line number Diff line change
@@ -1,68 +1,33 @@
package security;

import com.google.common.base.Preconditions;
import java.util.Collections;
import javax.annotation.Nonnull;
import javax.naming.AuthenticationException;
import javax.security.auth.callback.Callback;
import javax.security.auth.callback.CallbackHandler;
import javax.security.auth.callback.NameCallback;
import javax.security.auth.callback.PasswordCallback;
import javax.security.auth.login.LoginContext;
import javax.security.auth.login.LoginException;
import org.apache.commons.lang3.StringUtils;
import org.eclipse.jetty.jaas.JAASLoginService;
import org.eclipse.jetty.jaas.PropertyUserStoreManager;
import play.Logger;
import org.eclipse.jetty.security.UserPrincipal;
import org.eclipse.jetty.util.security.Credential;

public class AuthenticationManager {

private AuthenticationManager(boolean verbose) {}
private AuthenticationManager() {} // Prevent instantiation

public static void authenticateJaasUser(@Nonnull String userName, @Nonnull String password)
throws Exception {
Preconditions.checkArgument(!StringUtils.isAnyEmpty(userName), "Username cannot be empty");
JAASLoginService jaasLoginService = new JAASLoginService("WHZ-Authentication");
PropertyUserStoreManager propertyUserStoreManager = new PropertyUserStoreManager();
propertyUserStoreManager.start();
jaasLoginService.setBeans(Collections.singletonList(propertyUserStoreManager));
JAASLoginService.INSTANCE.set(jaasLoginService);
try {
LoginContext lc =
new LoginContext("WHZ-Authentication", new WHZCallbackHandler(userName, password));
lc.login();
} catch (LoginException le) {
AuthenticationException authenticationException =
new AuthenticationException(le.getMessage());
authenticationException.setRootCause(le);
throw authenticationException;
}
}

private static class WHZCallbackHandler implements CallbackHandler {
private String password;
private String username;

private WHZCallbackHandler(@Nonnull String username, @Nonnull String password) {
this.username = username;
this.password = password;
}
try {
// Create and configure credentials for authentication
UserPrincipal userPrincipal = new UserPrincipal(userName, Credential.getCredential(password));

@Override
public void handle(@Nonnull Callback[] callbacks) {
NameCallback nc = null;
PasswordCallback pc = null;
for (Callback callback : callbacks) {
Logger.debug(
"The submitted callback is of type: " + callback.getClass() + " : " + callback);
if (callback instanceof NameCallback) {
nc = (NameCallback) callback;
nc.setName(this.username);
} else if (callback instanceof PasswordCallback) {
pc = (PasswordCallback) callback;
pc.setPassword(this.password.toCharArray());
}
// Verify credentials
if (!userPrincipal.authenticate(password)) {
throw new AuthenticationException("Invalid credentials for user: " + userName);
}

} catch (Exception e) {
AuthenticationException authenticationException =
new AuthenticationException("Authentication failed");
authenticationException.setRootCause(e);
throw authenticationException;
}
}
}
2 changes: 1 addition & 1 deletion datahub-frontend/play.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ dependencies {
implementation externalDependency.springBeans
implementation externalDependency.springContext
implementation externalDependency.springBootAutoconfigure
implementation externalDependency.jettyJaas
implementation externalDependency.jettySecurity
implementation externalDependency.graphqlJava
implementation externalDependency.antlr4Runtime
implementation externalDependency.antlr4
Expand Down
8 changes: 1 addition & 7 deletions docker/datahub-gms/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION
FROM alpine:3.21 AS base

ENV JMX_VERSION=0.18.0
ENV JETTY_VERSION=11.0.21

# Re-declaring args from above to make them available in this stage (will inherit default values)
ARG ALPINE_REPO_URL
Expand All @@ -42,9 +41,6 @@ RUN apk --no-cache --update-cache --available upgrade \
&& apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat snappy \
&& apk --no-cache add openjdk17-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \
&& apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ \
&& curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-runner/${JETTY_VERSION}/jetty-runner-${JETTY_VERSION}.jar --output jetty-runner.jar \
&& curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-jmx/${JETTY_VERSION}/jetty-jmx-${JETTY_VERSION}.jar --output jetty-jmx.jar \
&& curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-util/${JETTY_VERSION}/jetty-util-${JETTY_VERSION}.jar --output jetty-util.jar \
&& wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \
&& wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \
&& cp /usr/lib/jvm/java-17-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks
Expand All @@ -56,8 +52,6 @@ FROM base AS prod-install
COPY war.war /datahub/datahub-gms/bin/war.war
COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml
COPY docker/datahub-gms/start.sh /datahub/datahub-gms/scripts/start.sh
COPY docker/datahub-gms/jetty.xml /datahub/datahub-gms/scripts/jetty.xml
COPY docker/datahub-gms/jetty-jmx.xml /datahub/datahub-gms/scripts/jetty-jmx.xml
COPY docker/monitoring/client-prometheus-config.yaml /datahub/datahub-gms/scripts/prometheus-config.yaml
RUN chmod +x /datahub/datahub-gms/scripts/start.sh

Expand All @@ -70,7 +64,7 @@ FROM ${APP_ENV}-install AS final
RUN mkdir -p /etc/datahub/plugins/auth/resources

RUN addgroup -S datahub && adduser -S datahub -G datahub
RUN chown -R datahub:datahub /etc/datahub
RUN chown -R datahub:datahub /etc/datahub /datahub
USER datahub

ENV JMX_OPTS=""
Expand Down
31 changes: 0 additions & 31 deletions docker/datahub-gms/jetty-jmx.xml

This file was deleted.

57 changes: 0 additions & 57 deletions docker/datahub-gms/jetty.xml

This file was deleted.

9 changes: 2 additions & 7 deletions docker/datahub-gms/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,8 @@ COMMON="
java $JAVA_OPTS $JMX_OPTS \
$OTEL_AGENT \
$PROMETHEUS_AGENT \
-jar /jetty-runner.jar \
--stats unsecure \
--jar jetty-util.jar \
--jar jetty-jmx.jar \
--config /datahub/datahub-gms/scripts/jetty.xml \
--config /datahub/datahub-gms/scripts/jetty-jmx.xml \
/datahub/datahub-gms/bin/war.war"
-Dstats=unsecure \
-jar /datahub/datahub-gms/bin/war.war"

if [[ $SKIP_ELASTICSEARCH_CHECK != true ]]; then
exec dockerize \
Expand Down
2 changes: 0 additions & 2 deletions docker/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ services:
- KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
volumes:
- ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh
- ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml
- ./datahub-gms/jetty-jmx.xml:/datahub/datahub-gms/scripts/jetty-jmx.xml
- ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml
- ../metadata-models/src/main/resources/:/datahub/datahub-gms/resources
- ../metadata-service/war/build/libs/:/datahub/datahub-gms/bin
Expand Down
1 change: 1 addition & 0 deletions docker/profiles/docker-compose.frontend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ x-datahub-frontend-service-dev: &datahub-frontend-service-dev
DATAHUB_ANALYTICS_ENABLED: ${DATAHUB_ANALYTICS_ENABLED:-true}
volumes:
- ../../datahub-frontend/build/stage/main:/datahub-frontend
- ./monitoring/client-prometheus-config.yaml:/datahub-frontend/client-prometheus-config.yaml

services:
frontend-quickstart:
Expand Down
4 changes: 1 addition & 3 deletions docker/profiles/docker-compose.gms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,13 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev
<<: [*datahub-dev-telemetry-env, *datahub-gms-env]
SKIP_ELASTICSEARCH_CHECK: false
JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001'
BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false
BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: ${BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE:-false}
SEARCH_SERVICE_ENABLE_CACHE: false
LINEAGE_SEARCH_CACHE_ENABLED: false
SHOW_BROWSE_V2: true
ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true}
volumes:
- ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh
- ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml
- ./datahub-gms/jetty-jmx.xml:/datahub/datahub-gms/scripts/jetty-jmx.xml
- ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml
- ../../metadata-models/src/main/resources/:/datahub/datahub-gms/resources
- ../../metadata-service/war/build/libs/:/datahub/datahub-gms/bin
Expand Down
31 changes: 20 additions & 11 deletions metadata-ingestion/src/datahub/secret/datahub_secrets_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,32 @@ class DataHubSecretsClient:
def __init__(self, graph: DataHubGraph):
self.graph = graph

def _cleanup_secret_name(self, secret_names: List[str]) -> List[str]:
"""Remove empty strings from the list of secret names."""
return [secret_name for secret_name in secret_names if secret_name]

def get_secret_values(self, secret_names: List[str]) -> Dict[str, Optional[str]]:
if len(secret_names) == 0:
return {}

res_data = self.graph.execute_graphql(
query="""query getSecretValues($input: GetSecretValuesInput!) {
getSecretValues(input: $input) {
name
value
}
request_json = {

Check warning on line 18 in metadata-ingestion/src/datahub/secret/datahub_secrets_client.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/secret/datahub_secrets_client.py#L18

Added line #L18 was not covered by tests
"query": """query getSecretValues($input: GetSecretValuesInput!) {\n
getSecretValues(input: $input) {\n
name\n
value\n
}\n
}""",
variables={"input": {"secrets": self._cleanup_secret_name(secret_names)}},
"variables": {"input": {"secrets": secret_names}},
}
# TODO: Use graph.execute_graphql() instead.

# Fetch secrets using GraphQL API f
response = self.graph._session.post(

Check warning on line 30 in metadata-ingestion/src/datahub/secret/datahub_secrets_client.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/secret/datahub_secrets_client.py#L30

Added line #L30 was not covered by tests
f"{self.graph.config.server}/api/graphql", json=request_json
)
response.raise_for_status()

Check warning on line 33 in metadata-ingestion/src/datahub/secret/datahub_secrets_client.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/secret/datahub_secrets_client.py#L33

Added line #L33 was not covered by tests

# Verify response
res_data = response.json()
if "errors" in res_data:
raise Exception("Failed to retrieve secrets from DataHub.")

Check warning on line 38 in metadata-ingestion/src/datahub/secret/datahub_secrets_client.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/secret/datahub_secrets_client.py#L36-L38

Added lines #L36 - L38 were not covered by tests

# Convert list of name, value secret pairs into a dict and return
secret_value_list = res_data["getSecretValues"]
secret_value_dict = dict()
Expand Down
8 changes: 7 additions & 1 deletion metadata-integration/java/acryl-spark-lineage/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,13 @@ dependencies {
//implementation "io.acryl:datahub-client:0.10.2"
implementation "io.openlineage:openlineage-spark_2.12:$openLineageVersion"
compileOnly "org.apache.iceberg:iceberg-spark3-runtime:0.12.1"
compileOnly "org.apache.spark:spark-sql_2.12:3.1.3"
compileOnly("org.apache.spark:spark-sql_2.12:3.1.3") {
exclude group: 'org.eclipse.jetty', module: 'jetty-servlet'
exclude group: 'org.eclipse.jetty', module: 'jetty-server'
exclude group: 'org.eclipse.jetty', module: 'jetty-util'
exclude group: 'org.eclipse.jetty', module: 'jetty-webapp'
exclude group: 'org.eclipse.jetty', module: 'jetty-security'
}
compileOnly "io.github.spark-redshift-community:spark-redshift_2.12:6.2.0-spark_3.5"

testCompileOnly externalDependency.lombok
Expand Down
1 change: 1 addition & 0 deletions metadata-io/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ dependencies {
exclude group: 'com.google.guava', module: 'guava'
exclude group: 'io.grpc', module: 'grpc-protobuf'
}
implementation externalDependency.dgraphNetty
implementation externalDependency.slf4jApi
runtimeOnly externalDependency.logbackClassic
compileOnly externalDependency.lombok
Expand Down
Loading

0 comments on commit 2a30b07

Please sign in to comment.