diff --git a/build-all-multiplatform.sh b/build-all-multiplatform.sh index 0216a827..9f235790 100755 --- a/build-all-multiplatform.sh +++ b/build-all-multiplatform.sh @@ -2,7 +2,7 @@ set -eo pipefail -DEBEZIUM_VERSION="2.3" +DEBEZIUM_VERSION="2.5" if [ -z "$DEBEZIUM_VERSIONS" ]; then DEBEZIUM_VERSIONS="$DEBEZIUM_VERSION" fi diff --git a/build-all.sh b/build-all.sh index fa903786..e688c210 100755 --- a/build-all.sh +++ b/build-all.sh @@ -2,7 +2,7 @@ set -eo pipefail -DEBEZIUM_VERSION="2.3" +DEBEZIUM_VERSION="2.5" if [ -z "$DEBEZIUM_VERSIONS" ]; then DEBEZIUM_VERSIONS="$DEBEZIUM_VERSION" fi diff --git a/connect-base/2.6/Dockerfile b/connect-base/2.6/Dockerfile new file mode 100644 index 00000000..929a4c7b --- /dev/null +++ b/connect-base/2.6/Dockerfile @@ -0,0 +1,57 @@ +ARG DEBEZIUM_DOCKER_REGISTRY_PRIMARY_NAME +FROM localhost:5500/debeziumquay/kafka:2.6 + +LABEL maintainer="Debezium Community" + +USER root +RUN microdnf -y install libaio && microdnf clean all + +USER kafka + +EXPOSE 8083 8778 +VOLUME ["/kafka/data","/kafka/logs","/kafka/config"] + +COPY docker-entrypoint.sh / +COPY --chown=kafka:kafka log4j.properties $KAFKA_HOME/config/log4j.properties +COPY docker-maven-download.sh /usr/local/bin/docker-maven-download + +# +# Set up the plugins directory ... +# +ENV KAFKA_CONNECT_PLUGINS_DIR=$KAFKA_HOME/connect \ + EXTERNAL_LIBS_DIR=$KAFKA_HOME/external_libs \ + CONNECT_PLUGIN_PATH=$KAFKA_CONNECT_PLUGINS_DIR \ + MAVEN_DEP_DESTINATION=$KAFKA_HOME/libs \ + APICURIO_VERSION=2.4.1.Final \ + JOLOKIA_VERSION=1.7.2 \ + OPENTELEMETRY_VERSION=1.23.1 \ + OPENTELEMETRY_INSTRUMENTATION_VERSION=1.23.0 + +RUN mkdir "$KAFKA_CONNECT_PLUGINS_DIR" "$EXTERNAL_LIBS_DIR" + +# +# The `docker-entrypoint.sh` script will automatically discover the child directories +# within the $KAFKA_CONNECT_PLUGINS_DIR directory (e.g., `/kafka/connect`), and place +# all of the files in those child directories onto the Java classpath. +# +# The general recommendation is to create a separate child directory for each connector +# (e.g., "debezium-connector-mysql"), and to place that connector's JAR files +# and other resource files in that child directory. +# +# However, use a single directory for connectors when those connectors share dependencies. +# This will prevent the classes in the shared dependencies from appearing in multiple JARs +# on the classpath, which results in arcane NoSuchMethodError exceptions. +# +RUN docker-maven-download apicurio "$APICURIO_VERSION" ead18a95038adca54e91b7f253717eb7 +RUN docker-maven-download central org/jolokia jolokia-jvm "$JOLOKIA_VERSION" d489d62d1143e6a2e85a869a4b824a67 +RUN docker-maven-download otel io/opentelemetry opentelemetry-api "$OPENTELEMETRY_VERSION" e198a9568ce31a82faaa26f328388e89 +RUN docker-maven-download otel io/opentelemetry opentelemetry-context "$OPENTELEMETRY_VERSION" 783594a506dbf035e686776d5bcb4bfc +RUN docker-maven-download otel io/opentelemetry opentelemetry-semconv "$OPENTELEMETRY_VERSION"-alpha dab8c00d4eaa453315d9a3da0d63c49a +RUN docker-maven-download otel io/opentelemetry/instrumentation opentelemetry-instrumentation-api "$OPENTELEMETRY_INSTRUMENTATION_VERSION" e4069a077dd8d6206c540222d08769e2 +RUN docker-maven-download otel io/opentelemetry/instrumentation opentelemetry-instrumentation-api-semconv "$OPENTELEMETRY_INSTRUMENTATION_VERSION"-alpha f4b8065fd7b451cea6bfb3c0bf235d26 +RUN docker-maven-download otel io/opentelemetry/instrumentation opentelemetry-kafka-clients-2.6 "$OPENTELEMETRY_INSTRUMENTATION_VERSION"-alpha 5d6e5ba9ea90adacf66e3f471596933b +RUN docker-maven-download otel io/opentelemetry/instrumentation opentelemetry-kafka-clients-common "$OPENTELEMETRY_INSTRUMENTATION_VERSION"-alpha f37131795d53faa6423173926629d04d + + +ENTRYPOINT ["/docker-entrypoint.sh"] +CMD ["start"] diff --git a/connect-base/2.6/README.md b/connect-base/2.6/README.md new file mode 100644 index 00000000..e038abfd --- /dev/null +++ b/connect-base/2.6/README.md @@ -0,0 +1,168 @@ +[Kafka Connect](http://kafka.apache.org/documentation.html#connect) is a system for moving data into and out of Kafka. All Debezium connectors adhere to the Kafka Connector API for _source connectors_, and each monitors a specific kind of database management system for changing data, and then forwards those changes directly into Kafka topics organized by server, database, and table. This image defines a runnable Kafka Connect service preconfigured with all Debezium connectors. The service has a RESTful API for managing connector instances -- simply start up a container, configure a connector for each data source you want to monitor, and let Debezium monitor those sources for changes and forward them to the appropriate Kafka topics. + +# What is Debezium? + +Debezium is a distributed platform that turns your existing databases into event streams, so applications can quickly react to each row-level change in the databases. Debezium is built on top of Kafka and provides Kafka Connect compatible connectors that monitor specific database management systems. Debezium records the history of data changes in Kafka logs, so your application can be stopped and restarted at any time and can easily consume all of the events it missed while it was not running, ensuring that all events are processed correctly and completely. + +Running Debezium involves Zookeeper, Kafka, and services that run Debezium's connectors. For simple evaluation and experimentation, all services can all be run on a single host machine, using the recipe outlined below. Production environments, however, require properly running and networking multiple instances of each service to provide the performance, reliability, replication, and fault tolerance. This can be done with a platform like [OpenShift](https://www.openshift.com) that manages multiple Docker containers running on multiple hosts and machines. But running Kafka in a Docker container has limitations, so for scenarios where very high throughput is required, you should run Kafka on dedicated hardware as explained in the [Kafka documentation](http://kafka.apache.org/documentation.html). + + +# How to use this image + +This image serves as a base image for other images that wish to use custom Kafka Connect connectors. This image provides a complete +installation of Kafka and its Kafka Connect libraries, plus a `docker-entrypoint.sh` script that will run Kafka Connect distributed service and dynamically set the Java classpath to include connector JARs found in child directories under `$KAFKA_CONNECT_PLUGINS_DIR`, which equates to `/kafka/connect`. + +To add your connectors, your image should be based upon this image (e.g., using `FROM quay.io/debezium/connect-base`) and should add the JAR files for one or more connectors to one or more child directories under `$KAFKA_CONNECT_PLUGINS_DIR`. + +The general recommendation is to create a separate child directory for each connector (e.g., "debezium-connector-mysql"), and to place that connector's JAR files and other resource files in that child directory. + +However, use a single directory for connectors when those connectors share dependencies. This will prevent the classes in the shared dependencies from appearing in multiple JARs on the classpath, which results in arcane NoSuchMethodError exceptions. + +## Start a Kafka Connect service instance + +*NOTE:* Please see the Apache Kafka [documentation](https://kafka.apache.org/documentation/#connect_running) for general information on running Kafka Connect and more details on the various options and environment variables. + +Kafka Connect requires an already-running Zookeeper service, which is either running locally via the container named `zookeeper` or with OpenShift running as a service named `zookeeper`. Also required are already-running Kafka brokers, which are either running locally via the container named `kafka` or with OpenShift running as a service named `kafka`. + +When running a cluster of one or more Kafka Connect service instances, several important parameters must be defined using environment variables. Please see the section below for the list of these required environment variables and acceptable values. + +Starting an instance of Kafka Connect using this image is simple: + + $ docker run -it --name connect -p 8083:8083 -e GROUP_ID=1 -e CONFIG_STORAGE_TOPIC=my-connect-configs -e OFFSET_STORAGE_TOPIC=my-connect-offsets -e STATUS_STORAGE_TOPIC=my-connect-statuses -e ADVERTISED_HOST_NAME=$(echo $DOCKER_HOST | cut -f3 -d'/' | cut -f1 -d':') --link zookeeper:zookeeper --link kafka:kafka quay.io/debezium/connect + +This command uses this image and starts a new container named `connect`, which runs in the foreground and attaches the console so that it display the service's output and error messages. It exposes its REST API on port 8083, which is mapped to the same port number on the local host. It uses Zookeeper in the container (or service) named `zookeeper` and Kafka brokers in the container (or service) named `kafka`. This command sets the three required environment variables, though you should replace their values with more meaningful values for your environment. + +To start the container in _detached_ mode, simply replace the `-it` option with `-d`. No service output will not be sent to your console, but it can be read at any time using the `docker logs` command. For example, the following command will display the output and keep following the output: + + $ docker logs --follow --name connect + +## Start a shell in a running container + +If you are already running a container with a Kafka Connect service, you can use this image to connect to that container and obtain a command line shell: + + $ docker exec -it connect bash + +where `connect` is the name of your existing container. The shell will be set up with all environment variables exactly like when starting the service in the container. Therefore, links to other containers and additional environment variables may be specified and will be reflected in the shell's exported variables. + + +# Environment variables + +The Debezium Kafka image uses several environment variables when running a Kafka broker using this image. + +### `GROUP_ID` + +This environment variable is required when running the Kafka Connect service. Set this to an ID that uniquely identifies the Kafka Connect cluster the service and its workers belong to. + +### `CONFIG_STORAGE_TOPIC` + +This environment variable is required when running the Kafka Connect service. Set this to the name of the Kafka topic where the Kafka Connect services in the group store connector configurations. The topic must have a single partition, should be highly replicated (e.g., 3x or more) and should be configured for compaction. + +### `OFFSET_STORAGE_TOPIC` + +This environment variable is required when running the Kafka Connect service. Set this to the name of the Kafka topic where the Kafka Connect services in the group store connector offsets. The topic should have many partitions, be highly replicated (e.g., 3x or more) and should be configured for compaction. + +### `STATUS_STORAGE_TOPIC` + +This environment variable should be provided when running the Kafka Connect service. Set this to the name of the Kafka topic where the Kafka Connect services in the group store connector status. The topic can have multiple partitions, should be highly replicated (e.g., 3x or more) and should be configured for compaction. + +### `BOOTSTRAP_SERVERS` + +This environment variable is an advanced setting, used only when Kafka is not running in a linkable container or service. Set this to a list of host/port pairs to use for establishing the *initial* connection to the Kafka cluster. Once a connection is established to one of these brokers, the service will then discover and make use of all Kafka brokers in the cluster, regardless of which servers are specified here for bootstrapping. The list should be in the form `host1:port1,host2:port2,...`. We recommend that you include more than one broker in this list, in case one of those is down. + +### `REST_HOST_NAME` + +This environment variable is an advanced setting. Set this to the hostname that the REST API will bind to. +Defaults to the hostname of the container. +Specify a value of `0.0.0.0` to bind the REST API to all available interfaces. + +### `ADVERTISED_HOST_NAME` + +This environment variable is an advanced setting. Set this to the hostname that will be given out to other workers to connect with. Defaults to the hostname of the container. + +### `KEY_CONVERTER` + +This environment variable is an advanced setting. Set this to the fully-qualified name of the Java class that implements Kafka Connect's `Converter` class, used to convert the connector's keys to the form stored in Kafka. Defaults to `org.apache.kafka.connect.json.JsonConverter`. + +### `VALUE_CONVERTER` + +This environment variable is an advanced setting. Set this to the fully-qualified name of the Java class that implements Kafka Connect's `Converter` class, used to convert the connector's values to the form stored in Kafka. Defaults to `org.apache.kafka.connect.json.JsonConverter`. + +### `OFFSET_FLUSH_INTERVAL_MS` + +This environment variable is an advanced setting. Set this to the number of milliseconds defining the interval at which the service will periodically try committing offsets for tasks. The default is `60000`, or 60 seconds. + +### `OFFSET_FLUSH_TIMEOUT_MS` + +This environment variable is an advanced setting. Set this to the maximum time in milliseconds to wait for records to flush and partition offset data to be committed to offset storage before cancelling the process and restoring the offset data to be committed in a future attempt. The default is `5000`, or 5 seconds. + +### `SHUTDOWN_TIMEOUT` + +This environment variable is an advanced setting. Set this to the number of milliseconds to wait for tasks to shutdown gracefully while the connectors complete all processing, record any final data, and clean up resources. This is the total amount of time, not per task. All task have shutdown triggered, then they are waited on sequentially. The default is `10000`, or 10 seconds. + +### `HEAP_OPTS` + +This environment variable is recommended. Use this to set the JVM options for the Kafka broker. By default a value of '-Xmx1G -Xms1G' is used, meaning that each Kafka broker uses 1GB of memory. Using too little memory may cause performance problems, while using too much may prevent the broker from starting properly given the memory available on the machine. Obviously the container must be able to use the amount of memory defined by this environment variable. + +### `CONNECT_LOG4J_LOGGERS` + +This environment variable is optional. Use this to override the loggers used in `log4j.properties` for the property `log4j.rootLogger`. Additional environment variables can be provided for log4j logging with the CONNECT_LOG4J prefix, mapped to properties in the `log4j.properties` file as described below in Others. + +### `LOG_LEVEL` + +This environment variable is optional. Use this to set the level of detail for Kafka's application log written to STDOUT and STDERR. Valid values are `INFO` (default), `WARN`, `ERROR`, `DEBUG`, or `TRACE`." + +### `ENABLE_APICURIO_CONVERTERS` + +This environment variable is optional. Use this to enable [Apicur.io](https://www.apicur.io/) converters with +Apicurio Schema Registry by setting `ENABLE_APICURIO_CONVERTERS=true` as container env var. Valid values are `false` to disable (default) or `true` to enable Apicurio converters. + +### `ENABLE_DEBEZIUM_KC_REST_EXTENSION` + +This environment variable is optional. +Use this to enable Debezium features the Debezium Kafka Connect REST Extension that comes shipped with the image by setting `ENABLE_DEBEZIUM_REST_EXTENSION=true` as container env var. +Valid values are `false` to disable (default) or `true` to enable the extension. + +### `ENABLE_DEBEZIUM_SCRIPTING` + +This environment variable is optional. +Use this to enable Debezium features that use scripting languages like Message Filtering or Content-based Routing SMT by setting `ENABLE_DEBEZIUM_SCRIPTING=true` as container env var. +Valid values are `false` to disable (default) or `true` to enable scripting. +Note: in order to prevent the execution of arbitrary scripting expressions, you should enable this option only if you've secured access to the Kafka Connect configuration interface appropriately. + +### `ENABLE_JFR` + +This environment variable is optional. +When set then [Flight Recorder](https://openjdk.java.net/jeps/328) recording session is started for this run. +This feature is useful for gathering of diagnostic information in case of performance or functional issues. +Flight Recorder start options can be configured via `JFR_RECORDING_` prefixed environment variables when the variables is converted to lowercase and underscores are replaced with dashes, e.g. `JFR_RECORDING_PATH_TO_GC_ROOTS=true` becomes `path-to-gc-roots=true`. +Flight Recorder control options can be configured via `JFR_OPT_` prefixed environment variables. + +### Others + +Environment variables that start with `CONNECT_` will be used to update the Kafka Connect worker configuration file. Each environment variable name will be mapped to a configuration property name by: + +1. removing the `CONNECT_` prefix; +2. lowercasing all characters; and +3. converting all '\_' characters to '.' characters + +For example, the environment variable `CONNECT_HEARTBEAT_INTERVAL_MS` is converted to the `heartbeat.interval.ms` property. The container will then update the Kafka Connect worker configuration file to include the property's name and value. + +The value of the environment variable may not contain a '\@' character. + + +# Ports + +Containers created using this image will expose port 8083, which is the standard port bound to by the Kafka Connect service. You can use standard Docker options to map this to a different port on the host that runs the container. + + +# Storing data + +The Kafka Connect service run by this image stores no data in the container, but it does produce logs. The only way to keep these files is to use volumes that map specific directories inside the container to the local file system (or to OpenShift persistent volumes). + +### Log files + +Although this image will send Kafka Connect service log output to standard output so it is visible as Docker logs, this image also configures the Kafka Connect service to write out more logs to a data volume at `/kafka/logs`. All logs are rotated daily. + +### Configuration + +This image defines a data volume at `/kafka/config` where the broker's configuration files are stored. Note that these configuration files are always modified based upon the environment variables and linked containers. The best use of this data volume is to be able to see the configuration files used by Kafka, although with some care it is possible to supply custom configuration files that will be adapted and used upon startup. diff --git a/connect-base/2.6/docker-entrypoint.sh b/connect-base/2.6/docker-entrypoint.sh new file mode 100755 index 00000000..e3083c5d --- /dev/null +++ b/connect-base/2.6/docker-entrypoint.sh @@ -0,0 +1,330 @@ +#!/bin/bash + +# Exit immediately if a *pipeline* returns a non-zero status. (Add -x for command tracing) +set -e + +if [[ -z "$SENSITIVE_PROPERTIES" ]]; then + SENSITIVE_PROPERTIES="CONNECT_SASL_JAAS_CONFIG,CONNECT_CONSUMER_SASL_JAAS_CONFIG,CONNECT_PRODUCER_SASL_JAAS_CONFIG,CONNECT_SSL_KEYSTORE_PASSWORD,CONNECT_PRODUCER_SSL_KEYSTORE_PASSWORD,CONNECT_SSL_TRUSTSTORE_PASSWORD,CONNECT_PRODUCER_SSL_TRUSTSTORE_PASSWORD,CONNECT_SSL_KEY_PASSWORD,CONNECT_PRODUCER_SSL_KEY_PASSWORD,CONNECT_CONSUMER_SSL_TRUSTSTORE_PASSWORD,CONNECT_CONSUMER_SSL_KEYSTORE_PASSWORD,CONNECT_CONSUMER_SSL_KEY_PASSWORD" +fi + +if [[ -z "$BOOTSTRAP_SERVERS" ]]; then + # Look for any environment variables set by Docker container linking. For example, if the container + # running Kafka were aliased to 'kafka' in this container, then Docker should have created several envs, + # such as 'KAFKA_PORT_9092_TCP'. If so, then use that to automatically set the 'bootstrap.servers' property. + BOOTSTRAP_SERVERS=$(env | grep .*PORT_9092_TCP= | sed -e 's|.*tcp://||' | uniq | paste -sd ,) +fi + +if [[ "x$BOOTSTRAP_SERVERS" = "x" ]]; then + export BOOTSTRAP_SERVERS=0.0.0.0:9092 +fi + +echo "Using BOOTSTRAP_SERVERS=$BOOTSTRAP_SERVERS" + + +if [[ -z "$HOST_NAME" ]]; then + HOST_NAME=$(ip addr | grep 'BROADCAST' -A2 | tail -n1 | awk '{print $2}' | cut -f1 -d'/') +fi + +: ${REST_PORT:=8083} +: ${REST_HOST_NAME:=$HOST_NAME} +: ${ADVERTISED_PORT:=8083} +: ${ADVERTISED_HOST_NAME:=$HOST_NAME} +: ${GROUP_ID:=1} +: ${OFFSET_FLUSH_INTERVAL_MS:=60000} +: ${OFFSET_FLUSH_TIMEOUT_MS:=5000} +: ${SHUTDOWN_TIMEOUT:=10000} +: ${KEY_CONVERTER:=org.apache.kafka.connect.json.JsonConverter} +: ${VALUE_CONVERTER:=org.apache.kafka.connect.json.JsonConverter} +: ${ENABLE_APICURIO_CONVERTERS:=false} +: ${ENABLE_DEBEZIUM_KC_REST_EXTENSION:=false} +: ${ENABLE_DEBEZIUM_SCRIPTING:=false} +: ${ENABLE_JOLOKIA:=false} +: ${ENABLE_OTEL:=false} +export CONNECT_REST_ADVERTISED_PORT=$ADVERTISED_PORT +export CONNECT_REST_ADVERTISED_HOST_NAME=$ADVERTISED_HOST_NAME +export CONNECT_REST_PORT=$REST_PORT +export CONNECT_REST_HOST_NAME=$REST_HOST_NAME +export CONNECT_BOOTSTRAP_SERVERS=$BOOTSTRAP_SERVERS +export CONNECT_GROUP_ID=$GROUP_ID +export CONNECT_CONFIG_STORAGE_TOPIC=$CONFIG_STORAGE_TOPIC +export CONNECT_OFFSET_STORAGE_TOPIC=$OFFSET_STORAGE_TOPIC +if [[ -n "$STATUS_STORAGE_TOPIC" ]]; then + export CONNECT_STATUS_STORAGE_TOPIC=$STATUS_STORAGE_TOPIC +fi +export CONNECT_KEY_CONVERTER=$KEY_CONVERTER +export CONNECT_VALUE_CONVERTER=$VALUE_CONVERTER +export CONNECT_TASK_SHUTDOWN_GRACEFUL_TIMEOUT_MS=$SHUTDOWN_TIMEOUT +export CONNECT_OFFSET_FLUSH_INTERVAL_MS=$OFFSET_FLUSH_INTERVAL_MS +export CONNECT_OFFSET_FLUSH_TIMEOUT_MS=$OFFSET_FLUSH_TIMEOUT_MS +if [[ -n "$HEAP_OPTS" ]]; then + export KAFKA_HEAP_OPTS=$HEAP_OPTS +fi +unset HOST_NAME +unset REST_PORT +unset REST_HOST_NAME +unset ADVERTISED_PORT +unset ADVERTISED_HOST_NAME +unset GROUP_ID +unset OFFSET_FLUSH_INTERVAL_MS +unset OFFSET_FLUSH_TIMEOUT_MS +unset SHUTDOWN_TIMEOUT +unset KEY_CONVERTER +unset VALUE_CONVERTER +unset HEAP_OPTS +unset MD5HASH +unset SCALA_VERSION + +# +# Parameter 1: Should the extension be enabled ("true") or disabled ("false") +# +# When enabled, the .jar for the extension is symlinked to from the Kafka Connect plugins directory. +function set_debezium_kc_rest_extension_availability() { + ENABLED=$1; + + if [[ "${ENABLED}" == "true" && ! -z "$EXTERNAL_LIBS_DIR" && -d "$EXTERNAL_LIBS_DIR/debezium-connect-rest-extension" ]] ; then + mkdir -p "$KAFKA_CONNECT_PLUGINS_DIR/debezium-connect-rest-extension" + ln -snf $EXTERNAL_LIBS_DIR/debezium-connect-rest-extension/* "$KAFKA_CONNECT_PLUGINS_DIR/debezium-connect-rest-extension" + if [ -z "${CONNECT_REST_EXTENSION_CLASSES-}" ]; then + export CONNECT_REST_EXTENSION_CLASSES=io.debezium.kcrestextension.DebeziumConnectRestExtension + else + export CONNECT_REST_EXTENSION_CLASSES=$CONNECT_REST_EXTENSION_CLASSES,io.debezium.kcrestextension.DebeziumConnectRestExtension + fi + echo Debezium Kafka Connect REST API Extension enabled! + else + if [[ -d "$KAFKA_CONNECT_PLUGINS_DIR/debezium-connect-rest-extension" ]] ; then + find "$KAFKA_CONNECT_PLUGINS_DIR/debezium-connect-rest-extension" -lname "$EXTERNAL_LIBS_DIR/debezium-connect-rest-extension/*" -exec rm -f {} \; + fi + fi +} + +# +# Parameter 1: Should the resource be enabled ("true") or disabled ("false") +# Parameter 2: Folder path under $EXTERNAL_LIBS_DIR where the resorce is deployed +# Parameter 3: A wildcard pattern matching files from the resource folder +# Parameter 4: Name of the resource to print in log messages +# +# When enabled, files for the given resource are symlinked to each connector's folder. +# The best practice is to have a class appear in no more than one JAR from all JARs +# on the classpath to prevent errors at runtime. +function set_connector_additonal_resource_availability() { + ENABLED=$1; + RESOURCE_FOLDER=$2; + FILE_WILD_CARD=$3; + RESOURCE_PRETTY_NAME=$4; + + if [[ "${ENABLED}" == "true" && ! -z "$EXTERNAL_LIBS_DIR" && -d "$EXTERNAL_LIBS_DIR/$RESOURCE_FOLDER" ]] ; then + plugin_dirs=(${CONNECT_PLUGIN_PATH//,/ }) + for plugin_dir in $plugin_dirs ; do + for plugin in $plugin_dir/*/ ; do + ln -snf $EXTERNAL_LIBS_DIR/$RESOURCE_FOLDER/$FILE_WILD_CARD "$plugin" + done + done + echo "$RESOURCE_PRETTY_NAME enabled!" + else + plugin_dirs=(${CONNECT_PLUGIN_PATH//,/ }) + for plugin_dir in $plugin_dirs ; do + find $plugin_dir/ -lname "$EXTERNAL_LIBS_DIR/$RESOURCE_FOLDER/$FILE_WILD_CARD" -exec rm -f {} \; + done + fi +} + +# +# Set up the classpath with all the plugins ... +# +if [ -z "$CONNECT_PLUGIN_PATH" ]; then + CONNECT_PLUGIN_PATH=$KAFKA_CONNECT_PLUGINS_DIR +fi +echo "Plugins are loaded from $CONNECT_PLUGIN_PATH" + +# +# Set up additional resources for Kafka Connect Debezium Connectors +# +set_connector_additonal_resource_availability $ENABLE_APICURIO_CONVERTERS "apicurio" "*" "Apicurio connectors" +set_connector_additonal_resource_availability $ENABLE_DEBEZIUM_SCRIPTING "debezium-scripting" "*.jar" "Debezium Scripting" +set_connector_additonal_resource_availability $ENABLE_OTEL "otel" "*.jar" "OpenTelemetry" + +# +# Set up Kafka Connect plugins +# +set_debezium_kc_rest_extension_availability $ENABLE_DEBEZIUM_KC_REST_EXTENSION + +# +# Set up the JMX options +# +: ${JMXAUTH:="false"} +: ${JMXSSL:="false"} +if [[ -n "$JMXPORT" && -n "$JMXHOST" ]]; then + echo "Enabling JMX on ${JMXHOST}:${JMXPORT}" + export KAFKA_JMX_OPTS="-Djava.rmi.server.hostname=${JMXHOST} -Dcom.sun.management.jmxremote.rmi.port=${JMXPORT} -Dcom.sun.management.jmxremote.port=${JMXPORT} -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=${JMXAUTH} -Dcom.sun.management.jmxremote.ssl=${JMXSSL} " +fi + +# +# Setup Flight Recorder +# +if [[ "$ENABLE_JFR" == "true" ]]; then + JFR_OPTS="-XX:StartFlightRecording" + opt_delimiter="=" + for VAR in $(env); do + if [[ "$VAR" == JFR_RECORDING_* ]]; then + opt_name=`echo "$VAR" | sed -r "s/^JFR_RECORDING_([^=]*)=.*/\1/g" | tr '[:upper:]' '[:lower:]' | tr _ -` + opt_value=`echo "$VAR" | sed -r "s/^JFR_RECORDING_[^=]*=(.*)/\1/g"` + JFR_OPTS="${JFR_OPTS}${opt_delimiter}${opt_name}=${opt_value}" + opt_delimiter="," + fi + done + opt_delimiter=" -XX:FlightRecorderOptions=" + for VAR in $(env); do + if [[ "$VAR" == JFR_OPT_* ]]; then + opt_name=`echo "$VAR" | sed -r "s/^JFR_OPT_([^=]*)=.*/\1/g" | tr '[:upper:]' '[:lower:]' | tr _ -` + opt_value=`echo "$VAR" | sed -r "s/^JFR_OPT_[^=]*=(.*)/\1/g"` + JFR_OPTS="${JFR_OPTS}${opt_delimiter}${opt_name}=${opt_value}" + opt_delimiter="," + fi + done + echo "Java Flight Recorder enabled and configured with options $JFR_OPTS" + if [[ -n "$KAFKA_OPTS" ]]; then + export KAFKA_OPTS="$KAFKA_OPTS $JFR_OPTS" + else + export KAFKA_OPTS="$JFR_OPTS" + fi + unset JFR_OPTS +fi + +# +# Setup Debezium Jolokia +# +if [ "$ENABLE_JOLOKIA" = "true" ]; then + KAFKA_OPTS="${KAFKA_OPTS} -javaagent:$(ls "$KAFKA_HOME"/libs/jolokia-jvm-*.jar)=port=8778,host=*" + export KAFKA_OPTS +fi + +# +# Setup Kafka Prometheus Metrics +# +if [ "$ENABLE_JMX_EXPORTER" = "true" ]; then + KAFKA_OPTS="${KAFKA_OPTS} -javaagent:$(ls "$KAFKA_HOME"/libs/jmx_prometheus_javaagent*.jar)=9404:$KAFKA_HOME/config/metrics.yaml" + export KAFKA_OPTS +fi + +# +# Make sure the directory for logs exists ... +# +mkdir -p ${KAFKA_DATA}/$KAFKA_BROKER_ID + +# Process the argument to this container ... +case $1 in + start) + if [[ "x$CONNECT_BOOTSTRAP_SERVERS" = "x" ]]; then + echo "The BOOTSTRAP_SERVERS variable must be set, or the container must be linked to one that runs Kafka." + exit 1 + fi + + if [[ "x$CONNECT_GROUP_ID" = "x" ]]; then + echo "The GROUP_ID must be set to an ID that uniquely identifies the Kafka Connect cluster these workers belong to." + echo "Ensure this is unique for all groups that work with a Kafka cluster." + exit 1 + fi + + if [[ "x$CONNECT_CONFIG_STORAGE_TOPIC" = "x" ]]; then + echo "The CONFIG_STORAGE_TOPIC variable must be set to the name of the topic where connector configurations will be stored." + echo "This topic must have a single partition, be highly replicated (e.g., 3x or more) and should be configured for compaction." + exit 1 + fi + + if [[ "x$CONNECT_OFFSET_STORAGE_TOPIC" = "x" ]]; then + echo "The OFFSET_STORAGE_TOPIC variable must be set to the name of the topic where connector offsets will be stored." + echo "This topic should have many partitions (e.g., 25 or 50), be highly replicated (e.g., 3x or more) and be configured for compaction." + exit 1 + fi + + if [[ "x$CONNECT_STATUS_STORAGE_TOPIC" = "x" ]]; then + echo "WARNING: it is recommended to specify the STATUS_STORAGE_TOPIC variable for defining the name of the topic where connector statuses will be stored." + echo "This topic may have multiple partitions, be highly replicated (e.g., 3x or more) and should be configured for compaction." + echo "As no value is given, the default of 'connect-status' will be used." + fi + + echo "Using the following environment variables:" + echo " GROUP_ID=$CONNECT_GROUP_ID" + echo " CONFIG_STORAGE_TOPIC=$CONNECT_CONFIG_STORAGE_TOPIC" + echo " OFFSET_STORAGE_TOPIC=$CONNECT_OFFSET_STORAGE_TOPIC" + if [[ "x$CONNECT_STATUS_STORAGE_TOPIC" != "x" ]]; then + echo " STATUS_STORAGE_TOPIC=$CONNECT_STATUS_STORAGE_TOPIC" + fi + echo " BOOTSTRAP_SERVERS=$CONNECT_BOOTSTRAP_SERVERS" + echo " REST_HOST_NAME=$CONNECT_REST_HOST_NAME" + echo " REST_PORT=$CONNECT_REST_PORT" + echo " ADVERTISED_HOST_NAME=$CONNECT_REST_ADVERTISED_HOST_NAME" + echo " ADVERTISED_PORT=$CONNECT_REST_ADVERTISED_PORT" + echo " KEY_CONVERTER=$CONNECT_KEY_CONVERTER" + echo " VALUE_CONVERTER=$CONNECT_VALUE_CONVERTER" + echo " OFFSET_FLUSH_INTERVAL_MS=$CONNECT_OFFSET_FLUSH_INTERVAL_MS" + echo " OFFSET_FLUSH_TIMEOUT_MS=$CONNECT_OFFSET_FLUSH_TIMEOUT_MS" + echo " SHUTDOWN_TIMEOUT=$CONNECT_TASK_SHUTDOWN_GRACEFUL_TIMEOUT_MS" + + # Copy config files if not provided in volume + cp -rn $KAFKA_HOME/config.orig/* $KAFKA_HOME/config + + # + # Configure the log files ... + # + if [[ -n "$CONNECT_LOG4J_LOGGERS" ]]; then + sed -i -r -e "s|^(log4j.rootLogger)=.*|\1=${CONNECT_LOG4J_LOGGERS}|g" $KAFKA_HOME/config/log4j.properties + unset CONNECT_LOG4J_LOGGERS + fi + env | grep '^CONNECT_LOG4J' | while read -r VAR; + do + env_var=`echo "$VAR" | sed -r "s/([^=]*)=.*/\1/g"` + prop_name=`echo "$VAR" | sed -r "s/^CONNECT_([^=]*)=.*/\1/g" | tr '[:upper:]' '[:lower:]' | tr _ .` + prop_value=`echo "$VAR" | sed -r "s/^CONNECT_[^=]*=(.*)/\1/g"` + if egrep -q "(^|^#)$prop_name=" $KAFKA_HOME/config/log4j.properties; then + #note that no config names or values may contain an '@' char + sed -r -i "s@(^|^#)($prop_name)=(.*)@\2=${prop_value}@g" $KAFKA_HOME/config/log4j.properties + else + echo "$prop_name=${prop_value}" >> $KAFKA_HOME/config/log4j.properties + fi + if [[ "$SENSITIVE_PROPERTIES" = *"$env_var"* ]]; then + echo "--- Setting logging property from $env_var: $prop_name=[hidden]" + else + echo "--- Setting logging property from $env_var: $prop_name=${prop_value}" + fi + unset $env_var + done + if [[ -n "$LOG_LEVEL" ]]; then + sed -i -r -e "s|=INFO, stdout|=$LOG_LEVEL, stdout|g" $KAFKA_HOME/config/log4j.properties + sed -i -r -e "s|^(log4j.appender.stdout.threshold)=.*|\1=${LOG_LEVEL}|g" $KAFKA_HOME/config/log4j.properties + fi + export KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$KAFKA_HOME/config/log4j.properties" + + # + # Process all environment variables that start with 'CONNECT_' + # + env | while read -r VAR; + do + env_var=`echo "$VAR" | sed -r "s/([^=]*)=.*/\1/g"` + if [[ $env_var =~ ^CONNECT_ ]]; then + prop_name=`echo "$VAR" | sed -r "s/^CONNECT_([^=]*)=.*/\1/g" | tr '[:upper:]' '[:lower:]' | tr _ .` + prop_value=`echo "$VAR" | sed -r "s/^CONNECT_[^=]*=(.*)/\1/g"` + if egrep -q "(^|^#)$prop_name=" $KAFKA_HOME/config/connect-distributed.properties; then + #note that no config names or values may contain an '@' char + sed -r -i "s@(^|^#)($prop_name)=(.*)@\2=${prop_value}@g" $KAFKA_HOME/config/connect-distributed.properties + else + # echo "Adding property $prop_name=${prop_value}" + echo "$prop_name=${prop_value}" >> $KAFKA_HOME/config/connect-distributed.properties + fi + if [[ "$SENSITIVE_PROPERTIES" = *"$env_var"* ]]; then + echo "--- Setting property from $env_var: $prop_name=[hidden]" + else + echo "--- Setting property from $env_var: $prop_name=${prop_value}" + fi + fi + done + + # + # Execute the Kafka Connect distributed service, replacing this shell process with the specified program ... + # + exec $KAFKA_HOME/bin/connect-distributed.sh $KAFKA_HOME/config/connect-distributed.properties + ;; +esac + +# Otherwise just run the specified command +exec "$@" diff --git a/connect-base/2.6/docker-maven-download.sh b/connect-base/2.6/docker-maven-download.sh new file mode 100755 index 00000000..a6530ca0 --- /dev/null +++ b/connect-base/2.6/docker-maven-download.sh @@ -0,0 +1,133 @@ +#!/bin/bash + +# +# Download connector maven dependencies +# 4 methods are available: +# - maven_dep(REPO, GROUP, PACKAGE, VERSION, FILE, MD5_CHECKSUM) # Downloads anything from a maven repo +# - maven_core_dep(GROUP, PACKAGE, VERSION, MD5_CHECKSUM) # Downloads jar files +# - maven_confluent_dep(GROUP, PACKAGE, VERSION, MD5_CHECKSUM) # Downloads jar files for Confluent deps +# - maven_debezium_plugin(CONNECTOR, VERSION, MD5_CHECKSUM) # Downloads debezium tar plugin +# +# Author: Renato Mefi +# +set -e + +# If there's not maven repository url set externally, +# default to the ones below +MAVEN_REPO_CENTRAL=${MAVEN_REPO_CENTRAL:-"https://repo1.maven.org/maven2"} +MAVEN_REPOS_ADDITIONAL=${MAVEN_REPOS_ADDITIONAL:-""} +MAVEN_REPO_CONFLUENT=${MAVEN_REPO_CONFLUENT:-"https://packages.confluent.io/maven"} +MAVEN_DEP_DESTINATION=${MAVEN_DEP_DESTINATION} +EXTERNAL_LIBS_DIR=${EXTERNAL_LIBS_DIR} + +maven_dep() { + local REPO="$1" + local GROUP="$2" + local PACKAGE="$3" + local VERSION="$4" + local FILE="$5" + local MD5_CHECKSUM="$6" + + DOWNLOAD_FILE_TMP_PATH="/tmp/maven_dep/${PACKAGE}" + DOWNLOAD_FILE="$DOWNLOAD_FILE_TMP_PATH/$FILE" + test -d $DOWNLOAD_FILE_TMP_PATH || mkdir -p $DOWNLOAD_FILE_TMP_PATH + + curl -sfSL -o "$DOWNLOAD_FILE" "$REPO/$GROUP/$PACKAGE/$VERSION/$FILE" + + echo "$MD5_CHECKSUM $DOWNLOAD_FILE" | md5sum -c - +} + +maven_central_dep() { + maven_dep $MAVEN_REPO_CENTRAL $1 $2 $3 "$2-$3.jar" $4 + mv "$DOWNLOAD_FILE" $MAVEN_DEP_DESTINATION +} + +maven_confluent_dep() { + maven_dep $MAVEN_REPO_CONFLUENT "io/confluent" $1 $2 "$1-$2.jar" $3 + mv "$DOWNLOAD_FILE" $MAVEN_DEP_DESTINATION +} + +maven_debezium_plugin() { + maven_dep $MAVEN_REPO_CENTRAL "io/debezium" "debezium-connector-$1" $2 "debezium-connector-$1-$2-plugin.tar.gz" $3 + tar -xzf "$DOWNLOAD_FILE" -C "$MAVEN_DEP_DESTINATION" && rm "$DOWNLOAD_FILE" +} + +maven_debezium_optional() { + maven_dep $MAVEN_REPO_CENTRAL "io/debezium" "debezium-$1" $2 "debezium-$1-$2.tar.gz" $3 + tar -xzf "$DOWNLOAD_FILE" -C "$EXTERNAL_LIBS_DIR" && rm "$DOWNLOAD_FILE" +} + +maven_camel_kafka() { + maven_dep $MAVEN_REPO_CENTRAL "org/apache/camel/kafkaconnector" "camel-$1-kafka-connector" $2 "camel-$1-kafka-connector-$2-package.tar.gz" $3 + tar -xzf "$DOWNLOAD_FILE" -C "$MAVEN_DEP_DESTINATION" && rm "$DOWNLOAD_FILE" +} + +maven_debezium_additional_plugin() { + eval "$MAVEN_REPOS_ADDITIONAL" + REPO=${1^^} + if [ -z "${!REPO}" ] + then + maven_dep $MAVEN_REPO_CENTRAL "io/debezium" "debezium-connector-$2" $3 "debezium-connector-$2-$3-plugin.tar.gz" $4 + else + maven_dep "${!REPO}" "io/debezium" "debezium-connector-$2" $3 "debezium-connector-$2-$3-plugin.tar.gz" $4 + fi + tar -xzf "$DOWNLOAD_FILE" -C "$MAVEN_DEP_DESTINATION" && rm "$DOWNLOAD_FILE" +} + +maven_apicurio_converter() { + if [[ -z "$EXTERNAL_LIBS_DIR" ]] ; then + echo "WARNING: EXTERNAL_LIBS_DIR is not set. Skipping Apicurio converter loading..." + return + fi + if [[ ! -d "$EXTERNAL_LIBS_DIR" ]] ; then + echo "WARNING: EXTERNAL_LIBS_DIR is not a directory. Skipping Apicurio converter loading..." + return + fi + APICURIO_CONVERTER_PACKAGE="apicurio-registry-distro-connect-converter" + maven_dep $MAVEN_REPO_CENTRAL "io/apicurio" "$APICURIO_CONVERTER_PACKAGE" "$1" "$APICURIO_CONVERTER_PACKAGE-$1.tar.gz" "$2" + mkdir "$EXTERNAL_LIBS_DIR/apicurio" + tar -xzf "$DOWNLOAD_FILE" -C "$EXTERNAL_LIBS_DIR/apicurio" && rm "$DOWNLOAD_FILE" +} + +maven_otel_libs() { + if [[ -z "$EXTERNAL_LIBS_DIR" ]] ; then + echo "WARNING: EXTERNAL_LIBS_DIR is not set. Skipping loading OTEL libraries ..." + return + fi + if [[ ! -d "$EXTERNAL_LIBS_DIR" ]] ; then + echo "WARNING: EXTERNAL_LIBS_DIR is not a directory. Skipping loading OTEL libraries ..." + return + fi + if [[ ! -d "$EXTERNAL_LIBS_DIR/otel" ]] ; then + mkdir "$EXTERNAL_LIBS_DIR/otel" + fi + maven_dep $MAVEN_REPO_CENTRAL $1 $2 $3 "$2-$3.jar" $4 + mv "$DOWNLOAD_FILE" $EXTERNAL_LIBS_DIR/otel +} + +case $1 in + "central" ) shift + maven_central_dep ${@} + ;; + "confluent" ) shift + maven_confluent_dep ${@} + ;; + "debezium" ) shift + maven_debezium_plugin ${@} + ;; + "debezium-additional" ) shift + maven_debezium_additional_plugin ${@} + ;; + "debezium-optional" ) shift + maven_debezium_optional ${@} + ;; + "camel-kafka" ) shift + maven_camel_kafka ${@} + ;; + "apicurio" ) shift + maven_apicurio_converter ${@} + ;; + "otel" ) shift + maven_otel_libs ${@} + ;; +esac diff --git a/connect-base/2.6/log4j.properties b/connect-base/2.6/log4j.properties new file mode 100644 index 00000000..ce4ef591 --- /dev/null +++ b/connect-base/2.6/log4j.properties @@ -0,0 +1,19 @@ +kafka.logs.dir=logs + +log4j.rootLogger=INFO, stdout, appender + +# Disable excessive reflection warnings - KAFKA-5229 +log4j.logger.org.reflections=ERROR + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.threshold=INFO +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %X{dbz.connectorType}|%X{dbz.connectorName}|%X{dbz.connectorContext} %m [%c]%n + + +log4j.appender.appender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.appender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.appender.File=${kafka.logs.dir}/connect-service.log +log4j.appender.appender.layout=org.apache.log4j.PatternLayout +log4j.appender.appender.layout.ConversionPattern=%d{ISO8601} %-5p %X{dbz.connectorType}|%X{dbz.connectorName}|%X{dbz.connectorContext} %m [%c]%n + diff --git a/connect/2.5/Dockerfile b/connect/2.5/Dockerfile index 925a7f49..f947b91f 100644 --- a/connect/2.5/Dockerfile +++ b/connect/2.5/Dockerfile @@ -3,22 +3,22 @@ FROM $DEBEZIUM_DOCKER_REGISTRY_PRIMARY_NAME/connect-base:2.5 LABEL maintainer="Debezium Community" -ENV DEBEZIUM_VERSION="2.5.0.CR1" \ +ENV DEBEZIUM_VERSION="2.5.0.Final" \ MAVEN_REPO_CENTRAL="" \ MAVEN_REPOS_ADDITIONAL="" \ MAVEN_DEP_DESTINATION=$KAFKA_CONNECT_PLUGINS_DIR \ - MONGODB_MD5=fd8032b4849145c260a7b7f16c5f70e6 \ - MYSQL_MD5=08430a792a008603bb933c34aaec9626 \ - POSTGRES_MD5=69e2896e52107c86e5ceeba5087f8447 \ - SQLSERVER_MD5=bc04a9ab2ad85f8e04fad355e1d26d42 \ - ORACLE_MD5=f9fbef1e3d8668dcf801eac98a89df2f \ - DB2_MD5=dc5fcf8ddbc5ca6e4b6fe1ac4f5b5d0e \ - SPANNER_MD5=e70e61a870527f3524c1b2d7eaee232a \ - VITESS_MD5=52e1f58902b29f8b5b868ad602f99b23 \ - JDBC_MD5=3edad043a0e57652eefae9eac249ce35 \ - INFORMIX_MD5=4ceac3ad27497f0d417c3a750a789066 \ - KCRESTEXT_MD5=0685d8550fc7b78707838d41733eca46 \ - SCRIPTING_MD5=5b4b738ca6d052d3e89294b7833419ec + MONGODB_MD5=fd5ca6d535108cafaef2a92a9afd97ae \ + MYSQL_MD5=09d944f3a21bd205d9f01edde5cf3963 \ + POSTGRES_MD5=16e95339cb27dba3a3d96bdd7b5ba3d0 \ + SQLSERVER_MD5=64579046f37a1523e2fd27e5b9ab330c \ + ORACLE_MD5=250d169979d2d3d8d919f8da65007634 \ + DB2_MD5=aa5405943a493992d848679532c796a4 \ + SPANNER_MD5=fb1c4a80c0990070497498600c1d50d6 \ + VITESS_MD5=7462d3e3600bd98c20dd23bce5c415ea \ + JDBC_MD5=7e3dae35120837b21e2f98a72eaf91d9 \ + INFORMIX_MD5=5ae365d3a3ca3afaba00bd1312b69e58 \ + KCRESTEXT_MD5=1dd7df4e6c7dd5dc4e5a74a5c887365f \ + SCRIPTING_MD5=49163a506e826056ac2f5ef758c03f5e RUN docker-maven-download debezium mongodb "$DEBEZIUM_VERSION" "$MONGODB_MD5" && \ docker-maven-download debezium mysql "$DEBEZIUM_VERSION" "$MYSQL_MD5" && \ diff --git a/connect/2.5/Dockerfile.local b/connect/2.5/Dockerfile.local index 132ad99a..aa3de4ba 100644 --- a/connect/2.5/Dockerfile.local +++ b/connect/2.5/Dockerfile.local @@ -2,7 +2,7 @@ FROM quay.io/debezium/connect-base:2.5 LABEL maintainer="Debezium Community" -ARG DEBEZIUM_VERSION=2.5.0.CR1 +ARG DEBEZIUM_VERSION=2.5.0.Final ENV DEBEZIUM_VERSION=${DEBEZIUM_VERSION} # -------- testing --------- diff --git a/connect/2.6/Dockerfile b/connect/2.6/Dockerfile new file mode 100644 index 00000000..a8081d55 --- /dev/null +++ b/connect/2.6/Dockerfile @@ -0,0 +1,35 @@ +ARG DEBEZIUM_DOCKER_REGISTRY_PRIMARY_NAME +FROM localhost:5500/debeziumquay/connect-base:2.6 + +LABEL maintainer="Debezium Community" + +ENV DEBEZIUM_VERSION="2.5.0.Final" \ + MAVEN_REPO_CENTRAL="" \ + MAVEN_REPOS_ADDITIONAL="" \ + MAVEN_DEP_DESTINATION=$KAFKA_CONNECT_PLUGINS_DIR \ + MONGODB_MD5=fd5ca6d535108cafaef2a92a9afd97ae \ + MYSQL_MD5=09d944f3a21bd205d9f01edde5cf3963 \ + POSTGRES_MD5=16e95339cb27dba3a3d96bdd7b5ba3d0 \ + SQLSERVER_MD5=64579046f37a1523e2fd27e5b9ab330c \ + ORACLE_MD5=250d169979d2d3d8d919f8da65007634 \ + DB2_MD5=aa5405943a493992d848679532c796a4 \ + SPANNER_MD5=fb1c4a80c0990070497498600c1d50d6 \ + VITESS_MD5=7462d3e3600bd98c20dd23bce5c415ea \ + JDBC_MD5=7e3dae35120837b21e2f98a72eaf91d9 \ + INFORMIX_MD5=5ae365d3a3ca3afaba00bd1312b69e58 \ + KCRESTEXT_MD5=1dd7df4e6c7dd5dc4e5a74a5c887365f \ + SCRIPTING_MD5=49163a506e826056ac2f5ef758c03f5e + +RUN docker-maven-download debezium mongodb "$DEBEZIUM_VERSION" "$MONGODB_MD5" && \ + docker-maven-download debezium mysql "$DEBEZIUM_VERSION" "$MYSQL_MD5" && \ + docker-maven-download debezium postgres "$DEBEZIUM_VERSION" "$POSTGRES_MD5" && \ + docker-maven-download debezium sqlserver "$DEBEZIUM_VERSION" "$SQLSERVER_MD5" && \ + docker-maven-download debezium oracle "$DEBEZIUM_VERSION" "$ORACLE_MD5" && \ + docker-maven-download debezium-additional db2 db2 "$DEBEZIUM_VERSION" "$DB2_MD5" && \ + docker-maven-download debezium-additional jdbc jdbc "$DEBEZIUM_VERSION" "$JDBC_MD5" && \ + docker-maven-download debezium-additional spanner spanner "$DEBEZIUM_VERSION" "$SPANNER_MD5" && \ + docker-maven-download debezium-additional vitess vitess "$DEBEZIUM_VERSION" "$VITESS_MD5" && \ + docker-maven-download debezium-additional informix informix "$DEBEZIUM_VERSION" "$INFORMIX_MD5" && \ + docker-maven-download debezium-optional connect-rest-extension "$DEBEZIUM_VERSION" "$KCRESTEXT_MD5" && \ + docker-maven-download debezium-optional scripting "$DEBEZIUM_VERSION" "$SCRIPTING_MD5" + diff --git a/connect/2.6/Dockerfile.local b/connect/2.6/Dockerfile.local new file mode 100644 index 00000000..b983fd79 --- /dev/null +++ b/connect/2.6/Dockerfile.local @@ -0,0 +1,25 @@ +FROM quay.io/debezium/connect-base:2.6 + +LABEL maintainer="Debezium Community" + +ARG DEBEZIUM_VERSION=2.6.0 +ENV DEBEZIUM_VERSION=${DEBEZIUM_VERSION} + +# -------- testing --------- +COPY debezium-connector-mysql-$DEBEZIUM_VERSION-SNAPSHOT-plugin.tar.gz $KAFKA_CONNECT_PLUGINS_DIR/debezium-mysql-plugin.tar.gz +COPY debezium-connector-mongodb-$DEBEZIUM_VERSION-SNAPSHOT-plugin.tar.gz $KAFKA_CONNECT_PLUGINS_DIR/debezium-mongodb-plugin.tar.gz +COPY debezium-connector-postgres-$DEBEZIUM_VERSION-SNAPSHOT-plugin.tar.gz $KAFKA_CONNECT_PLUGINS_DIR/debezium-postgres-plugin.tar.gz +COPY debezium-connector-sqlserver-$DEBEZIUM_VERSION-SNAPSHOT-plugin.tar.gz $KAFKA_CONNECT_PLUGINS_DIR/debezium-sqlserver-plugin.tar.gz + +COPY debezium-connector-oracle-$DEBEZIUM_VERSION-SNAPSHOT-plugin.tar.gz $KAFKA_CONNECT_PLUGINS_DIR/debezium-oracle-plugin.tar.gz +COPY debezium-connector-db2-$DEBEZIUM_VERSION-SNAPSHOT-plugin.tar.gz $KAFKA_CONNECT_PLUGINS_DIR/debezium-db2-plugin.tar.gz +COPY debezium-connector-jdbc-$DEBEZIUM_VERSION-SNAPSHOT-plugin.tar.gz $KAFKA_CONNECT_PLUGINS_DIR/debezium-jdbc-plugin.tar.gz +COPY debezium-connector-spanner-$DEBEZIUM_VERSION-SNAPSHOT-plugin.tar.gz $KAFKA_CONNECT_PLUGINS_DIR/debezium-spanner-plugin.tar.gz +COPY debezium-connector-vitess-$DEBEZIUM_VERSION-SNAPSHOT-plugin.tar.gz $KAFKA_CONNECT_PLUGINS_DIR/debezium-vitess-plugin.tar.gz + +ADD debezium-connect-rest-extension-$DEBEZIUM_VERSION-SNAPSHOT.tar.gz $EXTERNAL_LIBS_DIR +ADD debezium-scripting-$DEBEZIUM_VERSION-SNAPSHOT.tar.gz $EXTERNAL_LIBS_DIR + +RUN for CONNECTOR in {mysql,mongodb,postgres,sqlserver,oracle,db2,jdbc,spanner,vitess}; do \ + tar -xzf $KAFKA_CONNECT_PLUGINS_DIR/debezium-$CONNECTOR-plugin.tar.gz -C $KAFKA_CONNECT_PLUGINS_DIR; \ + done; diff --git a/connect/2.6/README.md b/connect/2.6/README.md new file mode 100644 index 00000000..348a6bb3 --- /dev/null +++ b/connect/2.6/README.md @@ -0,0 +1,156 @@ +[Kafka Connect](https://kafka.apache.org/documentation.html#connect) is a system for moving data into and out of Kafka. All Debezium connectors adhere to the Kafka Connector API for _source connectors_, and each monitors a specific kind of database management system for changing data, and then forwards those changes directly into Kafka topics organized by server, database, and table. This image defines a runnable Kafka Connect service preconfigured with all Debezium connectors. The service has a RESTful API for managing connector instances -- simply start up a container, configure a connector for each data source you want to monitor, and let Debezium monitor those sources for changes and forward them to the appropriate Kafka topics. + +# What is Debezium? + +Debezium is a distributed platform that turns your existing databases into event streams, so applications can quickly react to each row-level change in the databases. Debezium is built on top of Kafka and provides Kafka Connect compatible connectors that monitor specific database management systems. Debezium records the history of data changes in Kafka logs, so your application can be stopped and restarted at any time and can easily consume all of the events it missed while it was not running, ensuring that all events are processed correctly and completely. + +Running Debezium involves Zookeeper, Kafka, and services that run Debezium's connectors. For simple evaluation and experimentation, all services can all be run on a single host machine, using the recipe outlined below. Production environments, however, require properly running and networking multiple instances of each service to provide the performance, reliability, replication, and fault tolerance. This can be done with a platform like [OpenShift](https://www.openshift.com) that manages multiple Docker containers running on multiple hosts and machines. +Using a Kubernetes operator such as [Strimzi](https://strimzi.io/) (which comes with operators and CRDs for Kafka, Kafka Connect, connectors, and more) is recommended for such deployments. + +# How to use this image + +This image can be used in several different ways. All require an already-running Zookeeper service, which is either running locally via the container named `zookeeper` or with OpenShift running as a service named `zookeeper`. Also required are already-running Kafka brokers, which are either running locally via the container named `kafka` or with OpenShift running as a service named `kafka`. + +## Start a Kafka Connect service instance + +When running a cluster of one or more Kafka Connect service instances, several important parameters must be defined using environment variables. Please see the section below for the list of these required environment variables and acceptable values. + +Starting an instance of Kafka Connect using this image is simple: + + $ docker run -it --name connect -p 8083:8083 -e GROUP_ID=1 -e CONFIG_STORAGE_TOPIC=my-connect-configs -e OFFSET_STORAGE_TOPIC=my-connect-offsets -e ADVERTISED_HOST_NAME=$(echo $DOCKER_HOST | cut -f3 -d'/' | cut -f1 -d':') --link zookeeper:zookeeper --link kafka:kafka quay.io/debezium/connect + +This command uses this image and starts a new container named `connect`, which runs in the foreground and attaches the console so that it display the service's output and error messages. It exposes its REST API on port 8083, which is mapped to the same port number on the local host. It uses Zookeeper in the container (or service) named `zookeeper` and Kafka brokers in the container (or service) named `kafka`. This command sets the three required environment variables, though you should replace their values with more meaningful values for your environment. + +To start the container in _detached_ mode, simply replace the `-it` option with `-d`. No service output will be sent to your console, but it can be read at any time using the `docker logs` command. For example, the following command will display the output and keep following the output: + + $ docker logs --follow --name connect + +## Start a shell in a running container + +If you are already running a container with a Kafka Connect service, you can use this image to connect to that container and obtain a command line shell: + + $ docker exec -it connect bash + +where `connect` is the name of your existing container. The shell will be set up with all environment variables exactly like when starting the service in the container. Therefore, links to other containers and additional environment variables may be specified and will be reflected in the shell's exported variables. + + +# Environment variables + +The Debezium Kafka image uses several environment variables when running a Kafka broker using this image. +Please also see [the documentation](https://kafka.apache.org/documentation/#connect_running) of Kafka Connect to learn more about the specific settings. + +### `GROUP_ID` + +This environment variable is required when running the Kafka Connect service. Set this to an ID that uniquely identifies the Kafka Connect cluster the service and its workers belong to. + +### `CONFIG_STORAGE_TOPIC` + +This environment variable is required when running the Kafka Connect service. Set this to the name of the Kafka topic where the Kafka Connect services in the group store connector configurations. The topic must have a single partition and be highly replicated (e.g., 3x or more). + +### `OFFSET_STORAGE_TOPIC` + +This environment variable is required when running the Kafka Connect service. Set this to the name of the Kafka topic where the Kafka Connect services in the group store connector offsets. The topic must have a large number of partitions (e.g., 25 or 50), be highly replicated (e.g., 3x or more) and should be configured for compaction. + +### `BOOTSTRAP_SERVERS` + +This environment variable is an advanced setting, used only when Kafka is not running in a linkable container or service. Set this to a list of host/port pairs to use for establishing the *initial* connection to the Kafka cluster. Once a connection is established to one of these brokers, the service will then discover and make use of all Kafka brokers in the cluster, regardless of which servers are specified here for bootstrapping. The list should be in the form `host1:port1,host2:port2,...`. We recommend that you include more than one broker in this list, in case one of those is down. + +### `HOST_NAME` + +This environment variable is an advanced setting. Set this to the hostname that the the REST API will bind to. Defaults to the hostname of the container. + +### `ADVERTISED_HOST_NAME` + +This environment variable is an advanced setting. Set this to the hostname that will be given out to other workers to connect with. Defaults to the hostname of the container. + +### `ADVERTISED_PORT` + +This environment variable is an advanced setting. Set this to the port that will be given out to other workers to connect with. + +### `KEY_CONVERTER` + +This environment variable is an advanced setting. Set this to the fully-qualified name of the Java class that implements Kafka Connect's `Converter` class, used to convert the connector's keys to the form stored in Kafka. Defaults to `org.apache.kafka.connect.json.JsonConverter`. + +### `VALUE_CONVERTER` + +This environment variable is an advanced setting. Set this to the fully-qualified name of the Java class that implements Kafka Connect's `Converter` class, used to convert the connector's values to the form stored in Kafka. Defaults to `org.apache.kafka.connect.json.JsonConverter`. + +### `OFFSET_FLUSH_INTERVAL_MS` + +This environment variable is an advanced setting. Set this to the number of milliseconds defining the interval at which the service will periodically try committing offsets for tasks. The default is `60000`, or 60 seconds. + +### `OFFSET_FLUSH_TIMEOUT_MS` + +This environment variable is an advanced setting. Set this to the maximum time in milliseconds to wait for records to flush and partition offset data to be committed to offset storage before cancelling the process and restoring the offset data to be committed in a future attempt. The default is `5000`, or 5 seconds. + +### `SHUTDOWN_TIMEOUT` + +This environment variable is an advanced setting. Set this to the number of milliseconds to wait for tasks to shutdown gracefully while the connectors complete all processing, record any final data, and clean up resources. This is the total amount of time, not per task. All task have shutdown triggered, then they are waited on sequentially. The default is `10000`, or 10 seconds. + + +### `HEAP_OPTS` + +This environment variable is recommended. Use this to set the JVM options for Kafka Connect. By default a value of "-Xms256M -Xmx2G" is used, meaning that each Connect worker uses up to 2GB of heap memory. Using too little memory may cause performance problems, while using too much may prevent the worker from starting properly given the memory available on the machine. Obviously the container must be able to use the amount of memory defined by this environment variable. + +### `LOG_LEVEL` + +This environment variable is optional. Use this to set the level of detail for Kafka's application log written to STDOUT and STDERR. Valid values are `INFO` (default), `WARN`, `ERROR`, `DEBUG`, or `TRACE`." + +### `ENABLE_APICURIO_CONVERTERS` + +This environment variable is optional. Use this to enable [Apicur.io](https://www.apicur.io/) converters with +Apicurio Schema Registry by setting `ENABLE_APICURIO_CONVERTERS=true` as container env var. Valid values are `false` to disable (default) or `true` to enable Apicurio converters. + +### `ENABLE_DEBEZIUM_KC_REST_EXTENSION` + +This environment variable is optional. +Use this to enable Debezium features the Debezium Kafka Connect REST Extension that comes shipped with the image by setting `ENABLE_DEBEZIUM_REST_EXTENSION=true` as container env var. +Valid values are `false` to disable (default) or `true` to enable the extension. + +### `ENABLE_DEBEZIUM_SCRIPTING` + +This environment variable is optional. +Use this to enable Debezium features that use scripting languages like Message Filtering or Content-based Routing SMT by setting `ENABLE_DEBEZIUM_SCRIPTING=true` as container env var. +Valid values are `false` to disable (default) or `true` to enable scripting. +Note: in order to prevent the execution of arbitrary scripting expressions, you should enable this option only if you've secured access to the Kafka Connect configuration interface appropriately. + +### `ENABLE_OTEL` + +This environment variable is optional. Use this to enable [OpenTelemetry](https://opentelemetry.io/) libraries by setting `ENABLE_OTEL=true` as container env var. +Valid values are `false` to disable (default) or `true` to enable OpenTelemetry libraries. + +### Others + +Environment variables that start with `CONNECT_` will be used to update the Kafka Connect worker configuration file. Each environment variable name will be mapped to a configuration property name by: + +1. removing the `CONNECT_` prefix; +2. lowercasing all characters; and +3. converting all '_' characters to '.' characters + +For example, the environment variable `CONNECT_HEARTBEAT_INTERVAL_MS` is converted to the `heartbeat.interval.ms` property. The container will then update the Kafka Connect worker configuration file to include the property's name and value. + +The value of the environment variable may not contain a '\@' character. + + +# Ports + +Containers created using this image will expose port 8083, which is the standard port bound to by the Kafka Connect service. You can use standard Docker options to map this to a different port on the host that runs the container. + + +# Storing data + +The Kafka Connect service run by this image stores no data in the container, but it does produce logs. The only way to keep these files is to use volumes that map specific directories inside the container to the local file system (or to OpenShift persistent volumes). + +### Log files + +Although this image will send Kafka Connect service log output to standard output so it is visible as Docker logs, this image also configures the Kafka Connect service to write out more logs to a data volume at `/kafka/logs`. All logs are rotated daily. + +### Configuration + +This image defines a data volume at `/kafka/config` where the broker's configuration files are stored. Note that these configuration files are always modified based upon the environment variables and linked containers. The best use of this data volume is to be able to see the configuration files used by Kafka, although with some care it is possible to supply custom configuration files that will be adapted and used upon startup. + +# Oracle Connector + +If you want to use the Oracle connector it is necessary to add dependencies that are not part of the image due to licensing restrictions. In this case you should create a new image derived from this one and bake-in Oracle Instant Client JAR files. + +The files in question are available as [oracle Instant Client for Linux](http://www.oracle.com/technetwork/topics/linuxx86-64soft-092277.html). Please follow [an example](https://github.com/debezium/debezium-examples/blob/main/tutorial/debezium-with-oracle-jdbc/Dockerfile) so see how the resulting image should be structured. diff --git a/connect/snapshot/Dockerfile b/connect/snapshot/Dockerfile index 883ee3af..237022e7 100644 --- a/connect/snapshot/Dockerfile +++ b/connect/snapshot/Dockerfile @@ -1,8 +1,8 @@ -FROM quay.io/debezium/connect-base:2.5 +FROM quay.io/debezium/connect-base:2.6 LABEL maintainer="Debezium Community" -ARG DEBEZIUM_VERSION=2.5.0-SNAPSHOT +ARG DEBEZIUM_VERSION=2.6.0-SNAPSHOT ENV DEBEZIUM_VERSION=$DEBEZIUM_VERSION \ MAVEN_OSS_SNAPSHOT="https://oss.sonatype.org/content/repositories/snapshots" diff --git a/examples/mongodb/2.6/Dockerfile b/examples/mongodb/2.6/Dockerfile new file mode 100644 index 00000000..350dd69e --- /dev/null +++ b/examples/mongodb/2.6/Dockerfile @@ -0,0 +1,14 @@ +FROM mongo:5.0 + +LABEL maintainer="Debezium Community" + +COPY init-inventory.sh /usr/local/bin/ +RUN chmod +x /usr/local/bin/init-inventory.sh + +# Starting with MongoDB 4.4 the authentication enabled MongoDB requires a key +# for intra-replica set communication +RUN openssl rand -base64 756 > /etc/mongodb.keyfile &&\ + chown mongodb /etc/mongodb.keyfile &&\ + chmod 400 /etc/mongodb.keyfile + +CMD ["mongod", "--replSet", "rs0", "--auth", "--keyFile", "/etc/mongodb.keyfile"] diff --git a/examples/mongodb/2.6/init-inventory.sh b/examples/mongodb/2.6/init-inventory.sh new file mode 100755 index 00000000..853e42b6 --- /dev/null +++ b/examples/mongodb/2.6/init-inventory.sh @@ -0,0 +1,94 @@ +HOSTNAME=`hostname` + + OPTS=`getopt -o h: --long hostname: -n 'parse-options' -- "$@"` + if [ $? != 0 ] ; then echo "Failed parsing options." >&2 ; exit 1 ; fi + + echo "$OPTS" + eval set -- "$OPTS" + + while true; do + case "$1" in + -h | --hostname ) HOSTNAME=$2; shift; shift ;; + -- ) shift; break ;; + * ) break ;; + esac + done +echo "Using HOSTNAME='$HOSTNAME'" + +mongo localhost:27017/inventory <<-EOF + rs.initiate({ + _id: "rs0", + members: [ { _id: 0, host: "${HOSTNAME}:27017" } ] + }); +EOF +echo "Initiated replica set" + +sleep 3 +mongo localhost:27017/admin <<-EOF + db.createUser({ user: 'admin', pwd: 'admin', roles: [ { role: "userAdminAnyDatabase", db: "admin" } ] }); +EOF + +mongo -u admin -p admin localhost:27017/admin <<-EOF + db.runCommand({ + createRole: "listDatabases", + privileges: [ + { resource: { cluster : true }, actions: ["listDatabases"]} + ], + roles: [] + }); + + db.runCommand({ + createRole: "readChangeStream", + privileges: [ + { resource: { db: "", collection: ""}, actions: [ "find", "changeStream" ] } + ], + roles: [] + }); + + db.createUser({ + user: 'debezium', + pwd: 'dbz', + roles: [ + { role: "readWrite", db: "inventory" }, + { role: "read", db: "local" }, + { role: "listDatabases", db: "admin" }, + { role: "readChangeStream", db: "admin" }, + { role: "read", db: "config" }, + { role: "read", db: "admin" } + ] + }); +EOF + +echo "Created users" + +mongo -u debezium -p dbz --authenticationDatabase admin localhost:27017/inventory <<-EOF + use inventory; + + db.products.insert([ + { _id : NumberLong("101"), name : 'scooter', description: 'Small 2-wheel scooter', weight : 3.14, quantity : NumberInt("3") }, + { _id : NumberLong("102"), name : 'car battery', description: '12V car battery', weight : 8.1, quantity : NumberInt("8") }, + { _id : NumberLong("103"), name : '12-pack drill bits', description: '12-pack of drill bits with sizes ranging from #40 to #3', weight : 0.8, quantity : NumberInt("18") }, + { _id : NumberLong("104"), name : 'hammer', description: "12oz carpenter's hammer", weight : 0.75, quantity : NumberInt("4") }, + { _id : NumberLong("105"), name : 'hammer', description: "14oz carpenter's hammer", weight : 0.875, quantity : NumberInt("5") }, + { _id : NumberLong("106"), name : 'hammer', description: "16oz carpenter's hammer", weight : 1.0, quantity : NumberInt("0") }, + { _id : NumberLong("107"), name : 'rocks', description: 'box of assorted rocks', weight : 5.3, quantity : NumberInt("44") }, + { _id : NumberLong("108"), name : 'jacket', description: 'water resistent black wind breaker', weight : 0.1, quantity : NumberInt("2") }, + { _id : NumberLong("109"), name : 'spare tire', description: '24 inch spare tire', weight : 22.2, quantity : NumberInt("5") } + ]); + + db.customers.insert([ + { _id : NumberLong("1001"), first_name : 'Sally', last_name : 'Thomas', email : 'sally.thomas@acme.com' }, + { _id : NumberLong("1002"), first_name : 'George', last_name : 'Bailey', email : 'gbailey@foobar.com' }, + { _id : NumberLong("1003"), first_name : 'Edward', last_name : 'Walker', email : 'ed@walker.com' }, + { _id : NumberLong("1004"), first_name : 'Anne', last_name : 'Kretchmar', email : 'annek@noanswer.org' } + ]); + + db.orders.insert([ + { _id : NumberLong("10001"), order_date : new ISODate("2016-01-16T00:00:00Z"), purchaser_id : NumberLong("1001"), quantity : NumberInt("1"), product_id : NumberLong("102") }, + { _id : NumberLong("10002"), order_date : new ISODate("2016-01-17T00:00:00Z"), purchaser_id : NumberLong("1002"), quantity : NumberInt("2"), product_id : NumberLong("105") }, + { _id : NumberLong("10003"), order_date : new ISODate("2016-02-19T00:00:00Z"), purchaser_id : NumberLong("1002"), quantity : NumberInt("2"), product_id : NumberLong("106") }, + { _id : NumberLong("10004"), order_date : new ISODate("2016-02-21T00:00:00Z"), purchaser_id : NumberLong("1003"), quantity : NumberInt("1"), product_id : NumberLong("107") } + ]); +EOF + +echo "Inserted example data" diff --git a/examples/mysql-gtids/2.6/Dockerfile b/examples/mysql-gtids/2.6/Dockerfile new file mode 100644 index 00000000..c7c43f4d --- /dev/null +++ b/examples/mysql-gtids/2.6/Dockerfile @@ -0,0 +1,6 @@ +FROM mysql:8.2 + +LABEL maintainer="Debezium Community" + +COPY mysql.cnf /etc/mysql/conf.d/ +COPY inventory.sql /docker-entrypoint-initdb.d/ diff --git a/examples/mysql-gtids/2.6/inventory.sql b/examples/mysql-gtids/2.6/inventory.sql new file mode 100644 index 00000000..cd298080 --- /dev/null +++ b/examples/mysql-gtids/2.6/inventory.sql @@ -0,0 +1,97 @@ +# In production you would almost certainly limit the replication user must be on the follower (slave) machine, +# to prevent other clients accessing the log from other machines. For example, 'replicator'@'follower.acme.com'. +# +# However, this grant is equivalent to specifying *any* hosts, which makes this easier since the docker host +# is not easily known to the Docker container. But don't do this in production. +# +CREATE USER 'replicator' IDENTIFIED BY 'replpass'; +CREATE USER 'debezium' IDENTIFIED BY 'dbz'; +GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'; +GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'debezium'; + +# Create the database that we'll use to populate data and watch the effect in the binlog +CREATE DATABASE inventory; +GRANT ALL PRIVILEGES ON inventory.* TO 'mysqluser'@'%'; + +# Switch to this database +USE inventory; + +# Create and populate our products using a single insert with many rows +CREATE TABLE products ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL, + description VARCHAR(512), + weight FLOAT +); +ALTER TABLE products AUTO_INCREMENT = 101; + +INSERT INTO products +VALUES (default,"scooter","Small 2-wheel scooter",3.14), + (default,"car battery","12V car battery",8.1), + (default,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8), + (default,"hammer","12oz carpenter's hammer",0.75), + (default,"hammer","14oz carpenter's hammer",0.875), + (default,"hammer","16oz carpenter's hammer",1.0), + (default,"rocks","box of assorted rocks",5.3), + (default,"jacket","water resistent black wind breaker",0.1), + (default,"spare tire","24 inch spare tire",22.2); + +# Create and populate the products on hand using multiple inserts +CREATE TABLE products_on_hand ( + product_id INTEGER NOT NULL PRIMARY KEY, + quantity INTEGER NOT NULL, + FOREIGN KEY (product_id) REFERENCES products(id) +); + +INSERT INTO products_on_hand VALUES (101,3); +INSERT INTO products_on_hand VALUES (102,8); +INSERT INTO products_on_hand VALUES (103,18); +INSERT INTO products_on_hand VALUES (104,4); +INSERT INTO products_on_hand VALUES (105,5); +INSERT INTO products_on_hand VALUES (106,0); +INSERT INTO products_on_hand VALUES (107,44); +INSERT INTO products_on_hand VALUES (108,2); +INSERT INTO products_on_hand VALUES (109,5); + +# Create some customers ... +CREATE TABLE customers ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + first_name VARCHAR(255) NOT NULL, + last_name VARCHAR(255) NOT NULL, + email VARCHAR(255) NOT NULL UNIQUE KEY +) AUTO_INCREMENT=1001; + + +INSERT INTO customers +VALUES (default,"Sally","Thomas","sally.thomas@acme.com"), + (default,"George","Bailey","gbailey@foobar.com"), + (default,"Edward","Walker","ed@walker.com"), + (default,"Anne","Kretchmar","annek@noanswer.org"); + +# Create some veyr simple orders +CREATE TABLE orders ( + order_number INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + order_date DATE NOT NULL, + purchaser INTEGER NOT NULL, + quantity INTEGER NOT NULL, + product_id INTEGER NOT NULL, + FOREIGN KEY order_customer (purchaser) REFERENCES customers(id), + FOREIGN KEY ordered_product (product_id) REFERENCES products(id) +) AUTO_INCREMENT = 10001; + +INSERT INTO orders +VALUES (default, '2016-01-16', 1001, 1, 102), + (default, '2016-01-17', 1002, 2, 105), + (default, '2016-02-19', 1002, 2, 106), + (default, '2016-02-21', 1003, 1, 107); + +# Create table with Spatial/Geometry type +CREATE TABLE geom ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + g GEOMETRY NOT NULL, + h GEOMETRY); + +INSERT INTO geom +VALUES(default, ST_GeomFromText('POINT(1 1)'), NULL), + (default, ST_GeomFromText('LINESTRING(2 1, 6 6)'), NULL), + (default, ST_GeomFromText('POLYGON((0 5, 2 5, 2 7, 0 7, 0 5))'), NULL); diff --git a/examples/mysql-gtids/2.6/mysql.cnf b/examples/mysql-gtids/2.6/mysql.cnf new file mode 100644 index 00000000..988f6b5a --- /dev/null +++ b/examples/mysql-gtids/2.6/mysql.cnf @@ -0,0 +1,52 @@ +# For advice on how to change settings please see +# https://dev.mysql.com/doc/refman/8.2/en/server-configuration-defaults.html + +[mysqld] +# +# Remove leading # and set to the amount of RAM for the most important data +# cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%. +# innodb_buffer_pool_size = 128M +# +# Remove leading # to turn on a very important data integrity option: logging +# changes to the binary log between backups. +# log_bin +# +# Remove leading # to set options mainly useful for reporting servers. +# The server defaults are faster for transactions and fast SELECTs. +# Adjust sizes as needed, experiment to find the optimal values. +# join_buffer_size = 128M +# sort_buffer_size = 2M +# read_rnd_buffer_size = 2M +skip-host-cache +skip-name-resolve +#datadir=/var/lib/mysql +#socket=/var/lib/mysql/mysql.sock +#secure-file-priv=/var/lib/mysql-files +user=mysql + +# Disabling symbolic-links is recommended to prevent assorted security risks +symbolic-links=0 + +#log-error=/var/log/mysqld.log +#pid-file=/var/run/mysqld/mysqld.pid + +# ---------------------------------------------- +# Enable GTIDs on this master +# ---------------------------------------------- +gtid_mode = on +enforce_gtid_consistency = on + +# ---------------------------------------------- +# Enable the binlog for replication & CDC +# ---------------------------------------------- + +# Enable binary replication log and set the prefix, expiration, and log format. +# The prefix is arbitrary, expiration can be short for integration tests but would +# be longer on a production system. Row-level info is required for ingest to work. +# Server ID is required, but this will vary on production systems +server-id = 223344 +log_bin = mysql-bin +binlog_expire_logs_seconds = 86400 +binlog_format = row + +default_authentication_plugin = mysql_native_password diff --git a/examples/mysql-replication/master/2.6/Dockerfile b/examples/mysql-replication/master/2.6/Dockerfile new file mode 100644 index 00000000..e15728f1 --- /dev/null +++ b/examples/mysql-replication/master/2.6/Dockerfile @@ -0,0 +1,6 @@ +FROM mysql:latest + +ENV MYSQL_ROOT_PASSWORD=debezium + +COPY master.cnf /etc/my.cnf +COPY inventory.sql /docker-entrypoint-initdb.d/ diff --git a/examples/mysql-replication/master/2.6/inventory.sql b/examples/mysql-replication/master/2.6/inventory.sql new file mode 100644 index 00000000..897e0f1e --- /dev/null +++ b/examples/mysql-replication/master/2.6/inventory.sql @@ -0,0 +1,120 @@ +# In production you would almost certainly limit the replication user must be on the follower (slave) machine, +# to prevent other clients accessing the log from other machines. For example, 'replicator'@'follower.acme.com'. +# +# However, this grant is equivalent to specifying *any* hosts, which makes this easier since the docker host +# is not easily known to the Docker container. But don't do this in production. +# +CREATE USER 'replicator' IDENTIFIED BY 'replpass'; +CREATE USER 'debezium' IDENTIFIED BY 'dbz'; +GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'; +GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'debezium'; + +# Create the database that we'll use to populate data and watch the effect in the binlog +CREATE DATABASE inventory; +GRANT ALL PRIVILEGES ON inventory.* TO 'replicator'; +GRANT ALL PRIVILEGES ON inventory.* TO 'debezium'; +GRANT ALL PRIVILEGES ON inventory.* TO 'mysqluser'; + +# Switch to this database +USE inventory; + +# Create and populate our products using a single insert with many rows +CREATE TABLE products ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL, + description VARCHAR(512), + weight FLOAT +); +ALTER TABLE products AUTO_INCREMENT = 101; + +INSERT INTO products +VALUES (default,"scooter","Small 2-wheel scooter",3.14), + (default,"car battery","12V car battery",8.1), + (default,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8), + (default,"hammer","12oz carpenter's hammer",0.75), + (default,"hammer","14oz carpenter's hammer",0.875), + (default,"hammer","16oz carpenter's hammer",1.0), + (default,"rocks","box of assorted rocks",5.3), + (default,"jacket","water resistent black wind breaker",0.1), + (default,"spare tire","24 inch spare tire",22.2); + +# Create and populate the products on hand using multiple inserts +CREATE TABLE products_on_hand ( + product_id INTEGER NOT NULL PRIMARY KEY, + quantity INTEGER NOT NULL, + FOREIGN KEY (product_id) REFERENCES products(id) +); + +INSERT INTO products_on_hand VALUES (101,3); +INSERT INTO products_on_hand VALUES (102,8); +INSERT INTO products_on_hand VALUES (103,18); +INSERT INTO products_on_hand VALUES (104,4); +INSERT INTO products_on_hand VALUES (105,5); +INSERT INTO products_on_hand VALUES (106,0); +INSERT INTO products_on_hand VALUES (107,44); +INSERT INTO products_on_hand VALUES (108,2); +INSERT INTO products_on_hand VALUES (109,5); + +# Create some customers ... +CREATE TABLE customers ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + first_name VARCHAR(255) NOT NULL, + last_name VARCHAR(255) NOT NULL, + email VARCHAR(255) NOT NULL UNIQUE KEY +) AUTO_INCREMENT=1001; + + +INSERT INTO customers +VALUES (default,"Sally","Thomas","sally.thomas@acme.com"), + (default,"George","Bailey","gbailey@foobar.com"), + (default,"Edward","Walker","ed@walker.com"), + (default,"Anne","Kretchmar","annek@noanswer.org"); + +# Create some fake addresses +CREATE TABLE addresses ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + customer_id INTEGER NOT NULL, + street VARCHAR(255) NOT NULL, + city VARCHAR(255) NOT NULL, + state VARCHAR(255) NOT NULL, + zip VARCHAR(255) NOT NULL, + type enum('SHIPPING','BILLING','LIVING') NOT NULL, + FOREIGN KEY address_customer (customer_id) REFERENCES customers(id) +) AUTO_INCREMENT = 10; + +INSERT INTO addresses +VALUES (default,1001,'3183 Moore Avenue','Euless','Texas','76036','SHIPPING'), + (default,1001,'2389 Hidden Valley Road','Harrisburg','Pennsylvania','17116','BILLING'), + (default,1002,'281 Riverside Drive','Augusta','Georgia','30901','BILLING'), + (default,1003,'3787 Brownton Road','Columbus','Mississippi','39701','SHIPPING'), + (default,1003,'2458 Lost Creek Road','Bethlehem','Pennsylvania','18018','SHIPPING'), + (default,1003,'4800 Simpson Square','Hillsdale','Oklahoma','73743','BILLING'), + (default,1004,'1289 University Hill Road','Canehill','Arkansas','72717','LIVING'); + +# Create some very simple orders +CREATE TABLE orders ( + order_number INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + order_date DATE NOT NULL, + purchaser INTEGER NOT NULL, + quantity INTEGER NOT NULL, + product_id INTEGER NOT NULL, + FOREIGN KEY order_customer (purchaser) REFERENCES customers(id), + FOREIGN KEY ordered_product (product_id) REFERENCES products(id) +) AUTO_INCREMENT = 10001; + +INSERT INTO orders +VALUES (default, '2016-01-16', 1001, 1, 102), + (default, '2016-01-17', 1002, 2, 105), + (default, '2016-02-19', 1002, 2, 106), + (default, '2016-02-21', 1003, 1, 107); + +# Create table with Spatial/Geometry type +CREATE TABLE geom ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + g GEOMETRY NOT NULL, + h GEOMETRY); + +INSERT INTO geom +VALUES(default, ST_GeomFromText('POINT(1 1)'), NULL), + (default, ST_GeomFromText('LINESTRING(2 1, 6 6)'), NULL), + (default, ST_GeomFromText('POLYGON((0 5, 2 5, 2 7, 0 7, 0 5))'), NULL); diff --git a/examples/mysql-replication/master/2.6/master.cnf b/examples/mysql-replication/master/2.6/master.cnf new file mode 100644 index 00000000..0e6c4b2a --- /dev/null +++ b/examples/mysql-replication/master/2.6/master.cnf @@ -0,0 +1,25 @@ +# For advice on how to change settings please see +# https://dev.mysql.com/doc/refman/8.2/en/server-configuration-defaults.html + +[mysqld] +server-id = 1 +log_bin = /var/lib/mysql/mysql-bin.log +binlog_do_db = inventory +enforce_gtid_consistency = ON +gtid_mode = ON +default_authentication_plugin = mysql_native_password + +skip-host-cache +skip-name-resolve +datadir=/var/lib/mysql +socket=/var/run/mysqld/mysqld.sock +secure-file-priv=/var/lib/mysql-files +user=mysql + +pid-file=/var/run/mysqld/mysqld.pid + +[client] +socket=/var/run/mysqld/mysqld.sock + +!includedir /etc/mysql/conf.d/ + diff --git a/examples/mysql-replication/replica/2.6/Dockerfile b/examples/mysql-replication/replica/2.6/Dockerfile new file mode 100644 index 00000000..c8301fd8 --- /dev/null +++ b/examples/mysql-replication/replica/2.6/Dockerfile @@ -0,0 +1,7 @@ +FROM mysql:latest + +ENV MYSQL_ROOT_PASSWORD=debezium + +COPY slave.cnf /etc/my.cnf +COPY init.sql /docker-entrypoint-initdb.d/ + diff --git a/examples/mysql-replication/replica/2.6/init.sql b/examples/mysql-replication/replica/2.6/init.sql new file mode 100644 index 00000000..6ea82dc2 --- /dev/null +++ b/examples/mysql-replication/replica/2.6/init.sql @@ -0,0 +1,4 @@ +CHANGE REPLICATION SOURCE TO SOURCE_HOST='mysql-master',SOURCE_USER='replicator',SOURCE_PASSWORD='replpass'; +CHANGE MASTER TO GET_MASTER_PUBLIC_KEY=1; +START REPLICA; + diff --git a/examples/mysql-replication/replica/2.6/slave.cnf b/examples/mysql-replication/replica/2.6/slave.cnf new file mode 100644 index 00000000..b0937d87 --- /dev/null +++ b/examples/mysql-replication/replica/2.6/slave.cnf @@ -0,0 +1,20 @@ +[mysqld] +server-id = 2 +log_bin = /var/lib/mysql/mysql-bin.log +relay-log = /var/lib/mysql/mysql-relay-bin.log +binlog_do_db = inventory +enforce_gtid_consistency = ON +gtid_mode = ON + +skip-host-cache +skip-name-resolve +datadir=/var/lib/mysql +socket=/var/run/mysqld/mysqld.sock +secure-file-priv=/var/lib/mysql-files +user=mysql + +pid-file=/var/run/mysqld/mysqld.pid + +[client] +socket=/var/run/mysqld/mysqld.sock +!includedir /etc/mysql/conf.d/ diff --git a/examples/mysql/2.6/Dockerfile b/examples/mysql/2.6/Dockerfile new file mode 100644 index 00000000..c7c43f4d --- /dev/null +++ b/examples/mysql/2.6/Dockerfile @@ -0,0 +1,6 @@ +FROM mysql:8.2 + +LABEL maintainer="Debezium Community" + +COPY mysql.cnf /etc/mysql/conf.d/ +COPY inventory.sql /docker-entrypoint-initdb.d/ diff --git a/examples/mysql/2.6/inventory.sql b/examples/mysql/2.6/inventory.sql new file mode 100644 index 00000000..d0dec525 --- /dev/null +++ b/examples/mysql/2.6/inventory.sql @@ -0,0 +1,118 @@ +# In production you would almost certainly limit the replication user must be on the follower (slave) machine, +# to prevent other clients accessing the log from other machines. For example, 'replicator'@'follower.acme.com'. +# +# However, this grant is equivalent to specifying *any* hosts, which makes this easier since the docker host +# is not easily known to the Docker container. But don't do this in production. +# +CREATE USER 'replicator' IDENTIFIED BY 'replpass'; +CREATE USER 'debezium' IDENTIFIED BY 'dbz'; +GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'replicator'; +GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'debezium'; + +# Create the database that we'll use to populate data and watch the effect in the binlog +CREATE DATABASE inventory; +GRANT ALL PRIVILEGES ON inventory.* TO 'mysqluser'@'%'; + +# Switch to this database +USE inventory; + +# Create and populate our products using a single insert with many rows +CREATE TABLE products ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL, + description VARCHAR(512), + weight FLOAT +); +ALTER TABLE products AUTO_INCREMENT = 101; + +INSERT INTO products +VALUES (default,"scooter","Small 2-wheel scooter",3.14), + (default,"car battery","12V car battery",8.1), + (default,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8), + (default,"hammer","12oz carpenter's hammer",0.75), + (default,"hammer","14oz carpenter's hammer",0.875), + (default,"hammer","16oz carpenter's hammer",1.0), + (default,"rocks","box of assorted rocks",5.3), + (default,"jacket","water resistent black wind breaker",0.1), + (default,"spare tire","24 inch spare tire",22.2); + +# Create and populate the products on hand using multiple inserts +CREATE TABLE products_on_hand ( + product_id INTEGER NOT NULL PRIMARY KEY, + quantity INTEGER NOT NULL, + FOREIGN KEY (product_id) REFERENCES products(id) +); + +INSERT INTO products_on_hand VALUES (101,3); +INSERT INTO products_on_hand VALUES (102,8); +INSERT INTO products_on_hand VALUES (103,18); +INSERT INTO products_on_hand VALUES (104,4); +INSERT INTO products_on_hand VALUES (105,5); +INSERT INTO products_on_hand VALUES (106,0); +INSERT INTO products_on_hand VALUES (107,44); +INSERT INTO products_on_hand VALUES (108,2); +INSERT INTO products_on_hand VALUES (109,5); + +# Create some customers ... +CREATE TABLE customers ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + first_name VARCHAR(255) NOT NULL, + last_name VARCHAR(255) NOT NULL, + email VARCHAR(255) NOT NULL UNIQUE KEY +) AUTO_INCREMENT=1001; + + +INSERT INTO customers +VALUES (default,"Sally","Thomas","sally.thomas@acme.com"), + (default,"George","Bailey","gbailey@foobar.com"), + (default,"Edward","Walker","ed@walker.com"), + (default,"Anne","Kretchmar","annek@noanswer.org"); + +# Create some fake addresses +CREATE TABLE addresses ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + customer_id INTEGER NOT NULL, + street VARCHAR(255) NOT NULL, + city VARCHAR(255) NOT NULL, + state VARCHAR(255) NOT NULL, + zip VARCHAR(255) NOT NULL, + type enum('SHIPPING','BILLING','LIVING') NOT NULL, + FOREIGN KEY address_customer (customer_id) REFERENCES customers(id) +) AUTO_INCREMENT = 10; + +INSERT INTO addresses +VALUES (default,1001,'3183 Moore Avenue','Euless','Texas','76036','SHIPPING'), + (default,1001,'2389 Hidden Valley Road','Harrisburg','Pennsylvania','17116','BILLING'), + (default,1002,'281 Riverside Drive','Augusta','Georgia','30901','BILLING'), + (default,1003,'3787 Brownton Road','Columbus','Mississippi','39701','SHIPPING'), + (default,1003,'2458 Lost Creek Road','Bethlehem','Pennsylvania','18018','SHIPPING'), + (default,1003,'4800 Simpson Square','Hillsdale','Oklahoma','73743','BILLING'), + (default,1004,'1289 University Hill Road','Canehill','Arkansas','72717','LIVING'); + +# Create some very simple orders +CREATE TABLE orders ( + order_number INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + order_date DATE NOT NULL, + purchaser INTEGER NOT NULL, + quantity INTEGER NOT NULL, + product_id INTEGER NOT NULL, + FOREIGN KEY order_customer (purchaser) REFERENCES customers(id), + FOREIGN KEY ordered_product (product_id) REFERENCES products(id) +) AUTO_INCREMENT = 10001; + +INSERT INTO orders +VALUES (default, '2016-01-16', 1001, 1, 102), + (default, '2016-01-17', 1002, 2, 105), + (default, '2016-02-19', 1002, 2, 106), + (default, '2016-02-21', 1003, 1, 107); + +# Create table with Spatial/Geometry type +CREATE TABLE geom ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + g GEOMETRY NOT NULL, + h GEOMETRY); + +INSERT INTO geom +VALUES(default, ST_GeomFromText('POINT(1 1)'), NULL), + (default, ST_GeomFromText('LINESTRING(2 1, 6 6)'), NULL), + (default, ST_GeomFromText('POLYGON((0 5, 2 5, 2 7, 0 7, 0 5))'), NULL); diff --git a/examples/mysql/2.6/mysql.cnf b/examples/mysql/2.6/mysql.cnf new file mode 100644 index 00000000..aa56ad2f --- /dev/null +++ b/examples/mysql/2.6/mysql.cnf @@ -0,0 +1,46 @@ +# For advice on how to change settings please see +# https://dev.mysql.com/doc/refman/8.2/en/server-configuration-defaults.html + +[mysqld] +# +# Remove leading # and set to the amount of RAM for the most important data +# cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%. +# innodb_buffer_pool_size = 128M +# +# Remove leading # to turn on a very important data integrity option: logging +# changes to the binary log between backups. +# log_bin +# +# Remove leading # to set options mainly useful for reporting servers. +# The server defaults are faster for transactions and fast SELECTs. +# Adjust sizes as needed, experiment to find the optimal values. +# join_buffer_size = 128M +# sort_buffer_size = 2M +# read_rnd_buffer_size = 2M +skip-host-cache +skip-name-resolve +#datadir=/var/lib/mysql +#socket=/var/lib/mysql/mysql.sock +#secure-file-priv=/var/lib/mysql-files +user=mysql + +# Disabling symbolic-links is recommended to prevent assorted security risks +symbolic-links=0 + +#log-error=/var/log/mysqld.log +#pid-file=/var/run/mysqld/mysqld.pid + +# ---------------------------------------------- +# Enable the binlog for replication & CDC +# ---------------------------------------------- + +# Enable binary replication log and set the prefix, expiration, and log format. +# The prefix is arbitrary, expiration can be short for integration tests but would +# be longer on a production system. Row-level info is required for ingest to work. +# Server ID is required, but this will vary on production systems +server-id = 223344 +log_bin = mysql-bin +binlog_expire_logs_seconds = 86400 +binlog_format = row + +default_authentication_plugin = mysql_native_password diff --git a/examples/postgres/2.6/Dockerfile b/examples/postgres/2.6/Dockerfile new file mode 100644 index 00000000..980c32f1 --- /dev/null +++ b/examples/postgres/2.6/Dockerfile @@ -0,0 +1,7 @@ +# When changed, update also build-all-multiplatform.sh to make +# sure give debezim/postgres version is built also for ARM. +FROM quay.io/debezium/postgres:16 + +LABEL maintainer="Debezium Community" + +COPY inventory.sql /docker-entrypoint-initdb.d/ diff --git a/examples/postgres/2.6/inventory.sql b/examples/postgres/2.6/inventory.sql new file mode 100644 index 00000000..f4ddc89f --- /dev/null +++ b/examples/postgres/2.6/inventory.sql @@ -0,0 +1,91 @@ +-- Create the schema that we'll use to populate data and watch the effect in the WAL +CREATE SCHEMA inventory; +SET search_path TO inventory; + +-- enable PostGis +CREATE EXTENSION postgis; + +-- Create and populate our products using a single insert with many rows +CREATE TABLE products ( + id SERIAL NOT NULL PRIMARY KEY, + name VARCHAR(255) NOT NULL, + description VARCHAR(512), + weight FLOAT +); +ALTER SEQUENCE products_id_seq RESTART WITH 101; +ALTER TABLE products REPLICA IDENTITY FULL; + +INSERT INTO products +VALUES (default,'scooter','Small 2-wheel scooter',3.14), + (default,'car battery','12V car battery',8.1), + (default,'12-pack drill bits','12-pack of drill bits with sizes ranging from #40 to #3',0.8), + (default,'hammer','12oz carpenter''s hammer',0.75), + (default,'hammer','14oz carpenter''s hammer',0.875), + (default,'hammer','16oz carpenter''s hammer',1.0), + (default,'rocks','box of assorted rocks',5.3), + (default,'jacket','water resistent black wind breaker',0.1), + (default,'spare tire','24 inch spare tire',22.2); + +-- Create and populate the products on hand using multiple inserts +CREATE TABLE products_on_hand ( + product_id INTEGER NOT NULL PRIMARY KEY, + quantity INTEGER NOT NULL, + FOREIGN KEY (product_id) REFERENCES products(id) +); +ALTER TABLE products_on_hand REPLICA IDENTITY FULL; + +INSERT INTO products_on_hand VALUES (101,3); +INSERT INTO products_on_hand VALUES (102,8); +INSERT INTO products_on_hand VALUES (103,18); +INSERT INTO products_on_hand VALUES (104,4); +INSERT INTO products_on_hand VALUES (105,5); +INSERT INTO products_on_hand VALUES (106,0); +INSERT INTO products_on_hand VALUES (107,44); +INSERT INTO products_on_hand VALUES (108,2); +INSERT INTO products_on_hand VALUES (109,5); + +-- Create some customers ... +CREATE TABLE customers ( + id SERIAL NOT NULL PRIMARY KEY, + first_name VARCHAR(255) NOT NULL, + last_name VARCHAR(255) NOT NULL, + email VARCHAR(255) NOT NULL UNIQUE +); +ALTER SEQUENCE customers_id_seq RESTART WITH 1001; +ALTER TABLE customers REPLICA IDENTITY FULL; + +INSERT INTO customers +VALUES (default,'Sally','Thomas','sally.thomas@acme.com'), + (default,'George','Bailey','gbailey@foobar.com'), + (default,'Edward','Walker','ed@walker.com'), + (default,'Anne','Kretchmar','annek@noanswer.org'); + +-- Create some very simple orders +CREATE TABLE orders ( + id SERIAL NOT NULL PRIMARY KEY, + order_date DATE NOT NULL, + purchaser INTEGER NOT NULL, + quantity INTEGER NOT NULL, + product_id INTEGER NOT NULL, + FOREIGN KEY (purchaser) REFERENCES customers(id), + FOREIGN KEY (product_id) REFERENCES products(id) +); +ALTER SEQUENCE orders_id_seq RESTART WITH 10001; +ALTER TABLE orders REPLICA IDENTITY FULL; + +INSERT INTO orders +VALUES (default, '2016-01-16', 1001, 1, 102), + (default, '2016-01-17', 1002, 2, 105), + (default, '2016-02-19', 1002, 2, 106), + (default, '2016-02-21', 1003, 1, 107); + +-- Create table with Spatial/Geometry type +CREATE TABLE geom ( + id SERIAL NOT NULL PRIMARY KEY, + g GEOMETRY NOT NULL, + h GEOMETRY); + +INSERT INTO geom +VALUES(default, ST_GeomFromText('POINT(1 1)')), + (default, ST_GeomFromText('LINESTRING(2 1, 6 6)')), + (default, ST_GeomFromText('POLYGON((0 5, 2 5, 2 7, 0 7, 0 5))')); diff --git a/kafka/2.6/Dockerfile b/kafka/2.6/Dockerfile new file mode 100644 index 00000000..bad04194 --- /dev/null +++ b/kafka/2.6/Dockerfile @@ -0,0 +1,89 @@ +ARG DEBEZIUM_DOCKER_REGISTRY_PRIMARY_NAME +FROM $DEBEZIUM_DOCKER_REGISTRY_PRIMARY_NAME/base + +LABEL maintainer="Debezium Community" + +# +# Set the version, home directory, and MD5 hash. +# MD5 hash taken from http://kafka.apache.org/downloads.html for this version of Kafka +# These argument defaults can be overruled during build time but compatibility cannot be guaranteed when the defaults are not used. +# +ARG KAFKA_VERSION=3.6.1 +ARG SCALA_VERSION=2.13 +ARG SHA512HASH="1F063CD67463DD3BB5A5E06E7A1C2278DB84BFC836A634FAC7C9A005DE66A42AC00B32F5E9BBDD22086605F73659EFD4CE5BD1185196B02A743BE0898DAAC55D" + +ENV KAFKA_VERSION=$KAFKA_VERSION \ + SCALA_VERSION=$SCALA_VERSION \ + KAFKA_HOME=/kafka \ + SHA512HASH=$SHA512HASH \ + KAFKA_URL_PATH=kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz + +ENV KAFKA_DATA=$KAFKA_HOME/data + +# +# Create a user and home directory for Kafka +# +USER root +RUN groupadd -r kafka -g 1001 && useradd -u 1001 -r -g kafka -m -d $KAFKA_HOME -s /sbin/nologin -c "Kafka user" kafka && \ + chmod 755 $KAFKA_HOME +RUN mkdir $KAFKA_DATA && \ + mkdir $KAFKA_HOME/logs + +# +# Change ownership and switch user +# +RUN chown -R kafka $KAFKA_HOME && \ + chgrp -R kafka $KAFKA_HOME + +# +# 1.) Download Kafka, either from preferred host or archive +# 2.) Verify the contents and install, remove TGZ file +# 3.) Remove potentially exploitable classes (see CVE-2021-4104/DBZ-4447 CVE-2019-17571 CVE-2022-23302 CVE-2022-23305 CVE-2020-9493) +# 4.) Allow random UID to use Kafka (doing this for the bulk of files here, so as to avoid overhead of doing it in a separate layer) +RUN curl -fSL -o /tmp/kafka.tgz $(curl --stderr /dev/null https://www.apache.org/dyn/closer.cgi\?as_json\=1 | sed -rn 's/.*"preferred":.*"(.*)"/\1/p')$KAFKA_URL_PATH || curl -fSL -o /tmp/kafka.tgz https://archive.apache.org/dist/$KAFKA_URL_PATH &&\ + echo "$SHA512HASH /tmp/kafka.tgz" | sha512sum -c - &&\ + tar -xzf /tmp/kafka.tgz -C $KAFKA_HOME --strip-components 1 &&\ + rm -f /tmp/kafka.tgz &&\ + zip -d /kafka/libs/reload4j-1.2.25.jar org/apache/log4j/net/JMSAppender.class org/apache/log4j/net/SocketServer.class org/apache/log4j/net/JMSSink.class 'org/apache/log4j/jdbc/*' 'org/apache/log4j/chainsaw/*' &&\ + chmod -R g+w,o+w $KAFKA_HOME + +COPY ./log4j.properties $KAFKA_HOME/config/log4j.properties +RUN chmod g+w,o+w $KAFKA_HOME/config/log4j.properties + +# Back up config original files; they will be brought back in +# in docker-entrypoint.sh if no volume with user-provided config files is given +RUN mkdir $KAFKA_HOME/config.orig &&\ + mv $KAFKA_HOME/config/* $KAFKA_HOME/config.orig &&\ + chown -R kafka:kafka $KAFKA_HOME/config.orig + +# Remove unnecessary files +RUN rm -f $KAFKA_HOME/libs/*-{sources,javadoc,scaladoc}.jar* &&\ + rm -r $KAFKA_HOME/site-docs + +# +# The kafka-run-class.sh script generates the classpath for launching Kafka-related JVM, with entries +# containing the pattern "/bin/../libs", which fails to be resolved properly in some +# environments; the CLASSPATH is filled from "base_dir" environment variable that contains the relative +# path so it it is modified to contain absolute path using "realpath" command. +# +RUN sed -i 's/base_dir=\$(dirname \$0)\/../base_dir=\$(realpath \$(dirname \$0)\/..)/' $KAFKA_HOME/bin/kafka-run-class.sh + +# +# Allow random UID to use Kafka +# +RUN chmod -R g+w,o+w $KAFKA_HOME + +USER kafka + +# Set the working directory to the Kafka home directory +WORKDIR $KAFKA_HOME + +# +# Expose the ports and set up volumes for the data and logs directories +# +EXPOSE 9092 +VOLUME ["/kafka/data","/kafka/logs","/kafka/config"] + +COPY ./docker-entrypoint.sh / +ENTRYPOINT ["/docker-entrypoint.sh"] +CMD ["start"] diff --git a/kafka/2.6/README.md b/kafka/2.6/README.md new file mode 100644 index 00000000..9e47785c --- /dev/null +++ b/kafka/2.6/README.md @@ -0,0 +1,150 @@ +[Kafka](https://kafka.apache.org/) is a distributed, partitioned, replicated commit log service. In Debezium, connectors that monitor databases write all change events to Kafka topics, and your client applications consume the relevant Kafka topics to receive and process the change events. + +# What is Debezium? + +Debezium is a distributed platform that turns your existing databases into event streams, so applications can quickly react to each row-level change in the databases. Debezium is built on top of Kafka and provides Kafka Connect compatible connectors that monitor specific database management systems. Debezium records the history of data changes in Kafka logs, so your application can be stopped and restarted at any time and can easily consume all of the events it missed while it was not running, ensuring that all events are processed correctly and completely. + +Running Debezium involves Zookeeper, Kafka, and services that run Debezium's connectors. For simple evaluation and experimentation, all services can all be run on a single host machine, using the recipe outlined below. Production environments, however, require properly running and networking multiple instances of each service to provide the performance, reliability, replication, and fault tolerance. This can be done with a platform like [OpenShift](https://www.openshift.com) that manages multiple Docker containers running on multiple hosts and machines. But running Kafka in a Docker container has limitations, so for scenarios where very high throughput is required, you should run Kafka on dedicated hardware as explained in the [Kafka documentation](https://kafka.apache.org/documentation.html). + +# How to use this image + +This image can be used in several different ways. Unless you are running in "KRaft" mode (see below), all require an already-running Zookeeper service, which is either running locally via the container named `zookeeper` or with OpenShift running as a service named `zookeeper`. + +## Start a Kafka broker + +Starting a Kafka broker using this image is simple: + + $ docker run -it --name kafka -p 9092:9092 --link zookeeper:zookeeper quay.io/debezium/kafka + +This command uses this image and starts a new container named `kafka`, which runs in the foreground and attaches the console so that it display the broker's output and error messages. It exposes the broker on port 9092 and looks for Zookeeper in the container (or host) named `zookeeper`. See the environment variables below for additional information that can be supplied to the broker on startup. + +To start the container in _detached_ mode, simply replace the `-it` option with `-d`. No broker output will not be sent to your console, but it can be read at any time using the `docker logs` command. For example, the following command will display the output and keep following the output: + + $ docker logs --follow --name kafka + +## Running Kafka in KRaft mode + +Since Apache Kafka 2.8, there's experimental support for running without ZooKeeper, +using Kafka's own implementation of the Raft consensus protocol implementation [KRaft](https://github.com/apache/kafka/blob/trunk/config/kraft/README.md). + +To start Kafka in KRaft mode, specify the `CLUSTER_ID` environment variable with a unique id of the cluster. +The same id must be used for all nodes of the cluster. +Then, for each node in the cluster, specify the `NODE_ROLE` variable, with a value of 'controller' (for controller nodes), 'broker' (for broker nodes), or 'combined' (for nodes acting as both controller and broker). +Lastly, for each node in the cluster, specify the `KAFKA_CONTROLLER_QUORUM_VOTERS`variable, referencing the controller nodes in the form:`KAFKA_CONTROLLER_QUORUM_VOTERS=id-1@controller-node-1:controller-port-1,...`, e.g. `KAFKA_CONTROLLER_QUORUM_VOTERS=1@kafka-1:9093,2@kafka-2:9093,3@kafka-3:9093`. + +KRaft mode is an **experimental** feature as of Apache Kafka 2.8/3.0 and should not be used in production. + +## Create a topic on a running broker + +If you already have one or more running containers with a Kafka broker, you can use this image to start _another_ container that connects to the running broker(s) and uses them to create a topic: + + $ docker run -it --rm --link kafka:kafka quay.io/debezium/kafka create-topic [-p numPartitions] [-r numReplicas] [-c cleanupPolicy] topic-name + +where `topic-name` is the name of the new topic, `numPartitions` is the number of partitions within the new topic, `numReplicas` is the number of replicas for each partition within the new topic and `cleanupPolicy` is the cleanup policy for the new topic (either `delete` or `compact`). The default for both `numPartitions` and `numReplicas` is '1'. The default `cleanupPolicy` is `delete`. + +The container will exit as soon as the request to create the topic completes, and because `--rm` is used the container will be immediately removed. + +Simply run this command once for each topic you want to create. + +## Watch a topic on a running broker + +If you already have one or more running containers with a Kafka broker, you can use this image to start _another_ container that connects to the running broker(s) and watches a topic: + + $ docker run -it --rm --link kafka:kafka quay.io/debezium/kafka watch-topic [-a] [-k] [-m minBytes] topic-name + +where `topic-name` is the name of the topic, and + +* `-a` is an optional flag that specifies that all of the topic messages should be displayed (i.e. from the beginning) +* `-k` is an optional flag that specifies whether the message key should be shown (by default, the key will not be displayed) +* `-m minBytes` is an optional parameter to specify that messages should only be fetched when doing so will consume at least the specified number of bytes (defaults to '1') + +## Listing topics on a running broker + +If you already have one or more running containers with a Kafka broker, you can use this image to start _another_ container that connects to the running broker(s) and lists the existing topics: + + $ docker run -it --rm --link kafka:kafka quay.io/debezium/kafka list-topics + +The container will exit (and be removed) immediately after the response is displayed. + +# Environment variables + +The Debezium Kafka image uses several environment variables when running a Kafka broker using this image. +The `ZOOKEEPER_CONNECT` variable is also applied when using the `create-topic` and `list-topics` modes of this image. + +### `CLUSTER_ID` + +This environment variable must be set in order to enable KRaft mode (running Kafka without ZooKeeper). +It must be set to the same unique value for each node in the cluster, e.g. 'oh-sxaDRTcyAr6pFRbXyzA'. + +### `NODE_ID` + +This environment variable is recommended. Set this to the unique and persistent number for the broker. This must be set for every broker in a Kafka cluster, and should be set for a single standalone broker. The default is '1', and setting this will update the Kafka configuration. + +### `NODE_ROLE` + +This environment variable is recommended when running Kafka in KRaft mode. Set its value to 'controller' (for controller nodes), 'broker' (for broker nodes), or 'combined' (for nodes acting as both controller and broker). +The default is 'combined'. + +### `ZOOKEEPER_CONNECT` + +This environment variable is recommended, although linking to a `zookeeper` container precludes the need to use it. Otherwise, set this to a string described in the Kafka documentation for the 'zookeeper.connect' property so that the Kafka broker can find the Zookeeper service. Setting this will update the Kafka configuration. + +### `HOST_NAME` + +This environment variable is a recommended setting. Set this to the hostname that the broker will bind to. Defaults to the hostname of the container. + +### `ADVERTISED_HOST_NAME` + +This environment variable is an recommended setting. The host name specified with this environment variable will be registered in Zookeeper and given out to other workers to connect with. By default the value of `HOST_NAME` is used, so specify a different value if the `HOST_NAME` value will not be useful to or reachable by clients. + +### `HEAP_OPTS` + +This environment variable is recommended. Use this to set the JVM options for the Kafka broker. By default a value of '-Xmx1G -Xms1G' is used, meaning that each Kafka broker uses 1GB of memory. Using too little memory may cause performance problems, while using too much may prevent the broker from starting properly given the memory available on the machine. Obviously the container must be able to use the amount of memory defined by this environment variable. + +### `CREATE_TOPICS` + +This environment variable is optional. Use this to specify the topic(s) that should be created as soon as the broker starts. The value should be a comma-separated list of tuples in the form of `topic:partitions:replicas:(clean-up policy)?`. For example, when this environment variable is set to `topic1:1:2,topic2:3:1:compact`, then the container will create 'topic1' with 1 partition and 2 replicas, and 'topic2' with 3 partitions, 1 replica and `cleanup.policy` set to `compact`. + +### `LOG_LEVEL` + +This environment variable is optional. Use this to set the level of detail for Kafka's application log written to STDOUT and STDERR. Valid values are `INFO` (default), `WARN`, `ERROR`, `DEBUG`, or `TRACE`." + +### Others + +Environment variables that start with `KAFKA_` will be used to update the Kafka configuration file. Each environment variable name will be mapped to a configuration property name by: + +1. removing the `KAFKA_` prefix; +2. lowercasing all characters; and +3. converting all '_' characters to '.' characters + +For example, the environment variable `KAFKA_ADVERTISED_HOST_NAME` is converted to the `advertised.host.name` property, while `KAFKA_AUTO_CREATE_TOPICS_ENABLE` is converted to the `auto.create.topics.enable` property. The container will then update the Kafka configuration file to include the property's name and value. + +The value of the environment variable may not contain a '\@' character. + + +# Ports + +Containers created using this image will expose port 9092, which is the standard port used by Kafka. You can use standard Docker options to map this to a different port on the host that runs the container. +When using KRaft mode, port 9093 will be exposed for the controller listener, if the node has the 'controller' or 'combined' role. + +# Storing data + +The Kafka broker run by this image writes data to the local file system, and the only way to keep this data is to use volumes that map specific directories inside the container to the local file system (or to OpenShift persistent volumes). + +### Topic data + +This image defines a data volume at `/kafka/data`. The broker writes all persisted data as files within this directory, inside a subdirectory named with the value of BROKER_ID (see above). You must mount it appropriately when running your container to persist the data after the container is stopped; failing to do so will result in all data being lost when the container is stopped. + +### Log files + +Although this image will send Kafka broker log output to standard output so it is visible in the Docker logs, this image also configures Kafka to write out more detailed logs to a data volume at `/kafka/logs`. All logs are rotated daily, and include: + +* `server.log` - Contain the same log output sent to standard output and standard error. +* `state-change.log` - Records the timeline of requested and completed state changes between the controller and brokers. +* `kafka-request.log` - Records one entry for each of the request received and handled by the broker. +* `log-cleaner.log` - Records the detail about log compaction, whereby Kafka ensures that a compacted topic retains at least the last value for each distinct message key. +* `controller.log` - Records controller activities, such as the brokers that make up the in-sync replicas for each partition and the brokers that are the leaders of their partitions. + +### Configuration + +This image defines a data volume at `/kafka/config` where the broker's configuration files are stored. Note that these configuration files are always modified based upon the environment variables and linked containers. The best use of this data volume is to be able to see the configuration files used by Kafka, although with some care it is possible to supply custom configuration files that will be adapted and used upon startup. diff --git a/kafka/2.6/docker-entrypoint.sh b/kafka/2.6/docker-entrypoint.sh new file mode 100755 index 00000000..920d8c79 --- /dev/null +++ b/kafka/2.6/docker-entrypoint.sh @@ -0,0 +1,292 @@ +#!/bin/bash + +# Exit immediately if a *pipeline* returns a non-zero status. (Add -x for command tracing) +set -e + +get_broker_endpoint() { + if [[ -z "$KAFKA_BROKER" ]]; then + # Look for any environment variables set by Docker container linking. For example, if the container + # running Kafka were named 'broker' in this container, then Docker should have created several envs, + # such as 'BROKER_PORT_9092_TCP'. If so, then use that to automatically connect to the linked broker. + export KAFKA_BROKER=$(env | grep .*PORT_9092_TCP= | sed -e 's|.*tcp://||' | uniq | paste -sd ,) + fi + if [[ "x$KAFKA_BROKER" = "x" ]]; then + export KAFKA_BROKER=0.0.0.0:9092 + fi + echo "Using KAFKA_BROKER=$KAFKA_BROKER" +} + +if [[ -z "$NODE_ID" ]]; then + if [[ -z "$BROKER_ID" ]]; then + NODE_ID=1 + echo "WARNING: Using default NODE_ID=1, which is valid only for non-clustered installations." + else + NODE_ID="$BROKER_ID" + echo "WARNING: Using NODE_ID=$BROKER_ID, as specified via BROKER_ID variable. Please update your configuration to use the NODE_ID variable instead." + fi +fi + +# ZooKeeper mode +if [[ -z "$CLUSTER_ID" ]]; then + CONFIG_FILE=config/server.properties + echo "Starting in ZooKeeper mode using NODE_ID=$NODE_ID." + + if [[ -z "$ZOOKEEPER_CONNECT" ]]; then + # Look for any environment variables set by Docker container linking. For example, if the container + # running Zookeeper were named 'zoo' in this container, then Docker should have created several envs, + # such as 'ZOO_PORT_2181_TCP'. If so, then use that to automatically set the 'zookeeper.connect' property. + export ZOOKEEPER_CONNECT=$(env | grep .*PORT_2181_TCP= | sed -e 's|.*tcp://||' | uniq | paste -sd ,) + fi + if [[ "x$ZOOKEEPER_CONNECT" = "x" ]]; then + export ZOOKEEPER_CONNECT=0.0.0.0:2181 + fi + echo "Using ZOOKEEPER_CONNECT=$ZOOKEEPER_CONNECT" + +# KRaft mode +else + + if [[ -z "$NODE_ROLE" ]]; then + NODE_ROLE='combined'; + fi + + case "$NODE_ROLE" in + 'combined' ) CONFIG_FILE=config/kraft/server.properties;; + 'broker' ) CONFIG_FILE=config/kraft/broker.properties;; + 'controller' ) CONFIG_FILE=config/kraft/controller.properties;; + *) CONFIG_FILE=config/kraft/server.properties;; + esac + + echo "Starting in KRaft mode (EXPERIMENTAL), using CLUSTER_ID=$CLUSTER_ID, NODE_ID=$NODE_ID and NODE_ROLE=$NODE_ROLE." +fi + +echo "Using configuration $CONFIG_FILE." + +if [[ -n "$HEAP_OPTS" ]]; then + sed -r -i "s/^(export KAFKA_HEAP_OPTS)=\"(.*)\"/\1=\"${HEAP_OPTS}\"/g" $KAFKA_HOME/bin/kafka-server-start.sh + unset HEAP_OPTS +fi + +export KAFKA_ZOOKEEPER_CONNECT=$ZOOKEEPER_CONNECT +export KAFKA_NODE_ID=$NODE_ID +export KAFKA_BROKER_ID=$NODE_ID +export KAFKA_LOG_DIRS="${KAFKA_DATA}/$KAFKA_NODE_ID" +mkdir -p $KAFKA_LOG_DIRS +unset NODE_ID +unset ZOOKEEPER_CONNECT + +if [[ -z "$ADVERTISED_PORT" ]]; then + ADVERTISED_PORT=9092 +fi +if [[ -z "$HOST_NAME" ]]; then + HOST_NAME=$(ip addr | grep 'BROADCAST' -A2 | tail -n1 | awk '{print $2}' | cut -f1 -d'/') +fi + +: ${PORT:=9092} +: ${ADVERTISED_PORT:=9092} +: ${CONTROLLER_PORT:=9093} + +: ${ADVERTISED_PORT:=${PORT}} +: ${ADVERTISED_HOST_NAME:=${HOST_NAME}} + +: ${KAFKA_ADVERTISED_PORT:=${ADVERTISED_PORT}} +: ${KAFKA_ADVERTISED_HOST_NAME:=${ADVERTISED_HOST_NAME}} + +: ${KAFKA_PORT:=${PORT}} +: ${KAFKA_HOST_NAME:=${HOST_NAME}} + +if [[ -z "$CLUSTER_ID" ]]; then + : ${KAFKA_LISTENERS:=PLAINTEXT://$KAFKA_HOST_NAME:$KAFKA_PORT} +else + case "$NODE_ROLE" in + 'combined' ) : ${KAFKA_LISTENERS:=PLAINTEXT://$KAFKA_HOST_NAME:$KAFKA_PORT,CONTROLLER://$KAFKA_HOST_NAME:$CONTROLLER_PORT};; + 'broker' ) : ${KAFKA_LISTENERS:=PLAINTEXT://$KAFKA_HOST_NAME:$KAFKA_PORT};; + 'controller' ) : ${KAFKA_LISTENERS:=PLAINTEXT://$KAFKA_HOST_NAME:$CONTROLLER_PORT};; + *) : ${KAFKA_LISTENERS:=PLAINTEXT://$KAFKA_HOST_NAME:$KAFKA_PORT,CONTROLLER://$KAFKA_HOST_NAME:$CONTROLLER_PORT};; + esac +fi + +: ${KAFKA_ADVERTISED_LISTENERS:=PLAINTEXT://$KAFKA_ADVERTISED_HOST_NAME:$KAFKA_ADVERTISED_PORT} + +export KAFKA_LISTENERS KAFKA_ADVERTISED_LISTENERS +unset HOST_NAME ADVERTISED_HOST_NAME KAFKA_HOST_NAME KAFKA_ADVERTISED_HOST_NAME PORT ADVERTISED_PORT KAFKA_PORT KAFKA_ADVERTISED_PORT CONTROLLER_PORT NODE_ROLE + +echo "Using KAFKA_LISTENERS=$KAFKA_LISTENERS and KAFKA_ADVERTISED_LISTENERS=$KAFKA_ADVERTISED_LISTENERS" + +# +# Set up the JMX options +# +: ${JMXAUTH:="false"} +: ${JMXSSL:="false"} +if [[ -n "$JMXPORT" && -n "$JMXHOST" ]]; then + echo "Enabling JMX on ${JMXHOST}:${JMXPORT}" + export KAFKA_JMX_OPTS="-Djava.rmi.server.hostname=${JMXHOST} -Dcom.sun.management.jmxremote.rmi.port=${JMXPORT} -Dcom.sun.management.jmxremote.port=${JMXPORT} -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=${JMXAUTH} -Dcom.sun.management.jmxremote.ssl=${JMXSSL} " +fi + +# Copy config files if not provided in volume +cp -rn $KAFKA_HOME/config.orig/* $KAFKA_HOME/config + +# Process the argument to this container ... +case $1 in + start) + + # + # Configure the log files ... + # + if [[ -z "$LOG_LEVEL" ]]; then + LOG_LEVEL="INFO" + fi + sed -i -r -e "s|=INFO, stdout|=$LOG_LEVEL, stdout|g" $KAFKA_HOME/config/log4j.properties + sed -i -r -e "s|^(log4j.appender.stdout.threshold)=.*|\1=${LOG_LEVEL}|g" $KAFKA_HOME/config/log4j.properties + export KAFKA_LOG4J_OPTS="-Dlog4j.configuration=file:$KAFKA_HOME/config/log4j.properties" + unset LOG_LEVEL + + # Add missing EOF at the end of the config file + echo "" >> $KAFKA_HOME/$CONFIG_FILE + + # + # Process all environment variables that start with 'KAFKA_' (but not 'KAFKA_HOME' or 'KAFKA_VERSION'): + # + for VAR in `env` + do + env_var=`echo "$VAR" | sed "s/=.*//"` + if [[ $env_var =~ ^KAFKA_ && $env_var != "KAFKA_VERSION" && $env_var != "KAFKA_HOME" && $env_var != "KAFKA_LOG4J_OPTS" && $env_var != "KAFKA_JMX_OPTS" ]]; then + prop_name=`echo "$VAR" | sed -r "s/^KAFKA_(.*)=.*/\1/g" | tr '[:upper:]' '[:lower:]' | tr _ .` + if egrep -q "(^|^#)$prop_name=" $KAFKA_HOME/$CONFIG_FILE; then + #note that no config names or values may contain an '@' char + sed -r -i "s%(^|^#)($prop_name)=(.*)%\2=${!env_var}%g" $KAFKA_HOME/$CONFIG_FILE + else + #echo "Adding property $prop_name=${!env_var}" + echo "$prop_name=${!env_var}" >> $KAFKA_HOME/$CONFIG_FILE + fi + fi + done + + if [[ -n $CREATE_TOPICS ]]; then + echo "Creating topics: $CREATE_TOPICS" + # Start a subshell in the background that waits for the Kafka broker to open socket on port 9092 and + # then creates the topics when the broker is running and able to receive connections ... + ( + echo "STARTUP: Waiting for Kafka broker to open socket on port 9092 ..." + while ss -n | awk '$5 ~ /:9092$/ {exit 1}'; do sleep 1; done + echo "START: Found running Kafka broker on port 9092, so creating topics ..." + IFS=','; for topicToCreate in $CREATE_TOPICS; do + # remove leading and trailing whitespace ... + topicToCreate="$(echo ${topicToCreate} | xargs )" + IFS=':' read -a topicConfig <<< "$topicToCreate" + config= + if [ -n "${topicConfig[3]}" ]; then + config="--config=cleanup.policy=${topicConfig[3]}" + fi + get_broker_endpoint + echo "STARTUP: Creating topic ${topicConfig[0]} with ${topicConfig[1]} partitions and ${topicConfig[2]} replicas with cleanup policy ${topicConfig[3]}..." + $KAFKA_HOME/bin/kafka-topics.sh --create --bootstrap-server $KAFKA_BROKER --replication-factor ${topicConfig[2]} --partitions ${topicConfig[1]} --topic "${topicConfig[0]}" ${config} + done + )& + fi + + if [[ ! -z "$CLUSTER_ID" && ! -f "$KAFKA_LOG_DIRS/meta.properties" ]]; then + echo "No meta.properties found in $KAFKA_LOG_DIRS; going to format the directory" + + $KAFKA_HOME/bin/kafka-storage.sh format -t $CLUSTER_ID -c $KAFKA_HOME/$CONFIG_FILE + fi + + exec $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/$CONFIG_FILE + ;; + watch-topic) + shift + FROM_BEGINNING="" + FETCH_MIN_BYTES=1 + PRINT_KEY="false" + while getopts :akm: option; do + case ${option} in + a) + FROM_BEGINNING="--from-beginning" + ;; + k) + PRINT_KEY="true" + ;; + m) + FETCH_MIN_BYTES=$OPTARG + ;; + h|\?) + echo "Usage: watch-topic [-a] [-k] [-m minBytes] topicname" + echo "" + echo "where" + echo "" + echo " -a Consume all messages from the beginning of the topic." + echo " By default, this starts consuming messages starting at the" + echo " time this utility connects." + echo " -k Display the key with the value. By default, the key will" + echo " not be displayed." + echo " -m minBytes Fetch messages only when doing so will consume at least" + echo " the specified number of bytes. Defaults to '1'." + echo " topicname The required name of the topic to watch." + exit 1; + ;; + esac + done + shift $((OPTIND -1)) + if [[ -z $1 ]]; then + echo "ERROR: A topic name must be specified" + exit 1; + fi + TOPICNAME=$1 + shift + get_broker_endpoint + echo "Contents of topic $TOPICNAME:" + exec $KAFKA_HOME/bin/kafka-console-consumer.sh --bootstrap-server $KAFKA_BROKER --property print.key=$PRINT_KEY --property fetch.min.bytes=$FETCH_MIN_BYTES --topic "$TOPICNAME" $FROM_BEGINNING $@ + ;; + create-topic) + shift + PARTITION=1 + REPLICAS=1 + CLEANUP_POLICY=delete + while getopts :p:r:c: option; do + case ${option} in + p) + PARTITION=$OPTARG + ;; + r) + REPLICAS=$OPTARG + ;; + c) + CLEANUP_POLICY=$OPTARG + ;; + h|\?) + echo "Usage: create-topic [-p numPartitions] [-r numReplicas] [-c cleanupPolicy] topicname" + echo "" + echo "where" + echo "" + echo " -p numPartitions Create the topic with the specified number of partitions." + echo " By default, the topic is created with only one partition." + echo " -r numReplicas Create the topic with the specified number of replicas." + echo " By default, the topic is created with only one replica." + echo " The number of replicas may not be larger than the number" + echo " of brokers." + echo " -c cleanupPolicy Create the topic with the specified cleanup policy." + echo " By default, the topic is created with delete cleanup policy." + echo " topicname The required name of the new topic." + exit 1; + ;; + esac + done + get_broker_endpoint + shift $((OPTIND -1)) + if [[ -z $1 ]]; then + echo "ERROR: A topic name must be specified" + exit 1; + fi + TOPICNAME=$1 + echo "Creating new topic $TOPICNAME with $PARTITION partition(s), $REPLICAS replica(s) and cleanup policy set to $CLEANUP_POLICY..." + exec $KAFKA_HOME/bin/kafka-topics.sh --create --bootstrap-server $KAFKA_BROKER --replication-factor $REPLICAS --partitions $PARTITION --topic "$TOPICNAME" --config=cleanup.policy=$CLEANUP_POLICY + ;; + list-topics) + echo "Listing topics..." + get_broker_endpoint + exec $KAFKA_HOME/bin/kafka-topics.sh --list --bootstrap-server $KAFKA_BROKER + ;; + +esac + +# Otherwise just run the specified command +exec "$@" diff --git a/kafka/2.6/log4j.properties b/kafka/2.6/log4j.properties new file mode 100644 index 00000000..a4248486 --- /dev/null +++ b/kafka/2.6/log4j.properties @@ -0,0 +1,67 @@ +kafka.logs.dir=logs + +log4j.rootLogger=INFO, stdout + +# Disable excessive reflection warnings - KAFKA-5229 +log4j.logger.org.reflections=ERROR + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.threshold=INFO +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n + +log4j.appender.kafkaAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.kafkaAppender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.kafkaAppender.File=${kafka.logs.dir}/server.log +log4j.appender.kafkaAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.kafkaAppender.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n + +log4j.appender.stateChangeAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.stateChangeAppender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.stateChangeAppender.File=${kafka.logs.dir}/state-change.log +log4j.appender.stateChangeAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.stateChangeAppender.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n + +log4j.appender.requestAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.requestAppender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.requestAppender.File=${kafka.logs.dir}/kafka-request.log +log4j.appender.requestAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.requestAppender.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n + +log4j.appender.cleanerAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.cleanerAppender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.cleanerAppender.File=${kafka.logs.dir}/log-cleaner.log +log4j.appender.cleanerAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.cleanerAppender.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n + +log4j.appender.controllerAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.controllerAppender.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.controllerAppender.File=${kafka.logs.dir}/controller.log +log4j.appender.controllerAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.controllerAppender.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n + +# Turn on all our debugging info +#log4j.logger.kafka.producer.async.DefaultEventHandler=DEBUG, kafkaAppender +#log4j.logger.kafka.client.ClientUtils=DEBUG, kafkaAppender +#log4j.logger.kafka.perf=DEBUG, kafkaAppender +#log4j.logger.kafka.perf.ProducerPerformance$ProducerThread=DEBUG, kafkaAppender +#log4j.logger.org.I0Itec.zkclient.ZkClient=DEBUG +log4j.logger.kafka=INFO, kafkaAppender + +log4j.logger.kafka.network.RequestChannel$=WARN, requestAppender +log4j.additivity.kafka.network.RequestChannel$=false + +#log4j.logger.kafka.network.Processor=TRACE, requestAppender +#log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender +#log4j.additivity.kafka.server.KafkaApis=false +log4j.logger.kafka.request.logger=WARN, requestAppender +log4j.additivity.kafka.request.logger=false + +log4j.logger.kafka.controller=TRACE, controllerAppender +log4j.additivity.kafka.controller=false + +log4j.logger.kafka.log.LogCleaner=INFO, cleanerAppender +log4j.additivity.kafka.log.LogCleaner=false + +log4j.logger.state.change.logger=TRACE, stateChangeAppender +log4j.additivity.state.change.logger=false diff --git a/operator/2.5/Dockerfile b/operator/2.5/Dockerfile index 3fd421fa..2c9d2dcd 100644 --- a/operator/2.5/Dockerfile +++ b/operator/2.5/Dockerfile @@ -84,11 +84,11 @@ LABEL maintainer="Debezium Community" # Set the version, home directory # ENV LANGUAGE='en_US:en' -ENV DEBEZIUM_VERSION=2.5.0.CR1 \ +ENV DEBEZIUM_VERSION=2.5.0.Final \ OPERATOR_HOME=/operator \ MAVEN_REPO_CENTRAL="https://repo1.maven.org/maven2" ENV OPERATOR_URL_PATH=io/debezium/debezium-operator-dist/$DEBEZIUM_VERSION/debezium-operator-dist-$DEBEZIUM_VERSION.tar.gz \ - OPERATOR_MD5=ce023cf73bddb3c156c72f31bdd5cdc1 + OPERATOR_MD5=9e298cda18504c32f42e5e0b985f4206 USER root # diff --git a/operator/2.6/Dockerfile b/operator/2.6/Dockerfile new file mode 100644 index 00000000..2c9d2dcd --- /dev/null +++ b/operator/2.6/Dockerfile @@ -0,0 +1,122 @@ +#### +# This Dockerfile is used in order to build a container that runs the Quarkus application in JVM mode +# +# Before building the container image run: +# +# ./mvnw package +# +# Then, build the image with: +# +# docker build -f src/main/docker/Dockerfile.jvm -t quarkus/debezium-operator-jvm . +# +# Then run the container using: +# +# docker run -i --rm -p 8080:8080 quarkus/debezium-operator-jvm +# +# If you want to include the debug port into your docker image +# you will have to expose the debug port (default 5005 being the default) like this : EXPOSE 8080 5005. +# Additionally you will have to set -e JAVA_DEBUG=true and -e JAVA_DEBUG_PORT=*:5005 +# when running the container +# +# Then run the container using : +# +# docker run -i --rm -p 8080:8080 quarkus/debezium-operator-jvm +# +# This image uses the `run-java.sh` script to run the application. +# This scripts computes the command line to execute your Java application, and +# includes memory/GC tuning. +# You can configure the behavior using the following environment properties: +# - JAVA_OPTS: JVM options passed to the `java` command (example: "-verbose:class") +# - JAVA_OPTS_APPEND: User specified Java options to be appended to generated options +# in JAVA_OPTS (example: "-Dsome.property=foo") +# - JAVA_MAX_MEM_RATIO: Is used when no `-Xmx` option is given in JAVA_OPTS. This is +# used to calculate a default maximal heap memory based on a containers restriction. +# If used in a container without any memory constraints for the container then this +# option has no effect. If there is a memory constraint then `-Xmx` is set to a ratio +# of the container available memory as set here. The default is `50` which means 50% +# of the available memory is used as an upper boundary. You can skip this mechanism by +# setting this value to `0` in which case no `-Xmx` option is added. +# - JAVA_INITIAL_MEM_RATIO: Is used when no `-Xms` option is given in JAVA_OPTS. This +# is used to calculate a default initial heap memory based on the maximum heap memory. +# If used in a container without any memory constraints for the container then this +# option has no effect. If there is a memory constraint then `-Xms` is set to a ratio +# of the `-Xmx` memory as set here. The default is `25` which means 25% of the `-Xmx` +# is used as the initial heap size. You can skip this mechanism by setting this value +# to `0` in which case no `-Xms` option is added (example: "25") +# - JAVA_MAX_INITIAL_MEM: Is used when no `-Xms` option is given in JAVA_OPTS. +# This is used to calculate the maximum value of the initial heap memory. If used in +# a container without any memory constraints for the container then this option has +# no effect. If there is a memory constraint then `-Xms` is limited to the value set +# here. The default is 4096MB which means the calculated value of `-Xms` never will +# be greater than 4096MB. The value of this variable is expressed in MB (example: "4096") +# - JAVA_DIAGNOSTICS: Set this to get some diagnostics information to standard output +# when things are happening. This option, if set to true, will set +# `-XX:+UnlockDiagnosticVMOptions`. Disabled by default (example: "true"). +# - JAVA_DEBUG: If set remote debugging will be switched on. Disabled by default (example: +# true"). +# - JAVA_DEBUG_PORT: Port used for remote debugging. Defaults to 5005 (example: "8787"). +# - CONTAINER_CORE_LIMIT: A calculated core limit as described in +# https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt. (example: "2") +# - CONTAINER_MAX_MEMORY: Memory limit given to the container (example: "1024"). +# - GC_MIN_HEAP_FREE_RATIO: Minimum percentage of heap free after GC to avoid expansion. +# (example: "20") +# - GC_MAX_HEAP_FREE_RATIO: Maximum percentage of heap free after GC to avoid shrinking. +# (example: "40") +# - GC_TIME_RATIO: Specifies the ratio of the time spent outside the garbage collection. +# (example: "4") +# - GC_ADAPTIVE_SIZE_POLICY_WEIGHT: The weighting given to the current GC time versus +# previous GC times. (example: "90") +# - GC_METASPACE_SIZE: The initial metaspace size. (example: "20") +# - GC_MAX_METASPACE_SIZE: The maximum metaspace size. (example: "100") +# - GC_CONTAINER_OPTIONS: Specify Java GC to use. The value of this variable should +# contain the necessary JRE command-line options to specify the required GC, which +# will override the default of `-XX:+UseParallelGC` (example: -XX:+UseG1GC). +# - HTTPS_PROXY: The location of the https proxy. (example: "myuser@127.0.0.1:8080") +# - HTTP_PROXY: The location of the http proxy. (example: "myuser@127.0.0.1:8080") +# - NO_PROXY: A comma separated lists of hosts, IP addresses or domains that can be +# accessed directly. (example: "foo.example.com,bar.example.com") +# +### +FROM registry.access.redhat.com/ubi8/openjdk-17:1.14 +LABEL maintainer="Debezium Community" + +# +# Set the version, home directory +# +ENV LANGUAGE='en_US:en' +ENV DEBEZIUM_VERSION=2.5.0.Final \ + OPERATOR_HOME=/operator \ + MAVEN_REPO_CENTRAL="https://repo1.maven.org/maven2" +ENV OPERATOR_URL_PATH=io/debezium/debezium-operator-dist/$DEBEZIUM_VERSION/debezium-operator-dist-$DEBEZIUM_VERSION.tar.gz \ + OPERATOR_MD5=9e298cda18504c32f42e5e0b985f4206 + +USER root +# +# Prepare environment +# +RUN microdnf install -y gzip +RUN mkdir $OPERATOR_HOME; + +# +# Download and install Debezium Operator +# +RUN curl -fSL -o /tmp/operator.tar.gz "$MAVEN_REPO_CENTRAL/$OPERATOR_URL_PATH" + +# +# Verify the contents and then install ... +# +RUN echo "$OPERATOR_MD5 /tmp/operator.tar.gz" | md5sum -c - &&\ + tar xzf /tmp/operator.tar.gz -C $OPERATOR_HOME --strip-components 1 &&\ + rm -f /tmp/operator.tar.gz + + +# Set the owner on `$OPERATOR_HOME` +# +RUN chown -R 185 $OPERATOR_HOME + + +EXPOSE 8080 +USER 185 +ENV JAVA_OPTS="-Dquarkus.http.host=0.0.0.0 -Djava.util.logging.manager=org.jboss.logmanager.LogManager" +ENV JAVA_APP_JAR="$OPERATOR_HOME/quarkus-run.jar" + diff --git a/operator/snapshot/Dockerfile b/operator/snapshot/Dockerfile index 44e29521..cc0f5681 100644 --- a/operator/snapshot/Dockerfile +++ b/operator/snapshot/Dockerfile @@ -83,7 +83,7 @@ LABEL maintainer="Debezium Community" # # Set the version, home directory # -ARG DEBEZIUM_VERSION=2.3.0-SNAPSHOT +ARG DEBEZIUM_VERSION=2.6.0-SNAPSHOT ENV LANGUAGE='en_US:en' ENV DEBEZIUM_VERSION=$DEBEZIUM_VERSION \ diff --git a/postgres/10-alpine/Dockerfile b/postgres/10-alpine/Dockerfile index 6e030a48..0e6d0679 100644 --- a/postgres/10-alpine/Dockerfile +++ b/postgres/10-alpine/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:10-alpine LABEL maintainer="Debezium Community" -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV WAL2JSON_COMMIT_ID=92b33c7d7c2fccbeb9f79455dafbc92e87e00ddd diff --git a/postgres/10/Dockerfile b/postgres/10/Dockerfile index aabe6485..e8a5cca2 100644 --- a/postgres/10/Dockerfile +++ b/postgres/10/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:10-bullseye AS build ARG USE_POSTGIS=true -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV PROTOC_VERSION=1.4 ENV WAL2JSON_COMMIT_ID=92b33c7d7c2fccbeb9f79455dafbc92e87e00ddd diff --git a/postgres/11-alpine/Dockerfile b/postgres/11-alpine/Dockerfile index 3fdde9d4..28ecb5c6 100644 --- a/postgres/11-alpine/Dockerfile +++ b/postgres/11-alpine/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:11-alpine LABEL maintainer="Debezium Community" -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV WAL2JSON_COMMIT_ID=92b33c7d7c2fccbeb9f79455dafbc92e87e00ddd diff --git a/postgres/11/Dockerfile b/postgres/11/Dockerfile index 766ecf23..a26c1779 100644 --- a/postgres/11/Dockerfile +++ b/postgres/11/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:11-bullseye AS build ARG USE_POSTGIS=true -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV PROTOC_VERSION=1.4 ENV WAL2JSON_COMMIT_ID=92b33c7d7c2fccbeb9f79455dafbc92e87e00ddd diff --git a/postgres/12-alpine/Dockerfile b/postgres/12-alpine/Dockerfile index 7a2eaae1..241b9fd0 100644 --- a/postgres/12-alpine/Dockerfile +++ b/postgres/12-alpine/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:12-alpine LABEL maintainer="Debezium Community" -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV WAL2JSON_COMMIT_ID=92b33c7d7c2fccbeb9f79455dafbc92e87e00ddd diff --git a/postgres/12/Dockerfile b/postgres/12/Dockerfile index c7f8d3ad..73f9652c 100644 --- a/postgres/12/Dockerfile +++ b/postgres/12/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:12-bullseye AS build ARG USE_POSTGIS=true -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV PROTOC_VERSION=1.4 ENV WAL2JSON_COMMIT_ID=92b33c7d7c2fccbeb9f79455dafbc92e87e00ddd diff --git a/postgres/13-alpine/Dockerfile b/postgres/13-alpine/Dockerfile index 3c60e273..1c982c9b 100644 --- a/postgres/13-alpine/Dockerfile +++ b/postgres/13-alpine/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:13-alpine LABEL maintainer="Debezium Community" -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV WAL2JSON_COMMIT_ID=wal2json_2_3 diff --git a/postgres/13/Dockerfile b/postgres/13/Dockerfile index 1f2605b4..82860680 100644 --- a/postgres/13/Dockerfile +++ b/postgres/13/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:13-bullseye AS build ARG USE_POSTGIS=true -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV PROTOC_VERSION=1.4 ENV WAL2JSON_COMMIT_ID=wal2json_2_3 diff --git a/postgres/14-alpine/Dockerfile b/postgres/14-alpine/Dockerfile index bf9f8ff4..d48035a0 100644 --- a/postgres/14-alpine/Dockerfile +++ b/postgres/14-alpine/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:14-alpine LABEL maintainer="Debezium Community" -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV WAL2JSON_COMMIT_ID=wal2json_2_3 diff --git a/postgres/14/Dockerfile b/postgres/14/Dockerfile index 27d2a6d1..eee9edfe 100644 --- a/postgres/14/Dockerfile +++ b/postgres/14/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:14-bullseye AS build ARG USE_POSTGIS=true -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV PROTOC_VERSION=1.4 ENV WAL2JSON_COMMIT_ID=wal2json_2_3 diff --git a/postgres/15-alpine/Dockerfile b/postgres/15-alpine/Dockerfile index 386a390b..d3fdf1d1 100644 --- a/postgres/15-alpine/Dockerfile +++ b/postgres/15-alpine/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:15-alpine LABEL maintainer="Debezium Community" -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final RUN apk add --no-cache protobuf-c-dev diff --git a/postgres/15/Dockerfile b/postgres/15/Dockerfile index 656ef81e..405a7856 100644 --- a/postgres/15/Dockerfile +++ b/postgres/15/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:15-bullseye AS build ARG USE_POSTGIS=true -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV PROTOC_VERSION=1.4 # Install the packages which will be required to get everything to compile diff --git a/postgres/16-alpine/Dockerfile b/postgres/16-alpine/Dockerfile index ac1f5138..665f0633 100644 --- a/postgres/16-alpine/Dockerfile +++ b/postgres/16-alpine/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:16-alpine LABEL maintainer="Debezium Community" -ENV PLUGIN_VERSION=v2.5.0.CR1 +ENV PLUGIN_VERSION=v2.5.0.Final RUN apk add --no-cache protobuf-c-dev diff --git a/postgres/16/Dockerfile b/postgres/16/Dockerfile index 8f2f22df..43bf5467 100644 --- a/postgres/16/Dockerfile +++ b/postgres/16/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:16-bullseye AS build ARG USE_POSTGIS=true -ENV PLUGIN_VERSION=v2.5.0.CR1 +ENV PLUGIN_VERSION=v2.5.0.Final ENV PROTOC_VERSION=1.4 # Install the packages which will be required to get everything to compile diff --git a/postgres/9.6-alpine/Dockerfile b/postgres/9.6-alpine/Dockerfile index fdfb597a..a00051e1 100644 --- a/postgres/9.6-alpine/Dockerfile +++ b/postgres/9.6-alpine/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:9.6-alpine LABEL maintainer="Debezium Community" -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV WAL2JSON_COMMIT_ID=92b33c7d7c2fccbeb9f79455dafbc92e87e00ddd diff --git a/postgres/9.6/Dockerfile b/postgres/9.6/Dockerfile index 6f8b199e..f5e0e01c 100644 --- a/postgres/9.6/Dockerfile +++ b/postgres/9.6/Dockerfile @@ -1,7 +1,7 @@ FROM postgres:9.6-bullseye AS build ARG USE_POSTGIS=true -ENV PLUGIN_VERSION=v2.3.5.Final +ENV PLUGIN_VERSION=v2.5.0.Final ENV PROTOC_VERSION=1.4 ENV WAL2JSON_COMMIT_ID=92b33c7d7c2fccbeb9f79455dafbc92e87e00ddd diff --git a/server/2.5/Dockerfile b/server/2.5/Dockerfile index 78ef8f36..5d46d23f 100644 --- a/server/2.5/Dockerfile +++ b/server/2.5/Dockerfile @@ -5,11 +5,11 @@ LABEL maintainer="Debezium Community" # # Set the version, home directory, and MD5 hash. # -ENV DEBEZIUM_VERSION=2.5.0.CR1 \ +ENV DEBEZIUM_VERSION=2.5.0.Final \ SERVER_HOME=/debezium \ MAVEN_REPO_CENTRAL="https://repo1.maven.org/maven2" ENV SERVER_URL_PATH=io/debezium/debezium-server-dist/$DEBEZIUM_VERSION/debezium-server-dist-$DEBEZIUM_VERSION.tar.gz \ - SERVER_MD5=71a47938246d78a49ac5235d6b0ff30e + SERVER_MD5=0293ac8e8547bd2a8c7d385ce759682a # # Create a directory for Debezium Server diff --git a/server/2.6/Dockerfile b/server/2.6/Dockerfile new file mode 100644 index 00000000..5d46d23f --- /dev/null +++ b/server/2.6/Dockerfile @@ -0,0 +1,59 @@ +FROM registry.access.redhat.com/ubi8/openjdk-11 + +LABEL maintainer="Debezium Community" + +# +# Set the version, home directory, and MD5 hash. +# +ENV DEBEZIUM_VERSION=2.5.0.Final \ + SERVER_HOME=/debezium \ + MAVEN_REPO_CENTRAL="https://repo1.maven.org/maven2" +ENV SERVER_URL_PATH=io/debezium/debezium-server-dist/$DEBEZIUM_VERSION/debezium-server-dist-$DEBEZIUM_VERSION.tar.gz \ + SERVER_MD5=0293ac8e8547bd2a8c7d385ce759682a + +# +# Create a directory for Debezium Server +# +USER root +RUN microdnf -y install gzip && \ + microdnf clean all && \ + mkdir $SERVER_HOME && \ + chmod 755 $SERVER_HOME + +# +# Change ownership and switch user +# +RUN chown -R jboss $SERVER_HOME && \ + chgrp -R jboss $SERVER_HOME +USER jboss + +RUN mkdir $SERVER_HOME/conf && \ + mkdir $SERVER_HOME/data + +# +# Download and install Debezium Server +# +RUN curl -fSL -o /tmp/debezium.tar.gz "$MAVEN_REPO_CENTRAL/$SERVER_URL_PATH" + +# +# Verify the contents and then install ... +# +RUN echo "$SERVER_MD5 /tmp/debezium.tar.gz" | md5sum -c - &&\ + tar xzf /tmp/debezium.tar.gz -C $SERVER_HOME --strip-components 1 &&\ + rm -f /tmp/debezium.tar.gz + +# +# Allow random UID to use Debezium Server +# +RUN chmod -R g+w,o+w $SERVER_HOME + +# Set the working directory to the Debezium Server home directory +WORKDIR $SERVER_HOME + +# +# Expose the ports and set up volumes for the data, transaction log, and configuration +# +EXPOSE 8080 +VOLUME ["/debezium/conf","/debezium/data"] + +CMD ["/debezium/run.sh"] diff --git a/server/2.6/Dockerfile.local b/server/2.6/Dockerfile.local new file mode 100644 index 00000000..6741cdb5 --- /dev/null +++ b/server/2.6/Dockerfile.local @@ -0,0 +1,53 @@ +FROM registry.access.redhat.com/ubi8/openjdk-11 + +LABEL maintainer="Debezium Community" + +# +# Set the version, home directory, and MD5 hash. +# +ENV SERVER_HOME=/debezium + +# +# Create a directory for Debezium Server +# +USER root +RUN microdnf -y install gzip && \ + microdnf clean all && \ + mkdir $SERVER_HOME && \ + chmod 755 $SERVER_HOME + +# +# Change ownership and switch user +# +RUN chown -R jboss $SERVER_HOME && \ + chgrp -R jboss $SERVER_HOME +USER jboss + +RUN mkdir $SERVER_HOME/conf && \ + mkdir $SERVER_HOME/data + +# +# Download and install Debezium Server +# +COPY debezium-server-dist-2.2.0-SNAPSHOT.tar.gz /tmp/debezium.tar.gz + +# +# Verify the contents and then install ... +# +RUN tar xzf /tmp/debezium.tar.gz -C $SERVER_HOME --strip-components 1 + +# +# Allow random UID to use Debezium Server +# +RUN chmod -R g+w,o+w $SERVER_HOME + +# Set the working directory to the Debezium Server home directory +WORKDIR $SERVER_HOME + +# +# Expose the ports and set up volumes for the data, transaction log, and configuration +# +EXPOSE 8080 +VOLUME ["/debezium/conf","/debezium/data"] + +CMD ["/debezium/run.sh"] diff --git a/server/2.6/README.md b/server/2.6/README.md new file mode 100644 index 00000000..4bc53b3f --- /dev/null +++ b/server/2.6/README.md @@ -0,0 +1,104 @@ +# What is Debezium? + +Debezium is a distributed platform that turns your existing databases into event streams, so applications can quickly react to each row-level change in the databases. + +# What is Debezium Server? + +Debezium can be deployed either as connector instances in a [Kafka Connect](https://kafka.apache.org/documentation/#connect) cluster, or as a standalone application - Debezium Server. +Debezium [Server](https://debezium.io/documentation/reference/operations/debezium-server.html) is a [Quarkus-based](https://quarkus.io/) high-performance application that streams data from database to a one of supported sinks or a user developed sink. + +Debezium Server supports multiple converters to provide different output message formats. + + +# How to use this image + +The image requires as a dependency source and sink systems to read data from and write output messages to. + +The application itself can be configured either via environment variables or via `appliaction.properties` injected into the container via a volume. + +Starting an instance of Debezium Server using this container image is simple: + + $ docker run -it --name debezium -p 8080:8080 -v $PWD/conf:/debezium/conf -v $PWD/data:/debezium/data quay.io/debezium/server + + +## Example + +If you want to try the image yourself then please follow the steps to establish the necessary environment. + +Start PostgreSQL source database: + + $ docker run -d --name postgres -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres quay.io/debezium/example-postgres + +Start Apache Pulsar sink: + + $ docker run -d --name pulsar -p 6650:6650 -p 7080:8080 apachepulsar/pulsar:2.5.2 bin/pulsar standalone + +Wait for Pulsar sink to start: + + $ docker logs -f pulsar + +Prepare Debezium Server deployment: + +``` + $ mkdir {data,conf}; chmod 777 {data,conf} + $ cat <<-EOF > conf/application.properties +debezium.sink.type=pulsar +debezium.sink.pulsar.client.serviceUrl=pulsar://pulsar:6650 +debezium.source.connector.class=io.debezium.connector.postgresql.PostgresConnector +debezium.source.offset.storage.file.filename=data/offsets.dat +debezium.source.offset.flush.interval.ms=0 +debezium.source.database.hostname=postgres +debezium.source.database.port=5432 +debezium.source.database.user=postgres +debezium.source.database.password=postgres +debezium.source.database.dbname=postgres +debezium.source.topic.prefix=tutorial +debezium.source.schema.include.list=inventory +debezium.source.plugin.name=pgoutput +EOF +``` + +Note that the configuration file can be replaced with environment variables where every property translates to uppercase and dots are replaced with underscore, e.g. `debezium.sink.type` becomes `DEBEZIUM_SINK_TYPE`. + +Start the Debezium Server: + + $ docker run -it --name debezium -p 8080:8080 -v $PWD/conf:/debezium/conf -v $PWD/data:/debezium/data --link postgres --link pulsar quay.io/debezium/server + + +# Environment variables + +The Debezium Server image uses several environment variables to configure JVM and source/sink when running this image. + + +### `JAVA_OPTS` + +This environment variable is passed to command line when `java` command is invoked. +It could be used to tune memory settings etc. + +### `DEBEZIUM_OPTS` + +This environment variable is used in the same way as `JAVA_OPTS` and servers only for logical separation of Debezium Server specific settings. + +### Source/sink Configuration options + +All configuration options that could be present in `application.properties` can be either added or overridden via environment variables. +This is enabled by using [MicroProfile Config](https://github.com/eclipse/microprofile-config) in Debezium Server. + + + +# Ports + +Containers created using this image will expose port `8080`, which is the standard port to access [MicroProfile Health](https://github.com/eclipse/microprofile-health) endpoint. + + +# Volumes + +The container image exposes two volumes: + +### `/debezium/conf` + +In this volume the configuration files (mostly `application.properties`) are located. + +### `/debezium/data` + +In this volume the data files (mostly file offset storage) are located. diff --git a/server/snapshot/Dockerfile b/server/snapshot/Dockerfile index f234d0b1..677c7e20 100644 --- a/server/snapshot/Dockerfile +++ b/server/snapshot/Dockerfile @@ -5,7 +5,7 @@ LABEL maintainer="Debezium Community" # # Set the version, home directory # -ARG DEBEZIUM_VERSION=2.3.0-SNAPSHOT +ARG DEBEZIUM_VERSION=2.6.0-SNAPSHOT ENV DEBEZIUM_VERSION=$DEBEZIUM_VERSION \ SERVER_HOME=/debezium \ diff --git a/ui/2.5/Dockerfile b/ui/2.5/Dockerfile index d085c57c..c937d8dd 100644 --- a/ui/2.5/Dockerfile +++ b/ui/2.5/Dockerfile @@ -6,7 +6,7 @@ FROM registry.access.redhat.com/ubi9/ubi-minimal AS builder ARG JAVA_PACKAGE=java-11-openjdk-devel -ARG BRANCH=v2.5.0.CR1 +ARG BRANCH=v2.5.0.Final ENV LANG='en_US.UTF-8' \ LANGUAGE='en_US:en' \ diff --git a/ui/2.6/Dockerfile b/ui/2.6/Dockerfile new file mode 100644 index 00000000..c937d8dd --- /dev/null +++ b/ui/2.6/Dockerfile @@ -0,0 +1,63 @@ +#### +# This Dockerfile is used in order to build a container with Debezium UI. +# It is derived from standard Quarkus-build Docker file but the build is +# executed from the sources. +### +FROM registry.access.redhat.com/ubi9/ubi-minimal AS builder + +ARG JAVA_PACKAGE=java-11-openjdk-devel +ARG BRANCH=v2.5.0.Final + +ENV LANG='en_US.UTF-8' \ + LANGUAGE='en_US:en' \ + JAVA_HOME='/usr/lib/jvm/java-11-openjdk' + +# Install java and the run-java script +# Also set up permissions for user `1001` +RUN microdnf -y install ca-certificates make gcc-c++ ${JAVA_PACKAGE} tzdata-java git maven \ + && microdnf -y update \ + && microdnf -y clean all + +RUN java -version \ + && mkdir -p /sources \ + && cd /sources \ + && git clone -b $BRANCH https://github.com/debezium/debezium-ui.git . \ + && ./mvnw -am dependency:go-offline -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -Dmaven.wagon.http.pool=false -Dmaven.wagon.httpconnectionManager.ttlSeconds=120 \ + && ./mvnw clean package -DskipTests -DskipITs -Dquarkus.package.type=fast-jar -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -Dmaven.wagon.http.pool=false -Dmaven.wagon.httpconnectionManager.ttlSeconds=120 + +FROM registry.access.redhat.com/ubi9/ubi-minimal + +ARG JAVA_PACKAGE=java-11-openjdk-headless +ARG RUN_JAVA_VERSION=1.3.8 + +ENV LANG='en_US.UTF-8' \ + LANGUAGE='en_US:en' \ + JAVA_HOME='/usr/lib/jvm/jre-11' + +# Install java and the run-java script +# Also set up permissions for user `1001` +RUN microdnf -y install ca-certificates ${JAVA_PACKAGE} tzdata-java \ + && microdnf -y update \ + && microdnf -y clean all \ + && mkdir /deployments \ + && chown 1001 /deployments \ + && chmod "g+rwX" /deployments \ + && chown 1001:root /deployments \ + && curl https://repo1.maven.org/maven2/io/fabric8/run-java-sh/${RUN_JAVA_VERSION}/run-java-sh-${RUN_JAVA_VERSION}-sh.sh -o /deployments/run-java.sh \ + && chown 1001 /deployments/run-java.sh \ + && chmod 540 /deployments/run-java.sh \ + && echo "securerandom.source=file:/dev/urandom" >> /etc/alternatives/jre/lib/security/java.security + +# Configure the JAVA_OPTIONS, you can add -XshowSettings:vm to also display the heap size. +ENV JAVA_OPTIONS="-Dquarkus.http.host=0.0.0.0 -Djava.util.logging.manager=org.jboss.logmanager.LogManager" + +# We make four distinct layers so if there are application changes the library layers can be re-used +COPY --from=builder --chown=1001 /sources/backend/target/quarkus-app/lib/ /deployments/lib/ +COPY --from=builder --chown=1001 /sources/backend/target/quarkus-app/*.jar /deployments/ +COPY --from=builder --chown=1001 /sources/backend/target/quarkus-app/app/ /deployments/app/ +COPY --from=builder --chown=1001 /sources/backend/target/quarkus-app/quarkus/ /deployments/quarkus/ + +EXPOSE 8080 +USER 1001 + +ENTRYPOINT [ "/deployments/run-java.sh" ] diff --git a/ui/2.6/README.md b/ui/2.6/README.md new file mode 100644 index 00000000..c4af8905 --- /dev/null +++ b/ui/2.6/README.md @@ -0,0 +1,33 @@ + +# Debezium UI +The Debezium UI provides standalone web application, which connects to Kafka Connect via its REST API. See the [Debezium UI docs](https://debezium.io/documentation/reference/operations/debezium-ui.html) for more information. + +# Configure the Debezium UI +The following table shows the environment variables for the [Debezium UI container image](https://hub.docker.com/r/debezium/debezium-ui) and the related parameter names inside `application.properties` when running the Java application without the container. + + + + + + + + + + + + + + + + + + + + + + + + +
Environment variableParameter name in application.propertiesDefault valueDescription
DEPLOYMENT_MODEdeployment.modedefaultSpecifies how the Debezium UI is deployed.
For example, in some environments it might not be possible to reach the underlying backend, Kafka Connect REST interface or databases, then the deployment mode can be switched to match the underlying infrastructure.

default: The default deployment mode. It uses the Debezium UI backend with the configured Kafka Connect clusters via the Kafka Connect REST interface (see KAFKA_CONNECT_URIS how they are configured). +

validation.disabled: When set to validation.disabled the UI frontend will not call the backend to validate the user input nor check the availability and proper configuration of database connections. That mode is used to only generate the Debezium connector JSON configuration without the UI backend validation.
+
KAFKA_CONNECT_URISkafka.connect.urishttp://connect:8083A comma-separated list to one or more URLs of Kafka Connect REST interfaces to specify the Kafka Connect clusters that should be managed by the Debezium UI.
\ No newline at end of file diff --git a/zookeeper/2.6/Dockerfile b/zookeeper/2.6/Dockerfile new file mode 100644 index 00000000..08e14892 --- /dev/null +++ b/zookeeper/2.6/Dockerfile @@ -0,0 +1,82 @@ +ARG DEBEZIUM_DOCKER_REGISTRY_PRIMARY_NAME +FROM $DEBEZIUM_DOCKER_REGISTRY_PRIMARY_NAME/base + +LABEL maintainer="Debezium Community" + +# +# Set the version, home directory, and SHA hash. +# SHA 512 hash from https://www.apache.org/dist/zookeeper/zookeeper-$ZK_VERSION/zookeeper-$ZK_VERSION.tar.gz.sha512 +# +ENV ZK_VERSION=3.8.3 \ + ZK_HOME=/zookeeper \ + SHA256HASH=eb1a5e79fe55fa50e36021ca8b752e40584d5e24f23985c43e9f56abfbf23c0239dce37d18f27213c9a38fcb6f68daf1a9468fc352e6ac2364faaf9b56145bdf +ENV ZK_URL_PATH=zookeeper/zookeeper-$ZK_VERSION/apache-zookeeper-$ZK_VERSION-bin.tar.gz + +# +# Create a user and home directory for Zookeeper +# +USER root +RUN groupadd -r zookeeper -g 1001 && \ + useradd -u 1001 -r -g 1001 -m -d $ZK_HOME -s /sbin/nologin -c "Zookeeper user" zookeeper && \ + chmod 755 $ZK_HOME + +# +# Change ownership and switch user +# +RUN chown -R zookeeper $ZK_HOME && \ + chgrp -R zookeeper $ZK_HOME +USER zookeeper + +RUN mkdir $ZK_HOME/data && \ + mkdir $ZK_HOME/txns && \ + mkdir $ZK_HOME/logs + +# +# Download and install Zookeeper +# +#RUN curl -fSL -o /tmp/zookeeper.tar.gz $(curl --stderr /dev/null https://www.apache.org/dyn/closer.cgi\?as_json\=1 | sed -rn 's/.*"preferred":.*"(.*)"/\1/p')$ZK_URL_PATH || curl -fSL -o /tmp/zookeeper.tgz https://archive.apache.org/dist/$ZK_URL_PATH +RUN curl -fSL -o /tmp/zookeeper.tar.gz https://archive.apache.org/dist/$ZK_URL_PATH + +# +# Verify the contents and then install ... +# +RUN echo "$SHA256HASH /tmp/zookeeper.tar.gz" | sha512sum -c - &&\ + tar -xzf /tmp/zookeeper.tar.gz -C $ZK_HOME --strip-components 1 &&\ + rm -f /tmp/zookeeper.tar.gz + +# Remove unnecessary files +RUN rm -r $ZK_HOME/docs + +# +# Allow random UID to use Zookeeper +# +RUN chmod -R g+w,o+w $ZK_HOME + +# Set the working directory to the Zookeeper home directory +WORKDIR $ZK_HOME + +# +# Customize the Zookeeper and Log4J configuration files +# +COPY ./zoo.cfg $ZK_HOME/conf/zoo.cfg +RUN sed -i -r -e "s|name=\"zookeeper.log.dir\" value=\".\"|name=\"zookeeper.log.dir\" value=\"$ZK_HOME/logs\"|g" \ + -e "s|(\[myid\:\%X\{myid\}\]\s?)||g" \ + $ZK_HOME/conf/logback.xml +RUN mkdir $ZK_HOME/conf.orig && mv $ZK_HOME/conf/* $ZK_HOME/conf.orig + +# +# The zkEnv.sh script generates the classpath for launching ZooKeeper, with entries +# containing the pattern "/bin/../lib", which fails to be resolved properly in some +# environments; hence replacing this with "/lib" in the assembled classpath +# +RUN echo 'CLASSPATH="${CLASSPATH//bin\/\.\.\/lib\//lib/}"' >> $ZK_HOME/bin/zkEnv.sh + +# +# Expose the ports and set up volumes for the data, transaction log, and configuration +# +EXPOSE 2181 2888 3888 +VOLUME ["/zookeeper/data","/zookeeper/txns","/zookeeper/conf"] + +COPY ./docker-entrypoint.sh / +ENTRYPOINT ["/docker-entrypoint.sh"] +CMD ["start"] diff --git a/zookeeper/2.6/README.md b/zookeeper/2.6/README.md new file mode 100644 index 00000000..f8c149f9 --- /dev/null +++ b/zookeeper/2.6/README.md @@ -0,0 +1,81 @@ +[Zookeeper](http://zookeeper.apache.org/) is a distributed coordination and consensus service. In Debezium, it is used by [Kafka](http://kafka.apache.org/) to coordinate the availability and responsiblities of each Kafka broker. Reliability is provided by clustering multiple Zookeeper processes, and since Zookeeper uses quorums you need an odd number (typically 3 or 5 in a production environment). + +# What is Debezium? + +Debezium is a distributed platform that turns your existing databases into event streams, so applications can quickly react to each row-level change in the databases. Debezium is built on top of Kafka and provides Kafka Connect compatible connectors that monitor specific database management systems. Debezium records the history of data changes in Kafka logs, so your application can be stopped and restarted at any time and can easily consume all of the events it missed while it was not running, ensuring that all events are processed correctly and completely. + +Running Debezium involves Zookeeper, Kafka, and services that run Debezium's connectors. For simple evaluation and experimentation, all services can all be run on a single host machine, using the recipe outlined below. Production environments, however, require properly running and networking multiple instances of each service to provide the performance, reliability, replication, and fault tolerance. This can be done with a platform like [OpenShift](https://www.openshift.com) that manages multiple Docker containers running on multiple hosts and machines. But running Kafka in a Docker container has limitations, so for scenarios where very high throughput is required, you should run Kafka on dedicated hardware as explained in the [Kafka documentation](http://kafka.apache.org/documentation.html). + +# How to use this image + +This image can be used to run one or more instances of Zookeeper required by Kafka brokers running in other containers. If running a single instance, the defaults are often good enough, especially for simple evaluations and demonstrations. However, when running multiple instances you will need to use the environment variables. + +Production environments require running multiple instances of each service to provide the performance, reliability, replication, and fault tolerance. This can be done with a platform like [OpenShift](https://www.openshift.com) that manages multiple Docker containers running on multiple hosts and machines. + +## Start Zookeeper + +Starting a Zookeeper instance using this image is simple: + + $ docker run -it --name zookeeper -p 2181:2181 -p 2888:2888 -p 3888:3888 quay.io/debezium/zookeeper + +This command uses this image and starts a new container named `zookeeper`, which runs in the foreground and attaches the console so that it display Zookeeper's output and error messages. It exposes and maps port 2181to the same port on the Docker host so that code running outside of the container (e.g., like Kafka) can talk with Zookeepr; Zookeeper's other ports (2888 and 3888) are also exposed and mapped to the Docker host. See the environment variables below for additional information that can be supplied to the server on startup. + +To start the container in _detached_ mode, simply replace the `-it` option with `-d`. No broker output will not be sent to your console, but it can be read at any time using the `docker logs` command. For example, the following command will display the output and keep following the output: + + $ docker logs --follow --name zookeeper + +## Display Zookeeper status + +If you already have one or more containers running Zookeeper, you can use this image to start _another_ container that connects to the running instance(s) and displays the status: + + $ docker run -it --rm quay.io/debezium/zookeeper status + +The container will exit as soon as the status is displayed, and because `--rm` is used the container will be immediately removed. You can run this command as many times as necessary. + +## Use the Zookeeper CLI + +If you already have one or more containers running Zookeeper, you can use this image to start _another_ container that connects to the running instance(s) and starts the Zookeeper CLI: + + $ docker run -it --rm quay.io/debezium/zookeeper cli + +The container will exit as soon as you exit the CLI, and because `--rm` is used the container will be immediately removed. +You can run this command as many times as necessary. + + +# Environment variables + +The Debezium Zookeeper image uses several environment variables. + +### `SERVER_ID` + +This environment variable defines the numeric identifier for this Zookeeper server. The default is '1' and is only applicable for a single standalone Zookeeper server that is not replicated or fault tolerant. In all other cases, you should set the server number to a unique value within your Zookeeper cluster. + +### `SERVER_COUNT` + +This environment variable defines the total number of Zookeeper servers in the cluster. The default is '1' and is only applicable for a single standalone Zookeeper server. In all other cases, you must use this variable to set the total number of servers in the cluster. + +### `LOG_LEVEL` + +This environment variable is optional. Use this to set the level of detail for Zookeeper's application log written to STDOUT and STDERR. Valid values are `INFO` (default), `WARN`, `ERROR`, `DEBUG`, or `TRACE`." + + +# Ports + +Containers created using this image will expose ports 2181, 2888, and 3888. These are the standard ports used by Zookeeper. You can use standard Docker options to map these to different ports on the host that runs the container. + +# Storing data + +The Kafka broker run by this image writes data to the local file system, and the only way to keep this data is to volumes that map specific directories inside the container to the local file system (or to OpenShift persistent volumes). + +### Zookeeper data + +This image defines data volumes at `/zookeeper/data` and `/zookeeper/txns`, and it is in these directories that the Zookeeper server will persist all of its data. You must mount them appropriately when running your container to persist the data after the container is stopped; failing to do so will result in all data being lost when the container is stopped. + +### Log files + +Although this image will send Zookeeper's log output to standard output so it is visible as Docker logs, this image also configures Zookeeper to write out more detailed lots to a data volume at `/zookeeper/logs`. You must mount it appropriately when running your container to persist the logs after the container is stopped; failing to do so will result in all logs being lost when the container is stopped. + +### Configuration + +This image defines a data volume at `/zookeeper/conf` where the Zookeeper server's configuration files are stored. Note that these configuration files are always modified based upon the environment variables and linked containers. The best use of this data volume is to be able to see the configuration files used by Zookeper, although with some care it is possible to supply custom configuration files that will be adapted and used upon container startup. + diff --git a/zookeeper/2.6/docker-entrypoint.sh b/zookeeper/2.6/docker-entrypoint.sh new file mode 100755 index 00000000..34d12be2 --- /dev/null +++ b/zookeeper/2.6/docker-entrypoint.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +# Exit immediately if a *pipeline* returns a non-zero status. (Add -x for command tracing) +set -e + +if [[ -z $1 ]]; then + ARG1="start" +else + ARG1=$1 +fi + +if [[ -n "$JMXPORT" ]]; then + # Docker requires extra JMX-related JVM flags beyond what Zookeeper normally uses + JMX_EXTRA_FLAGS="-Djava.rmi.server.hostname=${JMXHOST} -Dcom.sun.management.jmxremote.rmi.port=${JMXPORT} -Dcom.sun.management.jmxremote.port=${JMXPORT}" + if [[ -n "$JVMFLAGS" ]]; then + export JVMFLAGS="${JMX_EXTRA_FLAGS} ${JVMFLAGS} " + else + export JVMFLAGS="${JMX_EXTRA_FLAGS} " + fi +fi + +# Process some known arguments to run Zookeeper ... +case $ARG1 in + start) + # Copy config files if not provided in volume + cp -rn $ZK_HOME/conf.orig/* $ZK_HOME/conf + + # + # Process the logging-related environment variables. Zookeeper's log configuration allows *some* variables to be + # set via environment variables, and more via system properties (e.g., "-Dzookeeper.console.threshold=INFO"). + # However, in the interest of keeping things straightforward and in the spirit of the immutable image, + # we don't use these and instead directly modify the Log4J configuration file (replacing the variables). + # + if [[ -z "$LOG_LEVEL" ]]; then + LOG_LEVEL="INFO" + fi + sed -i -r -e "s|name=\"zookeeper.console.threshold\" value=\".*\"|name=\"zookeeper.console.threshold\" value=\"$LOG_LEVEL\"|g" $ZK_HOME/conf/logback.xml + sed -i -r -e "s|root level=\".*\"|root level=\"$LOG_LEVEL\"|g" $ZK_HOME/conf/logback.xml + + # + # Configure cluster settings + # + if [[ -z "$SERVER_ID" ]]; then + SERVER_ID="1" + fi + if [[ -z "$SERVER_COUNT" ]]; then + SERVER_COUNT=1 + fi + if [[ $SERVER_ID = "1" ]] && [[ $SERVER_COUNT = "1" ]]; then + echo "Starting up in standalone mode" + else + echo "Starting up ${SERVER_ID} of ${SERVER_COUNT}" + # + # Append the server addresses to the configuration file ... + # + echo "" >> $ZK_HOME/conf/zoo.cfg + echo "#Server List" >> $ZK_HOME/conf/zoo.cfg + for i in $( eval echo {1..$SERVER_COUNT});do + if [ "$SERVER_ID" = "$i" ];then + echo "server.$i=0.0.0.0:2888:3888" >> $ZK_HOME/conf/zoo.cfg + else + echo "server.$i=zookeeper-$i:2888:3888" >> $ZK_HOME/conf/zoo.cfg + fi + done + # + # Persists the ID of the current instance of Zookeeper in the 'myid' file + # + echo ${SERVER_ID} > $ZK_HOME/data/myid + fi + + # Now start the Zookeeper server + export ZOOCFGDIR="$ZK_HOME/conf" + export ZOOCFG="zoo.cfg" + exec $ZK_HOME/bin/zkServer.sh start-foreground + ;; + status) + exec $ZK_HOME/bin/zkServer.sh status + ;; + cli) + exec "$ZK_HOME/bin/zkCli.sh -server 0.0.0.0:2181" + ;; +esac + +# Otherwise just run the specified command +exec "$@" diff --git a/zookeeper/2.6/zoo.cfg b/zookeeper/2.6/zoo.cfg new file mode 100644 index 00000000..fcb934af --- /dev/null +++ b/zookeeper/2.6/zoo.cfg @@ -0,0 +1,37 @@ +# The number of milliseconds of each tick +tickTime=2000 + +# The number of ticks that the initial +# synchronization phase can take +initLimit=10 + +# The number of ticks that can pass between +# sending a request and getting an acknowledgement +syncLimit=5 + +# the directory where the snapshot is stored. +dataDir=/zookeeper/data + +# This option will direct the machine to write the transaction log to the 'dataLogDir' rather +# than the 'dataDir'. This allows a dedicated log device to be used, and helps avoid +# competition between transaction logging and data snaphots. +dataLogDir=/zookeeper/txns + +# the port at which the clients will connect +clientPort=2181 + +# the maximum number of client connections. +# increase this if you need to handle more clients +#maxClientCnxns=60 + +# +# Be sure to read the maintenance section of the +# administrator guide before turning on autopurge. +# +# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance +# +# The number of snapshots to retain in dataDir +autopurge.snapRetainCount=3 +# Purge task interval in hours +# Set to "0" to disable auto purge feature +autopurge.purgeInterval=1