Skip to content

Commit

Permalink
fix: upgrades from versions prior to 1.15.0 have incorrect data direc…
Browse files Browse the repository at this point in the history
…tories (#1341)
  • Loading branch information
emosbaugh authored Oct 19, 2024
1 parent 54eb50b commit a1c8b0f
Show file tree
Hide file tree
Showing 18 changed files with 744 additions and 270 deletions.
2 changes: 1 addition & 1 deletion .github/actions/e2e/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ runs:
export EXPECT_K0S_VERSION_PREVIOUS=${{ inputs.k0s-version-previous }}
make e2e-test TEST_NAME=${{ inputs.test-name }}
- name: Troubleshoot
if: ${{ failure() }}
if: ${{ !cancelled() }}
uses: ./.github/actions/e2e-troubleshoot
with:
test-name: ${{ inputs.test-name }}
5 changes: 3 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ jobs:
run: |
make e2e-test TEST_NAME=${{ matrix.test }}
- name: Troubleshoot
if: ${{ failure() }}
if: ${{ !cancelled() }}
uses: ./.github/actions/e2e-troubleshoot
with:
test-name: '${{ matrix.test }}'
Expand All @@ -571,7 +571,6 @@ jobs:
- TestCommandsRequireSudo
- TestResetAndReinstallAirgap
- TestSingleNodeAirgapUpgrade
- TestSingleNodeAirgapUpgradeFromEC18
- TestSingleNodeAirgapUpgradeCustomCIDR
- TestSingleNodeDisasterRecoveryWithProxy
- TestProxiedEnvironment
Expand All @@ -583,6 +582,8 @@ jobs:
runner: embedded-cluster
- test: TestMultiNodeAirgapUpgradeSameK0s
runner: embedded-cluster
- test: TestAirgapUpgradeFromEC18
runner: embedded-cluster
- test: TestSingleNodeAirgapDisasterRecovery
runner: embedded-cluster
- test: TestMultiNodeAirgapHAInstallation
Expand Down
8 changes: 4 additions & 4 deletions e2e/cluster/docker/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ func (c *Cluster) WaitForReady() {
}

func (c *Cluster) Cleanup(envs ...map[string]string) {
if c.t.Failed() {
c.generateSupportBundle(envs...)
c.copyPlaywrightReport()
}
// if c.t.Failed() {
c.generateSupportBundle(envs...)
c.copyPlaywrightReport()
//}
for _, node := range c.Nodes {
node.Destroy()
}
Expand Down
8 changes: 4 additions & 4 deletions e2e/cluster/lxd/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -989,10 +989,10 @@ func (c *Cluster) InstallTestDependenciesDebian(t *testing.T, node int, withProx
}

func (c *Cluster) Cleanup(envs ...map[string]string) {
if c.T.Failed() {
c.generateSupportBundle(envs...)
c.copyPlaywrightReport()
}
// if c.T.Failed() {
c.generateSupportBundle(envs...)
c.copyPlaywrightReport()
// }
}

func (c *Cluster) SetupPlaywrightAndRunTest(testName string, args ...string) (string, string, error) {
Expand Down
118 changes: 114 additions & 4 deletions e2e/install_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ func TestUpgradeEC18FromReplicatedApp(t *testing.T) {

tc := docker.NewCluster(&docker.ClusterInput{
T: t,
Nodes: 1,
Nodes: 2,
Distro: "debian-bookworm",
K0sDir: "/var/lib/k0s",
})
Expand All @@ -547,6 +547,12 @@ func TestUpgradeEC18FromReplicatedApp(t *testing.T) {
t.Fatalf("fail to download embedded-cluster on node 0: %v: %s: %s", err, stdout, stderr)
}

t.Logf("%s: downloading embedded-cluster 1.8.0+k8s-1.28 on worker node", time.Now().Format(time.RFC3339))
line = []string{"vandoor-prepare.sh", "1.8.0+k8s-1.28", os.Getenv("LICENSE_ID"), "false"}
if stdout, stderr, err := tc.RunCommandOnNode(1, line); err != nil {
t.Fatalf("fail to download embedded-cluster on node 0: %v: %s: %s", err, stdout, stderr)
}

t.Logf("%s: installing embedded-cluster 1.8.0+k8s-1.28 on node 0", time.Now().Format(time.RFC3339))
line = []string{"single-node-install.sh", "ui"}
if stdout, stderr, err := tc.RunCommandOnNode(0, line, withEnv); err != nil {
Expand All @@ -560,6 +566,29 @@ func TestUpgradeEC18FromReplicatedApp(t *testing.T) {
t.Fatalf("fail to run playwright test deploy-ec18-app-version: %v: %s: %s", err, stdout, stderr)
}

t.Logf("%s: generating a new worker token command", time.Now().Format(time.RFC3339))
stdout, stderr, err := tc.RunPlaywrightTest("get-ec18-join-worker-command")
if err != nil {
t.Fatalf("fail to generate worker join token:\nstdout: %s\nstderr: %s", stdout, stderr)
}
command, err := findJoinCommandInOutput(stdout)
if err != nil {
t.Fatalf("fail to find the join command in the output: %v: %s: %s", err, stdout, stderr)
}
t.Log("worker join token command:", command)

t.Logf("%s: joining worker node to the cluster as a worker", time.Now().Format(time.RFC3339))
if stdout, stderr, err := tc.RunCommandOnNode(1, strings.Split(command, " ")); err != nil {
t.Fatalf("fail to join worker node to the cluster as a worker: %v: %s: %s", err, stdout, stderr)
}

// wait for the nodes to report as ready.
t.Logf("%s: all nodes joined, waiting for them to be ready", time.Now().Format(time.RFC3339))
stdout, stderr, err = tc.RunCommandOnNode(0, []string{"wait-for-ready-nodes.sh", "2"}, withEnv)
if err != nil {
t.Fatalf("fail to wait for ready nodes: %v: %s: %s", err, stdout, stderr)
}

t.Logf("%s: checking installation state", time.Now().Format(time.RFC3339))
line = []string{"check-installation-state.sh", "1.8.0+k8s-1.28", "v1.28.11"}
if stdout, stderr, err := tc.RunCommandOnNode(0, line, withEnv); err != nil {
Expand All @@ -580,6 +609,18 @@ func TestUpgradeEC18FromReplicatedApp(t *testing.T) {
t.Fatalf("fail to check postupgrade state: %v: %s: %s", err, stdout, stderr)
}

t.Logf("%s: resetting worker node", time.Now().Format(time.RFC3339))
line = []string{"reset-installation.sh"}
if stdout, stderr, err := tc.RunCommandOnNode(1, line, withEnv); err != nil {
t.Fatalf("fail to reset worker node: %v: %s: %s", err, stdout, stderr)
}

t.Logf("%s: resetting node 0", time.Now().Format(time.RFC3339))
line = []string{"reset-installation.sh"}
if stdout, stderr, err := tc.RunCommandOnNode(0, line, withEnv); err != nil {
t.Fatalf("fail to reset node 0: %v: %s: %s", err, stdout, stderr)
}

t.Logf("%s: test complete", time.Now().Format(time.RFC3339))
}

Expand Down Expand Up @@ -967,7 +1008,7 @@ func TestSingleNodeAirgapUpgradeCustomCIDR(t *testing.T) {
t.Logf("%s: test complete", time.Now().Format(time.RFC3339))
}

func TestSingleNodeAirgapUpgradeFromEC18(t *testing.T) {
func TestAirgapUpgradeFromEC18(t *testing.T) {
t.Parallel()

RequireEnvVars(t, []string{"SHORT_SHA", "AIRGAP_LICENSE_ID"})
Expand All @@ -987,7 +1028,7 @@ func TestSingleNodeAirgapUpgradeFromEC18(t *testing.T) {

tc := lxd.NewCluster(&lxd.ClusterInput{
T: t,
Nodes: 1,
Nodes: 2,
Image: "debian/12",
WithProxy: true,
AirgapInstallBundlePath: airgapInstallBundlePath,
Expand All @@ -1003,11 +1044,17 @@ func TestSingleNodeAirgapUpgradeFromEC18(t *testing.T) {
t.Logf("failed to remove airgap upgrade bundle: %v", err)
}

// upgrade airgap bundle is only needed on the first node
line := []string{"rm", "/assets/ec-release-upgrade.tgz"}
if _, _, err := tc.RunCommandOnNode(1, line); err != nil {
t.Fatalf("fail to remove upgrade airgap bundle on node %s: %v", tc.Nodes[1], err)
}

// install "curl" dependency on node 0 for app version checks.
tc.InstallTestDependenciesDebian(t, 0, true)

t.Logf("%s: preparing embedded cluster airgap files", time.Now().Format(time.RFC3339))
line := []string{"airgap-prepare.sh"}
line = []string{"airgap-prepare.sh"}
if _, _, err := tc.RunCommandOnNode(0, line); err != nil {
t.Fatalf("fail to prepare airgap files on node %s: %v", tc.Nodes[0], err)
}
Expand All @@ -1030,6 +1077,50 @@ func TestSingleNodeAirgapUpgradeFromEC18(t *testing.T) {
t.Fatalf("fail to run playwright test deploy-ec18-app-version: %v", err)
}

// generate worker node join command.
t.Logf("%s: generating a new worker token command", time.Now().Format(time.RFC3339))
stdout, stderr, err := tc.RunPlaywrightTest("get-ec18-join-worker-command")
if err != nil {
t.Fatalf("fail to generate worker join token:\nstdout: %s\nstderr: %s", stdout, stderr)
}
workerCommand, err := findJoinCommandInOutput(stdout)
if err != nil {
t.Fatalf("fail to find the join command in the output: %v", err)
}
t.Log("worker join token command:", workerCommand)

// join the worker node
t.Logf("%s: preparing embedded cluster airgap files on worker node", time.Now().Format(time.RFC3339))
line = []string{"airgap-prepare.sh"}
if _, _, err := tc.RunCommandOnNode(1, line); err != nil {
t.Fatalf("fail to prepare airgap files on worker node: %v", err)
}
t.Logf("%s: joining worker node to the cluster", time.Now().Format(time.RFC3339))
if _, _, err := tc.RunCommandOnNode(1, strings.Split(workerCommand, " ")); err != nil {
t.Fatalf("fail to join worker node to the cluster: %v", err)
}
// remove artifacts after joining to save space
line = []string{"rm", "/assets/release.airgap"}
if _, _, err := tc.RunCommandOnNode(1, line); err != nil {
t.Fatalf("fail to remove airgap bundle on worker node: %v", err)
}
line = []string{"rm", "/usr/local/bin/embedded-cluster"}
if _, _, err := tc.RunCommandOnNode(1, line); err != nil {
t.Fatalf("fail to remove embedded-cluster binary on worker node: %v", err)
}
line = []string{"rm", "/var/lib/embedded-cluster/bin/embedded-cluster"}
if _, _, err := tc.RunCommandOnNode(1, line); err != nil {
t.Fatalf("fail to remove embedded-cluster binary on node %s: %v", tc.Nodes[0], err)
}

// wait for the nodes to report as ready.
t.Logf("%s: all nodes joined, waiting for them to be ready", time.Now().Format(time.RFC3339))
stdout, _, err = tc.RunCommandOnNode(0, []string{"wait-for-ready-nodes.sh", "2"}, withEnv)
if err != nil {
t.Log(stdout)
t.Fatalf("fail to wait for ready nodes: %v", err)
}

t.Logf("%s: checking installation state after app deployment", time.Now().Format(time.RFC3339))
line = []string{
"check-airgap-installation-state.sh",
Expand Down Expand Up @@ -1066,6 +1157,25 @@ func TestSingleNodeAirgapUpgradeFromEC18(t *testing.T) {
t.Fatalf("fail to check postupgrade state: %v", err)
}

// TODO: reset fails with the following error:
// error: could not reset k0s: exit status 1, time="2024-10-17 22:44:52" level=warning msg="To ensure a full reset, a node reboot is recommended."
// Error: errors received during clean-up: [failed to delete /run/k0s. err: unlinkat /run/k0s/containerd/io.containerd.grpc.v1.cri/sandboxes/.../shm: device or resource busy]

// t.Logf("%s: resetting worker node", time.Now().Format(time.RFC3339))
// line = []string{"reset-installation.sh"}
// if stdout, stderr, err := tc.RunCommandOnNode(1, line, withEnv); err != nil {
// t.Fatalf("fail to reset worker node: %v: %s: %s", err, stdout, stderr)
// }

// // use upgrade binary for reset
// withUpgradeBin := map[string]string{"EMBEDDED_CLUSTER_BIN": "embedded-cluster-upgrade"}

// t.Logf("%s: resetting node 0", time.Now().Format(time.RFC3339))
// line = []string{"reset-installation.sh"}
// if stdout, stderr, err := tc.RunCommandOnNode(0, line, withEnv, withUpgradeBin); err != nil {
// t.Fatalf("fail to reset node 0: %v: %s: %s", err, stdout, stderr)
// }

t.Logf("%s: test complete", time.Now().Format(time.RFC3339))
}

Expand Down
13 changes: 13 additions & 0 deletions e2e/playwright/tests/get-ec18-join-worker-command/test.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { test, expect } from '@playwright/test';
import { login } from '../shared';

test('get join worker command', async ({ page }) => {
await login(page);
await page.locator('.NavItem').getByText('Cluster Management', { exact: true }).click();
await page.getByRole('button', { name: 'Add node', exact: true }).click();
await expect(page.locator('.Modal-body')).toBeVisible();
await expect(page.getByRole('heading')).toContainText('Add a Node');
await page.locator('.BoxedCheckbox').getByText('abc', { exact: true }).click();
const joinCommand = await page.locator('.react-prism.language-bash').first().textContent();
console.log(`{"command":"${joinCommand}"}`);
});
1 change: 1 addition & 0 deletions e2e/scripts/common.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/bin/bash

export EMBEDDED_CLUSTER_BIN="${EMBEDDED_CLUSTER_BIN:-embedded-cluster}"
export EMBEDDED_CLUSTER_BASE_DIR="${EMBEDDED_CLUSTER_BASE_DIR:-/var/lib/embedded-cluster}"
export EMBEDDED_CLUSTER_METRICS_BASEURL="https://staging.replicated.app"
export PATH="$PATH:${EMBEDDED_CLUSTER_BASE_DIR}/bin"
Expand Down
2 changes: 1 addition & 1 deletion e2e/scripts/reset-installation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ DIR=/usr/local/bin
main() {
local additional_flags=("$@")

if ! embedded-cluster reset --no-prompt "${additional_flags[@]}" | tee /tmp/log ; then
if ! "${EMBEDDED_CLUSTER_BIN}" reset --no-prompt "${additional_flags[@]}" | tee /tmp/log ; then
echo "Failed to uninstall embedded-cluster"
exit 1
fi
Expand Down
72 changes: 56 additions & 16 deletions operator/pkg/charts/charts.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ func updateInfraChartsFromInstall(in *v1beta1.Installation, clusterConfig *k0sv1
if chart.Name == "embedded-cluster-operator" {
newVals, err := helm.UnmarshalValues(chart.Values)
if err != nil {
return nil, fmt.Errorf("unmarshal admin-console.values: %w", err)
return nil, fmt.Errorf("unmarshal embedded-cluster-operator.values: %w", err)
}

// embedded-cluster-operator has "embeddedBinaryName" and "embeddedClusterID" as dynamic values
Expand All @@ -219,17 +219,29 @@ func updateInfraChartsFromInstall(in *v1beta1.Installation, clusterConfig *k0sv1

charts[i].Values, err = helm.MarshalValues(newVals)
if err != nil {
return nil, fmt.Errorf("marshal admin-console.values: %w", err)
return nil, fmt.Errorf("marshal embedded-cluster-operator.values: %w", err)
}
}
if chart.Name == "docker-registry" {
if !in.Spec.AirGap {
continue
if chart.Name == "openebs" {
newVals, err := helm.UnmarshalValues(chart.Values)
if err != nil {
return nil, fmt.Errorf("unmarshal openebs.values: %w", err)
}

newVals, err = helm.SetValue(newVals, `["localpv-provisioner"].localpv.basePath`, provider.EmbeddedClusterOpenEBSLocalSubDir())
if err != nil {
return nil, fmt.Errorf("set helm values openebs.localpv-provisioner.localpv.basePath: %w", err)
}

charts[i].Values, err = helm.MarshalValues(newVals)
if err != nil {
return nil, fmt.Errorf("marshal openebs.values: %w", err)
}
}
if chart.Name == "docker-registry" {
newVals, err := helm.UnmarshalValues(chart.Values)
if err != nil {
return nil, fmt.Errorf("unmarshal admin-console.values: %w", err)
return nil, fmt.Errorf("unmarshal docker-registry.values: %w", err)
}

// handle the registry IP, which will always be present in airgap
Expand Down Expand Up @@ -259,16 +271,38 @@ func updateInfraChartsFromInstall(in *v1beta1.Installation, clusterConfig *k0sv1

charts[i].Values, err = helm.MarshalValues(newVals)
if err != nil {
return nil, fmt.Errorf("marshal admin-console.values: %w", err)
return nil, fmt.Errorf("marshal docker-registry.values: %w", err)
}
}
if chart.Name == "seaweedfs" {
newVals, err := helm.UnmarshalValues(chart.Values)
if err != nil {
return nil, fmt.Errorf("unmarshal seaweedfs.values: %w", err)
}

dataPath := filepath.Join(provider.EmbeddedClusterSeaweedfsSubDir(), "ssd")
newVals, err = helm.SetValue(newVals, "global.data.hostPathPrefix", dataPath)
if err != nil {
return nil, fmt.Errorf("set helm values seaweedfs.global.data.hostPathPrefix: %w", err)
}
logsPath := filepath.Join(provider.EmbeddedClusterSeaweedfsSubDir(), "storage")
newVals, err = helm.SetValue(newVals, "global.logs.hostPathPrefix", logsPath)
if err != nil {
return nil, fmt.Errorf("set helm values seaweedfs.global.logs.hostPathPrefix: %w", err)
}

charts[i].Values, err = helm.MarshalValues(newVals)
if err != nil {
return nil, fmt.Errorf("marshal seaweedfs.values: %w", err)
}
}
if chart.Name == "velero" {
if in.Spec.Proxy != nil {
newVals, err := helm.UnmarshalValues(chart.Values)
if err != nil {
return nil, fmt.Errorf("unmarshal admin-console.values: %w", err)
}
newVals, err := helm.UnmarshalValues(chart.Values)
if err != nil {
return nil, fmt.Errorf("unmarshal velero.values: %w", err)
}

if in.Spec.Proxy != nil {
extraEnvVars := map[string]interface{}{
"extraEnvVars": map[string]string{
"HTTP_PROXY": in.Spec.Proxy.HTTPProxy,
Expand All @@ -281,11 +315,17 @@ func updateInfraChartsFromInstall(in *v1beta1.Installation, clusterConfig *k0sv1
if err != nil {
return nil, fmt.Errorf("set helm values velero.configuration: %w", err)
}
}

charts[i].Values, err = helm.MarshalValues(newVals)
if err != nil {
return nil, fmt.Errorf("marshal admin-console.values: %w", err)
}
podVolumePath := filepath.Join(provider.EmbeddedClusterK0sSubDir(), "kubelet/pods")
newVals, err = helm.SetValue(newVals, "nodeAgent.podVolumePath", podVolumePath)
if err != nil {
return nil, fmt.Errorf("set helm values velero.nodeAgent.podVolumePath: %w", err)
}

charts[i].Values, err = helm.MarshalValues(newVals)
if err != nil {
return nil, fmt.Errorf("marshal velero.values: %w", err)
}
}
}
Expand Down
Loading

0 comments on commit a1c8b0f

Please sign in to comment.