From 576e7c093a7a3c72563ea6ed0b087076a8395257 Mon Sep 17 00:00:00 2001
From: Chris Stephens <chris.stephens@databricks.com>
Date: Fri, 12 Jun 2026 12:29:34 -0700
Subject: [PATCH 1/2] acc: Always destroy deployed bundles on test exit in
 cloud runs

When acceptance tests run against real cloud workspaces (CLOUD_ENV set),
a test that fails, times out, or exits mid-script never reaches its own
'bundle destroy' step: scripts run under 'bash -e' and the merged
script.cleanup parts are skipped on failure. The deployed resources
(SQL warehouses, pipelines, jobs, ...) then leak in the shared test
workspaces. Leaked started warehouses recently exhausted a GCP quota and
took CI down for two days; we observed 100+ leaked warehouses and dozens
of leaked test pipelines in a single workspace.

This adds a harness-level safety net: on cloud runs, runTest registers a
t.Cleanup (before starting the script, so it also covers timeouts and
mid-test failures) that scans the test's temp dir for bundle state
directories (<bundle_root>/.databricks/bundle/<target>) and runs
'$CLI bundle destroy --auto-approve --target <target>' in each bundle
root, reusing the exact environment the script ran with.

The mechanism is deliberately best effort and invisible to test output:

- It is a no-op for local testserver runs (gated on CLOUD_ENV).
- It runs after output comparison and logs only via t.Logf, so cleanup
  output is never compared against expected out files.
- Double-destroy is harmless: 'bundle destroy' on an already-destroyed
  bundle exits 0 with 'No active deployment found to destroy!'. In the
  common success path the shared script.cleanup already removed
  .databricks, so nothing is even attempted.
- Destroy failures are logged but never fail the test.

Co-authored-by: Isaac
---
 acceptance/acceptance_test.go | 89 ++++++++++++++++++++++++++++++++++-
 1 file changed, 88 insertions(+), 1 deletion(-)
diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go
index a137d356f1..5913ce5b77 100644
--- a/acceptance/acceptance_test.go
+++ b/acceptance/acceptance_test.go
@@ -794,6 +794,21 @@ func runTest(t *testing.T,
 	}
 	cmd.Dir = tmpDir
 
+	// On cloud runs, guarantee that bundles deployed by the script are destroyed
+	// even if the script fails, times out or exits before reaching its own
+	// "bundle destroy" step. Otherwise real resources (jobs, pipelines, SQL
+	// warehouses, ...) leak in the shared test workspaces; leaked warehouses
+	// have previously exhausted cloud quota and broke CI.
+	// Register before starting the script so it also covers timeouts and
+	// mid-test failures. Cleanup runs after output comparison and only logs
+	// via t.Logf, so it never affects expected output files.
+	if isRunningOnCloud {
+		scriptEnv := slices.Clone(cmd.Env)
+		t.Cleanup(func() {
+			destroyDeployedBundles(t, tmpDir, scriptEnv)
+		})
+	}
+
 	outputPath := filepath.Join(tmpDir, "output.txt")
 	out, err := os.Create(outputPath)
 	require.NoError(t, err)
@@ -864,6 +879,77 @@ func runTest(t *testing.T,
 	}
 }
 
+// destroyDeployedBundles is a best-effort safety net for cloud runs: it finds
+// every bundle state directory created under tmpDir (<bundle_root>/.databricks/bundle/<target>)
+// and runs "bundle destroy" for it.
+//
+// Test scripts remain responsible for destroying the bundles they deploy; this
+// only matters when a script does not get there (failure, timeout, early exit).
+// Notes:
+//   - If the script succeeded with the bundle at the test root, the shared
+//     script.cleanup already removed .databricks, so nothing is found here.
+//   - If the bundle was already destroyed but its state directory remains,
+//     "bundle destroy" exits 0 with "No active deployment found to destroy!".
+//   - Failures are logged but never fail the test.
+func destroyDeployedBundles(t *testing.T, tmpDir string, env []string) {
+	cliPath := os.Getenv("CLI")
+	if cliPath == "" {
+		t.Log("Cleanup: CLI env var is not set, cannot destroy deployed bundles")
+		return
+	}
+
+	for bundleRoot, targets := range findBundleStateDirs(t, tmpDir) {
+		for _, target := range targets {
+			destroyBundle(t, cliPath, bundleRoot, target, env)
+		}
+	}
+}
+
+func destroyBundle(t *testing.T, cliPath, bundleRoot, target string, env []string) {
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
+	defer cancel()
+	cmd := exec.CommandContext(ctx, cliPath, "bundle", "destroy", "--auto-approve", "--target", target)
+	cmd.Dir = bundleRoot
+	cmd.Env = env
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Logf("Cleanup: 'bundle destroy --auto-approve --target %s' in %s failed, resources may have leaked: %s\n%s", target, bundleRoot, err, out)
+	} else {
+		t.Logf("Cleanup: destroyed bundle in %s (target %s)", bundleRoot, target)
+	}
+}
+
+// findBundleStateDirs returns a map from bundle root directory to target names
+// for every .databricks/bundle/<target> directory found under tmpDir.
+func findBundleStateDirs(t *testing.T, tmpDir string) map[string][]string {
+	result := make(map[string][]string)
+	err := filepath.WalkDir(tmpDir, func(path string, d fs.DirEntry, err error) error {
+		if err != nil || !d.IsDir() {
+			return nil
+		}
+		name := d.Name()
+		// Bundles are never deployed from inside virtual environments.
+		if name == ".venv" || name == "site-packages" {
+			return filepath.SkipDir
+		}
+		if name != ".databricks" {
+			return nil
+		}
+		entries, _ := os.ReadDir(filepath.Join(path, "bundle"))
+		for _, e := range entries {
+			if e.IsDir() {
+				bundleRoot := filepath.Dir(path)
+				result[bundleRoot] = append(result[bundleRoot], e.Name())
+			}
+		}
+		return filepath.SkipDir
+	})
+	if err != nil {
+		t.Logf("Cleanup: error scanning %s for deployed bundles: %s", tmpDir, err)
+	}
+	return result
+}
+
 // checkEnvFilters skips the test if any env filter doesn't match testEnv.
 func checkEnvFilters(t *testing.T, testEnv, envFilters []string) {
 	envMap := make(map[string]string, len(testEnv))
@@ -1016,7 +1102,8 @@ func shouldShowDiff(pathNew, valueNew string) bool {
 }
 
 // Returns combined script.prepare (root) + script.prepare (parent) + ... + script + ... + script.cleanup (parent) + ...
-// Note, cleanups are not executed if main script fails; that's not a huge issue, since it runs it temp dir.
+// Note, cleanups are not executed if main script fails; that's not a huge issue for local files, since it runs in a temp dir.
+// For cloud resources deployed via bundles, destroyDeployedBundles acts as a safety net on cloud runs.
 func readMergedScriptContents(t *testing.T, dir string) string {
 	scriptContents := testutil.ReadFile(t, filepath.Join(dir, EntryPointScript))
 

From 2aadb70aee7636c8c5a658dfaec048940f2ee5c2 Mon Sep 17 00:00:00 2001
From: Chris Stephens <chris.stephens@databricks.com>
Date: Fri, 12 Jun 2026 13:07:30 -0700
Subject: [PATCH 2/2] acc: Address lint findings and tighten comments

Use context.WithoutCancel(t.Context()) instead of context.Background()
(t.Context() is already canceled when cleanups run), make the
best-effort nilerr skip explicit, and trim narration comments.

Co-authored-by: Isaac
---
 acceptance/acceptance_test.go | 36 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go
index 5913ce5b77..a96d5b06f8 100644
--- a/acceptance/acceptance_test.go
+++ b/acceptance/acceptance_test.go
@@ -794,14 +794,10 @@ func runTest(t *testing.T,
 	}
 	cmd.Dir = tmpDir
 
-	// On cloud runs, guarantee that bundles deployed by the script are destroyed
-	// even if the script fails, times out or exits before reaching its own
-	// "bundle destroy" step. Otherwise real resources (jobs, pipelines, SQL
-	// warehouses, ...) leak in the shared test workspaces; leaked warehouses
-	// have previously exhausted cloud quota and broke CI.
-	// Register before starting the script so it also covers timeouts and
-	// mid-test failures. Cleanup runs after output comparison and only logs
-	// via t.Logf, so it never affects expected output files.
+	// On cloud runs, destroy any bundles the script deployed but did not get to
+	// destroy itself (failure, timeout, early exit), so resources do not leak
+	// into the shared test workspaces. Registered before the script starts so
+	// it also covers timeouts.
 	if isRunningOnCloud {
 		scriptEnv := slices.Clone(cmd.Env)
 		t.Cleanup(func() {
@@ -881,16 +877,9 @@ func runTest(t *testing.T,
 
 // destroyDeployedBundles is a best-effort safety net for cloud runs: it finds
 // every bundle state directory created under tmpDir (<bundle_root>/.databricks/bundle/<target>)
-// and runs "bundle destroy" for it.
-//
-// Test scripts remain responsible for destroying the bundles they deploy; this
-// only matters when a script does not get there (failure, timeout, early exit).
-// Notes:
-//   - If the script succeeded with the bundle at the test root, the shared
-//     script.cleanup already removed .databricks, so nothing is found here.
-//   - If the bundle was already destroyed but its state directory remains,
-//     "bundle destroy" exits 0 with "No active deployment found to destroy!".
-//   - Failures are logged but never fail the test.
+// and runs "bundle destroy" for it. On the happy path there is nothing to do:
+// the shared script.cleanup removes .databricks, and destroying an
+// already-destroyed bundle exits 0.
 func destroyDeployedBundles(t *testing.T, tmpDir string, env []string) {
 	cliPath := os.Getenv("CLI")
 	if cliPath == "" {
@@ -906,7 +895,8 @@ func destroyDeployedBundles(t *testing.T, tmpDir string, env []string) {
 }
 
 func destroyBundle(t *testing.T, cliPath, bundleRoot, target string, env []string) {
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
+	// t.Context() is already canceled when cleanups run; derive from it without cancellation.
+	ctx, cancel := context.WithTimeout(context.WithoutCancel(t.Context()), 10*time.Minute)
 	defer cancel()
 	cmd := exec.CommandContext(ctx, cliPath, "bundle", "destroy", "--auto-approve", "--target", target)
 	cmd.Dir = bundleRoot
@@ -924,7 +914,10 @@ func destroyBundle(t *testing.T, cliPath, bundleRoot, target string, env []strin
 func findBundleStateDirs(t *testing.T, tmpDir string) map[string][]string {
 	result := make(map[string][]string)
 	err := filepath.WalkDir(tmpDir, func(path string, d fs.DirEntry, err error) error {
-		if err != nil || !d.IsDir() {
+		if err != nil {
+			return nil //nolint:nilerr // best-effort scan, skip unreadable entries
+		}
+		if !d.IsDir() {
 			return nil
 		}
 		name := d.Name()
@@ -1102,8 +1095,7 @@ func shouldShowDiff(pathNew, valueNew string) bool {
 }
 
 // Returns combined script.prepare (root) + script.prepare (parent) + ... + script + ... + script.cleanup (parent) + ...
-// Note, cleanups are not executed if main script fails; that's not a huge issue for local files, since it runs in a temp dir.
-// For cloud resources deployed via bundles, destroyDeployedBundles acts as a safety net on cloud runs.
+// Note, cleanups are not executed if main script fails; that's not a huge issue, since it runs it temp dir.
 func readMergedScriptContents(t *testing.T, dir string) string {
 	scriptContents := testutil.ReadFile(t, filepath.Join(dir, EntryPointScript))