Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions acceptance/acceptance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,29 @@ func getSkipReason(config *internal.TestConfig, configPath string) string {
return ""
}

var ciRunID = regexp.MustCompile(`^[0-9]{1,16}$`)

// ciUniqueName embeds a CI run id into the random unique name as "ci<runID>x<random>".
// The result stays purely lowercase-alphanumeric like the base32 name it replaces, so it
// remains valid everywhere $UNIQUE_NAME is used: app names (no hyphens would be fine but
// underscores/uppercase are not), Python and Unity Catalog identifiers (no hyphens). No
// punctuation separator works for all of them, so the run id (all digits) is delimited by
// the letter "x", which also keeps the sweep prefix "ci<runID>x" collision-free between
// runs whose ids share a prefix. Length is preserved ("app-$UNIQUE_NAME" is exactly the
// 30-char app name maximum). Returns random unchanged when runID is absent, malformed, or
// too long to leave at least 8 random characters.
func ciUniqueName(runID, random string) string {
if !ciRunID.MatchString(runID) {
return random
}
prefix := "ci" + runID + "x"
randLen := len(random) - len(prefix)
if randLen < 8 {
return random
}
return prefix + random[:randLen]
}

func runTest(t *testing.T,
dir string,
variant int,
Expand Down Expand Up @@ -643,6 +666,8 @@ func runTest(t *testing.T,

id := uuid.New()
uniqueName := strings.ToLower(strings.Trim(base32.StdEncoding.EncodeToString(id[:]), "="))
// Embed the CI run id, when present, so leaked resources can be attributed to a run and swept by prefix.
uniqueName = ciUniqueName(os.Getenv("GITHUB_RUN_ID"), uniqueName)
repls.Set(uniqueName, "[UNIQUE_NAME]")

var tmpDir string
Expand Down
27 changes: 27 additions & 0 deletions acceptance/unique_name_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package acceptance_test

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestCIUniqueName(t *testing.T) {
// 26 lowercase base32 characters, like the generated unique name.
random := "osr5mzrrvzb73juixjoviti24y"

// Run id embedded, same length as input, lowercase-alphanumeric, sweepable prefix.
assert.Equal(t, "ci15799017600xosr5mzrrvzb7", ciUniqueName("15799017600", random))
assert.Equal(t, "ci1xosr5mzrrvzb73juixjovit", ciUniqueName("1", random))

// No or invalid run id: unchanged.
assert.Equal(t, random, ciUniqueName("", random))
assert.Equal(t, random, ciUniqueName("abc123", random))
assert.Equal(t, random, ciUniqueName("123 456", random))

// 15-digit run id still leaves exactly the 8-char random minimum: prefixed.
assert.Equal(t, "ci123456789012345xosr5mzrr", ciUniqueName("123456789012345", random))

// 16-digit run id is too long to leave enough randomness: unchanged.
assert.Equal(t, random, ciUniqueName("1234567890123456", random))
}
85 changes: 85 additions & 0 deletions tools/sweep_test_resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env python3
"""Sweep leaked acceptance-test resources by name prefix.

Lists (and with --delete, deletes) warehouses, pipelines and jobs whose names
start with the given prefix, e.g. the per-run prefix "ci<GITHUB_RUN_ID>x" that
the acceptance harness embeds into $UNIQUE_NAME on CI cloud runs.

Authentication is taken from the environment (DATABRICKS_HOST, DATABRICKS_TOKEN
or any other auth supported by the databricks CLI).

Usage:
tools/sweep_test_resources.py ci15799017600x # dry run: list only
tools/sweep_test_resources.py ci15799017600x --delete # delete matches
"""

import argparse
import json
import subprocess
import sys


def run_json(*args):
out = subprocess.check_output(["databricks", *args, "--output", "json"], text=True)
return json.loads(out) if out.strip() else []


def sweep(kind, items, name_of, id_of, delete_args, prefix, delete):
failures = 0
for item in items:
name = name_of(item) or ""
if not name.startswith(prefix):
continue
res_id = str(id_of(item))
print(f"{kind}\t{res_id}\t{name}")
if delete:
try:
subprocess.check_call(["databricks", *delete_args, res_id])
except subprocess.CalledProcessError as e:
print(f"failed to delete {kind} {res_id}: {e}", file=sys.stderr)
failures += 1
return failures


def main():
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("prefix", help="resource name prefix, e.g. ci<GITHUB_RUN_ID>x")
parser.add_argument("--delete", action="store_true", help="delete matches (default: list only)")
args = parser.parse_args()

if not args.prefix:
parser.error("prefix must not be empty")

failures = 0
failures += sweep(
"warehouse",
run_json("warehouses", "list"),
lambda w: w.get("name"),
lambda w: w.get("id"),
["warehouses", "delete"],
args.prefix,
args.delete,
)
failures += sweep(
"pipeline",
run_json("pipelines", "list-pipelines"),
lambda p: p.get("name"),
lambda p: p.get("pipeline_id"),
["pipelines", "delete"],
args.prefix,
args.delete,
)
failures += sweep(
"job",
run_json("jobs", "list"),
lambda j: j.get("settings", {}).get("name"),
lambda j: j.get("job_id"),
["jobs", "delete"],
args.prefix,
args.delete,
)
return 1 if failures else 0


if __name__ == "__main__":
sys.exit(main())
Loading