cmd/hi: improve test cleanup to reduce CI disk usage (#2881)

This commit is contained in:
Kristoffer Dalby
2025-11-28 16:59:54 +01:00
committed by GitHub
parent db293e0698
commit ed78bf4b98
7 changed files with 230 additions and 17 deletions

6
cmd/hi/README.md Normal file
View File

@@ -0,0 +1,6 @@
# hi
hi (headscale integration runner) is an entirely "vibe coded" wrapper around our
[integration test suite](../integration). It essentially runs the docker
commands for you with some added benefits of extracting resources like logs and
databases.

View File

@@ -3,6 +3,9 @@ package main
import ( import (
"context" "context"
"fmt" "fmt"
"log"
"os"
"path/filepath"
"strings" "strings"
"time" "time"
@@ -205,3 +208,110 @@ func cleanCacheVolume(ctx context.Context) error {
return nil return nil
} }
// cleanupSuccessfulTestArtifacts removes artifacts from successful test runs to save disk space.
// This function removes large artifacts that are mainly useful for debugging failures:
// - Database dumps (.db files)
// - Profile data (pprof directories)
// - MapResponse data (mapresponses directories)
// - Prometheus metrics files
//
// It preserves:
// - Log files (.log) which are small and useful for verification.
func cleanupSuccessfulTestArtifacts(logsDir string, verbose bool) error {
entries, err := os.ReadDir(logsDir)
if err != nil {
return fmt.Errorf("failed to read logs directory: %w", err)
}
var (
removedFiles, removedDirs int
totalSize int64
)
for _, entry := range entries {
name := entry.Name()
fullPath := filepath.Join(logsDir, name)
if entry.IsDir() {
// Remove pprof and mapresponses directories (typically large)
// These directories contain artifacts from all containers in the test run
if name == "pprof" || name == "mapresponses" {
size, sizeErr := getDirSize(fullPath)
if sizeErr == nil {
totalSize += size
}
err := os.RemoveAll(fullPath)
if err != nil {
if verbose {
log.Printf("Warning: failed to remove directory %s: %v", name, err)
}
} else {
removedDirs++
if verbose {
log.Printf("Removed directory: %s/", name)
}
}
}
} else {
// Only process test-related files (headscale and tailscale)
if !strings.HasPrefix(name, "hs-") && !strings.HasPrefix(name, "ts-") {
continue
}
// Remove database, metrics, and status files, but keep logs
shouldRemove := strings.HasSuffix(name, ".db") ||
strings.HasSuffix(name, "_metrics.txt") ||
strings.HasSuffix(name, "_status.json")
if shouldRemove {
info, infoErr := entry.Info()
if infoErr == nil {
totalSize += info.Size()
}
err := os.Remove(fullPath)
if err != nil {
if verbose {
log.Printf("Warning: failed to remove file %s: %v", name, err)
}
} else {
removedFiles++
if verbose {
log.Printf("Removed file: %s", name)
}
}
}
}
}
if removedFiles > 0 || removedDirs > 0 {
const bytesPerMB = 1024 * 1024
log.Printf("Cleaned up %d files and %d directories (freed ~%.2f MB)",
removedFiles, removedDirs, float64(totalSize)/bytesPerMB)
}
return nil
}
// getDirSize calculates the total size of a directory.
func getDirSize(path string) (int64, error) {
var size int64
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
size += info.Size()
}
return nil
})
return size, err
}

View File

@@ -154,6 +154,19 @@ func runTestContainer(ctx context.Context, config *RunConfig) error {
if cleanErr := cleanupAfterTest(ctx, cli, resp.ID); cleanErr != nil && config.Verbose { if cleanErr := cleanupAfterTest(ctx, cli, resp.ID); cleanErr != nil && config.Verbose {
log.Printf("Warning: post-test cleanup failed: %v", cleanErr) log.Printf("Warning: post-test cleanup failed: %v", cleanErr)
} }
// Clean up artifacts from successful tests to save disk space in CI
if exitCode == 0 {
if config.Verbose {
log.Printf("Test succeeded, cleaning up artifacts to save disk space...")
}
cleanErr := cleanupSuccessfulTestArtifacts(logsDir, config.Verbose)
if cleanErr != nil && config.Verbose {
log.Printf("Warning: artifact cleanup failed: %v", cleanErr)
}
}
} }
if err != nil { if err != nil {

View File

@@ -1,17 +1,25 @@
package dockertestutil package dockertestutil
import ( import (
"context"
"os/exec" "os/exec"
"time"
) )
// RunDockerBuildForDiagnostics runs docker build manually to get detailed error output. // RunDockerBuildForDiagnostics runs docker build manually to get detailed error output.
// This is used when a docker build fails to provide more detailed diagnostic information // This is used when a docker build fails to provide more detailed diagnostic information
// than what dockertest typically provides. // than what dockertest typically provides.
func RunDockerBuildForDiagnostics(contextDir, dockerfile string) string { //
cmd := exec.Command("docker", "build", "-f", dockerfile, contextDir) // Returns the build output regardless of success/failure, and an error if the build failed.
func RunDockerBuildForDiagnostics(contextDir, dockerfile string) (string, error) {
// Use a context with timeout to prevent hanging builds
const buildTimeout = 10 * time.Minute
ctx, cancel := context.WithTimeout(context.Background(), buildTimeout)
defer cancel()
cmd := exec.CommandContext(ctx, "docker", "build", "--progress=plain", "--no-cache", "-f", dockerfile, contextDir)
output, err := cmd.CombinedOutput() output, err := cmd.CombinedOutput()
if err != nil {
return string(output) return string(output), err
}
return ""
} }

View File

@@ -108,6 +108,8 @@ func CleanUnreferencedNetworks(pool *dockertest.Pool) error {
} }
// CleanImagesInCI removes images if running in CI. // CleanImagesInCI removes images if running in CI.
// It only removes dangling (untagged) images to avoid forcing rebuilds.
// Tagged images (golang:*, tailscale/tailscale:*, etc.) are automatically preserved.
func CleanImagesInCI(pool *dockertest.Pool) error { func CleanImagesInCI(pool *dockertest.Pool) error {
if !util.IsCI() { if !util.IsCI() {
log.Println("Skipping image cleanup outside of CI") log.Println("Skipping image cleanup outside of CI")
@@ -119,9 +121,26 @@ func CleanImagesInCI(pool *dockertest.Pool) error {
return fmt.Errorf("getting images: %w", err) return fmt.Errorf("getting images: %w", err)
} }
removedCount := 0
for _, image := range images { for _, image := range images {
log.Printf("removing image: %s, %v", image.ID, image.RepoTags) // Only remove dangling (untagged) images to avoid forcing rebuilds
_ = pool.Client.RemoveImage(image.ID) // Dangling images have no RepoTags or only have "<none>:<none>"
if len(image.RepoTags) == 0 || (len(image.RepoTags) == 1 && image.RepoTags[0] == "<none>:<none>") {
log.Printf("Removing dangling image: %s", image.ID[:12])
err := pool.Client.RemoveImage(image.ID)
if err != nil {
log.Printf("Warning: failed to remove image %s: %v", image.ID[:12], err)
} else {
removedCount++
}
}
}
if removedCount > 0 {
log.Printf("Removed %d dangling images in CI", removedCount)
} else {
log.Println("No dangling images to remove in CI")
} }
return nil return nil

View File

@@ -462,11 +462,33 @@ func New(
if err != nil { if err != nil {
// Try to get more detailed build output // Try to get more detailed build output
log.Printf("Docker build failed, attempting to get detailed output...") log.Printf("Docker build failed, attempting to get detailed output...")
buildOutput := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, IntegrationTestDockerFileName)
if buildOutput != "" { buildOutput, buildErr := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, IntegrationTestDockerFileName)
return nil, fmt.Errorf("could not start headscale container: %w\n\nDetailed build output:\n%s", err, buildOutput)
// Show the last 100 lines of build output to avoid overwhelming the logs
lines := strings.Split(buildOutput, "\n")
const maxLines = 100
startLine := 0
if len(lines) > maxLines {
startLine = len(lines) - maxLines
} }
return nil, fmt.Errorf("could not start headscale container: %w", err)
relevantOutput := strings.Join(lines[startLine:], "\n")
if buildErr != nil {
// The diagnostic build also failed - this is the real error
return nil, fmt.Errorf("could not start headscale container: %w\n\nDocker build failed. Last %d lines of output:\n%s", err, maxLines, relevantOutput)
}
if buildOutput != "" {
// Build succeeded on retry but container creation still failed
return nil, fmt.Errorf("could not start headscale container: %w\n\nDocker build succeeded on retry, but container creation failed. Last %d lines of build output:\n%s", err, maxLines, relevantOutput)
}
// No output at all - diagnostic build command may have failed
return nil, fmt.Errorf("could not start headscale container: %w\n\nUnable to get diagnostic build output (command may have failed silently)", err)
} }
log.Printf("Created %s container\n", hsic.hostname) log.Printf("Created %s container\n", hsic.hostname)

View File

@@ -327,16 +327,52 @@ func New(
if err != nil { if err != nil {
// Try to get more detailed build output // Try to get more detailed build output
log.Printf("Docker build failed for %s, attempting to get detailed output...", hostname) log.Printf("Docker build failed for %s, attempting to get detailed output...", hostname)
buildOutput := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, "Dockerfile.tailscale-HEAD")
if buildOutput != "" { buildOutput, buildErr := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, "Dockerfile.tailscale-HEAD")
// Show the last 100 lines of build output to avoid overwhelming the logs
lines := strings.Split(buildOutput, "\n")
const maxLines = 100
startLine := 0
if len(lines) > maxLines {
startLine = len(lines) - maxLines
}
relevantOutput := strings.Join(lines[startLine:], "\n")
if buildErr != nil {
// The diagnostic build also failed - this is the real error
return nil, fmt.Errorf( return nil, fmt.Errorf(
"%s could not start tailscale container (version: %s): %w\n\nDetailed build output:\n%s", "%s could not start tailscale container (version: %s): %w\n\nDocker build failed. Last %d lines of output:\n%s",
hostname, hostname,
version, version,
err, err,
buildOutput, maxLines,
relevantOutput,
) )
} }
if buildOutput != "" {
// Build succeeded on retry but container creation still failed
return nil, fmt.Errorf(
"%s could not start tailscale container (version: %s): %w\n\nDocker build succeeded on retry, but container creation failed. Last %d lines of build output:\n%s",
hostname,
version,
err,
maxLines,
relevantOutput,
)
}
// No output at all - diagnostic build command may have failed
return nil, fmt.Errorf(
"%s could not start tailscale container (version: %s): %w\n\nUnable to get diagnostic build output (command may have failed silently)",
hostname,
version,
err,
)
} }
case "unstable": case "unstable":
tailscaleOptions.Repository = "tailscale/tailscale" tailscaleOptions.Repository = "tailscale/tailscale"
@@ -580,7 +616,6 @@ func (t *TailscaleInContainer) Restart() error {
} }
return struct{}{}, nil return struct{}{}, nil
}, backoff.WithBackOff(backoff.NewExponentialBackOff()), backoff.WithMaxElapsedTime(30*time.Second)) }, backoff.WithBackOff(backoff.NewExponentialBackOff()), backoff.WithMaxElapsedTime(30*time.Second))
if err != nil { if err != nil {
return fmt.Errorf("timeout waiting for container %s to restart and become ready: %w", t.hostname, err) return fmt.Errorf("timeout waiting for container %s to restart and become ready: %w", t.hostname, err)
} }