mirror of
https://github.com/juanfont/headscale.git
synced 2025-11-28 21:18:02 -05:00
cmd/hi: improve test cleanup to reduce CI disk usage (#2881)
This commit is contained in:
6
cmd/hi/README.md
Normal file
6
cmd/hi/README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
# hi
|
||||
|
||||
hi (headscale integration runner) is an entirely "vibe coded" wrapper around our
|
||||
[integration test suite](../integration). It essentially runs the docker
|
||||
commands for you with some added benefits of extracting resources like logs and
|
||||
databases.
|
||||
@@ -3,6 +3,9 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -205,3 +208,110 @@ func cleanCacheVolume(ctx context.Context) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupSuccessfulTestArtifacts removes artifacts from successful test runs to save disk space.
|
||||
// This function removes large artifacts that are mainly useful for debugging failures:
|
||||
// - Database dumps (.db files)
|
||||
// - Profile data (pprof directories)
|
||||
// - MapResponse data (mapresponses directories)
|
||||
// - Prometheus metrics files
|
||||
//
|
||||
// It preserves:
|
||||
// - Log files (.log) which are small and useful for verification.
|
||||
func cleanupSuccessfulTestArtifacts(logsDir string, verbose bool) error {
|
||||
entries, err := os.ReadDir(logsDir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read logs directory: %w", err)
|
||||
}
|
||||
|
||||
var (
|
||||
removedFiles, removedDirs int
|
||||
totalSize int64
|
||||
)
|
||||
|
||||
for _, entry := range entries {
|
||||
name := entry.Name()
|
||||
fullPath := filepath.Join(logsDir, name)
|
||||
|
||||
if entry.IsDir() {
|
||||
// Remove pprof and mapresponses directories (typically large)
|
||||
// These directories contain artifacts from all containers in the test run
|
||||
if name == "pprof" || name == "mapresponses" {
|
||||
size, sizeErr := getDirSize(fullPath)
|
||||
if sizeErr == nil {
|
||||
totalSize += size
|
||||
}
|
||||
|
||||
err := os.RemoveAll(fullPath)
|
||||
if err != nil {
|
||||
if verbose {
|
||||
log.Printf("Warning: failed to remove directory %s: %v", name, err)
|
||||
}
|
||||
} else {
|
||||
removedDirs++
|
||||
|
||||
if verbose {
|
||||
log.Printf("Removed directory: %s/", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Only process test-related files (headscale and tailscale)
|
||||
if !strings.HasPrefix(name, "hs-") && !strings.HasPrefix(name, "ts-") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Remove database, metrics, and status files, but keep logs
|
||||
shouldRemove := strings.HasSuffix(name, ".db") ||
|
||||
strings.HasSuffix(name, "_metrics.txt") ||
|
||||
strings.HasSuffix(name, "_status.json")
|
||||
|
||||
if shouldRemove {
|
||||
info, infoErr := entry.Info()
|
||||
if infoErr == nil {
|
||||
totalSize += info.Size()
|
||||
}
|
||||
|
||||
err := os.Remove(fullPath)
|
||||
if err != nil {
|
||||
if verbose {
|
||||
log.Printf("Warning: failed to remove file %s: %v", name, err)
|
||||
}
|
||||
} else {
|
||||
removedFiles++
|
||||
|
||||
if verbose {
|
||||
log.Printf("Removed file: %s", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if removedFiles > 0 || removedDirs > 0 {
|
||||
const bytesPerMB = 1024 * 1024
|
||||
log.Printf("Cleaned up %d files and %d directories (freed ~%.2f MB)",
|
||||
removedFiles, removedDirs, float64(totalSize)/bytesPerMB)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// getDirSize calculates the total size of a directory.
|
||||
func getDirSize(path string) (int64, error) {
|
||||
var size int64
|
||||
|
||||
err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !info.IsDir() {
|
||||
size += info.Size()
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return size, err
|
||||
}
|
||||
|
||||
@@ -154,6 +154,19 @@ func runTestContainer(ctx context.Context, config *RunConfig) error {
|
||||
if cleanErr := cleanupAfterTest(ctx, cli, resp.ID); cleanErr != nil && config.Verbose {
|
||||
log.Printf("Warning: post-test cleanup failed: %v", cleanErr)
|
||||
}
|
||||
|
||||
// Clean up artifacts from successful tests to save disk space in CI
|
||||
if exitCode == 0 {
|
||||
if config.Verbose {
|
||||
log.Printf("Test succeeded, cleaning up artifacts to save disk space...")
|
||||
}
|
||||
|
||||
cleanErr := cleanupSuccessfulTestArtifacts(logsDir, config.Verbose)
|
||||
|
||||
if cleanErr != nil && config.Verbose {
|
||||
log.Printf("Warning: artifact cleanup failed: %v", cleanErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
|
||||
@@ -1,17 +1,25 @@
|
||||
package dockertestutil
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os/exec"
|
||||
"time"
|
||||
)
|
||||
|
||||
// RunDockerBuildForDiagnostics runs docker build manually to get detailed error output.
|
||||
// This is used when a docker build fails to provide more detailed diagnostic information
|
||||
// than what dockertest typically provides.
|
||||
func RunDockerBuildForDiagnostics(contextDir, dockerfile string) string {
|
||||
cmd := exec.Command("docker", "build", "-f", dockerfile, contextDir)
|
||||
//
|
||||
// Returns the build output regardless of success/failure, and an error if the build failed.
|
||||
func RunDockerBuildForDiagnostics(contextDir, dockerfile string) (string, error) {
|
||||
// Use a context with timeout to prevent hanging builds
|
||||
const buildTimeout = 10 * time.Minute
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), buildTimeout)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, "docker", "build", "--progress=plain", "--no-cache", "-f", dockerfile, contextDir)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return string(output)
|
||||
}
|
||||
return ""
|
||||
|
||||
return string(output), err
|
||||
}
|
||||
|
||||
@@ -108,6 +108,8 @@ func CleanUnreferencedNetworks(pool *dockertest.Pool) error {
|
||||
}
|
||||
|
||||
// CleanImagesInCI removes images if running in CI.
|
||||
// It only removes dangling (untagged) images to avoid forcing rebuilds.
|
||||
// Tagged images (golang:*, tailscale/tailscale:*, etc.) are automatically preserved.
|
||||
func CleanImagesInCI(pool *dockertest.Pool) error {
|
||||
if !util.IsCI() {
|
||||
log.Println("Skipping image cleanup outside of CI")
|
||||
@@ -119,9 +121,26 @@ func CleanImagesInCI(pool *dockertest.Pool) error {
|
||||
return fmt.Errorf("getting images: %w", err)
|
||||
}
|
||||
|
||||
removedCount := 0
|
||||
for _, image := range images {
|
||||
log.Printf("removing image: %s, %v", image.ID, image.RepoTags)
|
||||
_ = pool.Client.RemoveImage(image.ID)
|
||||
// Only remove dangling (untagged) images to avoid forcing rebuilds
|
||||
// Dangling images have no RepoTags or only have "<none>:<none>"
|
||||
if len(image.RepoTags) == 0 || (len(image.RepoTags) == 1 && image.RepoTags[0] == "<none>:<none>") {
|
||||
log.Printf("Removing dangling image: %s", image.ID[:12])
|
||||
|
||||
err := pool.Client.RemoveImage(image.ID)
|
||||
if err != nil {
|
||||
log.Printf("Warning: failed to remove image %s: %v", image.ID[:12], err)
|
||||
} else {
|
||||
removedCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if removedCount > 0 {
|
||||
log.Printf("Removed %d dangling images in CI", removedCount)
|
||||
} else {
|
||||
log.Println("No dangling images to remove in CI")
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -462,11 +462,33 @@ func New(
|
||||
if err != nil {
|
||||
// Try to get more detailed build output
|
||||
log.Printf("Docker build failed, attempting to get detailed output...")
|
||||
buildOutput := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, IntegrationTestDockerFileName)
|
||||
if buildOutput != "" {
|
||||
return nil, fmt.Errorf("could not start headscale container: %w\n\nDetailed build output:\n%s", err, buildOutput)
|
||||
|
||||
buildOutput, buildErr := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, IntegrationTestDockerFileName)
|
||||
|
||||
// Show the last 100 lines of build output to avoid overwhelming the logs
|
||||
lines := strings.Split(buildOutput, "\n")
|
||||
|
||||
const maxLines = 100
|
||||
|
||||
startLine := 0
|
||||
if len(lines) > maxLines {
|
||||
startLine = len(lines) - maxLines
|
||||
}
|
||||
return nil, fmt.Errorf("could not start headscale container: %w", err)
|
||||
|
||||
relevantOutput := strings.Join(lines[startLine:], "\n")
|
||||
|
||||
if buildErr != nil {
|
||||
// The diagnostic build also failed - this is the real error
|
||||
return nil, fmt.Errorf("could not start headscale container: %w\n\nDocker build failed. Last %d lines of output:\n%s", err, maxLines, relevantOutput)
|
||||
}
|
||||
|
||||
if buildOutput != "" {
|
||||
// Build succeeded on retry but container creation still failed
|
||||
return nil, fmt.Errorf("could not start headscale container: %w\n\nDocker build succeeded on retry, but container creation failed. Last %d lines of build output:\n%s", err, maxLines, relevantOutput)
|
||||
}
|
||||
|
||||
// No output at all - diagnostic build command may have failed
|
||||
return nil, fmt.Errorf("could not start headscale container: %w\n\nUnable to get diagnostic build output (command may have failed silently)", err)
|
||||
}
|
||||
log.Printf("Created %s container\n", hsic.hostname)
|
||||
|
||||
|
||||
@@ -327,16 +327,52 @@ func New(
|
||||
if err != nil {
|
||||
// Try to get more detailed build output
|
||||
log.Printf("Docker build failed for %s, attempting to get detailed output...", hostname)
|
||||
buildOutput := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, "Dockerfile.tailscale-HEAD")
|
||||
if buildOutput != "" {
|
||||
|
||||
buildOutput, buildErr := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, "Dockerfile.tailscale-HEAD")
|
||||
|
||||
// Show the last 100 lines of build output to avoid overwhelming the logs
|
||||
lines := strings.Split(buildOutput, "\n")
|
||||
|
||||
const maxLines = 100
|
||||
|
||||
startLine := 0
|
||||
if len(lines) > maxLines {
|
||||
startLine = len(lines) - maxLines
|
||||
}
|
||||
|
||||
relevantOutput := strings.Join(lines[startLine:], "\n")
|
||||
|
||||
if buildErr != nil {
|
||||
// The diagnostic build also failed - this is the real error
|
||||
return nil, fmt.Errorf(
|
||||
"%s could not start tailscale container (version: %s): %w\n\nDetailed build output:\n%s",
|
||||
"%s could not start tailscale container (version: %s): %w\n\nDocker build failed. Last %d lines of output:\n%s",
|
||||
hostname,
|
||||
version,
|
||||
err,
|
||||
buildOutput,
|
||||
maxLines,
|
||||
relevantOutput,
|
||||
)
|
||||
}
|
||||
|
||||
if buildOutput != "" {
|
||||
// Build succeeded on retry but container creation still failed
|
||||
return nil, fmt.Errorf(
|
||||
"%s could not start tailscale container (version: %s): %w\n\nDocker build succeeded on retry, but container creation failed. Last %d lines of build output:\n%s",
|
||||
hostname,
|
||||
version,
|
||||
err,
|
||||
maxLines,
|
||||
relevantOutput,
|
||||
)
|
||||
}
|
||||
|
||||
// No output at all - diagnostic build command may have failed
|
||||
return nil, fmt.Errorf(
|
||||
"%s could not start tailscale container (version: %s): %w\n\nUnable to get diagnostic build output (command may have failed silently)",
|
||||
hostname,
|
||||
version,
|
||||
err,
|
||||
)
|
||||
}
|
||||
case "unstable":
|
||||
tailscaleOptions.Repository = "tailscale/tailscale"
|
||||
@@ -580,7 +616,6 @@ func (t *TailscaleInContainer) Restart() error {
|
||||
}
|
||||
return struct{}{}, nil
|
||||
}, backoff.WithBackOff(backoff.NewExponentialBackOff()), backoff.WithMaxElapsedTime(30*time.Second))
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("timeout waiting for container %s to restart and become ready: %w", t.hostname, err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user