diff --git a/cmd/hi/README.md b/cmd/hi/README.md new file mode 100644 index 00000000..17324219 --- /dev/null +++ b/cmd/hi/README.md @@ -0,0 +1,6 @@ +# hi + +hi (headscale integration runner) is an entirely "vibe coded" wrapper around our +[integration test suite](../integration). It essentially runs the docker +commands for you with some added benefits of extracting resources like logs and +databases. diff --git a/cmd/hi/cleanup.go b/cmd/hi/cleanup.go index fd78c66f..813f9b12 100644 --- a/cmd/hi/cleanup.go +++ b/cmd/hi/cleanup.go @@ -3,6 +3,9 @@ package main import ( "context" "fmt" + "log" + "os" + "path/filepath" "strings" "time" @@ -205,3 +208,110 @@ func cleanCacheVolume(ctx context.Context) error { return nil } + +// cleanupSuccessfulTestArtifacts removes artifacts from successful test runs to save disk space. +// This function removes large artifacts that are mainly useful for debugging failures: +// - Database dumps (.db files) +// - Profile data (pprof directories) +// - MapResponse data (mapresponses directories) +// - Prometheus metrics files +// +// It preserves: +// - Log files (.log) which are small and useful for verification. +func cleanupSuccessfulTestArtifacts(logsDir string, verbose bool) error { + entries, err := os.ReadDir(logsDir) + if err != nil { + return fmt.Errorf("failed to read logs directory: %w", err) + } + + var ( + removedFiles, removedDirs int + totalSize int64 + ) + + for _, entry := range entries { + name := entry.Name() + fullPath := filepath.Join(logsDir, name) + + if entry.IsDir() { + // Remove pprof and mapresponses directories (typically large) + // These directories contain artifacts from all containers in the test run + if name == "pprof" || name == "mapresponses" { + size, sizeErr := getDirSize(fullPath) + if sizeErr == nil { + totalSize += size + } + + err := os.RemoveAll(fullPath) + if err != nil { + if verbose { + log.Printf("Warning: failed to remove directory %s: %v", name, err) + } + } else { + removedDirs++ + + if verbose { + log.Printf("Removed directory: %s/", name) + } + } + } + } else { + // Only process test-related files (headscale and tailscale) + if !strings.HasPrefix(name, "hs-") && !strings.HasPrefix(name, "ts-") { + continue + } + + // Remove database, metrics, and status files, but keep logs + shouldRemove := strings.HasSuffix(name, ".db") || + strings.HasSuffix(name, "_metrics.txt") || + strings.HasSuffix(name, "_status.json") + + if shouldRemove { + info, infoErr := entry.Info() + if infoErr == nil { + totalSize += info.Size() + } + + err := os.Remove(fullPath) + if err != nil { + if verbose { + log.Printf("Warning: failed to remove file %s: %v", name, err) + } + } else { + removedFiles++ + + if verbose { + log.Printf("Removed file: %s", name) + } + } + } + } + } + + if removedFiles > 0 || removedDirs > 0 { + const bytesPerMB = 1024 * 1024 + log.Printf("Cleaned up %d files and %d directories (freed ~%.2f MB)", + removedFiles, removedDirs, float64(totalSize)/bytesPerMB) + } + + return nil +} + +// getDirSize calculates the total size of a directory. +func getDirSize(path string) (int64, error) { + var size int64 + + err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if !info.IsDir() { + size += info.Size() + } + + return nil + }) + + return size, err +} diff --git a/cmd/hi/docker.go b/cmd/hi/docker.go index 1143bf77..3895fe2a 100644 --- a/cmd/hi/docker.go +++ b/cmd/hi/docker.go @@ -154,6 +154,19 @@ func runTestContainer(ctx context.Context, config *RunConfig) error { if cleanErr := cleanupAfterTest(ctx, cli, resp.ID); cleanErr != nil && config.Verbose { log.Printf("Warning: post-test cleanup failed: %v", cleanErr) } + + // Clean up artifacts from successful tests to save disk space in CI + if exitCode == 0 { + if config.Verbose { + log.Printf("Test succeeded, cleaning up artifacts to save disk space...") + } + + cleanErr := cleanupSuccessfulTestArtifacts(logsDir, config.Verbose) + + if cleanErr != nil && config.Verbose { + log.Printf("Warning: artifact cleanup failed: %v", cleanErr) + } + } } if err != nil { diff --git a/integration/dockertestutil/build.go b/integration/dockertestutil/build.go index 635f91ef..dd082d22 100644 --- a/integration/dockertestutil/build.go +++ b/integration/dockertestutil/build.go @@ -1,17 +1,25 @@ package dockertestutil import ( + "context" "os/exec" + "time" ) // RunDockerBuildForDiagnostics runs docker build manually to get detailed error output. // This is used when a docker build fails to provide more detailed diagnostic information // than what dockertest typically provides. -func RunDockerBuildForDiagnostics(contextDir, dockerfile string) string { - cmd := exec.Command("docker", "build", "-f", dockerfile, contextDir) +// +// Returns the build output regardless of success/failure, and an error if the build failed. +func RunDockerBuildForDiagnostics(contextDir, dockerfile string) (string, error) { + // Use a context with timeout to prevent hanging builds + const buildTimeout = 10 * time.Minute + + ctx, cancel := context.WithTimeout(context.Background(), buildTimeout) + defer cancel() + + cmd := exec.CommandContext(ctx, "docker", "build", "--progress=plain", "--no-cache", "-f", dockerfile, contextDir) output, err := cmd.CombinedOutput() - if err != nil { - return string(output) - } - return "" + + return string(output), err } diff --git a/integration/dockertestutil/network.go b/integration/dockertestutil/network.go index 0ec6a69b..42483247 100644 --- a/integration/dockertestutil/network.go +++ b/integration/dockertestutil/network.go @@ -108,6 +108,8 @@ func CleanUnreferencedNetworks(pool *dockertest.Pool) error { } // CleanImagesInCI removes images if running in CI. +// It only removes dangling (untagged) images to avoid forcing rebuilds. +// Tagged images (golang:*, tailscale/tailscale:*, etc.) are automatically preserved. func CleanImagesInCI(pool *dockertest.Pool) error { if !util.IsCI() { log.Println("Skipping image cleanup outside of CI") @@ -119,9 +121,26 @@ func CleanImagesInCI(pool *dockertest.Pool) error { return fmt.Errorf("getting images: %w", err) } + removedCount := 0 for _, image := range images { - log.Printf("removing image: %s, %v", image.ID, image.RepoTags) - _ = pool.Client.RemoveImage(image.ID) + // Only remove dangling (untagged) images to avoid forcing rebuilds + // Dangling images have no RepoTags or only have ":" + if len(image.RepoTags) == 0 || (len(image.RepoTags) == 1 && image.RepoTags[0] == ":") { + log.Printf("Removing dangling image: %s", image.ID[:12]) + + err := pool.Client.RemoveImage(image.ID) + if err != nil { + log.Printf("Warning: failed to remove image %s: %v", image.ID[:12], err) + } else { + removedCount++ + } + } + } + + if removedCount > 0 { + log.Printf("Removed %d dangling images in CI", removedCount) + } else { + log.Println("No dangling images to remove in CI") } return nil diff --git a/integration/hsic/hsic.go b/integration/hsic/hsic.go index 775e7937..81c33120 100644 --- a/integration/hsic/hsic.go +++ b/integration/hsic/hsic.go @@ -462,11 +462,33 @@ func New( if err != nil { // Try to get more detailed build output log.Printf("Docker build failed, attempting to get detailed output...") - buildOutput := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, IntegrationTestDockerFileName) - if buildOutput != "" { - return nil, fmt.Errorf("could not start headscale container: %w\n\nDetailed build output:\n%s", err, buildOutput) + + buildOutput, buildErr := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, IntegrationTestDockerFileName) + + // Show the last 100 lines of build output to avoid overwhelming the logs + lines := strings.Split(buildOutput, "\n") + + const maxLines = 100 + + startLine := 0 + if len(lines) > maxLines { + startLine = len(lines) - maxLines } - return nil, fmt.Errorf("could not start headscale container: %w", err) + + relevantOutput := strings.Join(lines[startLine:], "\n") + + if buildErr != nil { + // The diagnostic build also failed - this is the real error + return nil, fmt.Errorf("could not start headscale container: %w\n\nDocker build failed. Last %d lines of output:\n%s", err, maxLines, relevantOutput) + } + + if buildOutput != "" { + // Build succeeded on retry but container creation still failed + return nil, fmt.Errorf("could not start headscale container: %w\n\nDocker build succeeded on retry, but container creation failed. Last %d lines of build output:\n%s", err, maxLines, relevantOutput) + } + + // No output at all - diagnostic build command may have failed + return nil, fmt.Errorf("could not start headscale container: %w\n\nUnable to get diagnostic build output (command may have failed silently)", err) } log.Printf("Created %s container\n", hsic.hostname) diff --git a/integration/tsic/tsic.go b/integration/tsic/tsic.go index 462c3ea3..f60748a8 100644 --- a/integration/tsic/tsic.go +++ b/integration/tsic/tsic.go @@ -327,16 +327,52 @@ func New( if err != nil { // Try to get more detailed build output log.Printf("Docker build failed for %s, attempting to get detailed output...", hostname) - buildOutput := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, "Dockerfile.tailscale-HEAD") - if buildOutput != "" { + + buildOutput, buildErr := dockertestutil.RunDockerBuildForDiagnostics(dockerContextPath, "Dockerfile.tailscale-HEAD") + + // Show the last 100 lines of build output to avoid overwhelming the logs + lines := strings.Split(buildOutput, "\n") + + const maxLines = 100 + + startLine := 0 + if len(lines) > maxLines { + startLine = len(lines) - maxLines + } + + relevantOutput := strings.Join(lines[startLine:], "\n") + + if buildErr != nil { + // The diagnostic build also failed - this is the real error return nil, fmt.Errorf( - "%s could not start tailscale container (version: %s): %w\n\nDetailed build output:\n%s", + "%s could not start tailscale container (version: %s): %w\n\nDocker build failed. Last %d lines of output:\n%s", hostname, version, err, - buildOutput, + maxLines, + relevantOutput, ) } + + if buildOutput != "" { + // Build succeeded on retry but container creation still failed + return nil, fmt.Errorf( + "%s could not start tailscale container (version: %s): %w\n\nDocker build succeeded on retry, but container creation failed. Last %d lines of build output:\n%s", + hostname, + version, + err, + maxLines, + relevantOutput, + ) + } + + // No output at all - diagnostic build command may have failed + return nil, fmt.Errorf( + "%s could not start tailscale container (version: %s): %w\n\nUnable to get diagnostic build output (command may have failed silently)", + hostname, + version, + err, + ) } case "unstable": tailscaleOptions.Repository = "tailscale/tailscale" @@ -580,7 +616,6 @@ func (t *TailscaleInContainer) Restart() error { } return struct{}{}, nil }, backoff.WithBackOff(backoff.NewExponentialBackOff()), backoff.WithMaxElapsedTime(30*time.Second)) - if err != nil { return fmt.Errorf("timeout waiting for container %s to restart and become ready: %w", t.hostname, err) }