feat: add prune subcommand, drop archived/stale entries (#1441)
Deploy to GitHub Pages / build (push) Failing after 51s
Deploy to GitHub Pages / deploy (push) Has been skipped
Pull Requests / Weekly QA / test (push) Failing after 1m13s
Broken Links Report / check-links (push) Failing after 45s

* feat: add prune subcommand, drop archived/stale entries, add container-explorer

Add a new `awesome-docker prune` subcommand that removes README entries
whose repository health status matches a configurable set (default:
archived,stale). URLs are read from the local health cache, or from a
markdown report file via --from-report when the cache is outdated.

Apply it against the issue #1439 health report to remove 5 entries
that survived the recent reorg: stitchocker, docker-consul,
blockbridge-docker-volume, docker-explorer, dockdash.

Add google/container-explorer in the Security section as the actively
maintained successor to the now-archived google/docker-explorer.

Co-Authored-By: Claude <noreply@anthropic.com>

* golangci-lint config

* fix: address golangci-lint findings

Fixes errcheck on bufio.Writer.WriteString, gocritic rangeValCopy via
indexed loops with pointer locals, gosec G703 on user-supplied CLI
output path, noctx by switching to exec.CommandContext with a timeout
in the TUI url opener, prealloc in the scorer test, plus fieldalignment
struct reorders and golines line breaks from --fix.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Julien Bisconti
2026-05-18 23:46:32 +02:00
committed by GitHub
parent 503e5bd7c7
commit 29222bfcb5
23 changed files with 2548 additions and 2803 deletions
+221
View File
@@ -0,0 +1,221 @@
// Package pruner owns the removal of README entries by health status.
//
// Why it exists: maintenance regularly produces a list of archived/stale
// projects (see scorer + cache). Pruner is the seam that translates that list
// into a concrete edit of README.md and config/health_cache.yaml, so the README
// stays in lockstep with the cache instead of drifting via ad-hoc edits.
package pruner
import (
"bufio"
"fmt"
"io"
"os"
"regexp"
"sort"
"strings"
"github.com/veggiemonk/awesome-docker/internal/cache"
"github.com/veggiemonk/awesome-docker/internal/parser"
)
// Removed describes a single entry removed from the README.
type Removed struct {
URL string
Name string
Status string
Line int
}
// Result summarizes a prune run.
type Result struct {
Removed []Removed
// URLs in the target set that didn't appear in the README (already gone,
// non-GitHub indirection, or URL drift between cache and README).
NotFound []string
}
// TargetURLs returns the URL set selected by the given statuses from the cache.
func TargetURLs(hc *cache.HealthCache, statuses []string) map[string]cache.HealthEntry {
want := make(map[string]bool, len(statuses))
for _, s := range statuses {
want[strings.TrimSpace(strings.ToLower(s))] = true
}
out := make(map[string]cache.HealthEntry)
for i := range hc.Entries {
e := &hc.Entries[i]
if want[strings.ToLower(e.Status)] {
out[normalizeURL(e.URL)] = *e
}
}
return out
}
// PruneREADME removes lines whose entry URL is in targets and writes the
// result back to path. If dryRun is true, the file is not modified.
func PruneREADME(path string, targets map[string]cache.HealthEntry, dryRun bool) (Result, error) {
f, err := os.Open(path) //nolint:gosec
if err != nil {
return Result{}, fmt.Errorf("open %s: %w", path, err)
}
lines, err := readLines(f)
f.Close()
if err != nil {
return Result{}, fmt.Errorf("read %s: %w", path, err)
}
var (
kept = make([]string, 0, len(lines))
removed []Removed
hit = make(map[string]bool, len(targets))
)
for i, line := range lines {
entry, perr := parser.ParseEntry(line, i+1)
if perr != nil {
kept = append(kept, line)
continue
}
key := normalizeURL(entry.URL)
meta, ok := targets[key]
if !ok {
kept = append(kept, line)
continue
}
hit[key] = true
removed = append(removed, Removed{
URL: entry.URL,
Name: entry.Name,
Status: meta.Status,
Line: i + 1,
})
}
res := Result{Removed: removed}
for k := range targets {
if !hit[k] {
res.NotFound = append(res.NotFound, targets[k].URL)
}
}
sort.Strings(res.NotFound)
if dryRun || len(removed) == 0 {
return res, nil
}
if err := writeLines(path, kept); err != nil {
return res, fmt.Errorf("write %s: %w", path, err)
}
return res, nil
}
// PruneCache drops entries whose normalized URL is in targets and writes the
// cache back to path. Safe to call when len(targets) == 0 (no-op).
func PruneCache(path string, hc *cache.HealthCache, targets map[string]cache.HealthEntry, dryRun bool) (int, error) {
if len(targets) == 0 {
return 0, nil
}
kept := hc.Entries[:0]
for i := range hc.Entries {
e := &hc.Entries[i]
if _, drop := targets[normalizeURL(e.URL)]; drop {
continue
}
kept = append(kept, *e)
}
dropped := len(hc.Entries) - len(kept)
hc.Entries = kept
if dryRun || dropped == 0 {
return dropped, nil
}
if err := cache.SaveHealthCache(path, hc); err != nil {
return dropped, err
}
return dropped, nil
}
// reportSectionRe matches markdown health-report section headings:
//
// ## Archived (should mark :skull:)
// ## Stale (2+ years inactive)
// ## Inactive (1-2 years)
var reportSectionRe = regexp.MustCompile(`(?i)^##\s+(archived|stale|inactive|dead|healthy)\b`)
// reportEntryRe matches: "- [name](url) - Stars: N - Last push: YYYY-MM-DD"
var reportEntryRe = regexp.MustCompile(`^-\s+\[([^\]]+)\]\((https?://[^)]+)\)`)
// TargetsFromReport parses a markdown health report (same format as the
// `report` subcommand emits) and returns the URL set whose section heading
// matches one of the given statuses.
func TargetsFromReport(r io.Reader, statuses []string) (map[string]cache.HealthEntry, error) {
want := make(map[string]bool, len(statuses))
for _, s := range statuses {
want[strings.TrimSpace(strings.ToLower(s))] = true
}
out := make(map[string]cache.HealthEntry)
sc := bufio.NewScanner(r)
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
var current string
for sc.Scan() {
line := sc.Text()
if m := reportSectionRe.FindStringSubmatch(line); m != nil {
current = strings.ToLower(m[1])
continue
}
if !want[current] {
continue
}
if m := reportEntryRe.FindStringSubmatch(line); m != nil {
url := strings.TrimSpace(m[2])
out[normalizeURL(url)] = cache.HealthEntry{
URL: url,
Name: m[1],
Status: current,
}
}
}
if err := sc.Err(); err != nil {
return nil, err
}
return out, nil
}
func normalizeURL(u string) string {
u = strings.TrimSpace(u)
u = strings.TrimSuffix(u, "/")
u = strings.ToLower(u)
return u
}
func readLines(r *os.File) ([]string, error) {
var lines []string
sc := bufio.NewScanner(r)
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
for sc.Scan() {
lines = append(lines, sc.Text())
}
return lines, sc.Err()
}
func writeLines(path string, lines []string) error {
out, err := os.Create(path) //nolint:gosec
if err != nil {
return err
}
defer out.Close()
w := bufio.NewWriter(out)
for i, line := range lines {
if _, err := w.WriteString(line); err != nil {
return err
}
if i < len(lines)-1 {
if err := w.WriteByte('\n'); err != nil {
return err
}
}
}
if err := w.WriteByte('\n'); err != nil {
return err
}
return w.Flush()
}
+173
View File
@@ -0,0 +1,173 @@
package pruner
import (
"os"
"path/filepath"
"strings"
"testing"
"github.com/veggiemonk/awesome-docker/internal/cache"
)
func TestTargetURLs(t *testing.T) {
hc := &cache.HealthCache{Entries: []cache.HealthEntry{
{URL: "https://github.com/A/x", Status: "archived"},
{URL: "https://github.com/B/y", Status: "stale"},
{URL: "https://github.com/C/z", Status: "healthy"},
{URL: "https://github.com/D/w", Status: "inactive"},
}}
got := TargetURLs(hc, []string{"archived", "stale"})
if len(got) != 2 {
t.Fatalf("want 2 targets, got %d", len(got))
}
if _, ok := got["https://github.com/a/x"]; !ok {
t.Errorf("expected lowercased URL key for archived entry")
}
}
func TestTargetsFromReport(t *testing.T) {
r := strings.NewReader(`# Health Report
## Summary
- Stale (2+ years): 2
## Archived (should mark :skull:)
- [a/keep](https://github.com/A/Keep) - Stars: 1 - Last push: 2024-01-01
## Stale (2+ years inactive)
- [b/drop](https://github.com/b/drop) - Stars: 2 - Last push: 2020-01-01
## Inactive (1-2 years)
- [c/skip](https://github.com/c/skip) - Stars: 3 - Last push: 2025-01-01
`)
targets, err := TargetsFromReport(r, []string{"archived", "stale"})
if err != nil {
t.Fatal(err)
}
if len(targets) != 2 {
t.Fatalf("want 2, got %d: %v", len(targets), targets)
}
if _, ok := targets["https://github.com/a/keep"]; !ok {
t.Errorf("missing archived entry (case-insensitive)")
}
if _, ok := targets["https://github.com/b/drop"]; !ok {
t.Errorf("missing stale entry")
}
if _, ok := targets["https://github.com/c/skip"]; ok {
t.Errorf("inactive entry should not have been picked up")
}
}
func TestPruneREADME(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "README.md")
content := `# Header
## Tools
- [keep](https://github.com/keep/me) - Healthy project.
- [drop](https://github.com/drop/me) - Stale project.
- [also-keep](https://github.com/also/keep) - Another one.
`
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
t.Fatal(err)
}
targets := map[string]cache.HealthEntry{
"https://github.com/drop/me": {URL: "https://github.com/drop/me", Status: "stale"},
}
res, err := PruneREADME(path, targets, false)
if err != nil {
t.Fatal(err)
}
if len(res.Removed) != 1 {
t.Fatalf("want 1 removed, got %d", len(res.Removed))
}
if res.Removed[0].URL != "https://github.com/drop/me" {
t.Errorf("unexpected removed URL: %s", res.Removed[0].URL)
}
out, err := os.ReadFile(path)
if err != nil {
t.Fatal(err)
}
if strings.Contains(string(out), "drop/me") {
t.Errorf("expected drop/me to be removed from README, got:\n%s", out)
}
if !strings.Contains(string(out), "keep/me") || !strings.Contains(string(out), "also/keep") {
t.Errorf("expected other entries to be preserved, got:\n%s", out)
}
}
func TestPruneREADMEDryRun(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "README.md")
content := "## X\n\n- [drop](https://github.com/drop/me) - Stale.\n"
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
t.Fatal(err)
}
targets := map[string]cache.HealthEntry{
"https://github.com/drop/me": {URL: "https://github.com/drop/me", Status: "stale"},
}
res, err := PruneREADME(path, targets, true)
if err != nil {
t.Fatal(err)
}
if len(res.Removed) != 1 {
t.Fatalf("want 1 removed (preview), got %d", len(res.Removed))
}
got, _ := os.ReadFile(path)
if string(got) != content {
t.Errorf("dry-run modified file: %q", got)
}
}
func TestPruneREADMENotFound(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "README.md")
if err := os.WriteFile(path, []byte("## X\n\n- [k](https://github.com/k/v) - Keep.\n"), 0o644); err != nil {
t.Fatal(err)
}
targets := map[string]cache.HealthEntry{
"https://github.com/gone/missing": {URL: "https://github.com/gone/missing", Status: "stale"},
}
res, err := PruneREADME(path, targets, false)
if err != nil {
t.Fatal(err)
}
if len(res.Removed) != 0 {
t.Errorf("want 0 removed, got %d", len(res.Removed))
}
if len(res.NotFound) != 1 || res.NotFound[0] != "https://github.com/gone/missing" {
t.Errorf("want gone/missing in NotFound, got %v", res.NotFound)
}
}
func TestPruneCache(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "cache.yaml")
hc := &cache.HealthCache{Entries: []cache.HealthEntry{
{URL: "https://github.com/a/keep", Status: "healthy"},
{URL: "https://github.com/b/drop", Status: "stale"},
}}
if err := cache.SaveHealthCache(path, hc); err != nil {
t.Fatal(err)
}
targets := map[string]cache.HealthEntry{
"https://github.com/b/drop": {URL: "https://github.com/b/drop", Status: "stale"},
}
n, err := PruneCache(path, hc, targets, false)
if err != nil {
t.Fatal(err)
}
if n != 1 {
t.Errorf("want 1 dropped, got %d", n)
}
if len(hc.Entries) != 1 || hc.Entries[0].URL != "https://github.com/a/keep" {
t.Errorf("unexpected remaining entries: %v", hc.Entries)
}
}