mirror of
https://github.com/veggiemonk/awesome-docker.git
synced 2026-07-01 02:50:32 +02:00
feat: add prune subcommand, drop archived/stale entries (#1441)
* feat: add prune subcommand, drop archived/stale entries, add container-explorer Add a new `awesome-docker prune` subcommand that removes README entries whose repository health status matches a configurable set (default: archived,stale). URLs are read from the local health cache, or from a markdown report file via --from-report when the cache is outdated. Apply it against the issue #1439 health report to remove 5 entries that survived the recent reorg: stitchocker, docker-consul, blockbridge-docker-volume, docker-explorer, dockdash. Add google/container-explorer in the Security section as the actively maintained successor to the now-archived google/docker-explorer. Co-Authored-By: Claude <noreply@anthropic.com> * golangci-lint config * fix: address golangci-lint findings Fixes errcheck on bufio.Writer.WriteString, gocritic rangeValCopy via indexed loops with pointer locals, gosec G703 on user-supplied CLI output path, noctx by switching to exec.CommandContext with a timeout in the TUI url opener, prealloc in the scorer test, plus fieldalignment struct reorders and golines line breaks from --fix. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,221 @@
|
||||
// Package pruner owns the removal of README entries by health status.
|
||||
//
|
||||
// Why it exists: maintenance regularly produces a list of archived/stale
|
||||
// projects (see scorer + cache). Pruner is the seam that translates that list
|
||||
// into a concrete edit of README.md and config/health_cache.yaml, so the README
|
||||
// stays in lockstep with the cache instead of drifting via ad-hoc edits.
|
||||
package pruner
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/veggiemonk/awesome-docker/internal/cache"
|
||||
"github.com/veggiemonk/awesome-docker/internal/parser"
|
||||
)
|
||||
|
||||
// Removed describes a single entry removed from the README.
|
||||
type Removed struct {
|
||||
URL string
|
||||
Name string
|
||||
Status string
|
||||
Line int
|
||||
}
|
||||
|
||||
// Result summarizes a prune run.
|
||||
type Result struct {
|
||||
Removed []Removed
|
||||
// URLs in the target set that didn't appear in the README (already gone,
|
||||
// non-GitHub indirection, or URL drift between cache and README).
|
||||
NotFound []string
|
||||
}
|
||||
|
||||
// TargetURLs returns the URL set selected by the given statuses from the cache.
|
||||
func TargetURLs(hc *cache.HealthCache, statuses []string) map[string]cache.HealthEntry {
|
||||
want := make(map[string]bool, len(statuses))
|
||||
for _, s := range statuses {
|
||||
want[strings.TrimSpace(strings.ToLower(s))] = true
|
||||
}
|
||||
out := make(map[string]cache.HealthEntry)
|
||||
for i := range hc.Entries {
|
||||
e := &hc.Entries[i]
|
||||
if want[strings.ToLower(e.Status)] {
|
||||
out[normalizeURL(e.URL)] = *e
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// PruneREADME removes lines whose entry URL is in targets and writes the
|
||||
// result back to path. If dryRun is true, the file is not modified.
|
||||
func PruneREADME(path string, targets map[string]cache.HealthEntry, dryRun bool) (Result, error) {
|
||||
f, err := os.Open(path) //nolint:gosec
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("open %s: %w", path, err)
|
||||
}
|
||||
lines, err := readLines(f)
|
||||
f.Close()
|
||||
if err != nil {
|
||||
return Result{}, fmt.Errorf("read %s: %w", path, err)
|
||||
}
|
||||
|
||||
var (
|
||||
kept = make([]string, 0, len(lines))
|
||||
removed []Removed
|
||||
hit = make(map[string]bool, len(targets))
|
||||
)
|
||||
|
||||
for i, line := range lines {
|
||||
entry, perr := parser.ParseEntry(line, i+1)
|
||||
if perr != nil {
|
||||
kept = append(kept, line)
|
||||
continue
|
||||
}
|
||||
key := normalizeURL(entry.URL)
|
||||
meta, ok := targets[key]
|
||||
if !ok {
|
||||
kept = append(kept, line)
|
||||
continue
|
||||
}
|
||||
hit[key] = true
|
||||
removed = append(removed, Removed{
|
||||
URL: entry.URL,
|
||||
Name: entry.Name,
|
||||
Status: meta.Status,
|
||||
Line: i + 1,
|
||||
})
|
||||
}
|
||||
|
||||
res := Result{Removed: removed}
|
||||
for k := range targets {
|
||||
if !hit[k] {
|
||||
res.NotFound = append(res.NotFound, targets[k].URL)
|
||||
}
|
||||
}
|
||||
sort.Strings(res.NotFound)
|
||||
|
||||
if dryRun || len(removed) == 0 {
|
||||
return res, nil
|
||||
}
|
||||
|
||||
if err := writeLines(path, kept); err != nil {
|
||||
return res, fmt.Errorf("write %s: %w", path, err)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// PruneCache drops entries whose normalized URL is in targets and writes the
|
||||
// cache back to path. Safe to call when len(targets) == 0 (no-op).
|
||||
func PruneCache(path string, hc *cache.HealthCache, targets map[string]cache.HealthEntry, dryRun bool) (int, error) {
|
||||
if len(targets) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
kept := hc.Entries[:0]
|
||||
for i := range hc.Entries {
|
||||
e := &hc.Entries[i]
|
||||
if _, drop := targets[normalizeURL(e.URL)]; drop {
|
||||
continue
|
||||
}
|
||||
kept = append(kept, *e)
|
||||
}
|
||||
dropped := len(hc.Entries) - len(kept)
|
||||
hc.Entries = kept
|
||||
if dryRun || dropped == 0 {
|
||||
return dropped, nil
|
||||
}
|
||||
if err := cache.SaveHealthCache(path, hc); err != nil {
|
||||
return dropped, err
|
||||
}
|
||||
return dropped, nil
|
||||
}
|
||||
|
||||
// reportSectionRe matches markdown health-report section headings:
|
||||
//
|
||||
// ## Archived (should mark :skull:)
|
||||
// ## Stale (2+ years inactive)
|
||||
// ## Inactive (1-2 years)
|
||||
var reportSectionRe = regexp.MustCompile(`(?i)^##\s+(archived|stale|inactive|dead|healthy)\b`)
|
||||
|
||||
// reportEntryRe matches: "- [name](url) - Stars: N - Last push: YYYY-MM-DD"
|
||||
var reportEntryRe = regexp.MustCompile(`^-\s+\[([^\]]+)\]\((https?://[^)]+)\)`)
|
||||
|
||||
// TargetsFromReport parses a markdown health report (same format as the
|
||||
// `report` subcommand emits) and returns the URL set whose section heading
|
||||
// matches one of the given statuses.
|
||||
func TargetsFromReport(r io.Reader, statuses []string) (map[string]cache.HealthEntry, error) {
|
||||
want := make(map[string]bool, len(statuses))
|
||||
for _, s := range statuses {
|
||||
want[strings.TrimSpace(strings.ToLower(s))] = true
|
||||
}
|
||||
out := make(map[string]cache.HealthEntry)
|
||||
sc := bufio.NewScanner(r)
|
||||
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||
var current string
|
||||
for sc.Scan() {
|
||||
line := sc.Text()
|
||||
if m := reportSectionRe.FindStringSubmatch(line); m != nil {
|
||||
current = strings.ToLower(m[1])
|
||||
continue
|
||||
}
|
||||
if !want[current] {
|
||||
continue
|
||||
}
|
||||
if m := reportEntryRe.FindStringSubmatch(line); m != nil {
|
||||
url := strings.TrimSpace(m[2])
|
||||
out[normalizeURL(url)] = cache.HealthEntry{
|
||||
URL: url,
|
||||
Name: m[1],
|
||||
Status: current,
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func normalizeURL(u string) string {
|
||||
u = strings.TrimSpace(u)
|
||||
u = strings.TrimSuffix(u, "/")
|
||||
u = strings.ToLower(u)
|
||||
return u
|
||||
}
|
||||
|
||||
func readLines(r *os.File) ([]string, error) {
|
||||
var lines []string
|
||||
sc := bufio.NewScanner(r)
|
||||
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
||||
for sc.Scan() {
|
||||
lines = append(lines, sc.Text())
|
||||
}
|
||||
return lines, sc.Err()
|
||||
}
|
||||
|
||||
func writeLines(path string, lines []string) error {
|
||||
out, err := os.Create(path) //nolint:gosec
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer out.Close()
|
||||
w := bufio.NewWriter(out)
|
||||
for i, line := range lines {
|
||||
if _, err := w.WriteString(line); err != nil {
|
||||
return err
|
||||
}
|
||||
if i < len(lines)-1 {
|
||||
if err := w.WriteByte('\n'); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := w.WriteByte('\n'); err != nil {
|
||||
return err
|
||||
}
|
||||
return w.Flush()
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
package pruner
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/veggiemonk/awesome-docker/internal/cache"
|
||||
)
|
||||
|
||||
func TestTargetURLs(t *testing.T) {
|
||||
hc := &cache.HealthCache{Entries: []cache.HealthEntry{
|
||||
{URL: "https://github.com/A/x", Status: "archived"},
|
||||
{URL: "https://github.com/B/y", Status: "stale"},
|
||||
{URL: "https://github.com/C/z", Status: "healthy"},
|
||||
{URL: "https://github.com/D/w", Status: "inactive"},
|
||||
}}
|
||||
got := TargetURLs(hc, []string{"archived", "stale"})
|
||||
if len(got) != 2 {
|
||||
t.Fatalf("want 2 targets, got %d", len(got))
|
||||
}
|
||||
if _, ok := got["https://github.com/a/x"]; !ok {
|
||||
t.Errorf("expected lowercased URL key for archived entry")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTargetsFromReport(t *testing.T) {
|
||||
r := strings.NewReader(`# Health Report
|
||||
|
||||
## Summary
|
||||
|
||||
- Stale (2+ years): 2
|
||||
|
||||
## Archived (should mark :skull:)
|
||||
|
||||
- [a/keep](https://github.com/A/Keep) - Stars: 1 - Last push: 2024-01-01
|
||||
|
||||
## Stale (2+ years inactive)
|
||||
|
||||
- [b/drop](https://github.com/b/drop) - Stars: 2 - Last push: 2020-01-01
|
||||
|
||||
## Inactive (1-2 years)
|
||||
|
||||
- [c/skip](https://github.com/c/skip) - Stars: 3 - Last push: 2025-01-01
|
||||
`)
|
||||
targets, err := TargetsFromReport(r, []string{"archived", "stale"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(targets) != 2 {
|
||||
t.Fatalf("want 2, got %d: %v", len(targets), targets)
|
||||
}
|
||||
if _, ok := targets["https://github.com/a/keep"]; !ok {
|
||||
t.Errorf("missing archived entry (case-insensitive)")
|
||||
}
|
||||
if _, ok := targets["https://github.com/b/drop"]; !ok {
|
||||
t.Errorf("missing stale entry")
|
||||
}
|
||||
if _, ok := targets["https://github.com/c/skip"]; ok {
|
||||
t.Errorf("inactive entry should not have been picked up")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPruneREADME(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "README.md")
|
||||
content := `# Header
|
||||
|
||||
## Tools
|
||||
|
||||
- [keep](https://github.com/keep/me) - Healthy project.
|
||||
- [drop](https://github.com/drop/me) - Stale project.
|
||||
- [also-keep](https://github.com/also/keep) - Another one.
|
||||
`
|
||||
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
targets := map[string]cache.HealthEntry{
|
||||
"https://github.com/drop/me": {URL: "https://github.com/drop/me", Status: "stale"},
|
||||
}
|
||||
res, err := PruneREADME(path, targets, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(res.Removed) != 1 {
|
||||
t.Fatalf("want 1 removed, got %d", len(res.Removed))
|
||||
}
|
||||
if res.Removed[0].URL != "https://github.com/drop/me" {
|
||||
t.Errorf("unexpected removed URL: %s", res.Removed[0].URL)
|
||||
}
|
||||
|
||||
out, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if strings.Contains(string(out), "drop/me") {
|
||||
t.Errorf("expected drop/me to be removed from README, got:\n%s", out)
|
||||
}
|
||||
if !strings.Contains(string(out), "keep/me") || !strings.Contains(string(out), "also/keep") {
|
||||
t.Errorf("expected other entries to be preserved, got:\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPruneREADMEDryRun(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "README.md")
|
||||
content := "## X\n\n- [drop](https://github.com/drop/me) - Stale.\n"
|
||||
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
targets := map[string]cache.HealthEntry{
|
||||
"https://github.com/drop/me": {URL: "https://github.com/drop/me", Status: "stale"},
|
||||
}
|
||||
res, err := PruneREADME(path, targets, true)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(res.Removed) != 1 {
|
||||
t.Fatalf("want 1 removed (preview), got %d", len(res.Removed))
|
||||
}
|
||||
got, _ := os.ReadFile(path)
|
||||
if string(got) != content {
|
||||
t.Errorf("dry-run modified file: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPruneREADMENotFound(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "README.md")
|
||||
if err := os.WriteFile(path, []byte("## X\n\n- [k](https://github.com/k/v) - Keep.\n"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
targets := map[string]cache.HealthEntry{
|
||||
"https://github.com/gone/missing": {URL: "https://github.com/gone/missing", Status: "stale"},
|
||||
}
|
||||
res, err := PruneREADME(path, targets, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(res.Removed) != 0 {
|
||||
t.Errorf("want 0 removed, got %d", len(res.Removed))
|
||||
}
|
||||
if len(res.NotFound) != 1 || res.NotFound[0] != "https://github.com/gone/missing" {
|
||||
t.Errorf("want gone/missing in NotFound, got %v", res.NotFound)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPruneCache(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "cache.yaml")
|
||||
hc := &cache.HealthCache{Entries: []cache.HealthEntry{
|
||||
{URL: "https://github.com/a/keep", Status: "healthy"},
|
||||
{URL: "https://github.com/b/drop", Status: "stale"},
|
||||
}}
|
||||
if err := cache.SaveHealthCache(path, hc); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
targets := map[string]cache.HealthEntry{
|
||||
"https://github.com/b/drop": {URL: "https://github.com/b/drop", Status: "stale"},
|
||||
}
|
||||
n, err := PruneCache(path, hc, targets, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if n != 1 {
|
||||
t.Errorf("want 1 dropped, got %d", n)
|
||||
}
|
||||
if len(hc.Entries) != 1 || hc.Entries[0].URL != "https://github.com/a/keep" {
|
||||
t.Errorf("unexpected remaining entries: %v", hc.Entries)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user