Files
awesome-docker/internal/pruner/pruner.go
T
Julien Bisconti 29222bfcb5
Deploy to GitHub Pages / build (push) Failing after 51s
Deploy to GitHub Pages / deploy (push) Has been skipped
Pull Requests / Weekly QA / test (push) Failing after 1m13s
Broken Links Report / check-links (push) Failing after 45s
feat: add prune subcommand, drop archived/stale entries (#1441)
* feat: add prune subcommand, drop archived/stale entries, add container-explorer

Add a new `awesome-docker prune` subcommand that removes README entries
whose repository health status matches a configurable set (default:
archived,stale). URLs are read from the local health cache, or from a
markdown report file via --from-report when the cache is outdated.

Apply it against the issue #1439 health report to remove 5 entries
that survived the recent reorg: stitchocker, docker-consul,
blockbridge-docker-volume, docker-explorer, dockdash.

Add google/container-explorer in the Security section as the actively
maintained successor to the now-archived google/docker-explorer.

Co-Authored-By: Claude <noreply@anthropic.com>

* golangci-lint config

* fix: address golangci-lint findings

Fixes errcheck on bufio.Writer.WriteString, gocritic rangeValCopy via
indexed loops with pointer locals, gosec G703 on user-supplied CLI
output path, noctx by switching to exec.CommandContext with a timeout
in the TUI url opener, prealloc in the scorer test, plus fieldalignment
struct reorders and golines line breaks from --fix.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude <noreply@anthropic.com>
2026-05-18 23:46:32 +02:00

222 lines
5.6 KiB
Markdown

// Package pruner owns the removal of README entries by health status.
//
// Why it exists: maintenance regularly produces a list of archived/stale
// projects (see scorer + cache). Pruner is the seam that translates that list
// into a concrete edit of README.md and config/health_cache.yaml, so the README
// stays in lockstep with the cache instead of drifting via ad-hoc edits.
package pruner
import (
"bufio"
"fmt"
"io"
"os"
"regexp"
"sort"
"strings"
"github.com/veggiemonk/awesome-docker/internal/cache"
"github.com/veggiemonk/awesome-docker/internal/parser"
)
// Removed describes a single entry removed from the README.
type Removed struct {
URL string
Name string
Status string
Line int
}
// Result summarizes a prune run.
type Result struct {
Removed []Removed
// URLs in the target set that didn't appear in the README (already gone,
// non-GitHub indirection, or URL drift between cache and README).
NotFound []string
}
// TargetURLs returns the URL set selected by the given statuses from the cache.
func TargetURLs(hc *cache.HealthCache, statuses []string) map[string]cache.HealthEntry {
want := make(map[string]bool, len(statuses))
for _, s := range statuses {
want[strings.TrimSpace(strings.ToLower(s))] = true
}
out := make(map[string]cache.HealthEntry)
for i := range hc.Entries {
e := &hc.Entries[i]
if want[strings.ToLower(e.Status)] {
out[normalizeURL(e.URL)] = *e
}
}
return out
}
// PruneREADME removes lines whose entry URL is in targets and writes the
// result back to path. If dryRun is true, the file is not modified.
func PruneREADME(path string, targets map[string]cache.HealthEntry, dryRun bool) (Result, error) {
f, err := os.Open(path) //nolint:gosec
if err != nil {
return Result{}, fmt.Errorf("open %s: %w", path, err)
}
lines, err := readLines(f)
f.Close()
if err != nil {
return Result{}, fmt.Errorf("read %s: %w", path, err)
}
var (
kept = make([]string, 0, len(lines))
removed []Removed
hit = make(map[string]bool, len(targets))
)
for i, line := range lines {
entry, perr := parser.ParseEntry(line, i+1)
if perr != nil {
kept = append(kept, line)
continue
}
key := normalizeURL(entry.URL)
meta, ok := targets[key]
if !ok {
kept = append(kept, line)
continue
}
hit[key] = true
removed = append(removed, Removed{
URL: entry.URL,
Name: entry.Name,
Status: meta.Status,
Line: i + 1,
})
}
res := Result{Removed: removed}
for k := range targets {
if !hit[k] {
res.NotFound = append(res.NotFound, targets[k].URL)
}
}
sort.Strings(res.NotFound)
if dryRun || len(removed) == 0 {
return res, nil
}
if err := writeLines(path, kept); err != nil {
return res, fmt.Errorf("write %s: %w", path, err)
}
return res, nil
}
// PruneCache drops entries whose normalized URL is in targets and writes the
// cache back to path. Safe to call when len(targets) == 0 (no-op).
func PruneCache(path string, hc *cache.HealthCache, targets map[string]cache.HealthEntry, dryRun bool) (int, error) {
if len(targets) == 0 {
return 0, nil
}
kept := hc.Entries[:0]
for i := range hc.Entries {
e := &hc.Entries[i]
if _, drop := targets[normalizeURL(e.URL)]; drop {
continue
}
kept = append(kept, *e)
}
dropped := len(hc.Entries) - len(kept)
hc.Entries = kept
if dryRun || dropped == 0 {
return dropped, nil
}
if err := cache.SaveHealthCache(path, hc); err != nil {
return dropped, err
}
return dropped, nil
}
// reportSectionRe matches markdown health-report section headings:
//
// ## Archived (should mark :skull:)
// ## Stale (2+ years inactive)
// ## Inactive (1-2 years)
var reportSectionRe = regexp.MustCompile(`(?i)^##\s+(archived|stale|inactive|dead|healthy)\b`)
// reportEntryRe matches: "- [name](url) - Stars: N - Last push: YYYY-MM-DD"
var reportEntryRe = regexp.MustCompile(`^-\s+\[([^\]]+)\]\((https?://[^)]+)\)`)
// TargetsFromReport parses a markdown health report (same format as the
// `report` subcommand emits) and returns the URL set whose section heading
// matches one of the given statuses.
func TargetsFromReport(r io.Reader, statuses []string) (map[string]cache.HealthEntry, error) {
want := make(map[string]bool, len(statuses))
for _, s := range statuses {
want[strings.TrimSpace(strings.ToLower(s))] = true
}
out := make(map[string]cache.HealthEntry)
sc := bufio.NewScanner(r)
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
var current string
for sc.Scan() {
line := sc.Text()
if m := reportSectionRe.FindStringSubmatch(line); m != nil {
current = strings.ToLower(m[1])
continue
}
if !want[current] {
continue
}
if m := reportEntryRe.FindStringSubmatch(line); m != nil {
url := strings.TrimSpace(m[2])
out[normalizeURL(url)] = cache.HealthEntry{
URL: url,
Name: m[1],
Status: current,
}
}
}
if err := sc.Err(); err != nil {
return nil, err
}
return out, nil
}
func normalizeURL(u string) string {
u = strings.TrimSpace(u)
u = strings.TrimSuffix(u, "/")
u = strings.ToLower(u)
return u
}
func readLines(r *os.File) ([]string, error) {
var lines []string
sc := bufio.NewScanner(r)
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
for sc.Scan() {
lines = append(lines, sc.Text())
}
return lines, sc.Err()
}
func writeLines(path string, lines []string) error {
out, err := os.Create(path) //nolint:gosec
if err != nil {
return err
}
defer out.Close()
w := bufio.NewWriter(out)
for i, line := range lines {
if _, err := w.WriteString(line); err != nil {
return err
}
if i < len(lines)-1 {
if err := w.WriteByte('\n'); err != nil {
return err
}
}
}
if err := w.WriteByte('\n'); err != nil {
return err
}
return w.Flush()
}