mirror of
https://github.com/veggiemonk/awesome-docker.git
synced 2026-06-30 18:40:32 +02:00
29222bfcb5
* feat: add prune subcommand, drop archived/stale entries, add container-explorer Add a new `awesome-docker prune` subcommand that removes README entries whose repository health status matches a configurable set (default: archived,stale). URLs are read from the local health cache, or from a markdown report file via --from-report when the cache is outdated. Apply it against the issue #1439 health report to remove 5 entries that survived the recent reorg: stitchocker, docker-consul, blockbridge-docker-volume, docker-explorer, dockdash. Add google/container-explorer in the Security section as the actively maintained successor to the now-archived google/docker-explorer. Co-Authored-By: Claude <noreply@anthropic.com> * golangci-lint config * fix: address golangci-lint findings Fixes errcheck on bufio.Writer.WriteString, gocritic rangeValCopy via indexed loops with pointer locals, gosec G703 on user-supplied CLI output path, noctx by switching to exec.CommandContext with a timeout in the TUI url opener, prealloc in the scorer test, plus fieldalignment struct reorders and golines line breaks from --fix. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
222 lines
5.6 KiB
Markdown
222 lines
5.6 KiB
Markdown
// Package pruner owns the removal of README entries by health status.
|
|
//
|
|
// Why it exists: maintenance regularly produces a list of archived/stale
|
|
// projects (see scorer + cache). Pruner is the seam that translates that list
|
|
// into a concrete edit of README.md and config/health_cache.yaml, so the README
|
|
// stays in lockstep with the cache instead of drifting via ad-hoc edits.
|
|
package pruner
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
|
|
"github.com/veggiemonk/awesome-docker/internal/cache"
|
|
"github.com/veggiemonk/awesome-docker/internal/parser"
|
|
)
|
|
|
|
// Removed describes a single entry removed from the README.
|
|
type Removed struct {
|
|
URL string
|
|
Name string
|
|
Status string
|
|
Line int
|
|
}
|
|
|
|
// Result summarizes a prune run.
|
|
type Result struct {
|
|
Removed []Removed
|
|
// URLs in the target set that didn't appear in the README (already gone,
|
|
// non-GitHub indirection, or URL drift between cache and README).
|
|
NotFound []string
|
|
}
|
|
|
|
// TargetURLs returns the URL set selected by the given statuses from the cache.
|
|
func TargetURLs(hc *cache.HealthCache, statuses []string) map[string]cache.HealthEntry {
|
|
want := make(map[string]bool, len(statuses))
|
|
for _, s := range statuses {
|
|
want[strings.TrimSpace(strings.ToLower(s))] = true
|
|
}
|
|
out := make(map[string]cache.HealthEntry)
|
|
for i := range hc.Entries {
|
|
e := &hc.Entries[i]
|
|
if want[strings.ToLower(e.Status)] {
|
|
out[normalizeURL(e.URL)] = *e
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// PruneREADME removes lines whose entry URL is in targets and writes the
|
|
// result back to path. If dryRun is true, the file is not modified.
|
|
func PruneREADME(path string, targets map[string]cache.HealthEntry, dryRun bool) (Result, error) {
|
|
f, err := os.Open(path) //nolint:gosec
|
|
if err != nil {
|
|
return Result{}, fmt.Errorf("open %s: %w", path, err)
|
|
}
|
|
lines, err := readLines(f)
|
|
f.Close()
|
|
if err != nil {
|
|
return Result{}, fmt.Errorf("read %s: %w", path, err)
|
|
}
|
|
|
|
var (
|
|
kept = make([]string, 0, len(lines))
|
|
removed []Removed
|
|
hit = make(map[string]bool, len(targets))
|
|
)
|
|
|
|
for i, line := range lines {
|
|
entry, perr := parser.ParseEntry(line, i+1)
|
|
if perr != nil {
|
|
kept = append(kept, line)
|
|
continue
|
|
}
|
|
key := normalizeURL(entry.URL)
|
|
meta, ok := targets[key]
|
|
if !ok {
|
|
kept = append(kept, line)
|
|
continue
|
|
}
|
|
hit[key] = true
|
|
removed = append(removed, Removed{
|
|
URL: entry.URL,
|
|
Name: entry.Name,
|
|
Status: meta.Status,
|
|
Line: i + 1,
|
|
})
|
|
}
|
|
|
|
res := Result{Removed: removed}
|
|
for k := range targets {
|
|
if !hit[k] {
|
|
res.NotFound = append(res.NotFound, targets[k].URL)
|
|
}
|
|
}
|
|
sort.Strings(res.NotFound)
|
|
|
|
if dryRun || len(removed) == 0 {
|
|
return res, nil
|
|
}
|
|
|
|
if err := writeLines(path, kept); err != nil {
|
|
return res, fmt.Errorf("write %s: %w", path, err)
|
|
}
|
|
return res, nil
|
|
}
|
|
|
|
// PruneCache drops entries whose normalized URL is in targets and writes the
|
|
// cache back to path. Safe to call when len(targets) == 0 (no-op).
|
|
func PruneCache(path string, hc *cache.HealthCache, targets map[string]cache.HealthEntry, dryRun bool) (int, error) {
|
|
if len(targets) == 0 {
|
|
return 0, nil
|
|
}
|
|
kept := hc.Entries[:0]
|
|
for i := range hc.Entries {
|
|
e := &hc.Entries[i]
|
|
if _, drop := targets[normalizeURL(e.URL)]; drop {
|
|
continue
|
|
}
|
|
kept = append(kept, *e)
|
|
}
|
|
dropped := len(hc.Entries) - len(kept)
|
|
hc.Entries = kept
|
|
if dryRun || dropped == 0 {
|
|
return dropped, nil
|
|
}
|
|
if err := cache.SaveHealthCache(path, hc); err != nil {
|
|
return dropped, err
|
|
}
|
|
return dropped, nil
|
|
}
|
|
|
|
// reportSectionRe matches markdown health-report section headings:
|
|
//
|
|
// ## Archived (should mark :skull:)
|
|
// ## Stale (2+ years inactive)
|
|
// ## Inactive (1-2 years)
|
|
var reportSectionRe = regexp.MustCompile(`(?i)^##\s+(archived|stale|inactive|dead|healthy)\b`)
|
|
|
|
// reportEntryRe matches: "- [name](url) - Stars: N - Last push: YYYY-MM-DD"
|
|
var reportEntryRe = regexp.MustCompile(`^-\s+\[([^\]]+)\]\((https?://[^)]+)\)`)
|
|
|
|
// TargetsFromReport parses a markdown health report (same format as the
|
|
// `report` subcommand emits) and returns the URL set whose section heading
|
|
// matches one of the given statuses.
|
|
func TargetsFromReport(r io.Reader, statuses []string) (map[string]cache.HealthEntry, error) {
|
|
want := make(map[string]bool, len(statuses))
|
|
for _, s := range statuses {
|
|
want[strings.TrimSpace(strings.ToLower(s))] = true
|
|
}
|
|
out := make(map[string]cache.HealthEntry)
|
|
sc := bufio.NewScanner(r)
|
|
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
|
var current string
|
|
for sc.Scan() {
|
|
line := sc.Text()
|
|
if m := reportSectionRe.FindStringSubmatch(line); m != nil {
|
|
current = strings.ToLower(m[1])
|
|
continue
|
|
}
|
|
if !want[current] {
|
|
continue
|
|
}
|
|
if m := reportEntryRe.FindStringSubmatch(line); m != nil {
|
|
url := strings.TrimSpace(m[2])
|
|
out[normalizeURL(url)] = cache.HealthEntry{
|
|
URL: url,
|
|
Name: m[1],
|
|
Status: current,
|
|
}
|
|
}
|
|
}
|
|
if err := sc.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func normalizeURL(u string) string {
|
|
u = strings.TrimSpace(u)
|
|
u = strings.TrimSuffix(u, "/")
|
|
u = strings.ToLower(u)
|
|
return u
|
|
}
|
|
|
|
func readLines(r *os.File) ([]string, error) {
|
|
var lines []string
|
|
sc := bufio.NewScanner(r)
|
|
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
|
for sc.Scan() {
|
|
lines = append(lines, sc.Text())
|
|
}
|
|
return lines, sc.Err()
|
|
}
|
|
|
|
func writeLines(path string, lines []string) error {
|
|
out, err := os.Create(path) //nolint:gosec
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer out.Close()
|
|
w := bufio.NewWriter(out)
|
|
for i, line := range lines {
|
|
if _, err := w.WriteString(line); err != nil {
|
|
return err
|
|
}
|
|
if i < len(lines)-1 {
|
|
if err := w.WriteByte('\n'); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
if err := w.WriteByte('\n'); err != nil {
|
|
return err
|
|
}
|
|
return w.Flush()
|
|
}
|