Files
awesome-docker/internal/checker/github.go
T
Julien Bisconti 29222bfcb5
Deploy to GitHub Pages / build (push) Failing after 51s
Deploy to GitHub Pages / deploy (push) Has been skipped
Pull Requests / Weekly QA / test (push) Failing after 1m13s
Broken Links Report / check-links (push) Failing after 45s
feat: add prune subcommand, drop archived/stale entries (#1441)
* feat: add prune subcommand, drop archived/stale entries, add container-explorer

Add a new `awesome-docker prune` subcommand that removes README entries
whose repository health status matches a configurable set (default:
archived,stale). URLs are read from the local health cache, or from a
markdown report file via --from-report when the cache is outdated.

Apply it against the issue #1439 health report to remove 5 entries
that survived the recent reorg: stitchocker, docker-consul,
blockbridge-docker-volume, docker-explorer, dockdash.

Add google/container-explorer in the Security section as the actively
maintained successor to the now-archived google/docker-explorer.

Co-Authored-By: Claude <noreply@anthropic.com>

* golangci-lint config

* fix: address golangci-lint findings

Fixes errcheck on bufio.Writer.WriteString, gocritic rangeValCopy via
indexed loops with pointer locals, gosec G703 on user-supplied CLI
output path, noctx by switching to exec.CommandContext with a timeout
in the TUI url opener, prealloc in the scorer test, plus fieldalignment
struct reorders and golines line breaks from --fix.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude <noreply@anthropic.com>
2026-05-18 23:46:32 +02:00

173 lines
3.9 KiB
Markdown

package checker
import (
"context"
"fmt"
"net/url"
"strings"
"time"
"github.com/shurcooL/githubv4"
"golang.org/x/oauth2"
)
// RepoInfo holds metadata about a GitHub repository.
type RepoInfo struct {
PushedAt time.Time
Owner string
Name string
URL string
Stars int
Forks int
IsArchived bool
IsDisabled bool
IsPrivate bool
HasLicense bool
}
// ExtractGitHubRepo extracts owner/name from a GitHub URL.
// Returns false for non-repo URLs (issues, wiki, apps, etc.).
func ExtractGitHubRepo(rawURL string) (owner, name string, ok bool) {
u, err := url.Parse(rawURL)
if err != nil {
return "", "", false
}
host := strings.ToLower(u.Hostname())
if host != "github.com" && host != "www.github.com" {
return "", "", false
}
path := strings.Trim(u.Path, "/")
parts := strings.Split(path, "/")
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
return "", "", false
}
// Skip known non-repository top-level routes.
switch parts[0] {
case "apps", "features", "topics":
return "", "", false
}
name = strings.TrimSuffix(parts[1], ".git")
if name == "" {
return "", "", false
}
return parts[0], name, true
}
func isHTTPURL(raw string) bool {
u, err := url.Parse(raw)
if err != nil {
return false
}
return u.Scheme == "http" || u.Scheme == "https"
}
func isGitHubAuthError(err error) bool {
if err == nil {
return false
}
s := strings.ToLower(err.Error())
return strings.Contains(s, "401 unauthorized") ||
strings.Contains(s, "bad credentials") ||
strings.Contains(s, "resource not accessible by integration")
}
// PartitionLinks separates URLs into GitHub repos and external HTTP(S) links.
func PartitionLinks(urls []string) (github, external []string) {
for _, url := range urls {
if _, _, ok := ExtractGitHubRepo(url); ok {
github = append(github, url)
} else if isHTTPURL(url) {
external = append(external, url)
}
}
return
}
// GitHubChecker uses the GitHub GraphQL API.
type GitHubChecker struct {
client *githubv4.Client
}
// NewGitHubChecker creates a checker with the given OAuth token.
func NewGitHubChecker(token string) *GitHubChecker {
src := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token})
httpClient := oauth2.NewClient(context.Background(), src)
return &GitHubChecker{client: githubv4.NewClient(httpClient)}
}
// CheckRepo queries a single GitHub repository.
func (gc *GitHubChecker) CheckRepo(ctx context.Context, owner, name string) (RepoInfo, error) {
var query struct {
Repository struct {
PushedAt time.Time
LicenseInfo *struct{ Name string }
StargazerCount int
ForkCount int
IsArchived bool
IsDisabled bool
IsPrivate bool
} `graphql:"repository(owner: $owner, name: $name)"`
}
vars := map[string]any{
"owner": githubv4.String(owner),
"name": githubv4.String(name),
}
if err := gc.client.Query(ctx, &query, vars); err != nil {
return RepoInfo{}, fmt.Errorf("github query %s/%s: %w", owner, name, err)
}
r := query.Repository
return RepoInfo{
Owner: owner,
Name: name,
URL: fmt.Sprintf("https://github.com/%s/%s", owner, name),
IsArchived: r.IsArchived,
IsDisabled: r.IsDisabled,
IsPrivate: r.IsPrivate,
PushedAt: r.PushedAt,
Stars: r.StargazerCount,
Forks: r.ForkCount,
HasLicense: r.LicenseInfo != nil,
}, nil
}
// CheckRepos queries multiple repos in sequence with rate limiting.
func (gc *GitHubChecker) CheckRepos(ctx context.Context, urls []string, batchSize int) ([]RepoInfo, []error) {
if batchSize <= 0 {
batchSize = 50
}
var results []RepoInfo
var errs []error
for i, url := range urls {
owner, name, ok := ExtractGitHubRepo(url)
if !ok {
continue
}
info, err := gc.CheckRepo(ctx, owner, name)
if err != nil {
errs = append(errs, err)
if isGitHubAuthError(err) {
break
}
continue
}
results = append(results, info)
if (i+1)%batchSize == 0 {
time.Sleep(1 * time.Second)
}
}
return results, errs
}