Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
369e78b
Add scan command for multi-repo discovery and consolidated reporting
lex0c Apr 20, 2026
02100f5
Detect bare repositories during scan discovery
lex0c Apr 20, 2026
16742da
Strip repo prefix from suspect-detector suggestions
lex0c Apr 20, 2026
61f8e65
Honor negated ignore rules before pruning a directory
lex0c Apr 20, 2026
5950a26
Keep per-repo commits when SHAs overlap across repositories
lex0c Apr 20, 2026
9a6f5f6
Normalize repo prefix before suspect-pattern matching
lex0c Apr 20, 2026
98efb10
Use numeric value for repo breakdown bar widths
lex0c Apr 20, 2026
6cfe86d
Skip ignored repos before recording discovery hits
lex0c Apr 20, 2026
26ac75e
Account for globbed negations in reinclude checks
lex0c Apr 20, 2026
e85768e
Guarantee unique slugs after hashing duplicate basenames
lex0c Apr 20, 2026
c4958bb
Propagate context cancellation into Discover walk
lex0c Apr 20, 2026
6153c59
Harden tests for scan feature gaps
lex0c Apr 20, 2026
0947705
Document scan command and per-repo breakdown metric
lex0c Apr 20, 2026
00b7321
Validate --since before launching scan
lex0c Apr 20, 2026
8142695
Resolve symlink roots before walking discovery tree
lex0c Apr 20, 2026
5bd94ab
Normalize slug uniqueness for case-insensitive filesystems
lex0c Apr 20, 2026
89df586
Fail scan when every repo's extract failed
lex0c Apr 20, 2026
66f4516
Reject missing explicit ignore file paths
lex0c Apr 20, 2026
f62cdc5
Recover from worker panics in the scan pool
lex0c Apr 20, 2026
ecce5ee
Scope Per-Repository Breakdown to profile reports only
lex0c Apr 20, 2026
a0a6251
Add --report-dir for per-repo HTML reports plus an index landing page
lex0c Apr 20, 2026
fefced5
Fix report-dir pending accounting, downgrade render errors, reserve i…
lex0c Apr 20, 2026
f2810b5
Align scan-index footer with the per-repo report footer
lex0c Apr 20, 2026
c0bfccb
Simplify scan-index heading and drop roots subtitle
lex0c Apr 20, 2026
3494e0e
Trim redundant chrome from scan-index cards
lex0c Apr 20, 2026
49cd00d
Drop green left-border accent from ok scan-index cards
lex0c Apr 20, 2026
2ae7961
Surface last-commit recency on scan index cards
lex0c Apr 20, 2026
d59338e
Address 7 minors from the recency-batch review
lex0c Apr 20, 2026
87d3e8b
Drop scan-index H1; promote summary cards to page anchor
lex0c Apr 20, 2026
60b4c37
Unify scan-index summary-card CSS with the team/profile templates
lex0c Apr 20, 2026
7666e57
Order scan index as a triage view, not alphabetical
lex0c Apr 20, 2026
549775f
Validate --email requires --report before launching scan
lex0c Apr 20, 2026
049cdb5
Guard future dates before truncating day differences
lex0c Apr 20, 2026
302d13f
Validate .git entry content before treating a dir as a repo
lex0c Apr 20, 2026
9d61c47
Normalize skipped statuses before rendering scan index
lex0c Apr 21, 2026
13749d3
Use an aggregated label for multi-root profile reports
lex0c Apr 21, 2026
247cd16
Raise scan parallel default and skip the sort in dev-email aggregation
lex0c Apr 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 162 additions & 0 deletions cmd/gitcortex/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/lex0c/gitcortex/internal/extract"
"github.com/lex0c/gitcortex/internal/git"
reportpkg "github.com/lex0c/gitcortex/internal/report"
"github.com/lex0c/gitcortex/internal/scan"
"github.com/lex0c/gitcortex/internal/stats"

"github.com/spf13/cobra"
Expand All @@ -35,6 +36,7 @@ func main() {
rootCmd.AddCommand(diffCmd())
rootCmd.AddCommand(ciCmd())
rootCmd.AddCommand(reportCmd())
rootCmd.AddCommand(scanCmd())

if err := rootCmd.Execute(); err != nil {
os.Exit(1)
Expand Down Expand Up @@ -869,3 +871,163 @@ func reportCmd() *cobra.Command {

return cmd
}

// --- Scan ---

func scanCmd() *cobra.Command {
var (
roots []string
output string
ignoreFile string
maxDepth int
parallel int
email string
from string
to string
since string
reportPath string
topN int
extractIgnore []string
batchSize int
mailmap bool
firstParent bool
includeMessages bool
couplingMaxFiles int
couplingMinChanges int
churnHalfLife int
networkMinFiles int
)

cmd := &cobra.Command{
Use: "scan",
Short: "Discover git repositories under one or more roots and consolidate their history",
Long: `Walk the given root(s), find every git repository, and run extract on each
repository in parallel. Outputs one JSONL per repo plus a manifest in --output.
Optionally generates a consolidated HTML report including a per-repository
breakdown — handy for showing aggregated work across many repos.`,
RunE: func(cmd *cobra.Command, args []string) error {
if len(roots) == 0 {
return fmt.Errorf("--root is required (repeatable for multiple roots)")
}
if since != "" && (from != "" || to != "") {
return fmt.Errorf("--since cannot be combined with --from/--to")
}
if err := validateDate(from, "--from"); err != nil {
return err
}
if err := validateDate(to, "--to"); err != nil {
return err
}
if from != "" && to != "" && from > to {
return fmt.Errorf("--from (%s) must be on or before --to (%s)", from, to)
}

cfg := scan.Config{
Roots: roots,
Output: output,
IgnoreFile: ignoreFile,
MaxDepth: maxDepth,
Parallel: parallel,
Extract: extract.Config{
BatchSize: batchSize,
IncludeMessages: includeMessages,
CommandTimeout: extract.DefaultCommandTimeout,
FirstParent: firstParent,
Mailmap: mailmap,
IgnorePatterns: extractIgnore,
StartOffset: -1,
},
}

ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer stop()

result, err := scan.Run(ctx, cfg)
// scan.Run returns a partial result alongside ctx.Err() on
// cancellation. Honor that — write whatever progress we made
// to disk and surface the error so the CLI exits non-zero.
if err != nil {
return err
}

if reportPath == "" {
fmt.Fprintf(os.Stderr, "Scan complete: %d JSONL file(s) in %s\n", len(result.JSONLs), result.OutputDir)
return nil
Comment thread
lex0c marked this conversation as resolved.
Outdated
}
if len(result.JSONLs) == 0 {
return fmt.Errorf("no successful repos extracted; cannot build report")
}

fromDate := from
if since != "" {
d, err := parseSince(since)
if err != nil {
return err
Comment thread
lex0c marked this conversation as resolved.
Outdated
}
fromDate = d
}

ds, err := stats.LoadMultiJSONL(result.JSONLs, stats.LoadOptions{
From: fromDate,
To: to,
HalfLifeDays: churnHalfLife,
CoupMaxFiles: couplingMaxFiles,
})
if err != nil {
return fmt.Errorf("load consolidated dataset: %w", err)
}
fmt.Fprintf(os.Stderr, "Loaded %d commits across %d repo(s)\n", ds.CommitCount, len(result.JSONLs))

f, err := os.Create(reportPath)
if err != nil {
return fmt.Errorf("create report: %w", err)
}
defer f.Close()

// Label the report after the basename of the first --root
// (or the output dir as a fallback). "scan-scan-output" was
// the previous default; users find the root name far more
// recognizable as the H1 of the report.
repoLabel := filepath.Base(result.OutputDir)
if len(cfg.Roots) > 0 {
repoLabel = filepath.Base(absPath(cfg.Roots[0]))
}
sf := stats.StatsFlags{CouplingMinChanges: couplingMinChanges, NetworkMinFiles: networkMinFiles}
if email != "" {
if err := reportpkg.GenerateProfile(f, ds, repoLabel, email); err != nil {
return fmt.Errorf("generate profile report: %w", err)
}
fmt.Fprintf(os.Stderr, "Profile report for %s written to %s\n", email, fileURL(reportPath))
return nil
}
if err := reportpkg.Generate(f, ds, repoLabel, topN, sf); err != nil {
return fmt.Errorf("generate report: %w", err)
}
fmt.Fprintf(os.Stderr, "Consolidated report written to %s\n", fileURL(reportPath))
return nil
},
}

cmd.Flags().StringSliceVar(&roots, "root", nil, "Root directory to walk for repositories (repeatable)")
cmd.Flags().StringVar(&output, "output", "scan-output", "Directory to write per-repo JSONL files and the manifest")
cmd.Flags().StringVar(&ignoreFile, "ignore-file", "", "Gitignore-style file with directories to skip during discovery. When unset, only the first --root is searched for a .gitcortex-ignore; pass an explicit path to apply rules across all roots.")
cmd.Flags().IntVar(&maxDepth, "max-depth", 0, "Maximum directory depth to descend into when looking for repos (0 = unlimited)")
cmd.Flags().IntVar(&parallel, "parallel", 4, "Number of repositories to extract in parallel")
cmd.Flags().StringVar(&email, "email", "", "Generate a per-developer profile report (only when --report is set)")
cmd.Flags().StringVar(&from, "from", "", "Window start date YYYY-MM-DD (forwarded to the consolidated report)")
cmd.Flags().StringVar(&to, "to", "", "Window end date YYYY-MM-DD (forwarded to the consolidated report)")
cmd.Flags().StringVar(&since, "since", "", "Filter to recent period (e.g. 7d, 4w, 3m, 1y); mutually exclusive with --from/--to")
cmd.Flags().StringVar(&reportPath, "report", "", "If set, generate a consolidated HTML report at this path after the scan")
cmd.Flags().IntVar(&topN, "top", 20, "Top-N entries per section in the consolidated report")
cmd.Flags().StringSliceVar(&extractIgnore, "extract-ignore", nil, "Glob patterns forwarded to per-repo extract --ignore (e.g. package-lock.json)")
cmd.Flags().IntVar(&batchSize, "batch-size", 1000, "Per-repo extract checkpoint interval")
cmd.Flags().BoolVar(&mailmap, "mailmap", false, "Use .mailmap (per repo) to normalize identities")
cmd.Flags().BoolVar(&firstParent, "first-parent", false, "Restrict extracts to the first-parent chain")
cmd.Flags().BoolVar(&includeMessages, "include-commit-messages", false, "Include commit messages in JSONL (needed for Top Commits in the consolidated report)")
cmd.Flags().IntVar(&couplingMaxFiles, "coupling-max-files", 50, "Max files per commit for coupling analysis (consolidated report)")
cmd.Flags().IntVar(&couplingMinChanges, "coupling-min-changes", 5, "Min co-changes for coupling results (consolidated report)")
cmd.Flags().IntVar(&churnHalfLife, "churn-half-life", 90, "Half-life in days for churn decay (consolidated report)")
cmd.Flags().IntVar(&networkMinFiles, "network-min-files", 5, "Min shared files for dev-network edges (consolidated report)")

return cmd
}
36 changes: 36 additions & 0 deletions internal/report/profile_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,42 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
</div>
{{end}}

{{if gt (len .Repos) 1}}
<h2>Per-Repository Breakdown <span style="font-size:13px; color:#656d76; font-weight:normal;">{{thousands (len .Repos)}} repositories</span></h2>
<p class="hint">How {{.Profile.Name}}'s work is split across repositories in this scan. Use this to point at the projects with the most activity, or to spot single-repo focus vs. broad multi-repo engagement.</p>
<table>
<tr>
<th>Repository</th>
<th>Commits</th>
<th>% of My Commits</th>
<th>Churn</th>
<th>% of My Churn</th>
<th>Files</th>
<th>Active days</th>
<th>First → Last</th>
</tr>
{{range .Repos}}
<tr>
<td class="mono">{{.Repo}}</td>
<td>{{thousands .Commits}}</td>
<td>
<div style="display:flex; align-items:center; gap:6px;">
<div style="flex:0 0 60px; height:8px; background:#eaeef2; border-radius:3px; overflow:hidden;">
<div style="height:100%; width:{{printf "%.0f" .PctOfTotalCommits}}%; background:#0969da;"></div>
</div>
<span class="mono">{{printf "%.1f" .PctOfTotalCommits}}%</span>
</div>
</td>
<td>{{thousands .Churn}}</td>
<td class="mono">{{printf "%.1f" .PctOfTotalChurn}}%</td>
<td>{{thousands .Files}}</td>
<td>{{.ActiveDays}}</td>
<td class="mono" style="font-size:11px;">{{.FirstCommitDate}} → {{.LastCommitDate}}</td>
</tr>
{{end}}
</table>
{{end}}

{{if .Profile.TopFiles}}
<h2>Top Files</h2>
<p class="hint">Files this developer changed most (churn = additions + deletions). High churn on few files suggests deep ownership and potential knowledge concentration. · {{docRef "hotspots"}}</p>
Expand Down
15 changes: 15 additions & 0 deletions internal/report/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ type ReportData struct {
TotalDirectories int
TotalExtensions int
TotalBusFactorFiles int

// Repos holds per-repository aggregates for multi-repo (scan) reports.
// Empty on single-repo runs — the template gates the section behind
// `{{if gt (len .Repos) 1}}` so single-repo callers keep their
// existing layout untouched.
Repos []stats.RepoStat
}

// htmlTreeDepth caps the repo-structure tree baked into the HTML report.
Expand Down Expand Up @@ -376,6 +382,7 @@ func Generate(w io.Writer, ds *stats.Dataset, repoName string, topN int, sf stat
TotalDirectories: stats.DirectoryCount(ds),
TotalExtensions: stats.ExtensionCount(ds),
TotalBusFactorFiles: stats.BusFactorCount(ds),
Repos: stats.RepoBreakdown(ds, ""),
}
CapChildrenPerDir(data.Structure, htmlTreeMaxChildrenPerDir)

Expand Down Expand Up @@ -641,6 +648,13 @@ type ProfileReportData struct {
MaxActivityCommits int
PatternGrid [7][24]int
MaxPattern int

// Repos is the per-repository breakdown filtered to this developer's
// commits. Empty on single-repo profile reports — gated in the
// template so existing single-repo callers see no change. The headline
// use case for `gitcortex scan --email me` lives here: the developer
// can see at a glance which repos they spent time in.
Repos []stats.RepoStat
}

func GenerateProfile(w io.Writer, ds *stats.Dataset, repoName, email string) error {
Expand Down Expand Up @@ -672,6 +686,7 @@ func GenerateProfile(w io.Writer, ds *stats.Dataset, repoName, email string) err
MaxActivityCommits: maxAct,
PatternGrid: p.WorkGrid,
MaxPattern: maxP,
Repos: stats.RepoBreakdown(ds, email),
}

return profileTmpl.Execute(w, data)
Expand Down
39 changes: 39 additions & 0 deletions internal/report/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,45 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col
</div>
</div>

{{if gt (len .Repos) 1}}
<h2>Per-Repository Breakdown <span style="font-size:13px; color:#656d76; font-weight:normal;">{{thousands (len .Repos)}} repositories</span></h2>
<p class="hint">Cross-repo aggregation from <code>scan</code>. Use this view to see how work is distributed: a single repo dominating commits or churn often means the consolidated activity story is really about that one project; an even spread shows broad multi-repo engagement. Bar widths are normalized to the largest commit-count repo.</p>
{{$maxRepoCommits := 0}}{{range .Repos}}{{if gt .Commits $maxRepoCommits}}{{$maxRepoCommits = .Commits}}{{end}}{{end}}
<table>
<tr>
<th>Repository</th>
<th>Commits</th>
<th>% Commits</th>
<th>Churn</th>
<th>% Churn</th>
<th>Files</th>
<th>Active days</th>
<th>Devs</th>
<th>First → Last</th>
</tr>
{{range .Repos}}
<tr>
<td class="mono">{{.Repo}}</td>
<td>{{thousands .Commits}}</td>
<td>
<div style="display:flex; align-items:center; gap:6px;">
<div style="flex:0 0 60px; height:8px; background:#eaeef2; border-radius:3px; overflow:hidden;">
<div style="height:100%; width:{{if gt $maxRepoCommits 0}}{{printf "%.0f" (pctFloat .PctOfTotalCommits 100.0)}}%{{else}}0%{{end}}; background:#216e39;"></div>
Comment thread
lex0c marked this conversation as resolved.
Outdated
</div>
<span class="mono">{{printf "%.1f" .PctOfTotalCommits}}%</span>
</div>
</td>
<td>{{thousands .Churn}}</td>
<td class="mono">{{printf "%.1f" .PctOfTotalChurn}}%</td>
<td>{{thousands .Files}}</td>
<td>{{.ActiveDays}}</td>
<td>{{.UniqueDevs}}</td>
<td class="mono" style="font-size:11px;">{{.FirstCommitDate}} → {{.LastCommitDate}}</td>
</tr>
{{end}}
</table>
{{end}}

{{if .ActivityYears}}
<h2 style="display:flex; justify-content:space-between; align-items:center;">Activity <button onclick="var h=document.getElementById('act-heatmap'),t=document.getElementById('act-table');h.hidden=!h.hidden;t.hidden=!t.hidden;this.textContent=h.hidden?'heatmap':'table'" style="font-size:11px; font-weight:normal; padding:2px 10px; border:1px solid #d0d7de; border-radius:4px; background:#f6f8fa; color:#24292f; cursor:pointer;">table</button></h2>
<p class="hint">Monthly commit heatmap. Darker = more commits. Sudden drop-offs may mark team changes, re-orgs, or freezes; steady cadence signals healthy pace. Hover for details; toggle to table for exact numbers. · {{docRef "activity"}}</p>
Expand Down
Loading
Loading