From eeee3b60a1fd8dc8b8b92997f709ef65e4b2097f Mon Sep 17 00:00:00 2001 From: Peter Stone Date: Thu, 19 Mar 2026 23:15:58 +0000 Subject: feat: add errors, throughput, and billing sections to stats dashboard - GET /api/stats?window=7d: pre-aggregated SQL queries for errors, throughput, billing - Errors section: category summary (quota/rate_limit/timeout/git/failed) + failure table - Throughput section: stacked hourly bar chart (completed/failed/other) over 7d - Billing section: KPIs (7d total, avg/day, cost/run) + daily cost bar chart Co-Authored-By: Claude Sonnet 4.6 --- internal/api/executions.go | 17 ++++ internal/api/server.go | 1 + internal/storage/db.go | 135 +++++++++++++++++++++++++++++++ web/app.js | 195 ++++++++++++++++++++++++++++++++++++++++++++- web/style.css | 108 +++++++++++++++++++++++++ 5 files changed, 454 insertions(+), 2 deletions(-) diff --git a/internal/api/executions.go b/internal/api/executions.go index 29af139..4d8ba9c 100644 --- a/internal/api/executions.go +++ b/internal/api/executions.go @@ -86,6 +86,23 @@ func (s *Server) handleGetExecutionLog(w http.ResponseWriter, r *http.Request) { fmt.Fprint(w, content) } +// handleGetDashboardStats returns pre-aggregated error, throughput, and billing stats. +// GET /api/stats?window=7d|24h +func (s *Server) handleGetDashboardStats(w http.ResponseWriter, r *http.Request) { + window := 7 * 24 * time.Hour + if r.URL.Query().Get("window") == "24h" { + window = 24 * time.Hour + } + since := time.Now().Add(-window) + + stats, err := s.store.QueryDashboardStats(since) + if err != nil { + writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()}) + return + } + writeJSON(w, http.StatusOK, stats) +} + // handleGetAgentStatus returns the current status of all agents and recent rate-limit events. // GET /api/agents/status?since= func (s *Server) handleGetAgentStatus(w http.ResponseWriter, r *http.Request) { diff --git a/internal/api/server.go b/internal/api/server.go index 2d5c308..0127ab9 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -121,6 +121,7 @@ func (s *Server) routes() { s.mux.HandleFunc("GET /api/tasks/{id}/subtasks", s.handleListSubtasks) s.mux.HandleFunc("GET /api/tasks/{id}/executions", s.handleListExecutions) s.mux.HandleFunc("GET /api/executions", s.handleListRecentExecutions) + s.mux.HandleFunc("GET /api/stats", s.handleGetDashboardStats) s.mux.HandleFunc("GET /api/agents/status", s.handleGetAgentStatus) s.mux.HandleFunc("GET /api/executions/{id}", s.handleGetExecution) s.mux.HandleFunc("GET /api/executions/{id}/log", s.handleGetExecutionLog) diff --git a/internal/storage/db.go b/internal/storage/db.go index 0d11b4e..1a0e74f 100644 --- a/internal/storage/db.go +++ b/internal/storage/db.go @@ -560,6 +560,141 @@ type RecentExecution struct { StdoutPath string `json:"stdout_path"` } +// ThroughputBucket is one time-bucket of execution counts by outcome. +type ThroughputBucket struct { + Hour string `json:"hour"` // RFC3339 truncated to hour + Completed int `json:"completed"` + Failed int `json:"failed"` + Other int `json:"other"` +} + +// BillingDay is the aggregated cost and run count for a calendar day. +type BillingDay struct { + Day string `json:"day"` // YYYY-MM-DD + CostUSD float64 `json:"cost_usd"` + Runs int `json:"runs"` +} + +// FailedExecution is a failed/timed-out/budget-exceeded execution with its error. +type FailedExecution struct { + ID string `json:"id"` + TaskID string `json:"task_id"` + TaskName string `json:"task_name"` + Status string `json:"status"` + ErrorMsg string `json:"error_msg"` + Category string `json:"category"` // quota | timeout | rate_limit | git | failed + StartedAt time.Time `json:"started_at"` +} + +// DashboardStats is returned by QueryDashboardStats. +type DashboardStats struct { + Throughput []ThroughputBucket `json:"throughput"` + Billing []BillingDay `json:"billing"` + Failures []FailedExecution `json:"failures"` +} + +// QueryDashboardStats returns pre-aggregated stats for the given window. +func (s *DB) QueryDashboardStats(since time.Time) (*DashboardStats, error) { + stats := &DashboardStats{ + Throughput: []ThroughputBucket{}, + Billing: []BillingDay{}, + Failures: []FailedExecution{}, + } + + // Throughput: completions per hour bucket + tpRows, err := s.db.Query(` + SELECT strftime('%Y-%m-%dT%H:00:00Z', start_time) as hour, + SUM(CASE WHEN status IN ('COMPLETED','READY') THEN 1 ELSE 0 END), + SUM(CASE WHEN status IN ('FAILED','TIMED_OUT','BUDGET_EXCEEDED') THEN 1 ELSE 0 END), + SUM(CASE WHEN status NOT IN ('COMPLETED','READY','FAILED','TIMED_OUT','BUDGET_EXCEEDED') THEN 1 ELSE 0 END) + FROM executions + WHERE start_time >= ? AND status NOT IN ('RUNNING','QUEUED','PENDING') + GROUP BY hour ORDER BY hour ASC`, since.UTC()) + if err != nil { + return nil, err + } + defer tpRows.Close() + for tpRows.Next() { + var b ThroughputBucket + if err := tpRows.Scan(&b.Hour, &b.Completed, &b.Failed, &b.Other); err != nil { + return nil, err + } + stats.Throughput = append(stats.Throughput, b) + } + if err := tpRows.Err(); err != nil { + return nil, err + } + + // Billing: cost per day + billRows, err := s.db.Query(` + SELECT date(start_time) as day, COALESCE(SUM(cost_usd),0), COUNT(*) + FROM executions + WHERE start_time >= ? + GROUP BY day ORDER BY day ASC`, since.UTC()) + if err != nil { + return nil, err + } + defer billRows.Close() + for billRows.Next() { + var b BillingDay + if err := billRows.Scan(&b.Day, &b.CostUSD, &b.Runs); err != nil { + return nil, err + } + stats.Billing = append(stats.Billing, b) + } + if err := billRows.Err(); err != nil { + return nil, err + } + + // Failures: recent failed executions with error messages + failRows, err := s.db.Query(` + SELECT e.id, e.task_id, t.name, e.status, COALESCE(e.error_msg,''), e.start_time + FROM executions e JOIN tasks t ON e.task_id = t.id + WHERE e.start_time >= ? AND e.status IN ('FAILED','TIMED_OUT','BUDGET_EXCEEDED') + ORDER BY e.start_time DESC LIMIT 50`, since.UTC()) + if err != nil { + return nil, err + } + defer failRows.Close() + for failRows.Next() { + var f FailedExecution + if err := failRows.Scan(&f.ID, &f.TaskID, &f.TaskName, &f.Status, &f.ErrorMsg, &f.StartedAt); err != nil { + return nil, err + } + f.Category = classifyError(f.Status, f.ErrorMsg) + stats.Failures = append(stats.Failures, f) + } + if err := failRows.Err(); err != nil { + return nil, err + } + + return stats, nil +} + +// classifyError maps a status + error message to a human category. +func classifyError(status, msg string) string { + if status == "TIMED_OUT" { + return "timeout" + } + if status == "BUDGET_EXCEEDED" { + return "quota" + } + low := strings.ToLower(msg) + if strings.Contains(low, "quota") || strings.Contains(low, "exhausted") || strings.Contains(low, "terminalquota") { + return "quota" + } + if strings.Contains(low, "rate limit") || strings.Contains(low, "429") || strings.Contains(low, "too many requests") { + return "rate_limit" + } + if strings.Contains(low, "git push") || strings.Contains(low, "git pull") { + return "git" + } + if strings.Contains(low, "timeout") || strings.Contains(low, "deadline") { + return "timeout" + } + return "failed" +} + // ListRecentExecutions returns executions since the given time, joined with task names. // If taskID is non-empty, only executions for that task are returned. func (s *DB) ListRecentExecutions(since time.Time, limit int, taskID string) ([]*RecentExecution, error) { diff --git a/web/app.js b/web/app.js index 5d99984..90fcd6e 100644 --- a/web/app.js +++ b/web/app.js @@ -1178,8 +1178,9 @@ function renderActiveTab(allTasks) { Promise.all([ fetchRecentExecutions(BASE_PATH, fetch), fetch(`${BASE_PATH}/api/agents/status?since=${encodeURIComponent(new Date(Date.now() - 24*60*60*1000).toISOString())}`).then(r => r.ok ? r.json() : { agents: [], events: [] }), + fetch(`${BASE_PATH}/api/stats?window=7d`).then(r => r.ok ? r.json() : { throughput: [], billing: [], failures: [] }), ]) - .then(([execs, agentData]) => renderStatsPanel(allTasks, execs, agentData)) + .then(([execs, agentData, dashStats]) => renderStatsPanel(allTasks, execs, agentData, dashStats)) .catch(() => {}); break; case 'drops': @@ -2435,7 +2436,7 @@ function formatDurationMs(ms) { return rm > 0 ? `${h}h ${rm}m` : `${h}h`; } -function renderStatsPanel(tasks, executions, agentData = { agents: [], events: [] }) { +function renderStatsPanel(tasks, executions, agentData = { agents: [], events: [] }, dashStats = { throughput: [], billing: [], failures: [] }) { const panel = document.querySelector('[data-panel="stats"]'); if (!panel) return; @@ -2599,6 +2600,196 @@ function renderStatsPanel(tasks, executions, agentData = { agents: [], events: [ panel.appendChild(execSection); + // ── Errors ──────────────────────────────────────────────────────────────── + const failures = dashStats.failures || []; + const errSection = document.createElement('div'); + errSection.className = 'stats-section'; + + const errHeading = document.createElement('h2'); + errHeading.textContent = 'Errors (Last 7d)'; + errSection.appendChild(errHeading); + + if (failures.length === 0) { + const none = document.createElement('p'); + none.className = 'task-meta'; + none.textContent = 'No failures in the last 7 days.'; + errSection.appendChild(none); + } else { + // Category summary bar + const cats = {}; + for (const f of failures) cats[f.category] = (cats[f.category] || 0) + 1; + const catOrder = ['quota', 'rate_limit', 'timeout', 'git', 'failed']; + const catLabels = { quota: 'Quota', rate_limit: 'Rate limit', timeout: 'Timeout', git: 'Git', failed: 'Failed' }; + const catColors = { quota: 'var(--state-budget-exceeded)', rate_limit: 'var(--state-failed)', timeout: 'var(--state-timed-out)', git: 'var(--state-cancelled)', failed: 'var(--state-failed)' }; + + const catRow = document.createElement('div'); + catRow.className = 'stats-kpis'; + const allCats = [...catOrder, ...Object.keys(cats).filter(c => !catOrder.includes(c))]; + for (const cat of allCats) { + if (!cats[cat]) continue; + const box = document.createElement('div'); + box.className = 'stats-kpi-box stats-err-cat'; + box.style.setProperty('--cat-color', catColors[cat] || 'var(--state-failed)'); + const val = document.createElement('span'); + val.className = 'stats-kpi-value'; + val.textContent = String(cats[cat]); + const lbl = document.createElement('span'); + lbl.className = 'stats-kpi-label'; + lbl.textContent = catLabels[cat] || cat; + box.appendChild(val); + box.appendChild(lbl); + catRow.appendChild(box); + } + errSection.appendChild(catRow); + + // Failure table + const errTable = document.createElement('table'); + errTable.className = 'stats-exec-table'; + errTable.style.marginTop = '0.75rem'; + errTable.innerHTML = 'TaskCategoryErrorTime'; + const errTbody = document.createElement('tbody'); + for (const f of failures.slice(0, 25)) { + const tr = document.createElement('tr'); + const ts = new Date(f.started_at).toLocaleString(); + const short = f.error_msg.length > 80 ? f.error_msg.slice(0, 80) + '…' : f.error_msg; + const catColor = catColors[f.category] || 'var(--state-failed)'; + tr.innerHTML = `${f.task_name}${catLabels[f.category] || f.category}${short}${ts}`; + errTbody.appendChild(tr); + } + errTable.appendChild(errTbody); + errSection.appendChild(errTable); + } + + panel.appendChild(errSection); + + // ── Throughput ──────────────────────────────────────────────────────────── + const throughput = dashStats.throughput || []; + const tpSection = document.createElement('div'); + tpSection.className = 'stats-section'; + + const tpHeading = document.createElement('h2'); + tpHeading.textContent = 'Throughput (Last 7d)'; + tpSection.appendChild(tpHeading); + + if (throughput.length === 0) { + const none = document.createElement('p'); + none.className = 'task-meta'; + none.textContent = 'No execution data yet.'; + tpSection.appendChild(none); + } else { + const maxTotal = Math.max(...throughput.map(b => b.completed + b.failed + b.other), 1); + const chart = document.createElement('div'); + chart.className = 'stats-tp-chart'; + + for (const bucket of throughput) { + const total = bucket.completed + bucket.failed + bucket.other; + const col = document.createElement('div'); + col.className = 'stats-tp-col'; + const heightPct = (total / maxTotal) * 100; + const label = new Date(bucket.hour).toLocaleString(undefined, { month: 'short', day: 'numeric', hour: '2-digit' }); + col.title = `${label}\n✓ ${bucket.completed} ✗ ${bucket.failed} ○ ${bucket.other}`; + + if (total > 0) { + const bar = document.createElement('div'); + bar.className = 'stats-tp-bar'; + bar.style.height = `${heightPct.toFixed(1)}%`; + + const cPct = (bucket.completed / total) * 100; + const fPct = (bucket.failed / total) * 100; + const oPct = 100 - cPct - fPct; + + bar.style.background = `linear-gradient(to top, + var(--state-failed) 0% ${fPct.toFixed(1)}%, + var(--state-timed-out) ${fPct.toFixed(1)}% ${(fPct+oPct).toFixed(1)}%, + var(--state-completed) ${(fPct+oPct).toFixed(1)}% 100%)`; + + col.appendChild(bar); + } + + chart.appendChild(col); + } + tpSection.appendChild(chart); + + const tpLegend = document.createElement('div'); + tpLegend.className = 'stats-tp-legend'; + tpLegend.innerHTML = ` + Completed + Failed + Other + `; + tpSection.appendChild(tpLegend); + } + + panel.appendChild(tpSection); + + // ── Billing ─────────────────────────────────────────────────────────────── + const billing = dashStats.billing || []; + const billSection = document.createElement('div'); + billSection.className = 'stats-section'; + + const billHeading = document.createElement('h2'); + billHeading.textContent = 'Cost (Last 7d)'; + billSection.appendChild(billHeading); + + if (billing.length === 0) { + const none = document.createElement('p'); + none.className = 'task-meta'; + none.textContent = 'No cost data yet.'; + billSection.appendChild(none); + } else { + const totalCost = billing.reduce((s, d) => s + d.cost_usd, 0); + const totalRuns = billing.reduce((s, d) => s + d.runs, 0); + + const billKpis = document.createElement('div'); + billKpis.className = 'stats-kpis'; + for (const kpi of [ + { label: '7d Total', value: `$${totalCost.toFixed(2)}` }, + { label: 'Avg/Day', value: billing.length > 0 ? `$${(totalCost / billing.length).toFixed(2)}` : '—' }, + { label: 'Cost/Run', value: totalRuns > 0 ? `$${(totalCost / totalRuns).toFixed(3)}` : '—' }, + { label: 'Total Runs', value: String(totalRuns) }, + ]) { + const box = document.createElement('div'); + box.className = 'stats-kpi-box'; + const val = document.createElement('span'); + val.className = 'stats-kpi-value'; + val.textContent = kpi.value; + const lbl = document.createElement('span'); + lbl.className = 'stats-kpi-label'; + lbl.textContent = kpi.label; + box.appendChild(val); + box.appendChild(lbl); + billKpis.appendChild(box); + } + billSection.appendChild(billKpis); + + // Daily cost bar chart + const maxCost = Math.max(...billing.map(d => d.cost_usd), 0.001); + const billChart = document.createElement('div'); + billChart.className = 'stats-bill-chart'; + + for (const day of billing) { + const col = document.createElement('div'); + col.className = 'stats-bill-col'; + col.title = `${day.day}\n$${day.cost_usd.toFixed(3)} (${day.runs} runs)`; + + const bar = document.createElement('div'); + bar.className = 'stats-bill-bar'; + bar.style.height = `${((day.cost_usd / maxCost) * 100).toFixed(1)}%`; + + const dayLabel = document.createElement('span'); + dayLabel.className = 'stats-bill-day-label'; + const d = new Date(day.day + 'T12:00:00Z'); + dayLabel.textContent = d.toLocaleDateString(undefined, { month: 'short', day: 'numeric' }); + + col.appendChild(bar); + col.appendChild(dayLabel); + billChart.appendChild(col); + } + billSection.appendChild(billChart); + } + + panel.appendChild(billSection); + // ── Agent Status ─────────────────────────────────────────────────────────── const agentSection = document.createElement('div'); agentSection.className = 'stats-section'; diff --git a/web/style.css b/web/style.css index 37f3b61..90ceb90 100644 --- a/web/style.css +++ b/web/style.css @@ -1551,6 +1551,114 @@ dialog label select:focus { flex-shrink: 0; } +/* ── Error category badge ───────────────────────────────────────────────── */ +.stats-err-badge { + display: inline-block; + padding: 0.15rem 0.45rem; + border-radius: 4px; + font-size: 0.72rem; + font-weight: 600; + color: #fff; + white-space: nowrap; +} + +.stats-err-cat { + border-top: 3px solid var(--cat-color, var(--state-failed)); +} + +.stats-err-msg { + font-size: 0.75rem; + color: var(--text-muted); + max-width: 360px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +/* ── Throughput chart ───────────────────────────────────────────────────── */ +.stats-tp-chart { + display: flex; + align-items: flex-end; + gap: 2px; + height: 120px; + margin: 0.75rem 0 0.25rem; + border-bottom: 1px solid var(--border); +} + +.stats-tp-col { + flex: 1; + display: flex; + align-items: flex-end; + height: 100%; + min-width: 0; +} + +.stats-tp-bar { + width: 100%; + border-radius: 2px 2px 0 0; + min-height: 2px; +} + +.stats-tp-legend { + display: flex; + gap: 1rem; + font-size: 0.75rem; + color: var(--text-muted); + margin-top: 0.4rem; +} + +.stats-tp-legend-item { + display: flex; + align-items: center; + gap: 0.3rem; +} + +.stats-tp-swatch { + display: inline-block; + width: 10px; + height: 10px; + border-radius: 2px; + flex-shrink: 0; +} + +/* ── Billing chart ──────────────────────────────────────────────────────── */ +.stats-bill-chart { + display: flex; + align-items: flex-end; + gap: 4px; + height: 100px; + margin: 0.75rem 0 0; + border-bottom: 1px solid var(--border); +} + +.stats-bill-col { + flex: 1; + display: flex; + flex-direction: column; + align-items: center; + justify-content: flex-end; + height: 100%; + min-width: 0; +} + +.stats-bill-bar { + width: 100%; + background: var(--state-queued); + border-radius: 3px 3px 0 0; + min-height: 2px; +} + +.stats-bill-day-label { + font-size: 0.65rem; + color: var(--text-muted); + margin-top: 0.25rem; + text-align: center; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + width: 100%; +} + /* ── Execution detail table ─────────────────────────────────────────────── */ .stats-exec-table-wrap { margin-top: 1rem; -- cgit v1.2.3