diff options
| author | Peter Stone <thepeterstone@gmail.com> | 2026-03-19 23:15:58 +0000 |
|---|---|---|
| committer | Peter Stone <thepeterstone@gmail.com> | 2026-03-19 23:15:58 +0000 |
| commit | eeee3b60a1fd8dc8b8b92997f709ef65e4b2097f (patch) | |
| tree | d1c440a84617d31cf5d91b422b1f5d687c277af1 /internal | |
| parent | 7e8967decbc8221694953abf1435fda8aaf18824 (diff) | |
feat: add errors, throughput, and billing sections to stats dashboard
- GET /api/stats?window=7d: pre-aggregated SQL queries for errors, throughput, billing
- Errors section: category summary (quota/rate_limit/timeout/git/failed) + failure table
- Throughput section: stacked hourly bar chart (completed/failed/other) over 7d
- Billing section: KPIs (7d total, avg/day, cost/run) + daily cost bar chart
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'internal')
| -rw-r--r-- | internal/api/executions.go | 17 | ||||
| -rw-r--r-- | internal/api/server.go | 1 | ||||
| -rw-r--r-- | internal/storage/db.go | 135 |
3 files changed, 153 insertions, 0 deletions
diff --git a/internal/api/executions.go b/internal/api/executions.go index 29af139..4d8ba9c 100644 --- a/internal/api/executions.go +++ b/internal/api/executions.go @@ -86,6 +86,23 @@ func (s *Server) handleGetExecutionLog(w http.ResponseWriter, r *http.Request) { fmt.Fprint(w, content) } +// handleGetDashboardStats returns pre-aggregated error, throughput, and billing stats. +// GET /api/stats?window=7d|24h +func (s *Server) handleGetDashboardStats(w http.ResponseWriter, r *http.Request) { + window := 7 * 24 * time.Hour + if r.URL.Query().Get("window") == "24h" { + window = 24 * time.Hour + } + since := time.Now().Add(-window) + + stats, err := s.store.QueryDashboardStats(since) + if err != nil { + writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()}) + return + } + writeJSON(w, http.StatusOK, stats) +} + // handleGetAgentStatus returns the current status of all agents and recent rate-limit events. // GET /api/agents/status?since=<RFC3339> func (s *Server) handleGetAgentStatus(w http.ResponseWriter, r *http.Request) { diff --git a/internal/api/server.go b/internal/api/server.go index 2d5c308..0127ab9 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -121,6 +121,7 @@ func (s *Server) routes() { s.mux.HandleFunc("GET /api/tasks/{id}/subtasks", s.handleListSubtasks) s.mux.HandleFunc("GET /api/tasks/{id}/executions", s.handleListExecutions) s.mux.HandleFunc("GET /api/executions", s.handleListRecentExecutions) + s.mux.HandleFunc("GET /api/stats", s.handleGetDashboardStats) s.mux.HandleFunc("GET /api/agents/status", s.handleGetAgentStatus) s.mux.HandleFunc("GET /api/executions/{id}", s.handleGetExecution) s.mux.HandleFunc("GET /api/executions/{id}/log", s.handleGetExecutionLog) diff --git a/internal/storage/db.go b/internal/storage/db.go index 0d11b4e..1a0e74f 100644 --- a/internal/storage/db.go +++ b/internal/storage/db.go @@ -560,6 +560,141 @@ type RecentExecution struct { StdoutPath string `json:"stdout_path"` } +// ThroughputBucket is one time-bucket of execution counts by outcome. +type ThroughputBucket struct { + Hour string `json:"hour"` // RFC3339 truncated to hour + Completed int `json:"completed"` + Failed int `json:"failed"` + Other int `json:"other"` +} + +// BillingDay is the aggregated cost and run count for a calendar day. +type BillingDay struct { + Day string `json:"day"` // YYYY-MM-DD + CostUSD float64 `json:"cost_usd"` + Runs int `json:"runs"` +} + +// FailedExecution is a failed/timed-out/budget-exceeded execution with its error. +type FailedExecution struct { + ID string `json:"id"` + TaskID string `json:"task_id"` + TaskName string `json:"task_name"` + Status string `json:"status"` + ErrorMsg string `json:"error_msg"` + Category string `json:"category"` // quota | timeout | rate_limit | git | failed + StartedAt time.Time `json:"started_at"` +} + +// DashboardStats is returned by QueryDashboardStats. +type DashboardStats struct { + Throughput []ThroughputBucket `json:"throughput"` + Billing []BillingDay `json:"billing"` + Failures []FailedExecution `json:"failures"` +} + +// QueryDashboardStats returns pre-aggregated stats for the given window. +func (s *DB) QueryDashboardStats(since time.Time) (*DashboardStats, error) { + stats := &DashboardStats{ + Throughput: []ThroughputBucket{}, + Billing: []BillingDay{}, + Failures: []FailedExecution{}, + } + + // Throughput: completions per hour bucket + tpRows, err := s.db.Query(` + SELECT strftime('%Y-%m-%dT%H:00:00Z', start_time) as hour, + SUM(CASE WHEN status IN ('COMPLETED','READY') THEN 1 ELSE 0 END), + SUM(CASE WHEN status IN ('FAILED','TIMED_OUT','BUDGET_EXCEEDED') THEN 1 ELSE 0 END), + SUM(CASE WHEN status NOT IN ('COMPLETED','READY','FAILED','TIMED_OUT','BUDGET_EXCEEDED') THEN 1 ELSE 0 END) + FROM executions + WHERE start_time >= ? AND status NOT IN ('RUNNING','QUEUED','PENDING') + GROUP BY hour ORDER BY hour ASC`, since.UTC()) + if err != nil { + return nil, err + } + defer tpRows.Close() + for tpRows.Next() { + var b ThroughputBucket + if err := tpRows.Scan(&b.Hour, &b.Completed, &b.Failed, &b.Other); err != nil { + return nil, err + } + stats.Throughput = append(stats.Throughput, b) + } + if err := tpRows.Err(); err != nil { + return nil, err + } + + // Billing: cost per day + billRows, err := s.db.Query(` + SELECT date(start_time) as day, COALESCE(SUM(cost_usd),0), COUNT(*) + FROM executions + WHERE start_time >= ? + GROUP BY day ORDER BY day ASC`, since.UTC()) + if err != nil { + return nil, err + } + defer billRows.Close() + for billRows.Next() { + var b BillingDay + if err := billRows.Scan(&b.Day, &b.CostUSD, &b.Runs); err != nil { + return nil, err + } + stats.Billing = append(stats.Billing, b) + } + if err := billRows.Err(); err != nil { + return nil, err + } + + // Failures: recent failed executions with error messages + failRows, err := s.db.Query(` + SELECT e.id, e.task_id, t.name, e.status, COALESCE(e.error_msg,''), e.start_time + FROM executions e JOIN tasks t ON e.task_id = t.id + WHERE e.start_time >= ? AND e.status IN ('FAILED','TIMED_OUT','BUDGET_EXCEEDED') + ORDER BY e.start_time DESC LIMIT 50`, since.UTC()) + if err != nil { + return nil, err + } + defer failRows.Close() + for failRows.Next() { + var f FailedExecution + if err := failRows.Scan(&f.ID, &f.TaskID, &f.TaskName, &f.Status, &f.ErrorMsg, &f.StartedAt); err != nil { + return nil, err + } + f.Category = classifyError(f.Status, f.ErrorMsg) + stats.Failures = append(stats.Failures, f) + } + if err := failRows.Err(); err != nil { + return nil, err + } + + return stats, nil +} + +// classifyError maps a status + error message to a human category. +func classifyError(status, msg string) string { + if status == "TIMED_OUT" { + return "timeout" + } + if status == "BUDGET_EXCEEDED" { + return "quota" + } + low := strings.ToLower(msg) + if strings.Contains(low, "quota") || strings.Contains(low, "exhausted") || strings.Contains(low, "terminalquota") { + return "quota" + } + if strings.Contains(low, "rate limit") || strings.Contains(low, "429") || strings.Contains(low, "too many requests") { + return "rate_limit" + } + if strings.Contains(low, "git push") || strings.Contains(low, "git pull") { + return "git" + } + if strings.Contains(low, "timeout") || strings.Contains(low, "deadline") { + return "timeout" + } + return "failed" +} + // ListRecentExecutions returns executions since the given time, joined with task names. // If taskID is non-empty, only executions for that task are returned. func (s *DB) ListRecentExecutions(since time.Time, limit int, taskID string) ([]*RecentExecution, error) { |
