From bf6c04aba38db83579afe1dd81201b5f429685d7 Mon Sep 17 00:00:00 2001 From: "soroush.asadi" Date: Sun, 7 Jun 2026 07:51:01 +0330 Subject: [PATCH] =?UTF-8?q?fix(render):=20node=20reports=20progress=20?= =?UTF-8?q?=E2=86=92=20moving=20bar=20+=20ETA=20(was=20stuck=200%/Preparin?= =?UTF-8?q?g)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The node's onProgress callback only LOGGED — it never POSTed, so render_progress stayed 0 and step stayed Preparing (no bar, no ETA). Add render-svc POST /v1/internal/render/jobs/{id}/progress (UpdateJobProgress: set render_progress + bump step Queued/Preparing→Rendering once >0) + client UpdateProgress + wire onProgress to post it (8s best-effort timeout, AE-CPU/DB-starvation tolerant). Preview already posts; real-frame preview is epic C. Co-Authored-By: Claude Opus 4.8 --- services/node-agent/cmd/agent/main.go | 7 ++++++ services/node-agent/internal/client/client.go | 16 +++++++++++++ services/render/cmd/server/main.go | 1 + services/render/internal/db/db.go | 23 +++++++++++++++++++ services/render/internal/handlers/internal.go | 22 ++++++++++++++++++ 5 files changed, 69 insertions(+) diff --git a/services/node-agent/cmd/agent/main.go b/services/node-agent/cmd/agent/main.go index 3c22638..384b1b0 100644 --- a/services/node-agent/cmd/agent/main.go +++ b/services/node-agent/cmd/agent/main.go @@ -445,6 +445,13 @@ func (a *Agent) runJob(ctx context.Context, job *client.ClaimedJob) { onProgress := func(ctx context.Context, pct int, msg string) error { log.Printf("[job %s] %d%% %s", job.JobID, pct, msg) + // Report to the orchestrator so the UI shows a moving bar + ETA (best-effort, + // short timeout — AE pegs CPU so the DB may be slow on a single-box dev setup). + pCtx, cancel := context.WithTimeout(ctx, 8*time.Second) + defer cancel() + if err := a.orch.UpdateProgress(pCtx, job.JobID, pct); err != nil { + log.Printf("[job %s] progress push error: %v", job.JobID, err) + } return nil } diff --git a/services/node-agent/internal/client/client.go b/services/node-agent/internal/client/client.go index 0400c98..f7b000f 100644 --- a/services/node-agent/internal/client/client.go +++ b/services/node-agent/internal/client/client.go @@ -357,6 +357,22 @@ func (c *Client) UpdatePreview(ctx context.Context, jobID, imageB64 string) erro return nil } +// UpdateProgress reports the render percentage so the UI shows a moving bar + ETA. +// Errors are non-fatal — the UI just won't advance. +func (c *Client) UpdateProgress(ctx context.Context, jobID string, percent int) error { + resp, err := c.do(ctx, http.MethodPost, + fmt.Sprintf("/v1/internal/render/jobs/%s/progress", jobID), + map[string]int{"progress": percent}) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode >= 300 { + return fmt.Errorf("progress: HTTP %d", resp.StatusCode) + } + return nil +} + // GetOutputUploadURL asks the orchestrator to allocate an Export row and // return a presigned MinIO PUT URL for the rendered output file. func (c *Client) GetOutputUploadURL(ctx context.Context, jobID string) (*OutputUploadURLResponse, error) { diff --git a/services/render/cmd/server/main.go b/services/render/cmd/server/main.go index 1be9bb4..c3bed67 100644 --- a/services/render/cmd/server/main.go +++ b/services/render/cmd/server/main.go @@ -197,6 +197,7 @@ func main() { internal.POST("/nodes/:node_id/fonts/:request_id/status", fontH.Report) internal.POST("/render/jobs/claim", internalH.Claim) internal.POST("/render/jobs/:job_id/preview", internalH.Preview) + internal.POST("/render/jobs/:job_id/progress", internalH.RenderProgress) internal.POST("/render/jobs/:job_id/output-upload-url", internalH.OutputUploadURL) internal.POST("/render/jobs/:job_id/frames", internalH.FrameProgress) internal.POST("/render/jobs/:job_id/complete", internalH.Complete) diff --git a/services/render/internal/db/db.go b/services/render/internal/db/db.go index 6dc0125..f5c2b5a 100644 --- a/services/render/internal/db/db.go +++ b/services/render/internal/db/db.go @@ -750,6 +750,29 @@ func (s *Store) UpdateJobPreview(ctx context.Context, jobID uuid.UUID, imageB64 return err } +// UpdateJobProgress stores the render percentage for a running job and advances the +// step to 'Rendering' once it actually starts (so the UI shows progress + ETA, not a +// stuck 'Preparing'). Called by the node agent every few seconds during a render. +func (s *Store) UpdateJobProgress(ctx context.Context, jobID uuid.UUID, progress int) error { + if progress < 0 { + progress = 0 + } + if progress > 100 { + progress = 100 + } + _, err := s.pool.Exec(ctx, ` + UPDATE render.render_jobs + SET render_progress = $1, + step = CASE WHEN step IN ('Queued'::render_step, 'Preparing'::render_step) AND $1 > 0 + THEN 'Rendering'::render_step ELSE step END, + started_at = COALESCE(started_at, NOW()), + updated_at = NOW() + WHERE id = $2 + AND step NOT IN ('Done'::render_step, 'Failed'::render_step, 'Cancelled'::render_step)`, + progress, jobID) + return err +} + func (s *Store) CancelJob(ctx context.Context, id, userID uuid.UUID) (bool, error) { tag, err := s.pool.Exec(ctx, ` UPDATE render.render_jobs diff --git a/services/render/internal/handlers/internal.go b/services/render/internal/handlers/internal.go index 21cbb36..b69b450 100644 --- a/services/render/internal/handlers/internal.go +++ b/services/render/internal/handlers/internal.go @@ -235,6 +235,28 @@ func (h *InternalHandler) Preview(c *gin.Context) { c.Status(http.StatusNoContent) } +// POST /v1/internal/render/jobs/:job_id/progress +// Node agent pushes the render percentage so the UI shows a moving bar + ETA. +func (h *InternalHandler) RenderProgress(c *gin.Context) { + jobID, err := uuid.Parse(c.Param("job_id")) + if err != nil { + c.JSON(http.StatusBadRequest, models.APIError{Code: "bad_request", Message: "invalid job_id"}) + return + } + var req struct { + Progress int `json:"progress"` + } + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, models.APIError{Code: "bad_request", Message: err.Error()}) + return + } + if err := h.store.UpdateJobProgress(c.Request.Context(), jobID, req.Progress); err != nil { + c.JSON(http.StatusInternalServerError, models.APIError{Code: "internal_error", Message: err.Error()}) + return + } + c.Status(http.StatusNoContent) +} + // POST /v1/internal/render/jobs/claim // Node agent calls this to atomically claim the next queued job. // Returns 204 when there is nothing queued (agent should back off and retry).