fix(render): node reports progress → moving bar + ETA (was stuck 0%/Preparing)
Build backend images / build content-svc (push) Failing after 58s
Build backend images / build file-svc (push) Failing after 45s
Build backend images / build gateway (push) Failing after 52s
Build backend images / build identity-svc (push) Failing after 54s
Build backend images / build notification-svc (push) Failing after 56s
Build backend images / build render-svc (push) Failing after 56s
Build backend images / build studio-svc (push) Failing after 49s

The node's onProgress callback only LOGGED — it never POSTed, so render_progress stayed
0 and step stayed Preparing (no bar, no ETA). Add render-svc POST
/v1/internal/render/jobs/{id}/progress (UpdateJobProgress: set render_progress + bump
step Queued/Preparing→Rendering once >0) + client UpdateProgress + wire onProgress to
post it (8s best-effort timeout, AE-CPU/DB-starvation tolerant). Preview already posts;
real-frame preview is epic C.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-07 07:51:01 +03:30
parent 2879198dec
commit bf6c04aba3
5 changed files with 69 additions and 0 deletions
+7
View File
@@ -445,6 +445,13 @@ func (a *Agent) runJob(ctx context.Context, job *client.ClaimedJob) {
onProgress := func(ctx context.Context, pct int, msg string) error { onProgress := func(ctx context.Context, pct int, msg string) error {
log.Printf("[job %s] %d%% %s", job.JobID, pct, msg) log.Printf("[job %s] %d%% %s", job.JobID, pct, msg)
// Report to the orchestrator so the UI shows a moving bar + ETA (best-effort,
// short timeout — AE pegs CPU so the DB may be slow on a single-box dev setup).
pCtx, cancel := context.WithTimeout(ctx, 8*time.Second)
defer cancel()
if err := a.orch.UpdateProgress(pCtx, job.JobID, pct); err != nil {
log.Printf("[job %s] progress push error: %v", job.JobID, err)
}
return nil return nil
} }
@@ -357,6 +357,22 @@ func (c *Client) UpdatePreview(ctx context.Context, jobID, imageB64 string) erro
return nil return nil
} }
// UpdateProgress reports the render percentage so the UI shows a moving bar + ETA.
// Errors are non-fatal — the UI just won't advance.
func (c *Client) UpdateProgress(ctx context.Context, jobID string, percent int) error {
resp, err := c.do(ctx, http.MethodPost,
fmt.Sprintf("/v1/internal/render/jobs/%s/progress", jobID),
map[string]int{"progress": percent})
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
return fmt.Errorf("progress: HTTP %d", resp.StatusCode)
}
return nil
}
// GetOutputUploadURL asks the orchestrator to allocate an Export row and // GetOutputUploadURL asks the orchestrator to allocate an Export row and
// return a presigned MinIO PUT URL for the rendered output file. // return a presigned MinIO PUT URL for the rendered output file.
func (c *Client) GetOutputUploadURL(ctx context.Context, jobID string) (*OutputUploadURLResponse, error) { func (c *Client) GetOutputUploadURL(ctx context.Context, jobID string) (*OutputUploadURLResponse, error) {
+1
View File
@@ -197,6 +197,7 @@ func main() {
internal.POST("/nodes/:node_id/fonts/:request_id/status", fontH.Report) internal.POST("/nodes/:node_id/fonts/:request_id/status", fontH.Report)
internal.POST("/render/jobs/claim", internalH.Claim) internal.POST("/render/jobs/claim", internalH.Claim)
internal.POST("/render/jobs/:job_id/preview", internalH.Preview) internal.POST("/render/jobs/:job_id/preview", internalH.Preview)
internal.POST("/render/jobs/:job_id/progress", internalH.RenderProgress)
internal.POST("/render/jobs/:job_id/output-upload-url", internalH.OutputUploadURL) internal.POST("/render/jobs/:job_id/output-upload-url", internalH.OutputUploadURL)
internal.POST("/render/jobs/:job_id/frames", internalH.FrameProgress) internal.POST("/render/jobs/:job_id/frames", internalH.FrameProgress)
internal.POST("/render/jobs/:job_id/complete", internalH.Complete) internal.POST("/render/jobs/:job_id/complete", internalH.Complete)
+23
View File
@@ -750,6 +750,29 @@ func (s *Store) UpdateJobPreview(ctx context.Context, jobID uuid.UUID, imageB64
return err return err
} }
// UpdateJobProgress stores the render percentage for a running job and advances the
// step to 'Rendering' once it actually starts (so the UI shows progress + ETA, not a
// stuck 'Preparing'). Called by the node agent every few seconds during a render.
func (s *Store) UpdateJobProgress(ctx context.Context, jobID uuid.UUID, progress int) error {
if progress < 0 {
progress = 0
}
if progress > 100 {
progress = 100
}
_, err := s.pool.Exec(ctx, `
UPDATE render.render_jobs
SET render_progress = $1,
step = CASE WHEN step IN ('Queued'::render_step, 'Preparing'::render_step) AND $1 > 0
THEN 'Rendering'::render_step ELSE step END,
started_at = COALESCE(started_at, NOW()),
updated_at = NOW()
WHERE id = $2
AND step NOT IN ('Done'::render_step, 'Failed'::render_step, 'Cancelled'::render_step)`,
progress, jobID)
return err
}
func (s *Store) CancelJob(ctx context.Context, id, userID uuid.UUID) (bool, error) { func (s *Store) CancelJob(ctx context.Context, id, userID uuid.UUID) (bool, error) {
tag, err := s.pool.Exec(ctx, ` tag, err := s.pool.Exec(ctx, `
UPDATE render.render_jobs UPDATE render.render_jobs
@@ -235,6 +235,28 @@ func (h *InternalHandler) Preview(c *gin.Context) {
c.Status(http.StatusNoContent) c.Status(http.StatusNoContent)
} }
// POST /v1/internal/render/jobs/:job_id/progress
// Node agent pushes the render percentage so the UI shows a moving bar + ETA.
func (h *InternalHandler) RenderProgress(c *gin.Context) {
jobID, err := uuid.Parse(c.Param("job_id"))
if err != nil {
c.JSON(http.StatusBadRequest, models.APIError{Code: "bad_request", Message: "invalid job_id"})
return
}
var req struct {
Progress int `json:"progress"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, models.APIError{Code: "bad_request", Message: err.Error()})
return
}
if err := h.store.UpdateJobProgress(c.Request.Context(), jobID, req.Progress); err != nil {
c.JSON(http.StatusInternalServerError, models.APIError{Code: "internal_error", Message: err.Error()})
return
}
c.Status(http.StatusNoContent)
}
// POST /v1/internal/render/jobs/claim // POST /v1/internal/render/jobs/claim
// Node agent calls this to atomically claim the next queued job. // Node agent calls this to atomically claim the next queued job.
// Returns 204 when there is nothing queued (agent should back off and retry). // Returns 204 when there is nothing queued (agent should back off and retry).