fix(render): node reports progress → moving bar + ETA (was stuck 0%/Preparing)
Build backend images / build content-svc (push) Failing after 58s
Build backend images / build file-svc (push) Failing after 45s
Build backend images / build gateway (push) Failing after 52s
Build backend images / build identity-svc (push) Failing after 54s
Build backend images / build notification-svc (push) Failing after 56s
Build backend images / build render-svc (push) Failing after 56s
Build backend images / build studio-svc (push) Failing after 49s
Build backend images / build content-svc (push) Failing after 58s
Build backend images / build file-svc (push) Failing after 45s
Build backend images / build gateway (push) Failing after 52s
Build backend images / build identity-svc (push) Failing after 54s
Build backend images / build notification-svc (push) Failing after 56s
Build backend images / build render-svc (push) Failing after 56s
Build backend images / build studio-svc (push) Failing after 49s
The node's onProgress callback only LOGGED — it never POSTed, so render_progress stayed
0 and step stayed Preparing (no bar, no ETA). Add render-svc POST
/v1/internal/render/jobs/{id}/progress (UpdateJobProgress: set render_progress + bump
step Queued/Preparing→Rendering once >0) + client UpdateProgress + wire onProgress to
post it (8s best-effort timeout, AE-CPU/DB-starvation tolerant). Preview already posts;
real-frame preview is epic C.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -445,6 +445,13 @@ func (a *Agent) runJob(ctx context.Context, job *client.ClaimedJob) {
|
|||||||
|
|
||||||
onProgress := func(ctx context.Context, pct int, msg string) error {
|
onProgress := func(ctx context.Context, pct int, msg string) error {
|
||||||
log.Printf("[job %s] %d%% %s", job.JobID, pct, msg)
|
log.Printf("[job %s] %d%% %s", job.JobID, pct, msg)
|
||||||
|
// Report to the orchestrator so the UI shows a moving bar + ETA (best-effort,
|
||||||
|
// short timeout — AE pegs CPU so the DB may be slow on a single-box dev setup).
|
||||||
|
pCtx, cancel := context.WithTimeout(ctx, 8*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if err := a.orch.UpdateProgress(pCtx, job.JobID, pct); err != nil {
|
||||||
|
log.Printf("[job %s] progress push error: %v", job.JobID, err)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -357,6 +357,22 @@ func (c *Client) UpdatePreview(ctx context.Context, jobID, imageB64 string) erro
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateProgress reports the render percentage so the UI shows a moving bar + ETA.
|
||||||
|
// Errors are non-fatal — the UI just won't advance.
|
||||||
|
func (c *Client) UpdateProgress(ctx context.Context, jobID string, percent int) error {
|
||||||
|
resp, err := c.do(ctx, http.MethodPost,
|
||||||
|
fmt.Sprintf("/v1/internal/render/jobs/%s/progress", jobID),
|
||||||
|
map[string]int{"progress": percent})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
if resp.StatusCode >= 300 {
|
||||||
|
return fmt.Errorf("progress: HTTP %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// GetOutputUploadURL asks the orchestrator to allocate an Export row and
|
// GetOutputUploadURL asks the orchestrator to allocate an Export row and
|
||||||
// return a presigned MinIO PUT URL for the rendered output file.
|
// return a presigned MinIO PUT URL for the rendered output file.
|
||||||
func (c *Client) GetOutputUploadURL(ctx context.Context, jobID string) (*OutputUploadURLResponse, error) {
|
func (c *Client) GetOutputUploadURL(ctx context.Context, jobID string) (*OutputUploadURLResponse, error) {
|
||||||
|
|||||||
@@ -197,6 +197,7 @@ func main() {
|
|||||||
internal.POST("/nodes/:node_id/fonts/:request_id/status", fontH.Report)
|
internal.POST("/nodes/:node_id/fonts/:request_id/status", fontH.Report)
|
||||||
internal.POST("/render/jobs/claim", internalH.Claim)
|
internal.POST("/render/jobs/claim", internalH.Claim)
|
||||||
internal.POST("/render/jobs/:job_id/preview", internalH.Preview)
|
internal.POST("/render/jobs/:job_id/preview", internalH.Preview)
|
||||||
|
internal.POST("/render/jobs/:job_id/progress", internalH.RenderProgress)
|
||||||
internal.POST("/render/jobs/:job_id/output-upload-url", internalH.OutputUploadURL)
|
internal.POST("/render/jobs/:job_id/output-upload-url", internalH.OutputUploadURL)
|
||||||
internal.POST("/render/jobs/:job_id/frames", internalH.FrameProgress)
|
internal.POST("/render/jobs/:job_id/frames", internalH.FrameProgress)
|
||||||
internal.POST("/render/jobs/:job_id/complete", internalH.Complete)
|
internal.POST("/render/jobs/:job_id/complete", internalH.Complete)
|
||||||
|
|||||||
@@ -750,6 +750,29 @@ func (s *Store) UpdateJobPreview(ctx context.Context, jobID uuid.UUID, imageB64
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateJobProgress stores the render percentage for a running job and advances the
|
||||||
|
// step to 'Rendering' once it actually starts (so the UI shows progress + ETA, not a
|
||||||
|
// stuck 'Preparing'). Called by the node agent every few seconds during a render.
|
||||||
|
func (s *Store) UpdateJobProgress(ctx context.Context, jobID uuid.UUID, progress int) error {
|
||||||
|
if progress < 0 {
|
||||||
|
progress = 0
|
||||||
|
}
|
||||||
|
if progress > 100 {
|
||||||
|
progress = 100
|
||||||
|
}
|
||||||
|
_, err := s.pool.Exec(ctx, `
|
||||||
|
UPDATE render.render_jobs
|
||||||
|
SET render_progress = $1,
|
||||||
|
step = CASE WHEN step IN ('Queued'::render_step, 'Preparing'::render_step) AND $1 > 0
|
||||||
|
THEN 'Rendering'::render_step ELSE step END,
|
||||||
|
started_at = COALESCE(started_at, NOW()),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $2
|
||||||
|
AND step NOT IN ('Done'::render_step, 'Failed'::render_step, 'Cancelled'::render_step)`,
|
||||||
|
progress, jobID)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Store) CancelJob(ctx context.Context, id, userID uuid.UUID) (bool, error) {
|
func (s *Store) CancelJob(ctx context.Context, id, userID uuid.UUID) (bool, error) {
|
||||||
tag, err := s.pool.Exec(ctx, `
|
tag, err := s.pool.Exec(ctx, `
|
||||||
UPDATE render.render_jobs
|
UPDATE render.render_jobs
|
||||||
|
|||||||
@@ -235,6 +235,28 @@ func (h *InternalHandler) Preview(c *gin.Context) {
|
|||||||
c.Status(http.StatusNoContent)
|
c.Status(http.StatusNoContent)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// POST /v1/internal/render/jobs/:job_id/progress
|
||||||
|
// Node agent pushes the render percentage so the UI shows a moving bar + ETA.
|
||||||
|
func (h *InternalHandler) RenderProgress(c *gin.Context) {
|
||||||
|
jobID, err := uuid.Parse(c.Param("job_id"))
|
||||||
|
if err != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, models.APIError{Code: "bad_request", Message: "invalid job_id"})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var req struct {
|
||||||
|
Progress int `json:"progress"`
|
||||||
|
}
|
||||||
|
if err := c.ShouldBindJSON(&req); err != nil {
|
||||||
|
c.JSON(http.StatusBadRequest, models.APIError{Code: "bad_request", Message: err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := h.store.UpdateJobProgress(c.Request.Context(), jobID, req.Progress); err != nil {
|
||||||
|
c.JSON(http.StatusInternalServerError, models.APIError{Code: "internal_error", Message: err.Error()})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.Status(http.StatusNoContent)
|
||||||
|
}
|
||||||
|
|
||||||
// POST /v1/internal/render/jobs/claim
|
// POST /v1/internal/render/jobs/claim
|
||||||
// Node agent calls this to atomically claim the next queued job.
|
// Node agent calls this to atomically claim the next queued job.
|
||||||
// Returns 204 when there is nothing queued (agent should back off and retry).
|
// Returns 204 when there is nothing queued (agent should back off and retry).
|
||||||
|
|||||||
Reference in New Issue
Block a user