feat(render-svc+node-agent): add job-claim endpoint and build node-agent skeleton
render-svc: - db: ClaimJob() — atomic SELECT FOR UPDATE SKIP LOCKED; transitions job to Preparing, marks node Busy in a single transaction - models: ClaimJobRequest + ClaimedJob types - handlers/internal: POST /v1/internal/render/jobs/claim — 200 with job or 204 when queue empty - main: register the claim route under /v1/internal (nodeAuth) services/node-agent/ (new Go module github.com/flatrender/node-agent): - internal/config: env-var based config (NODE_ID required, sensible defaults) - internal/client: typed orchestrator HTTP client (Online, Heartbeat, ClaimJob, Complete, Fail, ReportCrash) — X-Node-Signature auth - internal/runner: AE render via aerender.exe or mock (for dev without AE) - cmd/agent/main: register online → heartbeat loop (5s) + poll loop (3s) → claim job → run render → report complete/fail; health endpoint on :7777 - Dockerfile: cross-compiles to Windows amd64 static binary Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -463,6 +463,62 @@ func (s *Store) getJobByIDInternal(ctx context.Context, id uuid.UUID) (*models.R
|
||||
return jobs[0], nil
|
||||
}
|
||||
|
||||
// ClaimJob atomically picks the highest-priority Queued job (optionally filtered
|
||||
// by region) and moves it to Preparing, setting the current_job_id on the node.
|
||||
// Returns (nil, nil) when there is nothing to do.
|
||||
func (s *Store) ClaimJob(ctx context.Context, nodeID uuid.UUID, region string) (*models.RenderJob, error) {
|
||||
tx, err := s.pool.Begin(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() { _ = tx.Rollback(ctx) }()
|
||||
|
||||
q := `SELECT id FROM render.render_jobs
|
||||
WHERE step = 'Queued'::render_step`
|
||||
args := []any{}
|
||||
argIdx := 1
|
||||
if region != "" {
|
||||
q += fmt.Sprintf(" AND (region IS NULL OR region = $%d)", argIdx)
|
||||
args = append(args, region)
|
||||
argIdx++
|
||||
}
|
||||
q += " ORDER BY priority_score DESC, queued_at ASC LIMIT 1 FOR UPDATE SKIP LOCKED"
|
||||
|
||||
var jobID uuid.UUID
|
||||
if err := tx.QueryRow(ctx, q, args...).Scan(&jobID); err != nil {
|
||||
if err.Error() == "no rows in result set" {
|
||||
return nil, nil // nothing to do
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Advance to Preparing and assign to this node
|
||||
_, err = tx.Exec(ctx, `
|
||||
UPDATE render.render_jobs SET
|
||||
step = 'Preparing'::render_step,
|
||||
started_at = COALESCE(started_at, NOW()),
|
||||
updated_at = NOW()
|
||||
WHERE id = $1`, jobID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_, err = tx.Exec(ctx, `
|
||||
UPDATE render.render_nodes SET
|
||||
status = 'Busy'::node_status,
|
||||
current_job_id = $1,
|
||||
job_started_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = $2`, jobID, nodeID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := tx.Commit(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return s.getJobByIDInternal(ctx, jobID)
|
||||
}
|
||||
|
||||
func (s *Store) CancelJob(ctx context.Context, id, userID uuid.UUID) (bool, error) {
|
||||
tag, err := s.pool.Exec(ctx, `
|
||||
UPDATE render.render_jobs
|
||||
|
||||
@@ -198,6 +198,37 @@ func (h *InternalHandler) ReplicaReady(c *gin.Context) {
|
||||
c.Status(http.StatusNoContent)
|
||||
}
|
||||
|
||||
// POST /v1/internal/render/jobs/claim
|
||||
// Node agent calls this to atomically claim the next queued job.
|
||||
// Returns 204 when there is nothing queued (agent should back off and retry).
|
||||
func (h *InternalHandler) Claim(c *gin.Context) {
|
||||
var req models.ClaimJobRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, models.APIError{Code: "bad_request", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
job, err := h.store.ClaimJob(c.Request.Context(), req.NodeID, req.Region)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, models.APIError{Code: "internal_error", Message: err.Error()})
|
||||
return
|
||||
}
|
||||
if job == nil {
|
||||
c.Status(http.StatusNoContent) // nothing queued
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, models.ClaimedJob{
|
||||
JobID: job.ID,
|
||||
SavedProjectID: job.SavedProjectID,
|
||||
Quality: job.Quality,
|
||||
Resolution: job.Resolution,
|
||||
FrameRate: job.FrameRate,
|
||||
HasMusic: job.HasMusic,
|
||||
HasVoiceover: job.HasVoiceover,
|
||||
})
|
||||
}
|
||||
|
||||
// POST /v1/internal/nodes/:node_id/cache-update
|
||||
func (h *InternalHandler) CacheUpdate(c *gin.Context) {
|
||||
nodeID, err := uuid.Parse(c.Param("node_id"))
|
||||
|
||||
@@ -402,6 +402,21 @@ type CrashReportRequest struct {
|
||||
LogFileURL *string `json:"log_file_url"`
|
||||
}
|
||||
|
||||
type ClaimJobRequest struct {
|
||||
NodeID uuid.UUID `json:"node_id" binding:"required"`
|
||||
Region string `json:"region"`
|
||||
}
|
||||
|
||||
type ClaimedJob struct {
|
||||
JobID uuid.UUID `json:"job_id"`
|
||||
SavedProjectID uuid.UUID `json:"saved_project_id"`
|
||||
Quality string `json:"quality"`
|
||||
Resolution string `json:"resolution"`
|
||||
FrameRate int `json:"frame_rate"`
|
||||
HasMusic bool `json:"has_music"`
|
||||
HasVoiceover bool `json:"has_voiceover"`
|
||||
}
|
||||
|
||||
type CacheUpdateRequest struct {
|
||||
Action string `json:"action" binding:"required"`
|
||||
ProjectID *uuid.UUID `json:"project_id"`
|
||||
|
||||
Reference in New Issue
Block a user