feat(render-svc+node-agent): add job-claim endpoint and build node-agent skeleton

render-svc:
- db: ClaimJob() — atomic SELECT FOR UPDATE SKIP LOCKED; transitions job to
  Preparing, marks node Busy in a single transaction
- models: ClaimJobRequest + ClaimedJob types
- handlers/internal: POST /v1/internal/render/jobs/claim — 200 with job or 204 when queue empty
- main: register the claim route under /v1/internal (nodeAuth)

services/node-agent/ (new Go module github.com/flatrender/node-agent):
- internal/config: env-var based config (NODE_ID required, sensible defaults)
- internal/client: typed orchestrator HTTP client (Online, Heartbeat, ClaimJob,
  Complete, Fail, ReportCrash) — X-Node-Signature auth
- internal/runner: AE render via aerender.exe or mock (for dev without AE)
- cmd/agent/main: register online → heartbeat loop (5s) + poll loop (3s) →
  claim job → run render → report complete/fail; health endpoint on :7777
- Dockerfile: cross-compiles to Windows amd64 static binary

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-01 09:28:31 +03:30
parent 541e935418
commit ee421ccc68
10 changed files with 901 additions and 0 deletions
+56
View File
@@ -463,6 +463,62 @@ func (s *Store) getJobByIDInternal(ctx context.Context, id uuid.UUID) (*models.R
return jobs[0], nil
}
// ClaimJob atomically picks the highest-priority Queued job (optionally filtered
// by region) and moves it to Preparing, setting the current_job_id on the node.
// Returns (nil, nil) when there is nothing to do.
func (s *Store) ClaimJob(ctx context.Context, nodeID uuid.UUID, region string) (*models.RenderJob, error) {
tx, err := s.pool.Begin(ctx)
if err != nil {
return nil, err
}
defer func() { _ = tx.Rollback(ctx) }()
q := `SELECT id FROM render.render_jobs
WHERE step = 'Queued'::render_step`
args := []any{}
argIdx := 1
if region != "" {
q += fmt.Sprintf(" AND (region IS NULL OR region = $%d)", argIdx)
args = append(args, region)
argIdx++
}
q += " ORDER BY priority_score DESC, queued_at ASC LIMIT 1 FOR UPDATE SKIP LOCKED"
var jobID uuid.UUID
if err := tx.QueryRow(ctx, q, args...).Scan(&jobID); err != nil {
if err.Error() == "no rows in result set" {
return nil, nil // nothing to do
}
return nil, err
}
// Advance to Preparing and assign to this node
_, err = tx.Exec(ctx, `
UPDATE render.render_jobs SET
step = 'Preparing'::render_step,
started_at = COALESCE(started_at, NOW()),
updated_at = NOW()
WHERE id = $1`, jobID)
if err != nil {
return nil, err
}
_, err = tx.Exec(ctx, `
UPDATE render.render_nodes SET
status = 'Busy'::node_status,
current_job_id = $1,
job_started_at = NOW(),
updated_at = NOW()
WHERE id = $2`, jobID, nodeID)
if err != nil {
return nil, err
}
if err := tx.Commit(ctx); err != nil {
return nil, err
}
return s.getJobByIDInternal(ctx, jobID)
}
func (s *Store) CancelJob(ctx context.Context, id, userID uuid.UUID) (bool, error) {
tag, err := s.pool.Exec(ctx, `
UPDATE render.render_jobs