feat(render-svc+node-agent): add job-claim endpoint and build node-agent skeleton
render-svc: - db: ClaimJob() — atomic SELECT FOR UPDATE SKIP LOCKED; transitions job to Preparing, marks node Busy in a single transaction - models: ClaimJobRequest + ClaimedJob types - handlers/internal: POST /v1/internal/render/jobs/claim — 200 with job or 204 when queue empty - main: register the claim route under /v1/internal (nodeAuth) services/node-agent/ (new Go module github.com/flatrender/node-agent): - internal/config: env-var based config (NODE_ID required, sensible defaults) - internal/client: typed orchestrator HTTP client (Online, Heartbeat, ClaimJob, Complete, Fail, ReportCrash) — X-Node-Signature auth - internal/runner: AE render via aerender.exe or mock (for dev without AE) - cmd/agent/main: register online → heartbeat loop (5s) + poll loop (3s) → claim job → run render → report complete/fail; health endpoint on :7777 - Dockerfile: cross-compiles to Windows amd64 static binary Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,233 @@
|
||||
// Package client provides a typed HTTP client for the V2 render orchestrator's
|
||||
// internal (node-agent) API. All requests are authenticated via the shared
|
||||
// X-Node-Signature header.
|
||||
package client
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Client talks to the V2 render orchestrator.
|
||||
type Client struct {
|
||||
base string
|
||||
secret string
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
// New returns a Client targeting the given base URL (e.g. "http://gateway:8080").
|
||||
func New(baseURL, nodeHMACSecret string) *Client {
|
||||
return &Client{
|
||||
base: strings.TrimRight(baseURL, "/"),
|
||||
secret: nodeHMACSecret,
|
||||
http: &http.Client{Timeout: 15 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
// ── Request helpers ───────────────────────────────────────────────────────────
|
||||
|
||||
func (c *Client) do(ctx context.Context, method, path string, body any) (*http.Response, error) {
|
||||
var bodyReader io.Reader
|
||||
if body != nil {
|
||||
b, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal: %w", err)
|
||||
}
|
||||
bodyReader = bytes.NewReader(b)
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, method, c.base+path, bodyReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("X-Node-Signature", c.secret)
|
||||
if body != nil {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
}
|
||||
req.Header.Set("Accept", "application/json")
|
||||
return c.http.Do(req)
|
||||
}
|
||||
|
||||
func decodeJSON(resp *http.Response, out any) error {
|
||||
defer resp.Body.Close()
|
||||
if out == nil {
|
||||
return nil
|
||||
}
|
||||
return json.NewDecoder(resp.Body).Decode(out)
|
||||
}
|
||||
|
||||
// ── Domain types ──────────────────────────────────────────────────────────────
|
||||
|
||||
// OnlineRequest is sent once on startup to mark the node Ready.
|
||||
type OnlineRequest struct {
|
||||
NodeAgentVersion string `json:"node_agent_version"`
|
||||
CurrentAEVersion string `json:"current_ae_version"`
|
||||
AvailableAEVersions []string `json:"available_ae_versions"`
|
||||
RamGB *int `json:"ram_gb,omitempty"`
|
||||
CPUCores *int `json:"cpu_cores,omitempty"`
|
||||
CacheUsedGB *int `json:"cache_used_gb,omitempty"`
|
||||
CachedTemplateMD5s []string `json:"cached_template_md5s"`
|
||||
}
|
||||
|
||||
// HeartbeatRequest is sent every HeartbeatIntervalSec seconds.
|
||||
type HeartbeatRequest struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Status string `json:"status"` // Ready | Busy
|
||||
CPUPct *int `json:"cpu_pct,omitempty"`
|
||||
RAMAvailableMB *int `json:"ram_available_mb,omitempty"`
|
||||
AERunning *bool `json:"ae_running,omitempty"`
|
||||
CurrentJobID *string `json:"current_job_id,omitempty"`
|
||||
CacheUsedGB *int `json:"cache_used_gb,omitempty"`
|
||||
}
|
||||
|
||||
// HeartbeatResponse carries optional commands from the orchestrator.
|
||||
type HeartbeatResponse struct {
|
||||
NextHeartbeatInSec int `json:"next_heartbeat_in_sec"`
|
||||
PendingCommands []any `json:"pending_commands"`
|
||||
}
|
||||
|
||||
// ClaimJobRequest asks the orchestrator for the next queued job.
|
||||
type ClaimJobRequest struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Region string `json:"region,omitempty"`
|
||||
}
|
||||
|
||||
// ClaimedJob is the response when a job is successfully claimed.
|
||||
type ClaimedJob struct {
|
||||
JobID string `json:"job_id"`
|
||||
SavedProjectID string `json:"saved_project_id"`
|
||||
Quality string `json:"quality"`
|
||||
Resolution string `json:"resolution"`
|
||||
FrameRate int `json:"frame_rate"`
|
||||
HasMusic bool `json:"has_music"`
|
||||
HasVoiceover bool `json:"has_voiceover"`
|
||||
}
|
||||
|
||||
// ProgressRequest reports render progress (frame-level) for a job.
|
||||
type ProgressRequest struct {
|
||||
FrameJobID string `json:"frame_job_id"`
|
||||
FrameNumber int `json:"frame_number"`
|
||||
CompletedAt *time.Time `json:"completed_at,omitempty"`
|
||||
}
|
||||
|
||||
// CompleteRequest marks a job as Done.
|
||||
type CompleteRequest struct {
|
||||
ExportID *string `json:"export_id,omitempty"`
|
||||
}
|
||||
|
||||
// FailRequest marks a job as Failed.
|
||||
type FailRequest struct {
|
||||
Reason string `json:"reason"`
|
||||
AtStep string `json:"at_step,omitempty"`
|
||||
}
|
||||
|
||||
// CrashRequest reports a node crash.
|
||||
type CrashRequest struct {
|
||||
NodeID string `json:"node_id"`
|
||||
LastKnownFrame *int `json:"last_known_frame,omitempty"`
|
||||
CrashSignal *string `json:"crash_signal,omitempty"`
|
||||
ErrorLogTail *string `json:"error_log_tail,omitempty"`
|
||||
}
|
||||
|
||||
// ── API methods ───────────────────────────────────────────────────────────────
|
||||
|
||||
// Online marks the node as Ready on startup.
|
||||
func (c *Client) Online(ctx context.Context, nodeID string, req OnlineRequest) error {
|
||||
resp, err := c.do(ctx, http.MethodPost,
|
||||
fmt.Sprintf("/v1/internal/nodes/%s/online", nodeID), req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("online: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Heartbeat sends a heartbeat and returns the orchestrator's response.
|
||||
func (c *Client) Heartbeat(ctx context.Context, nodeID string, req HeartbeatRequest) (*HeartbeatResponse, error) {
|
||||
resp, err := c.do(ctx, http.MethodPost,
|
||||
fmt.Sprintf("/v1/internal/nodes/%s/heartbeat", nodeID), req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return nil, fmt.Errorf("heartbeat: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
var out HeartbeatResponse
|
||||
_ = json.NewDecoder(resp.Body).Decode(&out)
|
||||
return &out, nil
|
||||
}
|
||||
|
||||
// ClaimJob atomically claims the next queued render job.
|
||||
// Returns (nil, nil) when the queue is empty (204 No Content).
|
||||
func (c *Client) ClaimJob(ctx context.Context, nodeID, region string) (*ClaimedJob, error) {
|
||||
resp, err := c.do(ctx, http.MethodPost, "/v1/internal/render/jobs/claim",
|
||||
ClaimJobRequest{NodeID: nodeID, Region: region})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusNoContent {
|
||||
return nil, nil // nothing queued
|
||||
}
|
||||
if resp.StatusCode >= 300 {
|
||||
return nil, fmt.Errorf("claim: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
var job ClaimedJob
|
||||
if err := json.NewDecoder(resp.Body).Decode(&job); err != nil {
|
||||
return nil, fmt.Errorf("claim decode: %w", err)
|
||||
}
|
||||
return &job, nil
|
||||
}
|
||||
|
||||
// Complete marks a render job as Done.
|
||||
func (c *Client) Complete(ctx context.Context, jobID string, exportID *string) error {
|
||||
resp, err := c.do(ctx, http.MethodPost,
|
||||
fmt.Sprintf("/v1/internal/render/jobs/%s/complete", jobID),
|
||||
CompleteRequest{ExportID: exportID})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("complete: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Fail marks a render job as Failed.
|
||||
func (c *Client) Fail(ctx context.Context, jobID, reason, atStep string) error {
|
||||
resp, err := c.do(ctx, http.MethodPost,
|
||||
fmt.Sprintf("/v1/internal/render/jobs/%s/fail", jobID),
|
||||
FailRequest{Reason: reason, AtStep: atStep})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("fail: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReportCrash reports a node crash for the given job.
|
||||
func (c *Client) ReportCrash(ctx context.Context, jobID string, req CrashRequest) error {
|
||||
resp, err := c.do(ctx, http.MethodPost,
|
||||
fmt.Sprintf("/v1/internal/render/jobs/%s/crash", jobID), req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("crash: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user