feat(render-svc+node-agent): add job-claim endpoint and build node-agent skeleton
render-svc: - db: ClaimJob() — atomic SELECT FOR UPDATE SKIP LOCKED; transitions job to Preparing, marks node Busy in a single transaction - models: ClaimJobRequest + ClaimedJob types - handlers/internal: POST /v1/internal/render/jobs/claim — 200 with job or 204 when queue empty - main: register the claim route under /v1/internal (nodeAuth) services/node-agent/ (new Go module github.com/flatrender/node-agent): - internal/config: env-var based config (NODE_ID required, sensible defaults) - internal/client: typed orchestrator HTTP client (Online, Heartbeat, ClaimJob, Complete, Fail, ReportCrash) — X-Node-Signature auth - internal/runner: AE render via aerender.exe or mock (for dev without AE) - cmd/agent/main: register online → heartbeat loop (5s) + poll loop (3s) → claim job → run render → report complete/fail; health endpoint on :7777 - Dockerfile: cross-compiles to Windows amd64 static binary Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,233 @@
|
||||
// Package client provides a typed HTTP client for the V2 render orchestrator's
|
||||
// internal (node-agent) API. All requests are authenticated via the shared
|
||||
// X-Node-Signature header.
|
||||
package client
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Client talks to the V2 render orchestrator.
|
||||
type Client struct {
|
||||
base string
|
||||
secret string
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
// New returns a Client targeting the given base URL (e.g. "http://gateway:8080").
|
||||
func New(baseURL, nodeHMACSecret string) *Client {
|
||||
return &Client{
|
||||
base: strings.TrimRight(baseURL, "/"),
|
||||
secret: nodeHMACSecret,
|
||||
http: &http.Client{Timeout: 15 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
// ── Request helpers ───────────────────────────────────────────────────────────
|
||||
|
||||
func (c *Client) do(ctx context.Context, method, path string, body any) (*http.Response, error) {
|
||||
var bodyReader io.Reader
|
||||
if body != nil {
|
||||
b, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal: %w", err)
|
||||
}
|
||||
bodyReader = bytes.NewReader(b)
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, method, c.base+path, bodyReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("X-Node-Signature", c.secret)
|
||||
if body != nil {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
}
|
||||
req.Header.Set("Accept", "application/json")
|
||||
return c.http.Do(req)
|
||||
}
|
||||
|
||||
func decodeJSON(resp *http.Response, out any) error {
|
||||
defer resp.Body.Close()
|
||||
if out == nil {
|
||||
return nil
|
||||
}
|
||||
return json.NewDecoder(resp.Body).Decode(out)
|
||||
}
|
||||
|
||||
// ── Domain types ──────────────────────────────────────────────────────────────
|
||||
|
||||
// OnlineRequest is sent once on startup to mark the node Ready.
|
||||
type OnlineRequest struct {
|
||||
NodeAgentVersion string `json:"node_agent_version"`
|
||||
CurrentAEVersion string `json:"current_ae_version"`
|
||||
AvailableAEVersions []string `json:"available_ae_versions"`
|
||||
RamGB *int `json:"ram_gb,omitempty"`
|
||||
CPUCores *int `json:"cpu_cores,omitempty"`
|
||||
CacheUsedGB *int `json:"cache_used_gb,omitempty"`
|
||||
CachedTemplateMD5s []string `json:"cached_template_md5s"`
|
||||
}
|
||||
|
||||
// HeartbeatRequest is sent every HeartbeatIntervalSec seconds.
|
||||
type HeartbeatRequest struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Status string `json:"status"` // Ready | Busy
|
||||
CPUPct *int `json:"cpu_pct,omitempty"`
|
||||
RAMAvailableMB *int `json:"ram_available_mb,omitempty"`
|
||||
AERunning *bool `json:"ae_running,omitempty"`
|
||||
CurrentJobID *string `json:"current_job_id,omitempty"`
|
||||
CacheUsedGB *int `json:"cache_used_gb,omitempty"`
|
||||
}
|
||||
|
||||
// HeartbeatResponse carries optional commands from the orchestrator.
|
||||
type HeartbeatResponse struct {
|
||||
NextHeartbeatInSec int `json:"next_heartbeat_in_sec"`
|
||||
PendingCommands []any `json:"pending_commands"`
|
||||
}
|
||||
|
||||
// ClaimJobRequest asks the orchestrator for the next queued job.
|
||||
type ClaimJobRequest struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Region string `json:"region,omitempty"`
|
||||
}
|
||||
|
||||
// ClaimedJob is the response when a job is successfully claimed.
|
||||
type ClaimedJob struct {
|
||||
JobID string `json:"job_id"`
|
||||
SavedProjectID string `json:"saved_project_id"`
|
||||
Quality string `json:"quality"`
|
||||
Resolution string `json:"resolution"`
|
||||
FrameRate int `json:"frame_rate"`
|
||||
HasMusic bool `json:"has_music"`
|
||||
HasVoiceover bool `json:"has_voiceover"`
|
||||
}
|
||||
|
||||
// ProgressRequest reports render progress (frame-level) for a job.
|
||||
type ProgressRequest struct {
|
||||
FrameJobID string `json:"frame_job_id"`
|
||||
FrameNumber int `json:"frame_number"`
|
||||
CompletedAt *time.Time `json:"completed_at,omitempty"`
|
||||
}
|
||||
|
||||
// CompleteRequest marks a job as Done.
|
||||
type CompleteRequest struct {
|
||||
ExportID *string `json:"export_id,omitempty"`
|
||||
}
|
||||
|
||||
// FailRequest marks a job as Failed.
|
||||
type FailRequest struct {
|
||||
Reason string `json:"reason"`
|
||||
AtStep string `json:"at_step,omitempty"`
|
||||
}
|
||||
|
||||
// CrashRequest reports a node crash.
|
||||
type CrashRequest struct {
|
||||
NodeID string `json:"node_id"`
|
||||
LastKnownFrame *int `json:"last_known_frame,omitempty"`
|
||||
CrashSignal *string `json:"crash_signal,omitempty"`
|
||||
ErrorLogTail *string `json:"error_log_tail,omitempty"`
|
||||
}
|
||||
|
||||
// ── API methods ───────────────────────────────────────────────────────────────
|
||||
|
||||
// Online marks the node as Ready on startup.
|
||||
func (c *Client) Online(ctx context.Context, nodeID string, req OnlineRequest) error {
|
||||
resp, err := c.do(ctx, http.MethodPost,
|
||||
fmt.Sprintf("/v1/internal/nodes/%s/online", nodeID), req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("online: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Heartbeat sends a heartbeat and returns the orchestrator's response.
|
||||
func (c *Client) Heartbeat(ctx context.Context, nodeID string, req HeartbeatRequest) (*HeartbeatResponse, error) {
|
||||
resp, err := c.do(ctx, http.MethodPost,
|
||||
fmt.Sprintf("/v1/internal/nodes/%s/heartbeat", nodeID), req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return nil, fmt.Errorf("heartbeat: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
var out HeartbeatResponse
|
||||
_ = json.NewDecoder(resp.Body).Decode(&out)
|
||||
return &out, nil
|
||||
}
|
||||
|
||||
// ClaimJob atomically claims the next queued render job.
|
||||
// Returns (nil, nil) when the queue is empty (204 No Content).
|
||||
func (c *Client) ClaimJob(ctx context.Context, nodeID, region string) (*ClaimedJob, error) {
|
||||
resp, err := c.do(ctx, http.MethodPost, "/v1/internal/render/jobs/claim",
|
||||
ClaimJobRequest{NodeID: nodeID, Region: region})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusNoContent {
|
||||
return nil, nil // nothing queued
|
||||
}
|
||||
if resp.StatusCode >= 300 {
|
||||
return nil, fmt.Errorf("claim: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
var job ClaimedJob
|
||||
if err := json.NewDecoder(resp.Body).Decode(&job); err != nil {
|
||||
return nil, fmt.Errorf("claim decode: %w", err)
|
||||
}
|
||||
return &job, nil
|
||||
}
|
||||
|
||||
// Complete marks a render job as Done.
|
||||
func (c *Client) Complete(ctx context.Context, jobID string, exportID *string) error {
|
||||
resp, err := c.do(ctx, http.MethodPost,
|
||||
fmt.Sprintf("/v1/internal/render/jobs/%s/complete", jobID),
|
||||
CompleteRequest{ExportID: exportID})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("complete: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Fail marks a render job as Failed.
|
||||
func (c *Client) Fail(ctx context.Context, jobID, reason, atStep string) error {
|
||||
resp, err := c.do(ctx, http.MethodPost,
|
||||
fmt.Sprintf("/v1/internal/render/jobs/%s/fail", jobID),
|
||||
FailRequest{Reason: reason, AtStep: atStep})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("fail: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReportCrash reports a node crash for the given job.
|
||||
func (c *Client) ReportCrash(ctx context.Context, jobID string, req CrashRequest) error {
|
||||
resp, err := c.do(ctx, http.MethodPost,
|
||||
fmt.Sprintf("/v1/internal/render/jobs/%s/crash", jobID), req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return fmt.Errorf("crash: HTTP %d", resp.StatusCode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
// Package config loads node-agent runtime configuration from environment variables.
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// Config holds all runtime settings for the node agent.
|
||||
type Config struct {
|
||||
// NodeID is the UUID of this render node, registered in the orchestrator.
|
||||
// Must match a row in render.render_nodes.
|
||||
NodeID string
|
||||
|
||||
// OrchestratorURL is the base URL of the V2 API gateway (internal network).
|
||||
// Example: http://gateway:8080 or http://172.30.0.5:8088
|
||||
OrchestratorURL string
|
||||
|
||||
// NodeHMACSecret is the shared secret sent as X-Node-Signature header.
|
||||
// Must match NODE_HMAC_SECRET in the render-svc environment.
|
||||
NodeHMACSecret string
|
||||
|
||||
// Region is the datacenter/region label for this node (e.g. "iran-tehran-1").
|
||||
// The orchestrator uses it to route region-preferred jobs to this node.
|
||||
Region string
|
||||
|
||||
// AEPath is the full path to the aerender.exe binary.
|
||||
// Example: C:\Program Files\Adobe\Adobe After Effects 2024\Support Files\aerender.exe
|
||||
// Leave empty to use mock rendering (for development / testing without AE).
|
||||
AEPath string
|
||||
|
||||
// WorkDir is the scratch directory for render temp files and AE project copies.
|
||||
WorkDir string
|
||||
|
||||
// HeartbeatIntervalSec is how often the agent sends a heartbeat to the orchestrator.
|
||||
HeartbeatIntervalSec int
|
||||
|
||||
// PollIntervalSec is how long the agent waits between job-claim attempts when idle.
|
||||
PollIntervalSec int
|
||||
|
||||
// AgentVersion is the semantic version string reported to the orchestrator.
|
||||
AgentVersion string
|
||||
|
||||
// AEVersion is the After Effects version string reported to the orchestrator.
|
||||
// Example: "2024"
|
||||
AEVersion string
|
||||
|
||||
// ListenPort is the port for the agent's own HTTP health endpoint.
|
||||
ListenPort int
|
||||
}
|
||||
|
||||
// Load reads configuration from environment variables, returning an error
|
||||
// if any required variable is missing.
|
||||
func Load() (*Config, error) {
|
||||
c := &Config{
|
||||
NodeID: os.Getenv("NODE_ID"),
|
||||
OrchestratorURL: getEnv("ORCHESTRATOR_URL", "http://localhost:8088"),
|
||||
NodeHMACSecret: getEnv("NODE_HMAC_SECRET", "node-secret-change-me"),
|
||||
Region: getEnv("NODE_REGION", ""),
|
||||
AEPath: getEnv("AE_PATH", ""),
|
||||
WorkDir: getEnv("WORK_DIR", os.TempDir()),
|
||||
AgentVersion: getEnv("AGENT_VERSION", "0.1.0"),
|
||||
AEVersion: getEnv("AE_VERSION", "2024"),
|
||||
HeartbeatIntervalSec: getInt("HEARTBEAT_INTERVAL_SEC", 5),
|
||||
PollIntervalSec: getInt("POLL_INTERVAL_SEC", 3),
|
||||
ListenPort: getInt("LISTEN_PORT", 7777),
|
||||
}
|
||||
if c.NodeID == "" {
|
||||
return nil, fmt.Errorf("NODE_ID environment variable is required")
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func getEnv(key, fallback string) string {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
return v
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func getInt(key string, fallback int) int {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
if n, err := strconv.Atoi(v); err == nil {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
// Package runner executes After Effects render jobs and streams progress back
|
||||
// via the provided callback. When AE_PATH is empty, a mock render is used
|
||||
// (useful for CI and dev environments without a licensed AE installation).
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ProgressFn is called periodically during rendering with (percent 0-100, message).
|
||||
type ProgressFn func(ctx context.Context, percent int, message string) error
|
||||
|
||||
// Job holds the parameters for a single render.
|
||||
type Job struct {
|
||||
JobID string
|
||||
SavedProjectID string
|
||||
Quality string
|
||||
Resolution string
|
||||
FrameRate int
|
||||
HasMusic bool
|
||||
HasVoiceover bool
|
||||
// AEPFilePath is the local path to the downloaded .aep project file.
|
||||
// In a full implementation the agent downloads this from MinIO before calling Run.
|
||||
AEPFilePath string
|
||||
}
|
||||
|
||||
// Run executes the render job, calling onProgress as it advances.
|
||||
// Returns the path to the output MP4 file on success.
|
||||
func Run(ctx context.Context, aePath, workDir string, job *Job, onProgress ProgressFn) (string, error) {
|
||||
outputDir := filepath.Join(workDir, "renders", job.JobID)
|
||||
if err := os.MkdirAll(outputDir, 0o755); err != nil {
|
||||
return "", fmt.Errorf("create output dir: %w", err)
|
||||
}
|
||||
outputPath := filepath.Join(outputDir, "output.mp4")
|
||||
|
||||
if aePath == "" {
|
||||
return mockRender(ctx, job, outputPath, onProgress)
|
||||
}
|
||||
return aeRender(ctx, aePath, job, outputPath, onProgress)
|
||||
}
|
||||
|
||||
// ── Mock render (no AE installed) ────────────────────────────────────────────
|
||||
|
||||
func mockRender(ctx context.Context, job *Job, outputPath string, onProgress ProgressFn) (string, error) {
|
||||
log.Printf("[mock] starting render for job %s (%s %s %dfps)", job.JobID, job.Quality, job.Resolution, job.FrameRate)
|
||||
|
||||
steps := []struct {
|
||||
pct int
|
||||
msg string
|
||||
}{
|
||||
{5, "Preparing project…"},
|
||||
{15, "Loading template…"},
|
||||
{30, "Rendering frames…"},
|
||||
{50, "Rendering frames… (50%)"},
|
||||
{70, "Rendering frames… (70%)"},
|
||||
{85, "Encoding MP4…"},
|
||||
{95, "Uploading output…"},
|
||||
}
|
||||
|
||||
for _, s := range steps {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return "", ctx.Err()
|
||||
case <-time.After(800 * time.Millisecond):
|
||||
}
|
||||
if err := onProgress(ctx, s.pct, s.msg); err != nil {
|
||||
log.Printf("[mock] progress callback error: %v", err)
|
||||
}
|
||||
log.Printf("[mock] %d%% — %s", s.pct, s.msg)
|
||||
}
|
||||
|
||||
// Write a placeholder file so the path is valid
|
||||
if err := os.WriteFile(outputPath, []byte("mock-render-output"), 0o644); err != nil {
|
||||
return "", fmt.Errorf("write mock output: %w", err)
|
||||
}
|
||||
log.Printf("[mock] render complete: %s", outputPath)
|
||||
return outputPath, nil
|
||||
}
|
||||
|
||||
// ── Real AE render via aerender.exe ──────────────────────────────────────────
|
||||
|
||||
func aeRender(ctx context.Context, aePath string, job *Job, outputPath string, onProgress ProgressFn) (string, error) {
|
||||
if job.AEPFilePath == "" {
|
||||
return "", fmt.Errorf("AEPFilePath is required for real AE render")
|
||||
}
|
||||
|
||||
// aerender flags:
|
||||
// -project <path.aep>
|
||||
// -output <output.mp4>
|
||||
// -RStemplate "Multi-Machine Settings" (optional)
|
||||
// -OMtemplate "H.264 – Match Render Settings – 15 Mbps"
|
||||
// -s <start_frame> -e <end_frame>
|
||||
args := []string{
|
||||
"-project", job.AEPFilePath,
|
||||
"-output", outputPath,
|
||||
}
|
||||
|
||||
log.Printf("[ae] running: %s %v", aePath, args)
|
||||
cmd := exec.CommandContext(ctx, aePath, args...)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return "", fmt.Errorf("start aerender: %w", err)
|
||||
}
|
||||
|
||||
// Poll process while alive — aerender does not expose machine-readable progress.
|
||||
// We advance the progress indicator every 10 seconds until the process exits.
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- cmd.Wait() }()
|
||||
|
||||
_ = onProgress(ctx, 10, "After Effects starting…")
|
||||
pct := 10
|
||||
ticker := time.NewTicker(10 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("aerender exit: %w", err)
|
||||
}
|
||||
_ = onProgress(ctx, 95, "Encoding complete")
|
||||
return outputPath, nil
|
||||
case <-ticker.C:
|
||||
if pct < 90 {
|
||||
pct += 5
|
||||
}
|
||||
_ = onProgress(ctx, pct, fmt.Sprintf("Rendering… %d%%", pct))
|
||||
case <-ctx.Done():
|
||||
_ = cmd.Process.Kill()
|
||||
return "", ctx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user