Compare commits

..

3 Commits

Author SHA1 Message Date
soroush.asadi 076c2e577f fix(render): resolve template id for render jobs + mock-fallback when no .aep
Build backend images / build content-svc (push) Failing after 1m30s
Build backend images / build file-svc (push) Failing after 1m23s
Build backend images / build gateway (push) Failing after 5m47s
Build backend images / build identity-svc (push) Failing after 1m23s
Build backend images / build notification-svc (push) Failing after 1m51s
Build backend images / build render-svc (push) Failing after 1m23s
Build backend images / build studio-svc (push) Failing after 1m23s
THE bug behind "AEPFilePath is required for real AE render": CreateJob inserted
original_project_id = saved_project_id (VALUES $3,$3), so the claim looked for the
render bundle at templates/{saved_project_id}/ — which never exists. The bundle
lives at templates/{TEMPLATE_id}/. Now original_project_id is resolved from
studio.saved_projects.original_project_id (the template the project was built from).
(Direct-SQL test renders masked this by setting the template id explicitly.)

Also harden the node-agent: Run() falls back to mock render when AEPFilePath is
empty even if AE is installed (previously hard-errored), so a missing/un-promoted
template degrades gracefully instead of failing the job.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 19:37:59 +03:30
soroush.asadi 62807f5f41 fix(node-agent): resilient output upload — 60s HTTP timeout + 4× retry on upload-URL
After a CPU-heavy AE render+transcode the orchestrator/DB can be briefly slow;
the 15s client timeout made the post-render output-upload-url call fail and the
finished MP4 was dropped (completed without export). Bumped client timeout to 60s
and retry the upload-URL call up to 4× with backoff so a finished render's output
is never lost to a transient stall.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 18:57:09 +03:30
soroush.asadi e59f07df4e fix(node-agent): transcode AE render to MP4 with ffmpeg (real renders deliver MP4)
aerender can't reliably write H.264 directly in modern AE — it renders the
project's output module (Lossless AVI/MOV) and ignores the .mp4 extension,
producing a multi-GB .avi the agent then failed to find/upload.

- findRenderedOutput(): locate the file aerender actually wrote (output.avi/.mov/.mp4)
- transcodeToMP4(): ffmpeg → H.264 yuv420p + AAC + faststart; drops the lossless
  intermediate. ffmpeg located via $FFMPEG_PATH, beside the agent exe, or PATH.
- Graceful fallback: if ffmpeg is missing/fails, upload the raw render so the job
  still delivers a (large but valid) file.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 07:50:43 +03:30
4 changed files with 139 additions and 6 deletions
+16 -2
View File
@@ -472,9 +472,23 @@ func (a *Agent) runJob(ctx context.Context, job *client.ClaimedJob) {
uploadCtx, uploadCancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer uploadCancel()
uploadInfo, urlErr := a.orch.GetOutputUploadURL(uploadCtx, job.JobID)
// Retry the upload-URL call: right after a CPU-heavy render the orchestrator/DB
// can be briefly slow, and dropping a finished render's output is the worst outcome.
var uploadInfo *client.OutputUploadURLResponse
var urlErr error
for attempt := 1; attempt <= 4; attempt++ {
uploadInfo, urlErr = a.orch.GetOutputUploadURL(uploadCtx, job.JobID)
if urlErr == nil {
break
}
log.Printf("[job %s] get upload URL attempt %d failed: %v", job.JobID, attempt, urlErr)
select {
case <-uploadCtx.Done():
case <-time.After(time.Duration(attempt*3) * time.Second):
}
}
if urlErr != nil {
log.Printf("[job %s] get upload URL failed: %v — completing without export", job.JobID, urlErr)
log.Printf("[job %s] get upload URL failed after retries: %v — completing without export", job.JobID, urlErr)
} else {
log.Printf("[job %s] uploading output to %s", job.JobID, uploadInfo.ObjectKey)
if _, upErr := runner.UploadFile(uploadCtx, uploadInfo.UploadURL, outputPath); upErr != nil {
@@ -26,7 +26,9 @@ func New(baseURL, nodeHMACSecret string) *Client {
return &Client{
base: strings.TrimRight(baseURL, "/"),
secret: nodeHMACSecret,
http: &http.Client{Timeout: 15 * time.Second},
// 60s: the post-render output-upload-url call (export insert + presign) can be
// slow when the DB is briefly busy right after a CPU-heavy render/transcode.
http: &http.Client{Timeout: 60 * time.Second},
}
}
+115 -2
View File
@@ -10,6 +10,8 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"time"
)
@@ -46,7 +48,14 @@ func Run(ctx context.Context, aePath, workDir string, job *Job, onProgress Progr
}
outputPath := filepath.Join(outputDir, "output.mp4")
if aePath == "" {
// Mock render when AE isn't installed (aePath empty) OR when this job has no
// template project to render (AEPFilePath empty — the template bundle wasn't
// uploaded/promoted yet). Mock drives progress+preview to completion so the job
// doesn't hard-fail; a real render requires both AE and a downloaded .aep.
if aePath == "" || job.AEPFilePath == "" {
if aePath != "" && job.AEPFilePath == "" {
log.Printf("[job %s] no template .aep available — falling back to mock render", job.JobID)
}
return mockRender(ctx, job, outputPath, onProgress, onPreview)
}
return aeRender(ctx, aePath, job, outputPath, onProgress, onPreview)
@@ -97,6 +106,87 @@ func mockRender(ctx context.Context, job *Job, outputPath string, onProgress Pro
return outputPath, nil
}
// findRenderedOutput locates the file aerender actually produced. The requested
// path is e.g. <dir>/output.mp4, but the output module may have written
// output.avi / output.mov / output.mp4. Prefer an exact match, then .mp4, then
// the largest output.* file in the directory.
func findRenderedOutput(requested string) string {
if st, err := os.Stat(requested); err == nil && st.Size() > 0 {
return requested
}
dir := filepath.Dir(requested)
base := strings.TrimSuffix(filepath.Base(requested), filepath.Ext(requested)) // "output"
matches, _ := filepath.Glob(filepath.Join(dir, base+".*"))
var best string
var bestSize int64 = -1
for _, m := range matches {
st, err := os.Stat(m)
if err != nil || st.IsDir() {
continue
}
// Prefer .mp4 immediately.
if strings.EqualFold(filepath.Ext(m), ".mp4") && st.Size() > 0 {
return m
}
if st.Size() > bestSize {
best, bestSize = m, st.Size()
}
}
return best
}
// ffmpegPath locates an ffmpeg binary: $FFMPEG_PATH, then `ffmpeg(.exe)` next to
// the agent executable, then PATH. Returns "" when none is found.
func ffmpegPath() string {
if p := os.Getenv("FFMPEG_PATH"); p != "" {
if _, err := os.Stat(p); err == nil {
return p
}
}
name := "ffmpeg"
if runtime.GOOS == "windows" {
name = "ffmpeg.exe"
}
if exe, err := os.Executable(); err == nil {
cand := filepath.Join(filepath.Dir(exe), name)
if _, err := os.Stat(cand); err == nil {
return cand
}
}
if p, err := exec.LookPath(name); err == nil {
return p
}
return ""
}
// transcodeToMP4 converts a lossless AE render (AVI/MOV) to a web-playable H.264
// MP4 using ffmpeg. Returns the .mp4 path. Errors if ffmpeg is unavailable.
func transcodeToMP4(ctx context.Context, src, requested string) (string, error) {
ff := ffmpegPath()
if ff == "" {
return "", fmt.Errorf("ffmpeg not found (set FFMPEG_PATH or place ffmpeg.exe next to the agent)")
}
dst := strings.TrimSuffix(requested, filepath.Ext(requested)) + ".mp4"
args := []string{
"-y", "-i", src,
"-c:v", "libx264", "-preset", "medium", "-crf", "20", "-pix_fmt", "yuv420p",
"-c:a", "aac", "-b:a", "192k",
"-movflags", "+faststart",
dst,
}
log.Printf("[ffmpeg] %s %v", ff, args)
cmd := exec.CommandContext(ctx, ff, args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return "", fmt.Errorf("ffmpeg: %w", err)
}
if st, err := os.Stat(dst); err != nil || st.Size() == 0 {
return "", fmt.Errorf("ffmpeg produced no output")
}
return dst, nil
}
// ── Real AE render via aerender.exe ──────────────────────────────────────────
func aeRender(ctx context.Context, aePath string, job *Job, outputPath string, onProgress ProgressFn, onPreview PreviewFn) (string, error) {
@@ -108,6 +198,9 @@ func aeRender(ctx context.Context, aePath string, job *Job, outputPath string, o
// -project <path.aep>
// -comp <name> (or -rqindex 1 when no comp name is known)
// -output <output.mp4>
// Modern AE can't reliably write H.264 directly from aerender, so we let it
// render with the project's output module (typically Lossless AVI/MOV) and
// transcode to MP4 with ffmpeg afterwards (see transcodeToMP4).
// Without -comp/-rqindex, aerender ignores -output and renders nothing.
args := []string{"-project", job.AEPFilePath}
if job.CompName != "" {
@@ -150,8 +243,28 @@ func aeRender(ctx context.Context, aePath string, job *Job, outputPath string, o
if err != nil {
return "", fmt.Errorf("aerender exit: %w", err)
}
// Find what aerender actually wrote (output.avi / .mov / .mp4).
actual := findRenderedOutput(outputPath)
if actual == "" {
return "", fmt.Errorf("aerender finished but no output file found in %s", filepath.Dir(outputPath))
}
// Already an MP4? done.
if strings.EqualFold(filepath.Ext(actual), ".mp4") {
_ = onProgress(ctx, 95, "Encoding complete")
return actual, nil
}
// Transcode the lossless render → H.264 MP4 (much smaller, web-playable).
_ = onProgress(ctx, 92, "Transcoding to MP4…")
mp4, terr := transcodeToMP4(ctx, actual, outputPath)
if terr != nil {
// ffmpeg missing/failed — fall back to the raw render so the job
// still delivers a file (large, but valid).
log.Printf("[ae] transcode failed (%v) — uploading raw %s", terr, filepath.Ext(actual))
return actual, nil
}
_ = onProgress(ctx, 95, "Encoding complete")
return outputPath, nil
_ = os.Remove(actual) // drop the multi-GB intermediate
return mp4, nil
case <-ticker.C:
if pct < 90 {
pct += 5
+5 -1
View File
@@ -430,7 +430,11 @@ func (s *Store) CreateJob(ctx context.Context, userID, tenantID uuid.UUID, req *
(tenant_id, user_id, saved_project_id, original_project_id,
priority_queue, step, price_type, quality, resolution, r_height,
frame_rate, is_60_fps, duration_sec, mode, tell_me_when_done, region)
VALUES ($1, $2, $3, $3,
VALUES ($1, $2, $3,
-- original_project_id = the TEMPLATE the saved project was created from
-- (the render bundle lives at templates/{original_project_id}/). Fall back
-- to the saved-project id only if the lookup is somehow null.
COALESCE((SELECT original_project_id FROM studio.saved_projects WHERE id = $3), $3),
'paid'::render_priority_queue, 'Queued'::render_step, $4::price_kind,
$5::render_quality, $6, 1080, $7, COALESCE($8, FALSE),
0, 'FIX', $9, $10)