90ac0b81d1
Add full V2 architecture: identity, content, studio (.NET 10) and file, render, notification, gateway (Go) services with vendored deps, plus DB migrations, event/API contracts, and an init-db script. Wire the Next.js frontend to the gateway: server-side JWT auth routes (login/register/refresh/logout/me), gateway fetch helper, and session/ cookie/jwt helpers under src/lib. Containerize the stack via docker-compose.v2.yml and per-service Dockerfiles. Base images resolve through a Nexus mirror (Docker Hub) and MCR directly; npm/NuGet pull from Nexus groups. Self-host fonts via next/font/local to avoid Google Fonts (geo-blocked). Add CI workflow and ignore .env.v2, *.stackdump, and .NET bin/obj. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
177 lines
7.9 KiB
SQL
177 lines
7.9 KiB
SQL
-- =====================================================================
|
|
-- RENDER SCHEMA — Part 1: Farm Nodes & Health
|
|
-- =====================================================================
|
|
|
|
SET search_path TO render, public;
|
|
|
|
CREATE TYPE node_status AS ENUM ('Ready','Busy','Offline','Maintenance','Crashed','Updating','Disabled');
|
|
CREATE TYPE node_kind AS ENUM ('Shared','Dedicated','Spot');
|
|
CREATE TYPE render_type AS ENUM ('Free','Paid','Snapshot','Mockup');
|
|
CREATE TYPE ae_version AS ENUM ('2020','2021','2022','2023','2024','2025');
|
|
|
|
-- ---------------------------------------------------------------------
|
|
-- render_nodes — registry of farm machines
|
|
-- ---------------------------------------------------------------------
|
|
CREATE TABLE render_nodes (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
name TEXT NOT NULL,
|
|
region TEXT NOT NULL, -- 'tehran','frankfurt',...
|
|
|
|
-- Network
|
|
node_ip INET NOT NULL,
|
|
worker_port INT NOT NULL DEFAULT 5555,
|
|
public_endpoint TEXT,
|
|
|
|
-- Spec
|
|
ram_gb INT,
|
|
cpu_cores INT,
|
|
gpu_model TEXT,
|
|
storage_gb INT,
|
|
|
|
-- Software
|
|
current_ae_version ae_version NOT NULL,
|
|
available_ae_versions TEXT[] NOT NULL DEFAULT '{}',
|
|
node_agent_version TEXT,
|
|
last_update_at TIMESTAMPTZ,
|
|
last_update_error TEXT,
|
|
|
|
-- Ownership
|
|
node_kind node_kind NOT NULL DEFAULT 'Shared',
|
|
owner_user_id UUID, -- references identity.users (for Dedicated)
|
|
owner_tenant_id UUID, -- references identity.tenants
|
|
|
|
-- State
|
|
status node_status NOT NULL DEFAULT 'Offline',
|
|
current_job_id UUID, -- references render_jobs
|
|
current_frame_job_id UUID, -- references frame_jobs
|
|
job_started_at TIMESTAMPTZ,
|
|
render_type render_type, -- which queue it's serving now
|
|
|
|
-- Health (denormalized for hot reads)
|
|
last_heartbeat_at TIMESTAMPTZ,
|
|
last_cpu_pct INT,
|
|
last_ram_available_mb INT,
|
|
ae_running BOOLEAN NOT NULL DEFAULT FALSE,
|
|
|
|
-- Stats
|
|
lifetime_task_count BIGINT NOT NULL DEFAULT 0,
|
|
lifetime_crash_count INT NOT NULL DEFAULT 0,
|
|
consecutive_failures INT NOT NULL DEFAULT 0,
|
|
|
|
-- Scheduling
|
|
priority INT NOT NULL DEFAULT 100,
|
|
is_active BOOLEAN NOT NULL DEFAULT TRUE,
|
|
accepts_new_jobs BOOLEAN NOT NULL DEFAULT TRUE,
|
|
|
|
-- Maintenance
|
|
last_maintenance_at TIMESTAMPTZ,
|
|
next_maintenance_at TIMESTAMPTZ,
|
|
maintenance_reason TEXT,
|
|
|
|
-- Local cache state (templates the node has downloaded)
|
|
cached_template_md5s TEXT[] NOT NULL DEFAULT '{}',
|
|
cache_used_gb INT NOT NULL DEFAULT 0,
|
|
|
|
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX idx_nodes_region_status ON render_nodes(region, status) WHERE is_active = TRUE;
|
|
CREATE INDEX idx_nodes_ready ON render_nodes(region, priority DESC)
|
|
WHERE status = 'Ready' AND accepts_new_jobs = TRUE AND is_active = TRUE;
|
|
CREATE INDEX idx_nodes_owner ON render_nodes(owner_user_id) WHERE node_kind = 'Dedicated';
|
|
CREATE INDEX idx_nodes_heartbeat ON render_nodes(last_heartbeat_at) WHERE is_active = TRUE;
|
|
CREATE UNIQUE INDEX uq_nodes_ip_port ON render_nodes(node_ip, worker_port);
|
|
|
|
CREATE TRIGGER tg_render_nodes_updated_at
|
|
BEFORE UPDATE ON render_nodes FOR EACH ROW EXECUTE FUNCTION public.tg_set_updated_at();
|
|
|
|
-- ---------------------------------------------------------------------
|
|
-- node_health_logs — historical heartbeat data (partitioned monthly)
|
|
-- ---------------------------------------------------------------------
|
|
CREATE TABLE node_health_logs (
|
|
id BIGSERIAL,
|
|
node_id UUID NOT NULL,
|
|
recorded_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
|
|
status node_status NOT NULL,
|
|
cpu_pct INT,
|
|
ram_available_mb INT,
|
|
ae_running BOOLEAN,
|
|
current_job_id UUID,
|
|
current_frame INT,
|
|
|
|
-- Templates cached (size summary only)
|
|
cache_used_gb INT,
|
|
|
|
PRIMARY KEY (id, recorded_at)
|
|
) PARTITION BY RANGE (recorded_at);
|
|
|
|
CREATE INDEX idx_node_health_node ON node_health_logs(node_id, recorded_at DESC);
|
|
|
|
CREATE TABLE node_health_logs_y2026m01
|
|
PARTITION OF node_health_logs
|
|
FOR VALUES FROM ('2026-01-01') TO ('2026-02-01');
|
|
|
|
-- ---------------------------------------------------------------------
|
|
-- node_crashes — every detected AE crash
|
|
-- ---------------------------------------------------------------------
|
|
CREATE TABLE node_crashes (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
node_id UUID NOT NULL REFERENCES render_nodes(id) ON DELETE CASCADE,
|
|
render_job_id UUID, -- which job was running
|
|
frame_job_id UUID,
|
|
|
|
crashed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
last_known_frame INT,
|
|
crash_signal TEXT, -- exit code or signal
|
|
error_log TEXT, -- last N lines of AE log
|
|
log_file_url TEXT, -- MinIO upload of full log
|
|
|
|
-- Recovery
|
|
auto_recovered BOOLEAN NOT NULL DEFAULT FALSE,
|
|
recovery_action TEXT, -- 'reset_prefs','restart_ae','reassign_job'
|
|
recovered_at TIMESTAMPTZ,
|
|
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
CREATE INDEX idx_node_crashes_node ON node_crashes(node_id, crashed_at DESC);
|
|
CREATE INDEX idx_node_crashes_job ON node_crashes(render_job_id) WHERE render_job_id IS NOT NULL;
|
|
|
|
-- ---------------------------------------------------------------------
|
|
-- node_updates — software/AE update tracking
|
|
-- ---------------------------------------------------------------------
|
|
CREATE TABLE node_updates (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
update_file_name TEXT NOT NULL,
|
|
update_number INT NOT NULL,
|
|
description TEXT,
|
|
target_ae_version ae_version,
|
|
in_update_queue BOOLEAN NOT NULL DEFAULT FALSE,
|
|
rolled_out_to_node_ids UUID[] NOT NULL DEFAULT '{}',
|
|
last_update_queue_date TIMESTAMPTZ,
|
|
create_date TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
);
|
|
|
|
-- ---------------------------------------------------------------------
|
|
-- node_template_cache — what's currently cached on each node
|
|
-- ---------------------------------------------------------------------
|
|
CREATE TABLE node_template_cache (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
node_id UUID NOT NULL REFERENCES render_nodes(id) ON DELETE CASCADE,
|
|
project_id UUID NOT NULL, -- references content.projects
|
|
aep_file_md5 TEXT NOT NULL,
|
|
file_size_bytes BIGINT NOT NULL,
|
|
cached_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
last_used_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
use_count INT NOT NULL DEFAULT 1,
|
|
local_path TEXT NOT NULL,
|
|
UNIQUE (node_id, aep_file_md5)
|
|
);
|
|
|
|
CREATE INDEX idx_node_cache_node_lru ON node_template_cache(node_id, last_used_at);
|
|
CREATE INDEX idx_node_cache_md5 ON node_template_cache(aep_file_md5);
|