M6: working memory + the PO→QA trigger + analytics — V1 complete
Working memory (Memory module's first real code): - MemoryEntry (schema "memory", vector(384), InitialMemory migration); TeamMemory implements the SharedKernel ITeamMemory seam (embed-and-store on write, cosine recall on read); GET /api/memory/search. HashingTextEmbedder promoted to SharedKernel (pure, deterministic; swapped for ONNX/BYOK embedders later behind ITextEmbedder). - Written on approval: Governance's approve stores an Approval/Correction entry per decision. - Read at assembly: the executor recalls the team's top-3 relevant entries; the prompt gains a "# Team memory" section (treated as data, not instructions). The single V1 event trigger: - IAgentDispatcher (SharedKernel) implemented by Assembler's AgentRunDispatcher (shared by the API and triggers). OrgBoard's QaHandoffTrigger: a task hitting done creates a QA task (provenance parent, assigned to the QA agent) and dispatches a run for the team's QA AI seat. Guardrails: Test/Review tasks never re-trigger (no self-cascade) and a task hands off at most once. Audited as handoff.triggered. Analytics — the V1 verdict view: - IBoardStats (SharedKernel) implemented by OrgBoard; GET /api/governance/analytics returns approval rate, avg edit distance, per-agent metrics + edit-distance trend, tasks done. - UI: /analytics — stat cards, per-agent table, recharts edit-distance trend per agent. Verified: build green; ArchitectureTests 8/8; IntegrationTests 42/42 incl. the M6 acceptance end to end — a dev marks a story done → Quill wakes via the handoff (QA task with provenance, assigned to the agent) → drafts a test plan that waits in review → approve records the second agent's edit distance → analytics show approval rate 100%, avg edit distance > 0, and trends for BOTH Aria and Quill; memory written on Aria's corrected approval is recalled into her next prompt; the guardrails hold. Client build green. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,267 @@
|
||||
using System.Net;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Net.Http.Json;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using TeamUp.Modules.Assembler.Queue;
|
||||
using TeamUp.Modules.Assembler.Runtime;
|
||||
using Xunit;
|
||||
|
||||
namespace TeamUp.IntegrationTests;
|
||||
|
||||
/// <summary>
|
||||
/// M6 acceptance — the proof of the bet: a dev marks a story done → Quill (QA) wakes via the
|
||||
/// handoff trigger, drafts a test plan → it waits in review → approve → analytics show edit
|
||||
/// distance and approval rate for Aria and Quill. Plus: working memory is written on approval
|
||||
/// and read back at the next assembly, and the trigger guardrails hold (no self-cascade, at
|
||||
/// most one handoff per task).
|
||||
/// </summary>
|
||||
public sealed class TwoRoleLoopTests(PostgresFixture postgres) : IClassFixture<PostgresFixture>
|
||||
{
|
||||
private sealed record BootstrapResponse(string Token, Guid MemberId, Guid OrganizationId);
|
||||
|
||||
private sealed record IdResponse(Guid Id);
|
||||
|
||||
private sealed record TeamResponse(Guid Id, Guid OrganizationId, string Name);
|
||||
|
||||
private sealed record SeatResponse(Guid Id, Guid TeamId, string RoleName, string State, Guid? MemberId, Guid? AgentId);
|
||||
|
||||
private sealed record SyncResult(int Indexed);
|
||||
|
||||
private sealed record RunResponse(
|
||||
Guid Id, Guid SeatId, Guid WorkItemId, Guid? AgentId, string Status,
|
||||
string? ActionType, string? ActionRisk, string? Prompt, string? Output, string? Error);
|
||||
|
||||
private sealed record ReviewItemResponse(
|
||||
Guid Id, Guid OrganizationId, Guid TeamId, Guid AgentRunId, Guid AgentId, Guid WorkItemId,
|
||||
string ActionKind, string Risk, string Title, string Content, List<string> ChildTitles,
|
||||
string? Trace, string Status, string? Decision, double? EditDistance, DateTimeOffset CreatedAtUtc);
|
||||
|
||||
private sealed record TaskResponse(
|
||||
Guid Id, Guid TeamId, string Title, string? Description, string Type,
|
||||
string Status, string AssigneeKind, Guid? AssigneeId, Guid? ParentId);
|
||||
|
||||
private sealed record BoardColumn(string Status, List<TaskResponse> Items);
|
||||
|
||||
private sealed record BoardResponse(Guid TeamId, List<BoardColumn> Columns);
|
||||
|
||||
private sealed record MemoryHitResponse(Guid Id, string Kind, string Content, DateTimeOffset CreatedAtUtc);
|
||||
|
||||
private sealed record EditDistancePoint(DateTimeOffset DecidedAtUtc, double Distance);
|
||||
|
||||
private sealed record AgentAnalytics(
|
||||
Guid AgentId, string Name, int Reviews, double? ApprovalRate, double? AvgEditDistance,
|
||||
List<EditDistancePoint> Trend);
|
||||
|
||||
private sealed record AnalyticsResponse(
|
||||
int TasksDone, int PendingReviews, int Decided, int Approved, int SentBack,
|
||||
double? ApprovalRate, double? AvgEditDistance, List<AgentAnalytics> Agents);
|
||||
|
||||
private sealed record AuditEntryResponse(
|
||||
Guid Id, string Action, string EntityType, Guid EntityId,
|
||||
Guid? ActorMemberId, string? Details, DateTimeOffset OccurredAtUtc);
|
||||
|
||||
[Fact]
|
||||
public async Task The_two_role_loop_runs_end_to_end_and_is_measurable()
|
||||
{
|
||||
var settings = new Dictionary<string, string?>
|
||||
{
|
||||
["GitSource:Provider"] = "filesystem",
|
||||
["GitSource:Root"] = LocateSkillsDirectory(),
|
||||
};
|
||||
|
||||
await using var factory = new TeamUpWebFactory(postgres.ConnectionString, settings);
|
||||
using var anon = factory.CreateClient();
|
||||
|
||||
// --- Setup: owner, org, team, stub BYOK, skills, Aria (PO) + Quill (QA), both gated ---
|
||||
var owner = await PostOk<BootstrapResponse>(anon, "/api/identity/bootstrap", new
|
||||
{
|
||||
organizationName = "AliaSaaS",
|
||||
ownerEmail = "owner@alia.test",
|
||||
ownerDisplayName = "Owner",
|
||||
ownerPassword = "Passw0rd!",
|
||||
});
|
||||
using var client = Authed(factory, owner.Token);
|
||||
|
||||
await client.PostAsJsonAsync("/api/orgboard/organizations", new { organizationId = owner.OrganizationId, name = "AliaSaaS" });
|
||||
var team = await PostOk<TeamResponse>(client, "/api/orgboard/teams", new { organizationId = owner.OrganizationId, name = "IPNOPS" });
|
||||
var config = await PostOk<IdResponse>(client, "/api/integrations/api-configs", new
|
||||
{
|
||||
organizationId = owner.OrganizationId,
|
||||
name = "Vertex-Pro",
|
||||
provider = "stub",
|
||||
model = "gemini-pro",
|
||||
apiKey = "sk-demo-key",
|
||||
});
|
||||
await PostOk<SyncResult>(client, "/api/skills/sync", new { });
|
||||
|
||||
var poSeat = await PostOk<SeatResponse>(client, "/api/orgboard/seats", new { teamId = team.Id, roleName = "Product Owner" });
|
||||
await client.PostAsJsonAsync($"/api/orgboard/seats/{poSeat.Id}/agent", new
|
||||
{
|
||||
name = "Aria",
|
||||
monogram = "AR",
|
||||
autonomy = "Gated",
|
||||
apiConfigId = config.Id,
|
||||
skillKeys = new[] { "spec-writing", "story-breakdown" },
|
||||
docs = Array.Empty<string>(),
|
||||
});
|
||||
var qaSeat = await PostOk<SeatResponse>(client, "/api/orgboard/seats", new { teamId = team.Id, roleName = "QA" });
|
||||
await client.PostAsJsonAsync($"/api/orgboard/seats/{qaSeat.Id}/agent", new
|
||||
{
|
||||
name = "Quill",
|
||||
monogram = "QU",
|
||||
autonomy = "Gated",
|
||||
apiConfigId = config.Id,
|
||||
skillKeys = new[] { "test-plan-generation", "diff-review" },
|
||||
docs = Array.Empty<string>(),
|
||||
});
|
||||
|
||||
// --- Aria proposes a spec; the owner corrects it on approval → memory is written ---
|
||||
var specTask = await PostOk<TaskResponse>(client, "/api/orgboard/tasks", new
|
||||
{
|
||||
teamId = team.Id,
|
||||
title = "Add a logout button to the header",
|
||||
description = "Users need a way to end their session.",
|
||||
type = "Spec",
|
||||
});
|
||||
await PostOk<RunResponse>(client, "/api/assembler/runs", new { seatId = poSeat.Id, workItemId = specTask.Id });
|
||||
await DrainOneJob(factory);
|
||||
|
||||
var ariaHeld = Assert.Single((await client.GetFromJsonAsync<List<ReviewItemResponse>>(
|
||||
$"/api/governance/reviews?organizationId={owner.OrganizationId}"))!);
|
||||
await PostOk<ReviewItemResponse>(client, $"/api/governance/reviews/{ariaHeld.Id}/approve", new
|
||||
{
|
||||
content = "Spec: a logout button in the header ends the session and returns to sign-in.",
|
||||
childTitles = new[] { "Add the logout button", "Clear the session on click" },
|
||||
});
|
||||
|
||||
// Working memory was written and is searchable.
|
||||
var hits = await client.GetFromJsonAsync<List<MemoryHitResponse>>(
|
||||
$"/api/memory/search?teamId={team.Id}&q=logout%20header%20session");
|
||||
Assert.NotEmpty(hits!);
|
||||
Assert.Contains(hits!, h => h.Kind == "Correction" && h.Content.Contains("logout button"));
|
||||
|
||||
// --- Memory is read back at the NEXT assembly: Aria's second run carries "# Team memory" ---
|
||||
var secondTask = await PostOk<TaskResponse>(client, "/api/orgboard/tasks", new
|
||||
{
|
||||
teamId = team.Id,
|
||||
title = "Add a logout link to the mobile header",
|
||||
description = (string?)null,
|
||||
type = "Spec",
|
||||
});
|
||||
var secondRun = await PostOk<RunResponse>(client, "/api/assembler/runs", new { seatId = poSeat.Id, workItemId = secondTask.Id });
|
||||
await DrainOneJob(factory);
|
||||
var secondDone = await client.GetFromJsonAsync<RunResponse>($"/api/assembler/runs/{secondRun.Id}");
|
||||
Assert.Equal("Completed", secondDone!.Status);
|
||||
Assert.Contains("# Team memory", secondDone.Prompt);
|
||||
Assert.Contains("[correction] write-spec", secondDone.Prompt);
|
||||
|
||||
// --- THE TRIGGER: a dev marks a story done → Quill wakes with a QA task ---
|
||||
var story = await PostOk<TaskResponse>(client, "/api/orgboard/tasks", new
|
||||
{
|
||||
teamId = team.Id,
|
||||
title = "Build the login screen",
|
||||
description = "Implements the approved spec.",
|
||||
type = "Story",
|
||||
});
|
||||
await PatchOk<TaskResponse>(client, $"/api/orgboard/tasks/{story.Id}/move", new { status = "Done" });
|
||||
|
||||
var board = await client.GetFromJsonAsync<BoardResponse>($"/api/orgboard/board?teamId={team.Id}");
|
||||
var qaTask = Assert.Single(board!.Columns.SelectMany(c => c.Items), i => i.ParentId == story.Id);
|
||||
Assert.Equal("Test", qaTask.Type);
|
||||
Assert.StartsWith("QA:", qaTask.Title);
|
||||
Assert.Equal("Agent", qaTask.AssigneeKind); // assigned to Quill — humans and AI share one task model
|
||||
|
||||
// Quill's run was dispatched by the trigger; drain it → the test plan waits in review.
|
||||
await DrainOneJob(factory);
|
||||
var pending = await client.GetFromJsonAsync<List<ReviewItemResponse>>(
|
||||
$"/api/governance/reviews?organizationId={owner.OrganizationId}");
|
||||
var quillHeld = Assert.Single(pending!, r => r.WorkItemId == qaTask.Id);
|
||||
Assert.Equal("write-test-plan", quillHeld.ActionKind);
|
||||
|
||||
// Approve Quill's plan with a small edit → the second agent's edit distance is recorded.
|
||||
var quillApproved = await PostOk<ReviewItemResponse>(client, $"/api/governance/reviews/{quillHeld.Id}/approve", new
|
||||
{
|
||||
content = "Test plan: 1. logout ends the session. 2. protected routes redirect after logout.",
|
||||
childTitles = Array.Empty<string>(),
|
||||
});
|
||||
Assert.True(quillApproved.EditDistance > 0);
|
||||
|
||||
// --- Guardrails: QA tasks never re-trigger; a story hands off at most once ---
|
||||
await PatchOk<TaskResponse>(client, $"/api/orgboard/tasks/{qaTask.Id}/move", new { status = "Done" });
|
||||
await PatchOk<TaskResponse>(client, $"/api/orgboard/tasks/{story.Id}/move", new { status = "InProgress" });
|
||||
await PatchOk<TaskResponse>(client, $"/api/orgboard/tasks/{story.Id}/move", new { status = "Done" });
|
||||
|
||||
var after = await client.GetFromJsonAsync<BoardResponse>($"/api/orgboard/board?teamId={team.Id}");
|
||||
var allTasks = after!.Columns.SelectMany(c => c.Items).ToList();
|
||||
Assert.Single(allTasks, i => i.ParentId == story.Id); // still exactly one handoff
|
||||
Assert.DoesNotContain(allTasks, i => i.ParentId == qaTask.Id); // QA's done never cascaded
|
||||
|
||||
// --- ANALYTICS: the bet is measurable — edit distance + approval rate for Aria AND Quill ---
|
||||
var analytics = await client.GetFromJsonAsync<AnalyticsResponse>(
|
||||
$"/api/governance/analytics?organizationId={owner.OrganizationId}");
|
||||
Assert.True(analytics!.TasksDone >= 2); // the story + the QA task
|
||||
Assert.Equal(2, analytics.Decided);
|
||||
Assert.Equal(2, analytics.Approved);
|
||||
Assert.Equal(1.0, analytics.ApprovalRate);
|
||||
Assert.True(analytics.AvgEditDistance > 0);
|
||||
Assert.Equal(1, analytics.PendingReviews); // Aria's second (memory-aware) spec still waiting
|
||||
|
||||
var aria = Assert.Single(analytics.Agents, a => a.Name == "Aria");
|
||||
var quill = Assert.Single(analytics.Agents, a => a.Name == "Quill");
|
||||
Assert.True(aria.AvgEditDistance > 0);
|
||||
Assert.True(quill.AvgEditDistance > 0);
|
||||
Assert.NotEmpty(aria.Trend);
|
||||
Assert.NotEmpty(quill.Trend);
|
||||
|
||||
// The handoff itself is on the audit trail.
|
||||
var audit = await client.GetFromJsonAsync<List<AuditEntryResponse>>(
|
||||
$"/api/governance/audit?organizationId={owner.OrganizationId}&take=300");
|
||||
Assert.Contains(audit!, e => e.Action == "handoff.triggered" && e.EntityId == qaTask.Id);
|
||||
}
|
||||
|
||||
private static async Task DrainOneJob(TeamUpWebFactory factory)
|
||||
{
|
||||
await using var scope = factory.Services.CreateAsyncScope();
|
||||
var queue = scope.ServiceProvider.GetRequiredService<JobQueue>();
|
||||
var job = await queue.ClaimNextAsync("test-worker");
|
||||
Assert.NotNull(job);
|
||||
await scope.ServiceProvider.GetRequiredService<AgentRunExecutor>().ProcessAsync(job!);
|
||||
}
|
||||
|
||||
private static HttpClient Authed(TeamUpWebFactory factory, string token)
|
||||
{
|
||||
var client = factory.CreateClient();
|
||||
client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", token);
|
||||
return client;
|
||||
}
|
||||
|
||||
private static async Task<T> PostOk<T>(HttpClient client, string url, object body)
|
||||
{
|
||||
var response = await client.PostAsJsonAsync(url, body);
|
||||
Assert.Equal(HttpStatusCode.OK, response.StatusCode);
|
||||
var value = await response.Content.ReadFromJsonAsync<T>();
|
||||
Assert.NotNull(value);
|
||||
return value!;
|
||||
}
|
||||
|
||||
private static async Task<T> PatchOk<T>(HttpClient client, string url, object body)
|
||||
{
|
||||
var response = await client.PatchAsJsonAsync(url, body);
|
||||
Assert.Equal(HttpStatusCode.OK, response.StatusCode);
|
||||
var value = await response.Content.ReadFromJsonAsync<T>();
|
||||
Assert.NotNull(value);
|
||||
return value!;
|
||||
}
|
||||
|
||||
private static string LocateSkillsDirectory()
|
||||
{
|
||||
var dir = new DirectoryInfo(AppContext.BaseDirectory);
|
||||
while (dir is not null && !File.Exists(Path.Combine(dir.FullName, "TeamUp.slnx")))
|
||||
{
|
||||
dir = dir.Parent;
|
||||
}
|
||||
|
||||
Assert.NotNull(dir);
|
||||
return Path.Combine(dir!.FullName, "skills");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user