M2: skill index — SKILL.md parsing, pgvector index, query by role

Skills module (references SharedKernel only):
- Skill entity + SkillsDbContext (schema "skills") + InitialSkills migration: roles/tools/
  context as text[], risk-tagged actions and golden tests as jsonb, a nullable vector(384)
  embedding, unique (SkillKey, Version).
- SkillMarkdownParser: YAML frontmatter (YamlDotNet) + markdown body → SkillManifest.
- HashingSkillEmbedder: placeholder deterministic embedder so the pgvector path is real now;
  swapped for ONNX/BYOK embeddings at M3-M4 (384-dim to match MiniLM/bge).
- SkillIndexer: parse → hash → embed → upsert; structural publish gate (roles + >=1 golden
  test). Executing golden tests against a model + gating on edit distance lands at M4.
- Endpoints: GET /api/skills (filter by role/visibility), GET /api/skills/{key},
  POST /api/skills/index (manual/admin) — all authenticated.

Verified: build green; ArchitectureTests 8/8 (Skills references only SharedKernel);
IntegrationTests 21/21 incl. a new skill-registry flow — index a SKILL.md, it publishes,
is queryable by role (and not under others), re-index dedups, malformed is 400, catalogue
needs auth.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-09 18:01:37 +03:30
parent ce5c644c7b
commit 401e3e69af
17 changed files with 1103 additions and 14 deletions
@@ -0,0 +1,98 @@
using Pgvector;
using TeamUp.SharedKernel.Domain;
namespace TeamUp.Modules.Skills.Domain;
/// <summary>
/// An indexed skill atom: the projection of a SKILL.md (Git is the source of truth) into a
/// queryable Postgres + pgvector row. Identified by (SkillKey, Version).
/// </summary>
internal sealed class Skill : Entity
{
public string SkillKey { get; private set; } = null!;
public string Name { get; private set; } = null!;
public string Version { get; private set; } = null!;
public string? Summary { get; private set; }
public List<string> Roles { get; private set; } = [];
public string? Inputs { get; private set; }
public string? Outputs { get; private set; }
public List<SkillAction> Actions { get; private set; } = [];
public List<string> Tools { get; private set; } = [];
public List<string> Context { get; private set; } = [];
public List<GoldenExample> GoldenTests { get; private set; } = [];
public SkillVisibility Visibility { get; private set; }
public SkillTier MinTier { get; private set; }
public SkillStatus Status { get; private set; }
public string Body { get; private set; } = null!;
public string ContentHash { get; private set; } = null!;
public string? SourceRepo { get; private set; }
public string? SourcePath { get; private set; }
public string? SourceCommit { get; private set; }
public Vector? Embedding { get; private set; }
public DateTimeOffset IndexedAtUtc { get; private set; }
public DateTimeOffset UpdatedAtUtc { get; private set; }
private Skill()
{
}
public static Skill Create(string skillKey, string version, DateTimeOffset nowUtc) =>
new() { SkillKey = skillKey, Version = version, IndexedAtUtc = nowUtc };
/// <summary>(Re)projects a parsed manifest + body onto this row. Used for both insert and update.</summary>
public void Index(
SkillManifest manifest,
string body,
string contentHash,
string? sourceRepo,
string? sourcePath,
string? sourceCommit,
Vector? embedding,
SkillStatus status,
DateTimeOffset nowUtc)
{
Name = string.IsNullOrWhiteSpace(manifest.Name) ? manifest.Id : manifest.Name;
Version = manifest.Version;
Summary = manifest.Summary;
Roles = manifest.Roles;
Inputs = manifest.Inputs;
Outputs = manifest.Outputs;
Actions = manifest.Actions
.Select(a => new SkillAction { Name = a.Name, Risk = ParseRisk(a.Risk), Description = a.Description })
.ToList();
Tools = manifest.Tools;
Context = manifest.Context;
GoldenTests = manifest.GoldenTests;
Visibility = ParseVisibility(manifest.Visibility);
MinTier = ParseTier(manifest.MinTier);
Status = status;
Body = body;
ContentHash = contentHash;
SourceRepo = sourceRepo;
SourcePath = sourcePath;
SourceCommit = sourceCommit;
Embedding = embedding;
UpdatedAtUtc = nowUtc;
}
private static string Normalize(string value) => value.Trim().Replace("-", string.Empty).Replace("_", string.Empty);
private static ActionRisk ParseRisk(string value) => Normalize(value).ToLowerInvariant() switch
{
"draft" => ActionRisk.Draft,
"publish" => ActionRisk.Publish,
"destructive" => ActionRisk.Destructive,
_ => ActionRisk.Read,
};
private static SkillVisibility ParseVisibility(string value) =>
Normalize(value).ToLowerInvariant() is "privatetoorg" or "private" ? SkillVisibility.PrivateToOrg : SkillVisibility.Public;
private static SkillTier ParseTier(string value) => Normalize(value).ToLowerInvariant() switch
{
"team" => SkillTier.Team,
"scale" => SkillTier.Scale,
"enterprise" => SkillTier.Enterprise,
_ => SkillTier.Free,
};
}
@@ -0,0 +1,26 @@
namespace TeamUp.Modules.Skills.Domain;
/// <summary>The YAML frontmatter of a SKILL.md (raw, as authored). Mapped onto <see cref="Skill"/>.</summary>
internal sealed class SkillManifest
{
public string Id { get; set; } = string.Empty;
public string Name { get; set; } = string.Empty;
public string Version { get; set; } = "1.0.0";
public string? Summary { get; set; }
public List<string> Roles { get; set; } = [];
public string? Inputs { get; set; }
public string? Outputs { get; set; }
public List<ManifestAction> Actions { get; set; } = [];
public List<string> Tools { get; set; } = [];
public List<string> Context { get; set; } = [];
public string Visibility { get; set; } = "public";
public string MinTier { get; set; } = "free";
public List<GoldenExample> GoldenTests { get; set; } = [];
}
internal sealed class ManifestAction
{
public string Name { get; set; } = string.Empty;
public string Risk { get; set; } = "read";
public string? Description { get; set; }
}
@@ -0,0 +1,47 @@
namespace TeamUp.Modules.Skills.Domain;
/// <summary>public (catalogue) vs private-to-org. Enforcement is Phase 1; the field exists now.</summary>
internal enum SkillVisibility
{
Public,
PrivateToOrg,
}
internal enum SkillTier
{
Free,
Team,
Scale,
Enterprise,
}
/// <summary>Risk lives on the action; the action gate (M5) compares it to seat autonomy.</summary>
internal enum ActionRisk
{
Read,
Draft,
Publish,
Destructive,
}
/// <summary>Published only once eval (golden tests) passes — see SkillIndexer/eval harness.</summary>
internal enum SkillStatus
{
Draft,
Published,
}
/// <summary>A risk-tagged action a skill can take. Stored as JSON on the skill.</summary>
internal sealed class SkillAction
{
public string Name { get; set; } = null!;
public ActionRisk Risk { get; set; }
public string? Description { get; set; }
}
/// <summary>A golden input/expected pair the eval harness checks (edit distance) before publish.</summary>
internal sealed class GoldenExample
{
public string Input { get; set; } = null!;
public string Expected { get; set; } = null!;
}