M2: skill index — SKILL.md parsing, pgvector index, query by role
Skills module (references SharedKernel only):
- Skill entity + SkillsDbContext (schema "skills") + InitialSkills migration: roles/tools/
context as text[], risk-tagged actions and golden tests as jsonb, a nullable vector(384)
embedding, unique (SkillKey, Version).
- SkillMarkdownParser: YAML frontmatter (YamlDotNet) + markdown body → SkillManifest.
- HashingSkillEmbedder: placeholder deterministic embedder so the pgvector path is real now;
swapped for ONNX/BYOK embeddings at M3-M4 (384-dim to match MiniLM/bge).
- SkillIndexer: parse → hash → embed → upsert; structural publish gate (roles + >=1 golden
test). Executing golden tests against a model + gating on edit distance lands at M4.
- Endpoints: GET /api/skills (filter by role/visibility), GET /api/skills/{key},
POST /api/skills/index (manual/admin) — all authenticated.
Verified: build green; ArchitectureTests 8/8 (Skills references only SharedKernel);
IntegrationTests 21/21 incl. a new skill-registry flow — index a SKILL.md, it publishes,
is queryable by role (and not under others), re-index dedups, malformed is 400, catalogue
needs auth.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,51 @@
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Pgvector;
|
||||
using TeamUp.Modules.Skills.Domain;
|
||||
using TeamUp.Modules.Skills.Parsing;
|
||||
using TeamUp.Modules.Skills.Persistence;
|
||||
|
||||
namespace TeamUp.Modules.Skills.Indexing;
|
||||
|
||||
/// <summary>Parses a SKILL.md, computes its embedding, and upserts the Skill row (by key+version).</summary>
|
||||
internal sealed class SkillIndexer(SkillsDbContext db, ISkillEmbedder embedder, TimeProvider clock)
|
||||
{
|
||||
public async Task<Skill> IndexAsync(
|
||||
string content,
|
||||
string? sourceRepo,
|
||||
string? sourcePath,
|
||||
string? sourceCommit,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var parsed = SkillMarkdownParser.Parse(content);
|
||||
var manifest = parsed.Manifest;
|
||||
var now = clock.GetUtcNow();
|
||||
var contentHash = Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(content)));
|
||||
|
||||
var embeddingText = $"{manifest.Name}\n{manifest.Summary}\n{string.Join(' ', manifest.Roles)}\n{parsed.Body}";
|
||||
var embedding = new Vector(embedder.Embed(embeddingText));
|
||||
|
||||
// M2 publish gate (structural): a skill is published only if it declares roles and carries
|
||||
// at least one well-formed golden test. Executing the golden tests against a model — and
|
||||
// gating on edit distance — lands in M4 when the assembler/runtime exists.
|
||||
var status = manifest.Roles.Count > 0 && manifest.GoldenTests.Count > 0
|
||||
? SkillStatus.Published
|
||||
: SkillStatus.Draft;
|
||||
|
||||
var skill = await db.Skills
|
||||
.FirstOrDefaultAsync(s => s.SkillKey == manifest.Id && s.Version == manifest.Version, cancellationToken);
|
||||
|
||||
var isNew = skill is null;
|
||||
skill ??= Skill.Create(manifest.Id, manifest.Version, now);
|
||||
skill.Index(manifest, parsed.Body, contentHash, sourceRepo, sourcePath, sourceCommit, embedding, status, now);
|
||||
|
||||
if (isNew)
|
||||
{
|
||||
db.Skills.Add(skill);
|
||||
}
|
||||
|
||||
await db.SaveChangesAsync(cancellationToken);
|
||||
return skill;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user