Files
hamkadr/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs
T
soroush.asadi 36bb165438 Real channel fetch (Telegram/Bale/Divar) + AI-audited automation engine + CI/CD
- Fetch: Telegram via t.me/s, Bale via Bot API, Divar via web-search (HttpClient, config-gated, graceful)
- AI layer: DB-backed AppSetting (mode auto/manual, thresholds, AI endpoint/model/key/prompt/framework, auto-approve); OpenAI-compatible IAiAuditor (self-host/Iranian endpoints; fails safe to manual)
- Pipeline: fetch → dedupe(hash) → parse → validate → AI audit → Discard/Flag/Queue/auto-publish (resolve-or-create facility)
- Admin: /Admin/Settings automation+AI panel; queue shows confidence + AI verdict; flagged section
- CI/CD: Dockerfile, docker-compose.prod.yml, .gitea/workflows/ci-cd.yml, nginx vhost, DEPLOY.md; forwarded headers + /healthz + prod reference-only seed; ports 22/80/443 only

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 17:41:02 +03:30

109 lines
4.7 KiB
C#

using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
public record AiStructured(
string? Kind, string? Role, string? City, string? District, string? ShiftType,
string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName);
/// <summary>An AI verdict on a raw listing.</summary>
public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data)
{
public bool Approve => Decision.Equals("approve", StringComparison.OrdinalIgnoreCase);
public bool Reject => Decision.Equals("reject", StringComparison.OrdinalIgnoreCase);
}
public interface IAiAuditor
{
/// <summary>Audit a raw post. Returns null when AI is off or the call fails (fail safe → manual).</summary>
Task<AiAuditResult?> AuditAsync(string rawText, AppSetting settings, CancellationToken ct = default);
}
/// <summary>
/// Calls any OpenAI-compatible chat-completions endpoint (self-hosted vLLM/Ollama, or an Iranian
/// provider — OpenAI/Anthropic are blocked from Iran). The admin-set system prompt is the
/// "framework" that tells the model how to approve/reject/structure. We ask for strict JSON and
/// parse it. Any failure returns null so ingestion falls back to the rule-based path.
/// </summary>
public class OpenAiCompatibleAuditor : IAiAuditor
{
private readonly IHttpClientFactory _http;
private readonly ILogger<OpenAiCompatibleAuditor> _log;
public OpenAiCompatibleAuditor(IHttpClientFactory http, ILogger<OpenAiCompatibleAuditor> log)
{
_http = http;
_log = log;
}
public async Task<AiAuditResult?> AuditAsync(string rawText, AppSetting s, CancellationToken ct = default)
{
if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint)) return null;
try
{
var payload = new
{
model = string.IsNullOrWhiteSpace(s.AiModel) ? "gpt-4o-mini" : s.AiModel,
temperature = 0,
response_format = new { type = "json_object" },
messages = new object[]
{
new { role = "system", content = s.AiSystemPrompt },
new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." },
},
};
var client = _http.CreateClient("ai");
client.Timeout = TimeSpan.FromSeconds(30);
using var req = new HttpRequestMessage(HttpMethod.Post, s.AiEndpoint)
{
Content = new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"),
};
if (!string.IsNullOrWhiteSpace(s.AiApiKey))
req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", s.AiApiKey);
using var resp = await client.SendAsync(req, ct);
resp.EnsureSuccessStatusCode();
var body = await resp.Content.ReadAsStringAsync(ct);
using var doc = JsonDocument.Parse(body);
var content = doc.RootElement
.GetProperty("choices")[0].GetProperty("message").GetProperty("content").GetString();
if (string.IsNullOrWhiteSpace(content)) return null;
return ParseVerdict(content);
}
catch (Exception ex)
{
_log.LogWarning(ex, "AI audit failed — falling back to rule-based decision.");
return null;
}
}
private static AiAuditResult? ParseVerdict(string json)
{
// The content itself should be a JSON object; tolerate code fences.
json = json.Trim().Trim('`');
var start = json.IndexOf('{');
var end = json.LastIndexOf('}');
if (start < 0 || end <= start) return null;
json = json.Substring(start, end - start + 1);
using var doc = JsonDocument.Parse(json);
var r = doc.RootElement;
string? S(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.String ? v.GetString() : null;
int I(string k, int d) => r.TryGetProperty(k, out var v) && v.TryGetInt32(out var n) ? n : d;
long? L(string k) => r.TryGetProperty(k, out var v) && v.TryGetInt64(out var n) ? n : null;
int? NI(string k) => r.TryGetProperty(k, out var v) && v.TryGetInt32(out var n) ? n : null;
var decision = (S("decision") ?? "review").ToLowerInvariant();
var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"),
S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"));
return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data);
}
}