36bb165438
- Fetch: Telegram via t.me/s, Bale via Bot API, Divar via web-search (HttpClient, config-gated, graceful) - AI layer: DB-backed AppSetting (mode auto/manual, thresholds, AI endpoint/model/key/prompt/framework, auto-approve); OpenAI-compatible IAiAuditor (self-host/Iranian endpoints; fails safe to manual) - Pipeline: fetch → dedupe(hash) → parse → validate → AI audit → Discard/Flag/Queue/auto-publish (resolve-or-create facility) - Admin: /Admin/Settings automation+AI panel; queue shows confidence + AI verdict; flagged section - CI/CD: Dockerfile, docker-compose.prod.yml, .gitea/workflows/ci-cd.yml, nginx vhost, DEPLOY.md; forwarded headers + /healthz + prod reference-only seed; ports 22/80/443 only Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
109 lines
4.7 KiB
C#
109 lines
4.7 KiB
C#
using System.Net.Http.Headers;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using JobsMedical.Web.Models;
|
|
|
|
namespace JobsMedical.Web.Services.Scraping;
|
|
|
|
public record AiStructured(
|
|
string? Kind, string? Role, string? City, string? District, string? ShiftType,
|
|
string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName);
|
|
|
|
/// <summary>An AI verdict on a raw listing.</summary>
|
|
public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data)
|
|
{
|
|
public bool Approve => Decision.Equals("approve", StringComparison.OrdinalIgnoreCase);
|
|
public bool Reject => Decision.Equals("reject", StringComparison.OrdinalIgnoreCase);
|
|
}
|
|
|
|
public interface IAiAuditor
|
|
{
|
|
/// <summary>Audit a raw post. Returns null when AI is off or the call fails (fail safe → manual).</summary>
|
|
Task<AiAuditResult?> AuditAsync(string rawText, AppSetting settings, CancellationToken ct = default);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Calls any OpenAI-compatible chat-completions endpoint (self-hosted vLLM/Ollama, or an Iranian
|
|
/// provider — OpenAI/Anthropic are blocked from Iran). The admin-set system prompt is the
|
|
/// "framework" that tells the model how to approve/reject/structure. We ask for strict JSON and
|
|
/// parse it. Any failure returns null so ingestion falls back to the rule-based path.
|
|
/// </summary>
|
|
public class OpenAiCompatibleAuditor : IAiAuditor
|
|
{
|
|
private readonly IHttpClientFactory _http;
|
|
private readonly ILogger<OpenAiCompatibleAuditor> _log;
|
|
|
|
public OpenAiCompatibleAuditor(IHttpClientFactory http, ILogger<OpenAiCompatibleAuditor> log)
|
|
{
|
|
_http = http;
|
|
_log = log;
|
|
}
|
|
|
|
public async Task<AiAuditResult?> AuditAsync(string rawText, AppSetting s, CancellationToken ct = default)
|
|
{
|
|
if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint)) return null;
|
|
|
|
try
|
|
{
|
|
var payload = new
|
|
{
|
|
model = string.IsNullOrWhiteSpace(s.AiModel) ? "gpt-4o-mini" : s.AiModel,
|
|
temperature = 0,
|
|
response_format = new { type = "json_object" },
|
|
messages = new object[]
|
|
{
|
|
new { role = "system", content = s.AiSystemPrompt },
|
|
new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." },
|
|
},
|
|
};
|
|
|
|
var client = _http.CreateClient("ai");
|
|
client.Timeout = TimeSpan.FromSeconds(30);
|
|
using var req = new HttpRequestMessage(HttpMethod.Post, s.AiEndpoint)
|
|
{
|
|
Content = new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"),
|
|
};
|
|
if (!string.IsNullOrWhiteSpace(s.AiApiKey))
|
|
req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", s.AiApiKey);
|
|
|
|
using var resp = await client.SendAsync(req, ct);
|
|
resp.EnsureSuccessStatusCode();
|
|
var body = await resp.Content.ReadAsStringAsync(ct);
|
|
|
|
using var doc = JsonDocument.Parse(body);
|
|
var content = doc.RootElement
|
|
.GetProperty("choices")[0].GetProperty("message").GetProperty("content").GetString();
|
|
if (string.IsNullOrWhiteSpace(content)) return null;
|
|
|
|
return ParseVerdict(content);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_log.LogWarning(ex, "AI audit failed — falling back to rule-based decision.");
|
|
return null;
|
|
}
|
|
}
|
|
|
|
private static AiAuditResult? ParseVerdict(string json)
|
|
{
|
|
// The content itself should be a JSON object; tolerate code fences.
|
|
json = json.Trim().Trim('`');
|
|
var start = json.IndexOf('{');
|
|
var end = json.LastIndexOf('}');
|
|
if (start < 0 || end <= start) return null;
|
|
json = json.Substring(start, end - start + 1);
|
|
|
|
using var doc = JsonDocument.Parse(json);
|
|
var r = doc.RootElement;
|
|
string? S(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.String ? v.GetString() : null;
|
|
int I(string k, int d) => r.TryGetProperty(k, out var v) && v.TryGetInt32(out var n) ? n : d;
|
|
long? L(string k) => r.TryGetProperty(k, out var v) && v.TryGetInt64(out var n) ? n : null;
|
|
int? NI(string k) => r.TryGetProperty(k, out var v) && v.TryGetInt32(out var n) ? n : null;
|
|
|
|
var decision = (S("decision") ?? "review").ToLowerInvariant();
|
|
var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"),
|
|
S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"));
|
|
return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data);
|
|
}
|
|
}
|