e582597b20
Where deterministic geocoding gives up (neighborhood not in the TehranGeo table), fall back to the registered AI model: the auditor now also returns approximate lat/lng for a recognized Tehran neighborhood (folded into the existing single audit call — no extra requests), and Publish uses it only after the source ad and the local table, and only when it falls inside greater Tehran (InTehran bbox guard rejects hallucinated points). Coords order: Divar point → TehranGeo → AI. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
248 lines
14 KiB
C#
248 lines
14 KiB
C#
using System.Net;
|
||
using System.Net.Http.Headers;
|
||
using System.Text;
|
||
using System.Text.Json;
|
||
using JobsMedical.Web.Models;
|
||
|
||
namespace JobsMedical.Web.Services.Scraping;
|
||
|
||
public record AiStructured(
|
||
string? Kind, string? Role, string? City, string? District, string? ShiftType,
|
||
string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName,
|
||
string? Phone = null, string? PersonName = null, int? YearsExperience = null, bool? IsLicensed = null,
|
||
// Dynamic taxonomy: the model may name a role/category outside the seeded set (ingestion
|
||
// resolves-or-creates it). Tags carry the post's skills/requirements (ICU, MMT, پروانهدار…).
|
||
string? Category = null, IReadOnlyList<string>? Tags = null,
|
||
// Approximate coords the model infers from a named neighborhood — used ONLY as a geocoding
|
||
// fallback (validated against Tehran's bbox), when the source ad and the local table have none.
|
||
double? Lat = null, double? Lng = null);
|
||
|
||
/// <summary>An AI verdict on a raw listing.</summary>
|
||
public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data)
|
||
{
|
||
public bool Approve => Decision.Equals("approve", StringComparison.OrdinalIgnoreCase);
|
||
public bool Reject => Decision.Equals("reject", StringComparison.OrdinalIgnoreCase);
|
||
}
|
||
|
||
public interface IAiAuditor
|
||
{
|
||
/// <summary>Audit a raw post. Returns null when AI is off or the call fails (fail safe → manual).</summary>
|
||
Task<AiAuditResult?> AuditAsync(string rawText, AppSetting settings, CancellationToken ct = default);
|
||
|
||
/// <summary>Diagnostic: runs a real call and returns a detailed, human-readable Persian
|
||
/// success/error string (HTTP status, response snippet, exception detail) so the admin can
|
||
/// see exactly why the AI service won't connect. Never throws.</summary>
|
||
Task<string> TestAsync(string rawText, AppSetting settings, CancellationToken ct = default);
|
||
}
|
||
|
||
/// <summary>
|
||
/// Calls any OpenAI-compatible chat-completions endpoint (self-hosted vLLM/Ollama, or an Iranian
|
||
/// provider — OpenAI/Anthropic are blocked from Iran). The admin-set system prompt is the
|
||
/// "framework" that tells the model how to approve/reject/structure. We ask for strict JSON and
|
||
/// parse it. Any failure returns null so ingestion falls back to the rule-based path.
|
||
/// </summary>
|
||
public class OpenAiCompatibleAuditor : IAiAuditor
|
||
{
|
||
// Authoritative output contract appended to the admin prompt so tags/categories stay correct
|
||
// (including the «آماده به کار» type and contact phone) regardless of the stored prompt text.
|
||
private const string OutputSchema = """
|
||
فقط یک شیء JSON با این کلیدها برگردان (هر فیلد نامشخص = null):
|
||
decision: approve|reject|review
|
||
confidence: عدد ۰ تا ۱۰۰
|
||
reason: توضیح کوتاه فارسی
|
||
kind: shift (شیفت توسط مرکز) | job (استخدام توسط مرکز) | talent (کادر درمان که خودش «آماده به کار» است)
|
||
role: «حرفهٔ پایه»، نه با توصیفگر. گروه سنی/بخش/سطح را در tags بگذار («پرستار کودک»→role «پرستار»). فقط برای حرفهٔ پایهٔ متفاوت که در فهرست نیست نقش جدید بساز.
|
||
category: فقط یکی از این پنج: پزشک | پرستار | ماما | تکنسین | دندانپزشک. اگر نگنجید «سایر». هرگز گروه جدید نساز.
|
||
tags: آرایهٔ کلیدواژههای بالینی (مهارت/بخش/گواهی/گروه سنی/سطح) مثل "ICU"،"دیالیز"،"کودک"،"پروانهدار". بدون مبلغ/پرداخت/تماس/شهر یا جملهٔ ناقص. اگر نبود [].
|
||
city, district: نام شهر و محله/منطقه در صورت ذکر
|
||
lat, lng: اگر محله/منطقه را در تهران تشخیص دادی، مختصاتِ تقریبیِ مرکزِ همان محله را بهصورت عدد اعشاری برگردان (lat حدود ۳۵.x، lng حدود ۵۱.x)؛ در غیر این صورت null. حدس نزن.
|
||
shiftType: day|evening|night|oncall (فقط برای shift)
|
||
employmentType: fulltime|parttime|contract|plan
|
||
payAmount: عدد تومان یا null ، sharePercent: عدد ۰ تا ۱۰۰ یا null (مثل «۵۰٪ تسویه»)
|
||
title: عنوان کوتاه ، facilityName: نام مرکز درمانی (فقط برای shift/job)
|
||
phone: شماره تماس (موبایل یا ثابت) بهصورت رقم لاتین، یا null
|
||
personName: نام فرد (فقط برای talent) ، yearsExperience: سال سابقه عدد یا null ، isLicensed: true/false (پروانهدار)
|
||
""";
|
||
|
||
private readonly ScrapeHttpClients _clients;
|
||
private readonly ILogger<OpenAiCompatibleAuditor> _log;
|
||
|
||
public OpenAiCompatibleAuditor(ScrapeHttpClients clients, ILogger<OpenAiCompatibleAuditor> log)
|
||
{
|
||
_clients = clients;
|
||
_log = log;
|
||
}
|
||
|
||
public async Task<AiAuditResult?> AuditAsync(string rawText, AppSetting s, CancellationToken ct = default)
|
||
{
|
||
if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint)) return null;
|
||
|
||
try
|
||
{
|
||
var (status, body) = await SendAsync(rawText, s, ct);
|
||
if (!IsSuccess(status))
|
||
{
|
||
// Log the actual status + response body — the provider usually explains the failure
|
||
// here (bad key, unknown model, quota), so don't throw it away with EnsureSuccessStatusCode.
|
||
_log.LogWarning("AI endpoint {Endpoint} returned HTTP {Status}: {Body}",
|
||
s.AiEndpoint, (int)status, Truncate(body, 600));
|
||
return null;
|
||
}
|
||
|
||
var content = ExtractContent(body);
|
||
if (string.IsNullOrWhiteSpace(content))
|
||
{
|
||
_log.LogWarning("AI endpoint {Endpoint} returned no message content (response shape not OpenAI-compatible?). Body: {Body}",
|
||
s.AiEndpoint, Truncate(body, 600));
|
||
return null;
|
||
}
|
||
|
||
return ParseVerdict(content);
|
||
}
|
||
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
|
||
{
|
||
_log.LogWarning("AI call to {Endpoint} timed out (proxy={Proxy}).", s.AiEndpoint, s.AiUseProxy);
|
||
return null;
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
_log.LogWarning(ex, "AI audit failed for endpoint {Endpoint} (proxy={Proxy}) — falling back to rule-based decision.",
|
||
s.AiEndpoint, s.AiUseProxy);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
public async Task<string> TestAsync(string rawText, AppSetting s, CancellationToken ct = default)
|
||
{
|
||
if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint))
|
||
return "هوش مصنوعی غیرفعال است یا آدرس سرویس خالی است. ابتدا آن را فعال و ذخیره کن.";
|
||
|
||
try
|
||
{
|
||
var (status, body) = await SendAsync(rawText, s, ct);
|
||
if (!IsSuccess(status))
|
||
return $"❌ سرویس کد HTTP {(int)status} ({status}) برگرداند.\nآدرس: {s.AiEndpoint}\nپروکسی: {(s.AiUseProxy ? "روشن" : "خاموش")}\nپاسخ سرویس:\n{Truncate(body, 800)}";
|
||
|
||
var content = ExtractContent(body);
|
||
if (string.IsNullOrWhiteSpace(content))
|
||
return $"❌ پاسخ دریافت شد ولی محتوای پیام خالی بود — ساختار پاسخ با OpenAI سازگار نیست؟\nپاسخ خام:\n{Truncate(body, 800)}";
|
||
|
||
var v = ParseVerdict(content);
|
||
return v is null
|
||
? $"⚠️ مدل پاسخ داد ولی JSON قابلخواندن نبود. (response_format=json_object را پشتیبانی نمیکند؟)\nمحتوا:\n{Truncate(content, 800)}"
|
||
: $"✅ اتصال موفق — تصمیم: {v.Decision} | اطمینان: {v.Confidence}٪ | نقش: {v.Data?.Role} | شهر: {v.Data?.City} | شیفت: {v.Data?.ShiftType}";
|
||
}
|
||
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
|
||
{
|
||
return "❌ مهلت پاسخگویی تمام شد (timeout ۱۰۰ ثانیه). اگر تیک «از طریق پروکسی» روشن است، صحت آدرس پروکسی را بررسی کن.";
|
||
}
|
||
catch (HttpRequestException ex)
|
||
{
|
||
// DNS failure, connection refused, TLS error, proxy unreachable — the common Iran cases.
|
||
var inner = ex.InnerException is { } i ? $" — {i.Message}" : "";
|
||
return $"❌ خطای شبکه/پروکسی: {ex.Message}{inner}\nآدرس: {s.AiEndpoint}\nپروکسی: {(s.AiUseProxy ? "روشن" : "خاموش")}";
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
return $"❌ خطا: {ex.GetType().Name}: {ex.Message}";
|
||
}
|
||
}
|
||
|
||
/// <summary>POSTs the chat-completions request and returns the raw status + body. Shared by
|
||
/// AuditAsync (fail-safe) and TestAsync (diagnostic) so both exercise the identical call path.</summary>
|
||
private async Task<(HttpStatusCode status, string body)> SendAsync(string rawText, AppSetting s, CancellationToken ct)
|
||
{
|
||
var payload = new
|
||
{
|
||
model = string.IsNullOrWhiteSpace(s.AiModel) ? "gpt-4o-mini" : s.AiModel,
|
||
temperature = 0,
|
||
response_format = new { type = "json_object" },
|
||
messages = new object[]
|
||
{
|
||
// Hardcoded, code-owned prompt (NOT the stored AiSystemPrompt) + the authoritative
|
||
// output schema, so classification/tags can never be broken by an admin edit.
|
||
new { role = "system", content = AppSetting.DefaultPrompt + "\n\n" + OutputSchema },
|
||
new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." },
|
||
},
|
||
};
|
||
|
||
var client = _clients.ForAi(s); // proxy-aware when AiUseProxy is on (e.g. OpenAI from Iran)
|
||
using var req = new HttpRequestMessage(HttpMethod.Post, s.AiEndpoint)
|
||
{
|
||
Content = new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"),
|
||
};
|
||
if (!string.IsNullOrWhiteSpace(s.AiApiKey))
|
||
req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", s.AiApiKey);
|
||
|
||
using var resp = await client.SendAsync(req, ct);
|
||
var body = await resp.Content.ReadAsStringAsync(ct);
|
||
return (resp.StatusCode, body);
|
||
}
|
||
|
||
private static bool IsSuccess(HttpStatusCode s) => (int)s is >= 200 and < 300;
|
||
|
||
/// <summary>Pulls choices[0].message.content out of an OpenAI-style response. Returns null on any
|
||
/// unexpected shape (e.g. an error object) rather than throwing, so the caller can show the body.</summary>
|
||
private static string? ExtractContent(string body)
|
||
{
|
||
try
|
||
{
|
||
using var doc = JsonDocument.Parse(body);
|
||
if (doc.RootElement.TryGetProperty("choices", out var choices)
|
||
&& choices.ValueKind == JsonValueKind.Array && choices.GetArrayLength() > 0
|
||
&& choices[0].TryGetProperty("message", out var msg)
|
||
&& msg.TryGetProperty("content", out var content))
|
||
return content.GetString();
|
||
}
|
||
catch (JsonException) { }
|
||
return null;
|
||
}
|
||
|
||
private static string Truncate(string? s, int max)
|
||
=> string.IsNullOrEmpty(s) ? "(خالی)" : (s.Length <= max ? s : s[..max] + " …");
|
||
|
||
private static AiAuditResult? ParseVerdict(string json)
|
||
{
|
||
// The content itself should be a JSON object; tolerate code fences.
|
||
json = json.Trim().Trim('`');
|
||
var start = json.IndexOf('{');
|
||
var end = json.LastIndexOf('}');
|
||
if (start < 0 || end <= start) return null;
|
||
json = json.Substring(start, end - start + 1);
|
||
|
||
JsonDocument doc;
|
||
try { doc = JsonDocument.Parse(json); }
|
||
catch (JsonException) { return null; } // model returned non-JSON content
|
||
using (doc)
|
||
{
|
||
var r = doc.RootElement;
|
||
// Guard on ValueKind == Number first — TryGetInt32/64 THROW on null/string values
|
||
// (the model often returns payAmount/sharePercent as null), which would fail the whole parse.
|
||
string? S(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.String ? v.GetString() : null;
|
||
int I(string k, int d) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : d;
|
||
long? L(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt64(out var n) ? n : null;
|
||
double? D(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetDouble(out var n) ? n : null;
|
||
int? NI(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : null;
|
||
bool? B(string k) => r.TryGetProperty(k, out var v) && (v.ValueKind == JsonValueKind.True || v.ValueKind == JsonValueKind.False) ? v.GetBoolean() : null;
|
||
// Array-of-strings reader (tolerates the model returning a single string instead of an array).
|
||
IReadOnlyList<string>? SA(string k)
|
||
{
|
||
if (!r.TryGetProperty(k, out var v)) return null;
|
||
var list = new List<string>();
|
||
if (v.ValueKind == JsonValueKind.Array)
|
||
foreach (var el in v.EnumerateArray())
|
||
if (el.ValueKind == JsonValueKind.String && el.GetString() is { Length: > 0 } s) list.Add(s);
|
||
else if (v.ValueKind == JsonValueKind.String && v.GetString() is { Length: > 0 } one) list.Add(one);
|
||
return list.Count > 0 ? list : null;
|
||
}
|
||
|
||
var decision = (S("decision") ?? "review").ToLowerInvariant();
|
||
var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"),
|
||
S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"),
|
||
Phone: S("phone"), PersonName: S("personName"), YearsExperience: NI("yearsExperience"), IsLicensed: B("isLicensed"),
|
||
Category: S("category"), Tags: SA("tags"), Lat: D("lat"), Lng: D("lng"));
|
||
return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data);
|
||
}
|
||
}
|
||
}
|