diff --git a/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs b/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs index 6035227..c964e09 100644 --- a/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs +++ b/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs @@ -12,7 +12,10 @@ public record AiStructured( string? Phone = null, string? PersonName = null, int? YearsExperience = null, bool? IsLicensed = null, // Dynamic taxonomy: the model may name a role/category outside the seeded set (ingestion // resolves-or-creates it). Tags carry the post's skills/requirements (ICU, MMT, پروانه‌دار…). - string? Category = null, IReadOnlyList? Tags = null); + string? Category = null, IReadOnlyList? Tags = null, + // Approximate coords the model infers from a named neighborhood — used ONLY as a geocoding + // fallback (validated against Tehran's bbox), when the source ad and the local table have none. + double? Lat = null, double? Lng = null); /// An AI verdict on a raw listing. public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data) @@ -52,6 +55,7 @@ public class OpenAiCompatibleAuditor : IAiAuditor category: فقط یکی از این پنج: پزشک | پرستار | ماما | تکنسین | دندانپزشک. اگر نگنجید «سایر». هرگز گروه جدید نساز. tags: آرایهٔ کلیدواژه‌های بالینی (مهارت/بخش/گواهی/گروه سنی/سطح) مثل "ICU"،"دیالیز"،"کودک"،"پروانه‌دار". بدون مبلغ/پرداخت/تماس/شهر یا جملهٔ ناقص. اگر نبود []. city, district: نام شهر و محله/منطقه در صورت ذکر + lat, lng: اگر محله/منطقه را در تهران تشخیص دادی، مختصاتِ تقریبیِ مرکزِ همان محله را به‌صورت عدد اعشاری برگردان (lat حدود ۳۵.x، lng حدود ۵۱.x)؛ در غیر این صورت null. حدس نزن. shiftType: day|evening|night|oncall (فقط برای shift) employmentType: fulltime|parttime|contract|plan payAmount: عدد تومان یا null ، sharePercent: عدد ۰ تا ۱۰۰ یا null (مثل «۵۰٪ تسویه») @@ -217,6 +221,7 @@ public class OpenAiCompatibleAuditor : IAiAuditor string? S(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.String ? v.GetString() : null; int I(string k, int d) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : d; long? L(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt64(out var n) ? n : null; + double? D(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetDouble(out var n) ? n : null; int? NI(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : null; bool? B(string k) => r.TryGetProperty(k, out var v) && (v.ValueKind == JsonValueKind.True || v.ValueKind == JsonValueKind.False) ? v.GetBoolean() : null; // Array-of-strings reader (tolerates the model returning a single string instead of an array). @@ -235,7 +240,7 @@ public class OpenAiCompatibleAuditor : IAiAuditor var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"), S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"), Phone: S("phone"), PersonName: S("personName"), YearsExperience: NI("yearsExperience"), IsLicensed: B("isLicensed"), - Category: S("category"), Tags: SA("tags")); + Category: S("category"), Tags: SA("tags"), Lat: D("lat"), Lng: D("lng")); return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data); } } diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs index 7e5a7f5..490d800 100644 --- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs +++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs @@ -314,6 +314,10 @@ public class IngestionService if (appLat is null && city.Name == "تهران" && TehranGeo.Locate(district?.Name, districtName, parsed.AreaNote) is { } g) { appLat = g.lat; appLng = g.lng; } + // Last resort — the AI model's inferred coords, but ONLY when they fall inside greater Tehran + // (rejects a hallucinated point elsewhere). Uses the registered model where the rules can't decide. + if (appLat is null && d?.Lat is double aLat && d?.Lng is double aLng && InTehran(aLat, aLng)) + { appLat = aLat; appLng = aLng; } var kindStr = (d?.Kind ?? parsed.Kind.ToString()).ToLowerInvariant(); @@ -544,6 +548,9 @@ public class IngestionService private static string Clamp(string s, int max) => s.Length <= max ? s : s[..max].Trim(); + /// Greater-Tehran bounding box — rejects out-of-area (hallucinated) AI coordinates. + private static bool InTehran(double lat, double lng) => lat is >= 35.4 and <= 35.95 && lng is >= 51.0 and <= 51.8; + // Gender/seniority tokens that don't belong in a role name (they go to tags / the Gender field). private static readonly string[] RoleModifierWords = { "آقا", "خانم", "خانوم", "بانو", "مرد", "زن", "کارآموز", "کارورز", "ارشد", "مبتدی" };