From e582597b20cb8462f6a213577d2650cb8c6aaa3a Mon Sep 17 00:00:00 2001 From: "soroush.asadi" Date: Sat, 20 Jun 2026 15:48:42 +0330 Subject: [PATCH] Geocoding fallback: use the registered AI model when the table can't resolve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Where deterministic geocoding gives up (neighborhood not in the TehranGeo table), fall back to the registered AI model: the auditor now also returns approximate lat/lng for a recognized Tehran neighborhood (folded into the existing single audit call — no extra requests), and Publish uses it only after the source ad and the local table, and only when it falls inside greater Tehran (InTehran bbox guard rejects hallucinated points). Coords order: Divar point → TehranGeo → AI. Co-Authored-By: Claude Opus 4.8 --- src/JobsMedical.Web/Services/Scraping/AiAuditor.cs | 9 +++++++-- .../Services/Scraping/IngestionService.cs | 7 +++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs b/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs index 6035227..c964e09 100644 --- a/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs +++ b/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs @@ -12,7 +12,10 @@ public record AiStructured( string? Phone = null, string? PersonName = null, int? YearsExperience = null, bool? IsLicensed = null, // Dynamic taxonomy: the model may name a role/category outside the seeded set (ingestion // resolves-or-creates it). Tags carry the post's skills/requirements (ICU, MMT, پروانه‌دار…). - string? Category = null, IReadOnlyList? Tags = null); + string? Category = null, IReadOnlyList? Tags = null, + // Approximate coords the model infers from a named neighborhood — used ONLY as a geocoding + // fallback (validated against Tehran's bbox), when the source ad and the local table have none. + double? Lat = null, double? Lng = null); /// An AI verdict on a raw listing. public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data) @@ -52,6 +55,7 @@ public class OpenAiCompatibleAuditor : IAiAuditor category: فقط یکی از این پنج: پزشک | پرستار | ماما | تکنسین | دندانپزشک. اگر نگنجید «سایر». هرگز گروه جدید نساز. tags: آرایهٔ کلیدواژه‌های بالینی (مهارت/بخش/گواهی/گروه سنی/سطح) مثل "ICU"،"دیالیز"،"کودک"،"پروانه‌دار". بدون مبلغ/پرداخت/تماس/شهر یا جملهٔ ناقص. اگر نبود []. city, district: نام شهر و محله/منطقه در صورت ذکر + lat, lng: اگر محله/منطقه را در تهران تشخیص دادی، مختصاتِ تقریبیِ مرکزِ همان محله را به‌صورت عدد اعشاری برگردان (lat حدود ۳۵.x، lng حدود ۵۱.x)؛ در غیر این صورت null. حدس نزن. shiftType: day|evening|night|oncall (فقط برای shift) employmentType: fulltime|parttime|contract|plan payAmount: عدد تومان یا null ، sharePercent: عدد ۰ تا ۱۰۰ یا null (مثل «۵۰٪ تسویه») @@ -217,6 +221,7 @@ public class OpenAiCompatibleAuditor : IAiAuditor string? S(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.String ? v.GetString() : null; int I(string k, int d) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : d; long? L(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt64(out var n) ? n : null; + double? D(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetDouble(out var n) ? n : null; int? NI(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : null; bool? B(string k) => r.TryGetProperty(k, out var v) && (v.ValueKind == JsonValueKind.True || v.ValueKind == JsonValueKind.False) ? v.GetBoolean() : null; // Array-of-strings reader (tolerates the model returning a single string instead of an array). @@ -235,7 +240,7 @@ public class OpenAiCompatibleAuditor : IAiAuditor var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"), S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"), Phone: S("phone"), PersonName: S("personName"), YearsExperience: NI("yearsExperience"), IsLicensed: B("isLicensed"), - Category: S("category"), Tags: SA("tags")); + Category: S("category"), Tags: SA("tags"), Lat: D("lat"), Lng: D("lng")); return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data); } } diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs index 7e5a7f5..490d800 100644 --- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs +++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs @@ -314,6 +314,10 @@ public class IngestionService if (appLat is null && city.Name == "تهران" && TehranGeo.Locate(district?.Name, districtName, parsed.AreaNote) is { } g) { appLat = g.lat; appLng = g.lng; } + // Last resort — the AI model's inferred coords, but ONLY when they fall inside greater Tehran + // (rejects a hallucinated point elsewhere). Uses the registered model where the rules can't decide. + if (appLat is null && d?.Lat is double aLat && d?.Lng is double aLng && InTehran(aLat, aLng)) + { appLat = aLat; appLng = aLng; } var kindStr = (d?.Kind ?? parsed.Kind.ToString()).ToLowerInvariant(); @@ -544,6 +548,9 @@ public class IngestionService private static string Clamp(string s, int max) => s.Length <= max ? s : s[..max].Trim(); + /// Greater-Tehran bounding box — rejects out-of-area (hallucinated) AI coordinates. + private static bool InTehran(double lat, double lng) => lat is >= 35.4 and <= 35.95 && lng is >= 51.0 and <= 51.8; + // Gender/seniority tokens that don't belong in a role name (they go to tags / the Gender field). private static readonly string[] RoleModifierWords = { "آقا", "خانم", "خانوم", "بانو", "مرد", "زن", "کارآموز", "کارورز", "ارشد", "مبتدی" };