From 7bbb4e385eb7b641db8a02eaf78fb6af8c2f3e09 Mon Sep 17 00:00:00 2001 From: "soroush.asadi" Date: Sun, 21 Jun 2026 18:06:22 +0330 Subject: [PATCH] =?UTF-8?q?Add=20in-place=20role-fix=20for=20existing=20?= =?UTF-8?q?=C2=AB=D9=BE=D8=B2=D8=B4=DA=A9=20=D8=B9=D9=85=D9=88=D9=85=DB=8C?= =?UTF-8?q?=C2=BB-mislabeled=20listings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RecorrectDoctorRolesAsync (+ admin button «اصلاح نقش»): re-runs the keyword parser + doctor-role guard over the stored text of existing aggregated listings currently labeled «پزشک عمومی», and corrects RoleId + the generic title in place when the text actually names a more specific role (dentist, «متخصص», lab, …). No AI call, no delete/recreate — IDs and indexed URLs unchanged, only GP-labeled rows touched. Cleans up the dentist/ENT/«متخصص غدد» mislabels already published. Co-Authored-By: Claude Opus 4.8 --- src/JobsMedical.Web/Pages/Admin/Index.cshtml | 9 ++++ .../Pages/Admin/Index.cshtml.cs | 9 ++++ .../Services/Scraping/IngestionService.cs | 51 +++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/src/JobsMedical.Web/Pages/Admin/Index.cshtml b/src/JobsMedical.Web/Pages/Admin/Index.cshtml index 68c3c19..9edb3c3 100644 --- a/src/JobsMedical.Web/Pages/Admin/Index.cshtml +++ b/src/JobsMedical.Web/Pages/Admin/Index.cshtml @@ -81,6 +81,15 @@ 🏥 ادغام مراکز تکراری و حذف مراکز بی‌نام + +
+ +
+

+ آگهی‌هایی که هوش مصنوعی به اشتباه «پزشک عمومی» زده ولی متنشان نقش دیگری دارد، از روی متن اصلاح می‌شوند (درجا، بدون تغییر شناسه/آدرس). +

مراکز تکراری (با تطبیقِ فارسی) در یک رکورد ادغام و مراکزِ بدونِ نامِ واقعی به «نامشخص» منتقل می‌شوند. آگهی‌ها حفظ می‌شوند؛ فقط مراکزِ جمع‌آوری‌شده و مدیریت‌نشده پاک می‌شوند.

diff --git a/src/JobsMedical.Web/Pages/Admin/Index.cshtml.cs b/src/JobsMedical.Web/Pages/Admin/Index.cshtml.cs index a099017..6eb0187 100644 --- a/src/JobsMedical.Web/Pages/Admin/Index.cshtml.cs +++ b/src/JobsMedical.Web/Pages/Admin/Index.cshtml.cs @@ -157,6 +157,15 @@ public class IndexModel : PageModel return RedirectToPage(); } + /// Fix existing aggregated listings the AI mislabeled «پزشک عمومی» (dentist/specialist/…) + /// in place from their stored text — no AI, no ID/URL change. + public async Task OnPostRecorrectRolesAsync() + { + var n = await _ingest.RecorrectDoctorRolesAsync(); + IngestMessage = $"اصلاح نقش: {n} آگهیِ «پزشک عمومی» که در واقع نقش دیگری بود (دندانپزشک، متخصص و …) از روی متن آگهی اصلاح شد. بدون تغییر شناسه یا آدرس صفحه."; + return RedirectToPage(); + } + private async Task LoadAsync() { Queue = await _db.RawListings diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs index c0c5eee..86aa9fd 100644 --- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs +++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs @@ -522,6 +522,57 @@ public class IngestionService return (merged, cleaned); } + /// + /// In-place fix for EXISTING aggregated listings the AI mislabeled «پزشک عمومی» when the ad text + /// actually names a more specific role (dentist, endocrinologist/«متخصص», lab, …). Re-runs the + /// keyword parser + the same doctor-role guard over the stored text and updates RoleId (and the + /// generic «استخدام پزشک عمومی» title) IN PLACE — no AI call, no delete/recreate, so IDs and + /// indexed URLs are untouched. Only ever changes rows currently labeled «پزشک عمومی». Returns the + /// number corrected. + /// + public async Task RecorrectDoctorRolesAsync(CancellationToken ct = default) + { + var roles = await _db.Roles.ToListAsync(ct); + var roleNames = roles.Select(r => r.Name).ToList(); + var cityNames = await _db.Cities.Select(c => c.Name).ToListAsync(ct); + var districtNames = await _db.Districts.Select(d => d.Name).ToListAsync(ct); + var gp = roles.FirstOrDefault(r => r.Name == "پزشک عمومی"); + if (gp is null) return 0; + + Role? Corrected(string? text) + { + var parsed = _parser.Parse(text ?? "", roleNames, cityNames, districtNames); + var specific = parsed.RoleNames.FirstOrDefault(n => NormalizeFa(n) != NormalizeFa("پزشک عمومی")); + if (specific is not null) return ResolveOrCreateRole(roles, specific, null); + if (LooksSpecialist(text)) return ResolveOrCreateRole(roles, "پزشک متخصص", "پزشک"); + return null; + } + + int fixedCount = 0; + + var jobs = await _db.JobOpenings + .Where(j => j.Status == ShiftStatus.Open && j.Source == ShiftSource.Aggregated && j.RoleId == gp.Id) + .ToListAsync(ct); + foreach (var j in jobs) + { + if (Corrected(j.Description) is { } nr && nr.Id != j.RoleId) + { + if (string.IsNullOrWhiteSpace(j.Title) || j.Title == "استخدام پزشک عمومی") j.Title = $"استخدام {nr.Name}"; + j.RoleId = nr.Id; fixedCount++; + } + } + + var talent = await _db.TalentListings + .Where(t => t.Status == ShiftStatus.Open && t.Source == ShiftSource.Aggregated && t.RoleId == gp.Id) + .ToListAsync(ct); + foreach (var t in talent) + if (Corrected(t.Description) is { } nr && nr.Id != t.RoleId) { t.RoleId = nr.Id; fixedCount++; } + + if (fixedCount > 0) await _db.SaveChangesAsync(ct); + _log.LogInformation("Recorrected {N} «پزشک عمومی»-mislabeled aggregated listings.", fixedCount); + return fixedCount; + } + private static string DigitsOnly(string s) => new(HtmlUtil.ToLatinDigits(s).Where(char.IsDigit).ToArray()); private static (RawListingStatus status, string? reason, int confidence) Decide(