From 386e25c8fd6107b32b38933ba3fcd5b3c9bdb863 Mon Sep 17 00:00:00 2001 From: "soroush.asadi" Date: Mon, 8 Jun 2026 09:30:23 +0330 Subject: [PATCH] Validator: discard promotional/training ads (workshops, courses) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Medical-flavored ads like «کارگاه بوتاکس و فیلر… ویژه پزشکان ۱۰٪» passed the medical gate and got misclassified as a پزشک عمومی shift with a bogus 10% share. Now: if a course/event/product marker is present and there's no staffing intent (hiring/shift/availability), the item is auto-discarded. Co-Authored-By: Claude Opus 4.8 --- .../Services/Scraping/ListingValidator.cs | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs b/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs index 843f5b4..e720954 100644 --- a/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs +++ b/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs @@ -31,6 +31,21 @@ public class ListingValidator "دیالیز", "فوریت", "آی سی یو", "سی سی یو", "آنکال", "کشیک", "تریاژ", "نوزادان", "سالمند" }; + // Course/event/product ads aimed at clinicians — not job posts. + private static readonly string[] PromoMarkers = + { + "کارگاه", "وبینار", "سمینار", "همایش", "کنگره", "دوره آموزشی", "دوره‌ی آموزشی", + "ثبت نام", "ثبت‌نام", "ظرفیت محدود", "فروش دوره", "مدرک معتبر", "گواهی پایان دوره", + "بوتاکس و فیلر", "مزوتراپی", "فیلر صورت", + }; + + // Words that signal a real staffing post (hiring, shift, or availability). + private static readonly string[] StaffingIntent = + { + "استخدام", "جذب", "نیازمند", "نیازمندیم", "دعوت به همکاری", "شیفت", "آنکال", "انکال", + "کشیک", "نوبت", "آماده به کار", "آماده‌به‌کار", "آماده همکاری", "جویای کار", "مسئول فنی", + }; + public ValidationResult Validate(string rawText, ParsedListing parsed) { var issues = new List(); @@ -43,6 +58,15 @@ public class ListingValidator bool looksMedical = MedicalMarkers.Any(text.Contains); if (!looksMedical) issues.Add("نشانه‌ای از حوزه درمان یافت نشد"); + // Promotional / training ads (workshops, webinars, course/product sales) are medical- + // flavored but NOT staffing. Discard them when there's no hiring/shift/availability intent. + bool isPromo = PromoMarkers.Any(text.Contains) && !StaffingIntent.Any(text.Contains); + if (isPromo) + { + issues.Add("آگهی تبلیغاتی/آموزشی است، نه استخدام/شیفت"); + return new ValidationResult(false, true, 0, issues); // IsSpam → auto-discard + } + // «آماده به کار»: a worker offering themselves. No facility/shift-date expected; the role // and a contact number are what matter. if (parsed.Kind == ListingKind.Talent)