From 33450a37ea32e7aad24b4a5bf0fa740c0e83187c Mon Sep 17 00:00:00 2001 From: "soroush.asadi" Date: Sun, 21 Jun 2026 20:04:57 +0330 Subject: [PATCH] =?UTF-8?q?Filter=20out=20home=20childcare=20/=20babysitte?= =?UTF-8?q?r=20ads=20(not=20=DA=A9=D8=A7=D8=AF=D8=B1=20=D8=AF=D8=B1=D9=85?= =?UTF-8?q?=D8=A7=D9=86)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Divar «پرستار کودک/خانم شبانه‌روزی» ads are often a family hiring an in-home babysitter («پدر کودک ۴ ساله هستم … نگهداری و مراقبت تمام‌وقت»), not clinical nursing. Add ChildcareMarkers (نگهداری/بچه‌داری/«پدر|مادر کودک»/پرستار بچه …) and discard such ads as out of scope, alongside the existing housekeeping filter. Clinical pediatric roles («بخش اطفال/کودکان/NICU») are unaffected. New ingests are filtered at crawl; run «بایگانیِ درجا» to re-screen existing rows that have the full text. Co-Authored-By: Claude Opus 4.8 --- .../Services/Scraping/ListingValidator.cs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs b/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs index 7392d48..f81e5c9 100644 --- a/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs +++ b/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs @@ -47,6 +47,16 @@ public class ListingValidator "نظافتچی", "خدمتکار", "کارگر منزل", "خدمات منزل", "مستخدم", }; + // Home childcare / babysitting — a family hiring someone to mind their child at home. NOT کادر + // درمان even when phrased «پرستار کودک/بچه». Clinical pediatric roles say «بخش اطفال/کودکان/NICU», + // not «نگهداری/بچه‌داری» or a parent self-identifying («پدر/مادر کودک»). + private static readonly string[] ChildcareMarkers = + { + "بچه داری", "بچه‌داری", "بچه دار ", "نگهداری کودک", "نگهداری از کودک", "نگهداری بچه", + "نگهداری از بچه", "نگهداری فرزند", "نگهداری نوزاد", "نگهداری شیرخوار", "پرستار بچه", + "پدر کودک", "مادر کودک", "نگهدار کودک", "نگهدار بچه", "مراقبت از کودک", "مراقبت از بچه", + }; + // Words that signal a real staffing post (hiring, shift, or availability). private static readonly string[] StaffingIntent = { @@ -82,6 +92,13 @@ public class ListingValidator return new ValidationResult(false, true, 0, issues, looksMedical); // IsSpam → auto-discard } + // Home childcare / babysitting — out of scope (not کادر درمان), discard. + if (ChildcareMarkers.Any(text.Contains)) + { + issues.Add("آگهی نگهداری کودک در منزل است، نه کادر درمان"); + return new ValidationResult(false, true, 0, issues, looksMedical); // IsSpam → auto-discard + } + // «آماده به کار»: a worker offering themselves. No facility/shift-date expected; the role // and a contact number are what matter. if (parsed.Kind == ListingKind.Talent)