diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
index d4edf83..732d5d4 100644
--- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
+++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
@@ -610,11 +610,18 @@ public class IngestionService
private static readonly string[] RoleModifierWords =
{ "آقا", "خانم", "خانوم", "بانو", "مرد", "زن", "کارآموز", "کارورز", "ارشد", "مبتدی" };
- /// Remove modifier tokens from a role name, keeping the base profession. Never strips to
- /// empty (falls back to the original).
+ // Availability phrases that the model sometimes glues onto the role («کمک بهیار آماده به کار»);
+ // removed as whole substrings before token-stripping (so «به»/«کار» tokens stay safe elsewhere).
+ private static readonly string[] RolePhraseNoise =
+ { "آماده به کار", "آماده همکاری", "آماده بکار", "آماده به همکاری", "جویای کار", "دنبال کار", "جهت همکاری" };
+
+ /// Remove availability phrases + gender/seniority tokens from a role name, keeping the
+ /// base profession. Never strips to empty (falls back to the original).
private static string StripRoleModifiers(string name)
{
- var kept = NormalizeFa(name).Split(' ', StringSplitOptions.RemoveEmptyEntries)
+ var n = NormalizeFa(name);
+ foreach (var p in RolePhraseNoise) n = n.Replace(NormalizeFa(p), " ");
+ var kept = n.Split(' ', StringSplitOptions.RemoveEmptyEntries)
.Where(t => !RoleModifierWords.Any(m => NormalizeFa(m) == t)).ToList();
return kept.Count > 0 ? string.Join(" ", kept) : name.Trim();
}
diff --git a/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs b/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs
index e818e5c..7392d48 100644
--- a/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs
+++ b/src/JobsMedical.Web/Services/Scraping/ListingValidator.cs
@@ -39,6 +39,14 @@ public class ListingValidator
"بوتاکس و فیلر", "مزوتراپی", "فیلر صورت",
};
+ // Domestic-helper ads (housekeeping/cleaning/servant) — not کادر درمان, even when they also
+ // mention سالمند/نگهداری. The «امور منزل / نظافت» phrasing is the giveaway.
+ private static readonly string[] DomesticMarkers =
+ {
+ "امور منزل", "امور سبک منزل", "امورسبک منزل", "کارهای منزل", "کار منزل", "نظافت منزل",
+ "نظافتچی", "خدمتکار", "کارگر منزل", "خدمات منزل", "مستخدم",
+ };
+
// Words that signal a real staffing post (hiring, shift, or availability).
private static readonly string[] StaffingIntent =
{
@@ -67,6 +75,13 @@ public class ListingValidator
return new ValidationResult(false, true, 0, issues, looksMedical); // IsSpam → auto-discard
}
+ // Domestic-helper / housekeeping ads — out of scope (not کادر درمان), discard.
+ if (DomesticMarkers.Any(text.Contains))
+ {
+ issues.Add("آگهی خدماتِ منزل/نظافت است، نه کادر درمان");
+ return new ValidationResult(false, true, 0, issues, looksMedical); // IsSpam → auto-discard
+ }
+
// «آماده به کار»: a worker offering themselves. No facility/shift-date expected; the role
// and a contact number are what matter.
if (parsed.Kind == ListingKind.Talent)