Strip «آماده به کار» from role names + reject domestic-helper ads
Re-check of live applicants found two gaps: - «کمک بهیار آماده به کار» — the availability phrase glued onto the role. StripRoleModifiers now removes «آماده به کار / آماده همکاری / جویای کار / جهت همکاری» phrases before token-stripping, so the role collapses to «کمک بهیار». - «خانم امورسبک منزل» — light-housework domestic helpers (not کادر درمان). Validator now discards ads with «امور منزل / نظافت منزل / خدمتکار / مستخدم …» markers. Both take effect for existing data on the next applicant reprocess. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -610,11 +610,18 @@ public class IngestionService
|
|||||||
private static readonly string[] RoleModifierWords =
|
private static readonly string[] RoleModifierWords =
|
||||||
{ "آقا", "خانم", "خانوم", "بانو", "مرد", "زن", "کارآموز", "کارورز", "ارشد", "مبتدی" };
|
{ "آقا", "خانم", "خانوم", "بانو", "مرد", "زن", "کارآموز", "کارورز", "ارشد", "مبتدی" };
|
||||||
|
|
||||||
/// <summary>Remove modifier tokens from a role name, keeping the base profession. Never strips to
|
// Availability phrases that the model sometimes glues onto the role («کمک بهیار آماده به کار»);
|
||||||
/// empty (falls back to the original).</summary>
|
// removed as whole substrings before token-stripping (so «به»/«کار» tokens stay safe elsewhere).
|
||||||
|
private static readonly string[] RolePhraseNoise =
|
||||||
|
{ "آماده به کار", "آماده همکاری", "آماده بکار", "آماده به همکاری", "جویای کار", "دنبال کار", "جهت همکاری" };
|
||||||
|
|
||||||
|
/// <summary>Remove availability phrases + gender/seniority tokens from a role name, keeping the
|
||||||
|
/// base profession. Never strips to empty (falls back to the original).</summary>
|
||||||
private static string StripRoleModifiers(string name)
|
private static string StripRoleModifiers(string name)
|
||||||
{
|
{
|
||||||
var kept = NormalizeFa(name).Split(' ', StringSplitOptions.RemoveEmptyEntries)
|
var n = NormalizeFa(name);
|
||||||
|
foreach (var p in RolePhraseNoise) n = n.Replace(NormalizeFa(p), " ");
|
||||||
|
var kept = n.Split(' ', StringSplitOptions.RemoveEmptyEntries)
|
||||||
.Where(t => !RoleModifierWords.Any(m => NormalizeFa(m) == t)).ToList();
|
.Where(t => !RoleModifierWords.Any(m => NormalizeFa(m) == t)).ToList();
|
||||||
return kept.Count > 0 ? string.Join(" ", kept) : name.Trim();
|
return kept.Count > 0 ? string.Join(" ", kept) : name.Trim();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,6 +39,14 @@ public class ListingValidator
|
|||||||
"بوتاکس و فیلر", "مزوتراپی", "فیلر صورت",
|
"بوتاکس و فیلر", "مزوتراپی", "فیلر صورت",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Domestic-helper ads (housekeeping/cleaning/servant) — not کادر درمان, even when they also
|
||||||
|
// mention سالمند/نگهداری. The «امور منزل / نظافت» phrasing is the giveaway.
|
||||||
|
private static readonly string[] DomesticMarkers =
|
||||||
|
{
|
||||||
|
"امور منزل", "امور سبک منزل", "امورسبک منزل", "کارهای منزل", "کار منزل", "نظافت منزل",
|
||||||
|
"نظافتچی", "خدمتکار", "کارگر منزل", "خدمات منزل", "مستخدم",
|
||||||
|
};
|
||||||
|
|
||||||
// Words that signal a real staffing post (hiring, shift, or availability).
|
// Words that signal a real staffing post (hiring, shift, or availability).
|
||||||
private static readonly string[] StaffingIntent =
|
private static readonly string[] StaffingIntent =
|
||||||
{
|
{
|
||||||
@@ -67,6 +75,13 @@ public class ListingValidator
|
|||||||
return new ValidationResult(false, true, 0, issues, looksMedical); // IsSpam → auto-discard
|
return new ValidationResult(false, true, 0, issues, looksMedical); // IsSpam → auto-discard
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Domestic-helper / housekeeping ads — out of scope (not کادر درمان), discard.
|
||||||
|
if (DomesticMarkers.Any(text.Contains))
|
||||||
|
{
|
||||||
|
issues.Add("آگهی خدماتِ منزل/نظافت است، نه کادر درمان");
|
||||||
|
return new ValidationResult(false, true, 0, issues, looksMedical); // IsSpam → auto-discard
|
||||||
|
}
|
||||||
|
|
||||||
// «آماده به کار»: a worker offering themselves. No facility/shift-date expected; the role
|
// «آماده به کار»: a worker offering themselves. No facility/shift-date expected; the role
|
||||||
// and a contact number are what matter.
|
// and a contact number are what matter.
|
||||||
if (parsed.Kind == ListingKind.Talent)
|
if (parsed.Kind == ListingKind.Talent)
|
||||||
|
|||||||
Reference in New Issue
Block a user