Reject filler/verb words as applicant names
CI/CD / CI · dotnet build (push) Successful in 1m42s
CI/CD / Deploy · hamkadr (push) Successful in 2m25s

The person-name extractor was grabbing the word after a title even when it was a verb/filler/
availability/role word, producing garbage headings like «خانم هستم»، «دکتر ام»، «دکتر داروساز
آماده». Stop collecting at a NameNoise word (هستم/ام/آماده/جویای/role words…), so a real name
(«دکتر سپیده علیزاده») still works but these fall back to the role heading. New ingests only;
existing rebuild via the talent reprocess button.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-21 20:20:07 +03:30
parent 33450a37ea
commit 98fc01be8e
@@ -236,6 +236,16 @@ public class HeuristicListingParser : IListingParser
// Titles that introduce a person's name in «آماده به کار» posts. // Titles that introduce a person's name in «آماده به کار» posts.
private static readonly string[] PersonTitles = { "دکتر", "خانم دکتر", "آقای دکتر", "مهندس", "سرکار خانم", "جناب آقای", "خانم", "آقای" }; private static readonly string[] PersonTitles = { "دکتر", "خانم دکتر", "آقای دکتر", "مهندس", "سرکار خانم", "جناب آقای", "خانم", "آقای" };
// Words that are NOT a person's name — verbs/fillers/availability/role words the extractor was
// grabbing after a title («خانم هستم»، «دکتر ام»، «دکتر داروساز آماده»). Stop collecting at one.
private static readonly string[] NameNoise =
{
"هستم", "هستیم", "هستش", "ام", "بودم", "میباشم", "میباشد", "باشم", "آماده", "آماده‌ام",
"جویای", "بکار", "به‌کار", "کار", "همکاری", "نیازمند", "استخدام", "جذب", "عزیز", "محترم",
"گرامی", "خانم", "آقا", "اقا", "دکتر", "پزشک", "پرستار", "بهیار", "ماما", "دندانپزشک",
"داروساز", "تکنسین", "کارشناس", "متخصص", "عمومی", "مراقب", "کمک",
};
/// <summary>Best-effort person name: a title (دکتر/خانم/…) plus up to two following words.</summary> /// <summary>Best-effort person name: a title (دکتر/خانم/…) plus up to two following words.</summary>
private static string? ExtractPersonName(string text) private static string? ExtractPersonName(string text)
{ {
@@ -251,6 +261,7 @@ public class HeuristicListingParser : IListingParser
foreach (var w in words) foreach (var w in words)
{ {
if (NameStops.Contains(w)) break; if (NameStops.Contains(w)) break;
if (NameNoise.Any(n => Normalize(n) == Normalize(w))) break; // «خانم هستم»/«دکتر ام»…
if (Regex.IsMatch(w, @"[\d]")) break; if (Regex.IsMatch(w, @"[\d]")) break;
if (w.Length == 1) break; if (w.Length == 1) break;
picked.Add(w); picked.Add(w);