Reject filler/verb words as applicant names
The person-name extractor was grabbing the word after a title even when it was a verb/filler/ availability/role word, producing garbage headings like «خانم هستم»، «دکتر ام»، «دکتر داروساز آماده». Stop collecting at a NameNoise word (هستم/ام/آماده/جویای/role words…), so a real name («دکتر سپیده علیزاده») still works but these fall back to the role heading. New ingests only; existing rebuild via the talent reprocess button. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -236,6 +236,16 @@ public class HeuristicListingParser : IListingParser
|
|||||||
// Titles that introduce a person's name in «آماده به کار» posts.
|
// Titles that introduce a person's name in «آماده به کار» posts.
|
||||||
private static readonly string[] PersonTitles = { "دکتر", "خانم دکتر", "آقای دکتر", "مهندس", "سرکار خانم", "جناب آقای", "خانم", "آقای" };
|
private static readonly string[] PersonTitles = { "دکتر", "خانم دکتر", "آقای دکتر", "مهندس", "سرکار خانم", "جناب آقای", "خانم", "آقای" };
|
||||||
|
|
||||||
|
// Words that are NOT a person's name — verbs/fillers/availability/role words the extractor was
|
||||||
|
// grabbing after a title («خانم هستم»، «دکتر ام»، «دکتر داروساز آماده»). Stop collecting at one.
|
||||||
|
private static readonly string[] NameNoise =
|
||||||
|
{
|
||||||
|
"هستم", "هستیم", "هستش", "ام", "بودم", "میباشم", "میباشد", "باشم", "آماده", "آمادهام",
|
||||||
|
"جویای", "بکار", "بهکار", "کار", "همکاری", "نیازمند", "استخدام", "جذب", "عزیز", "محترم",
|
||||||
|
"گرامی", "خانم", "آقا", "اقا", "دکتر", "پزشک", "پرستار", "بهیار", "ماما", "دندانپزشک",
|
||||||
|
"داروساز", "تکنسین", "کارشناس", "متخصص", "عمومی", "مراقب", "کمک",
|
||||||
|
};
|
||||||
|
|
||||||
/// <summary>Best-effort person name: a title (دکتر/خانم/…) plus up to two following words.</summary>
|
/// <summary>Best-effort person name: a title (دکتر/خانم/…) plus up to two following words.</summary>
|
||||||
private static string? ExtractPersonName(string text)
|
private static string? ExtractPersonName(string text)
|
||||||
{
|
{
|
||||||
@@ -251,6 +261,7 @@ public class HeuristicListingParser : IListingParser
|
|||||||
foreach (var w in words)
|
foreach (var w in words)
|
||||||
{
|
{
|
||||||
if (NameStops.Contains(w)) break;
|
if (NameStops.Contains(w)) break;
|
||||||
|
if (NameNoise.Any(n => Normalize(n) == Normalize(w))) break; // «خانم هستم»/«دکتر ام»…
|
||||||
if (Regex.IsMatch(w, @"[\d]")) break;
|
if (Regex.IsMatch(w, @"[\d]")) break;
|
||||||
if (w.Length == 1) break;
|
if (w.Length == 1) break;
|
||||||
picked.Add(w);
|
picked.Add(w);
|
||||||
|
|||||||
Reference in New Issue
Block a user