From 98fc01be8ec13c4f4cf581ed549aac84a51602aa Mon Sep 17 00:00:00 2001 From: "soroush.asadi" Date: Sun, 21 Jun 2026 20:20:07 +0330 Subject: [PATCH] Reject filler/verb words as applicant names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The person-name extractor was grabbing the word after a title even when it was a verb/filler/ availability/role word, producing garbage headings like «خانم هستم»، «دکتر ام»، «دکتر داروساز آماده». Stop collecting at a NameNoise word (هستم/ام/آماده/جویای/role words…), so a real name («دکتر سپیده علیزاده») still works but these fall back to the role heading. New ingests only; existing rebuild via the talent reprocess button. Co-Authored-By: Claude Opus 4.8 --- src/JobsMedical.Web/Services/ListingParser.cs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/JobsMedical.Web/Services/ListingParser.cs b/src/JobsMedical.Web/Services/ListingParser.cs index ea067e2..cd73128 100644 --- a/src/JobsMedical.Web/Services/ListingParser.cs +++ b/src/JobsMedical.Web/Services/ListingParser.cs @@ -236,6 +236,16 @@ public class HeuristicListingParser : IListingParser // Titles that introduce a person's name in «آماده به کار» posts. private static readonly string[] PersonTitles = { "دکتر", "خانم دکتر", "آقای دکتر", "مهندس", "سرکار خانم", "جناب آقای", "خانم", "آقای" }; + // Words that are NOT a person's name — verbs/fillers/availability/role words the extractor was + // grabbing after a title («خانم هستم»، «دکتر ام»، «دکتر داروساز آماده»). Stop collecting at one. + private static readonly string[] NameNoise = + { + "هستم", "هستیم", "هستش", "ام", "بودم", "میباشم", "میباشد", "باشم", "آماده", "آماده‌ام", + "جویای", "بکار", "به‌کار", "کار", "همکاری", "نیازمند", "استخدام", "جذب", "عزیز", "محترم", + "گرامی", "خانم", "آقا", "اقا", "دکتر", "پزشک", "پرستار", "بهیار", "ماما", "دندانپزشک", + "داروساز", "تکنسین", "کارشناس", "متخصص", "عمومی", "مراقب", "کمک", + }; + /// Best-effort person name: a title (دکتر/خانم/…) plus up to two following words. private static string? ExtractPersonName(string text) { @@ -251,6 +261,7 @@ public class HeuristicListingParser : IListingParser foreach (var w in words) { if (NameStops.Contains(w)) break; + if (NameNoise.Any(n => Normalize(n) == Normalize(w))) break; // «خانم هستم»/«دکتر ام»… if (Regex.IsMatch(w, @"[\d]")) break; if (w.Length == 1) break; picked.Add(w);