diff --git a/src/JobsMedical.Web/Services/ListingParser.cs b/src/JobsMedical.Web/Services/ListingParser.cs index a13c445..ea067e2 100644 --- a/src/JobsMedical.Web/Services/ListingParser.cs +++ b/src/JobsMedical.Web/Services/ListingParser.cs @@ -361,7 +361,9 @@ public class HeuristicListingParser : IListingParser if (d.Length == 10 && d[0] == '9') d = "0" + d; Add(ContactType.Mobile, d); } - foreach (Match m in Regex.Matches(latin, @"(? c.Name == cityName) ?? cities.FirstOrDefault(c => c.IsActive) ?? cities.First(); var district = districts.FirstOrDefault(x => x.Name == districtName && x.CityId == city.Id); @@ -835,6 +841,17 @@ public class IngestionService /// Greater-Tehran bounding box — rejects out-of-area (hallucinated) AI coordinates. private static bool InTehran(double lat, double lng) => lat is >= 35.4 and <= 35.95 && lng is >= 51.0 and <= 51.8; + // Markers that mean a doctor role is a SPECIALIST, not a GP — used to correct a «پزشک عمومی» + // mislabel on a clearly-specialist ad (e.g. an ENT post showing as «استخدام پزشک عمومی»). + private static readonly string[] SpecialistMarkers = + { "متخصص", "فوق تخصص", "فوقتخصص", "فلوشیپ", "فلوشیب", "بورد تخصصی", "ساب اسپشالیتی", "ent" }; + + private static bool LooksSpecialist(string? rawText) + { + var t = NormalizeFa(rawText); + return SpecialistMarkers.Any(m => t.Contains(NormalizeFa(m))); + } + // Gender/seniority tokens that don't belong in a role name (they go to tags / the Gender field). private static readonly string[] RoleModifierWords = { "آقا", "خانم", "خانوم", "بانو", "مرد", "زن", "کارآموز", "کارورز", "ارشد", "مبتدی" };