Fix role + contact mislabels seen on a live iranestekhdam ad
(1) Specialist guard: the AI sometimes labels a clearly-specialist ad («پزشک متخصص گوش و
حلق و بینی»، «فلوشیپ»، «فوق تخصص») as «پزشک عمومی», so an ENT post published as
«استخدام پزشک عمومی». When the primary role is GP but the ad text names a specialist, swap
it to «پزشک متخصص» (the subspecialty stays as a tag).
(2) Phone type: the landline regex 0\d{2,3} also matched 09xx MOBILE numbers and labeled them
«تلفن ثابت». Iranian landline area codes are 0[1-8]xx (021/026/…), never 09 — restrict it so
mobiles are no longer mislabeled as landlines.
Both apply to new ingests; existing mislabeled rows correct on turnover/reprocess.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -361,7 +361,9 @@ public class HeuristicListingParser : IListingParser
|
||||
if (d.Length == 10 && d[0] == '9') d = "0" + d;
|
||||
Add(ContactType.Mobile, d);
|
||||
}
|
||||
foreach (Match m in Regex.Matches(latin, @"(?<!\d)0\d{2,3}[\s-]?\d{7,8}(?!\d)"))
|
||||
// Landline area codes start 0[1-8] (021 Tehran, 026 Karaj, …) — never 09, which is a MOBILE.
|
||||
// The old 0\d{2,3} matched 09xx numbers and mislabeled mobiles as «تلفن ثابت».
|
||||
foreach (Match m in Regex.Matches(latin, @"(?<!\d)0[1-8]\d{1,2}[\s-]?\d{7,8}(?!\d)"))
|
||||
Add(ContactType.Phone, Regex.Replace(m.Value, @"\D", ""));
|
||||
|
||||
return list.Take(8).ToList();
|
||||
|
||||
@@ -581,6 +581,12 @@ public class IngestionService
|
||||
}
|
||||
if (pubRoles.Count == 0) pubRoles.Add(roles.First());
|
||||
|
||||
// Specialist guard: the model sometimes labels a clearly-specialist ad («پزشک متخصص …»،
|
||||
// «فلوشیپ»، «فوق تخصص») as «پزشک عمومی» — making the title/filters wrong (an ENT post showing
|
||||
// as «استخدام پزشک عمومی»). When the primary role is GP but the ad text says specialist, swap it.
|
||||
if (pubRoles[0].Name == "پزشک عمومی" && LooksSpecialist(raw.RawText))
|
||||
pubRoles[0] = ResolveOrCreateRole(roles, "پزشک متخصص", "پزشک");
|
||||
|
||||
var city = cities.FirstOrDefault(c => c.Name == cityName)
|
||||
?? cities.FirstOrDefault(c => c.IsActive) ?? cities.First();
|
||||
var district = districts.FirstOrDefault(x => x.Name == districtName && x.CityId == city.Id);
|
||||
@@ -835,6 +841,17 @@ public class IngestionService
|
||||
/// <summary>Greater-Tehran bounding box — rejects out-of-area (hallucinated) AI coordinates.</summary>
|
||||
private static bool InTehran(double lat, double lng) => lat is >= 35.4 and <= 35.95 && lng is >= 51.0 and <= 51.8;
|
||||
|
||||
// Markers that mean a doctor role is a SPECIALIST, not a GP — used to correct a «پزشک عمومی»
|
||||
// mislabel on a clearly-specialist ad (e.g. an ENT post showing as «استخدام پزشک عمومی»).
|
||||
private static readonly string[] SpecialistMarkers =
|
||||
{ "متخصص", "فوق تخصص", "فوقتخصص", "فلوشیپ", "فلوشیب", "بورد تخصصی", "ساب اسپشالیتی", "ent" };
|
||||
|
||||
private static bool LooksSpecialist(string? rawText)
|
||||
{
|
||||
var t = NormalizeFa(rawText);
|
||||
return SpecialistMarkers.Any(m => t.Contains(NormalizeFa(m)));
|
||||
}
|
||||
|
||||
// Gender/seniority tokens that don't belong in a role name (they go to tags / the Gender field).
|
||||
private static readonly string[] RoleModifierWords =
|
||||
{ "آقا", "خانم", "خانوم", "بانو", "مرد", "زن", "کارآموز", "کارورز", "ارشد", "مبتدی" };
|
||||
|
||||
Reference in New Issue
Block a user