diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs index 60246b4..ee149c3 100644 --- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs +++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs @@ -288,7 +288,7 @@ public class IngestionService var core = NormalizeFa(Regex.Replace(desc ?? "", @"[0-9۰-۹]+|روز پیش|ساعت پیش|هفته پیش|دقیقه پیش|دیروز|پریروز", " ")).Trim(); if (core.Length < 15) return null; // too little to call it a dup safely - return $"t:{roleId}:{cityId}:{(core.Length > 100 ? core[..100] : core)}"; + return $"t:{roleId}:{cityId}:{(core.Length > 400 ? core[..400] : core)}"; } var toRemove = rows @@ -497,7 +497,9 @@ public class IngestionService var core = NormalizeFa(Regex.Replace(desc ?? "", @"[0-9۰-۹]+|روز پیش|ساعت پیش|هفته پیش|دقیقه پیش|دیروز|پریروز", " ")).Trim(); if (core.Length < 15) return null; // too little to call it a dup safely - return $"j:{facId}:{(core.Length > 120 ? core[..120] : core)}"; + // Compare a LONG slice (not 120) — most jobs share the «نامشخص» facility, so a short + // prefix could collapse two different placeholder ads that merely open the same way. + return $"j:{facId}:{(core.Length > 400 ? core[..400] : core)}"; } // Keep one per group — prefer a non-«پزشک عمومی» role (the fan-out's GP copy is the usual