diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs index 8483649..d70e945 100644 --- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs +++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs @@ -430,7 +430,6 @@ public class IngestionService public async Task<(int merged, int cleaned)> MergeAndCleanFacilitiesAsync(CancellationToken ct = default) { var facilities = await _db.Facilities.ToListAsync(ct); - var placeholder = facilities.FirstOrDefault(f => f.Name == UnknownFacilityName); var jobCounts = await _db.JobOpenings.GroupBy(j => j.FacilityId) .Select(g => new { g.Key, C = g.Count() }).ToDictionaryAsync(x => x.Key, x => x.C, ct); @@ -438,6 +437,15 @@ public class IngestionService .Select(g => new { g.Key, C = g.Count() }).ToDictionaryAsync(x => x.Key, x => x.C, ct); int Listings(int id) => jobCounts.GetValueOrDefault(id) + shiftCounts.GetValueOrDefault(id); + // The shared "unknown" placeholder is worded differently in older data + // («مرکز درمانی (نامشخص)») than the current constant, so an exact-name lookup found nothing and + // the junk-fold step silently no-op'd. Match by the «نامشخص» marker and pick the bucket actually + // used by the most listings — that's the real placeholder junk should fold into. + var placeholder = facilities + .Where(f => f.Name == UnknownFacilityName || FacilityMatcher.Normalize(f.Name).Contains("نامشخص")) + .OrderByDescending(f => Listings(f.Id)).FirstOrDefault(); + var placeholderId = placeholder?.Id ?? -1; + // Removable = purely crawl-generated and unmanaged. Never the placeholder, an owned, or a // verified facility (those carry real employer data / verification). bool Removable(Facility f) => f.OwnerUserId is null && !f.IsVerified @@ -463,8 +471,8 @@ public class IngestionService cleaned++; } - // 2) Merge same-city Persian-fuzzy duplicates into the best keeper. - var remaining = await _db.Facilities.Where(f => f.Name != UnknownFacilityName).ToListAsync(ct); + // 2) Merge same-city Persian-fuzzy duplicates into the best keeper (never the placeholder). + var remaining = (await _db.Facilities.ToListAsync(ct)).Where(f => f.Id != placeholderId).ToList(); var done = new HashSet(); foreach (var f in remaining) {