diff --git a/src/JobsMedical.Web/Services/ListingParser.cs b/src/JobsMedical.Web/Services/ListingParser.cs index e4ec088..0a266c0 100644 --- a/src/JobsMedical.Web/Services/ListingParser.cs +++ b/src/JobsMedical.Web/Services/ListingParser.cs @@ -69,8 +69,11 @@ public class HeuristicListingParser : IListingParser } else { - p.Kind = (jobSignals && !shiftSignals) ? ListingKind.Job : ListingKind.Shift; - p.Notes.Add(p.Kind == ListingKind.Job ? "نوع: استخدام (تشخیص خودکار)" : "نوع: شیفت (تشخیص خودکار)"); + // A dated SHIFT requires an explicit shift signal («شیفت/آنکال/کشیک/نوبت»). Otherwise the ad + // is an ongoing hiring post → Job. (Defaulting to Shift forced a fabricated date/time onto + // generic ads like «پرستار درمانگاه», which the source never stated.) + p.Kind = shiftSignals ? ListingKind.Shift : ListingKind.Job; + p.Notes.Add(p.Kind == ListingKind.Shift ? "نوع: شیفت (تشخیص خودکار)" : "نوع: استخدام (تشخیص خودکار)"); } // --- Roles (an ad can name several at once: «پرستار سالمند و کودک و همراه بیمار») --- diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs index f551223..60246b4 100644 --- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs +++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs @@ -373,6 +373,41 @@ public class IngestionService return filled; } + private static readonly string[] ShiftSignals = { "شیفت", "آنکال", "انکال", "کشیک", "نوبت" }; + + /// + /// Convert existing aggregated "shifts" that have NO shift signal in their text into JobOpenings — + /// they were generic hiring ads («پرستار درمانگاه») mis-defaulted to a Shift with a fabricated + /// date/time. Copies the content into a job and archives the old shift (so its URL 410s). New ingests + /// no longer produce these (Job is now the default), so after one pass this is a no-op. + /// + public async Task ReclassifyMisclassifiedShiftsAsync(CancellationToken ct = default) + { + var shifts = await _db.Shifts.Include(s => s.Role).Include(s => s.Contacts) + .Where(s => s.Status == ShiftStatus.Open && s.Source == ShiftSource.Aggregated) + .ToListAsync(ct); + var bad = shifts.Where(s => !ShiftSignals.Any(w => (s.Description ?? "").Contains(w))).ToList(); + if (bad.Count == 0) return 0; + + foreach (var s in bad) + { + _db.JobOpenings.Add(new JobOpening + { + FacilityId = s.FacilityId, RoleId = s.RoleId, + Title = $"استخدام {s.Role?.Name}", + EmploymentType = EmploymentType.FullTime, + SalaryMin = s.PayAmount, + Description = s.Description, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, + SourceUrl = s.SourceUrl, Lat = s.Lat, Lng = s.Lng, + Contacts = s.Contacts.Select(c => new ContactMethod { Type = c.Type, Value = c.Value, SortOrder = c.SortOrder }).ToList(), + }); + s.Status = ShiftStatus.Archived; + } + await _db.SaveChangesAsync(ct); + _log.LogInformation("Reclassified {N} signal-less aggregated shifts into jobs.", bad.Count); + return bad.Count; + } + /// /// The self-cleaning pass run automatically at the end of every crawl (and available on demand): /// archive out-of-scope/duplicate listings, merge duplicate + fold junk facilities, and backfill @@ -387,8 +422,9 @@ public class IngestionService var (mergedFac, cleanedFac) = await MergeAndCleanFacilitiesAsync(ct); var coords = await BackfillCoordsAsync(ct); var pay = await BackfillPayAsync(ct); - _log.LogInformation("Post-ingest cleanup: archived={A} dedupedJobs={DJ} mergedFac={MF} cleanedFac={CF} coords={C} pay={P}", - archived, dedupedJobs, mergedFac, cleanedFac, coords, pay); + var reclassified = await ReclassifyMisclassifiedShiftsAsync(ct); + _log.LogInformation("Post-ingest cleanup: archived={A} dedupedJobs={DJ} mergedFac={MF} cleanedFac={CF} coords={C} pay={P} reclassified={R}", + archived, dedupedJobs, mergedFac, cleanedFac, coords, pay, reclassified); return (archived, dedupedJobs, mergedFac, cleanedFac, coords); } @@ -821,7 +857,13 @@ public class IngestionService // one per extracted/typo role (پزشک عمومی، پرستار، دستیار پزشک، بهیار، «بیهیار»). Publish only // the primary (guard-corrected) role; the rest stay findable via the full description text. var primaryRole = pubRoles[0]; - if (kindStr.Contains("job") || kindStr.Contains("استخدام")) + // A dated SHIFT is created ONLY when the ad is explicitly shift-based (the kind says shift AND + // the text actually carries a shift signal). Otherwise it's an ongoing hiring post → Job, so we + // never fabricate a date/time the source never stated (the «پرستار درمانگاه as فردا ۰۸:۰۰ شیفت» + // bug). Defends against the AI mislabeling a generic ad as a shift, too. + bool isShift = (kindStr.Contains("shift") || kindStr.Contains("شیفت")) + && new[] { "شیفت", "آنکال", "انکال", "کشیک", "نوبت" }.Any(raw.RawText.Contains); + if (!isShift) { _db.JobOpenings.Add(new JobOpening {