Default aggregated ads to Job, not Shift (stop fabricating shift dates/times)
CI/CD / CI · dotnet build (push) Successful in 1m54s
CI/CD / Deploy · hamkadr (push) Successful in 2m19s

A generic hiring ad like «پرستار درمانگاه» was published as a dated SHIFT with an invented date
(«فردا») and default hours («۰۸:۰۰–۱۴:۰۰») the source never stated — because classification defaulted
to Shift. Now a dated Shift is only produced when the text carries an explicit shift signal
(شیفت/آنکال/کشیک/نوبت); everything else is an ongoing hiring post → Job (no date to invent). Fixed in
both the parser default and the Publish branch (so an AI mislabel can''t force a shift either).

ReclassifyMisclassifiedShiftsAsync (in the post-ingest auto-cleanup) converts the existing signal-less
aggregated shifts into jobs in place — copies the content to a JobOpening and archives the old shift
(its URL 410s). After one pass it''s a no-op since new ads no longer become shifts.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-23 07:08:47 +03:30
parent b3e7123d74
commit 1f628d971e
2 changed files with 50 additions and 5 deletions
@@ -69,8 +69,11 @@ public class HeuristicListingParser : IListingParser
} }
else else
{ {
p.Kind = (jobSignals && !shiftSignals) ? ListingKind.Job : ListingKind.Shift; // A dated SHIFT requires an explicit shift signal («شیفت/آنکال/کشیک/نوبت»). Otherwise the ad
p.Notes.Add(p.Kind == ListingKind.Job ? "نوع: استخدام (تشخیص خودکار)" : "نوع: شیفت (تشخیص خودکار)"); // is an ongoing hiring post → Job. (Defaulting to Shift forced a fabricated date/time onto
// generic ads like «پرستار درمانگاه», which the source never stated.)
p.Kind = shiftSignals ? ListingKind.Shift : ListingKind.Job;
p.Notes.Add(p.Kind == ListingKind.Shift ? "نوع: شیفت (تشخیص خودکار)" : "نوع: استخدام (تشخیص خودکار)");
} }
// --- Roles (an ad can name several at once: «پرستار سالمند و کودک و همراه بیمار») --- // --- Roles (an ad can name several at once: «پرستار سالمند و کودک و همراه بیمار») ---
@@ -373,6 +373,41 @@ public class IngestionService
return filled; return filled;
} }
private static readonly string[] ShiftSignals = { "شیفت", "آنکال", "انکال", "کشیک", "نوبت" };
/// <summary>
/// Convert existing aggregated "shifts" that have NO shift signal in their text into JobOpenings —
/// they were generic hiring ads («پرستار درمانگاه») mis-defaulted to a Shift with a fabricated
/// date/time. Copies the content into a job and archives the old shift (so its URL 410s). New ingests
/// no longer produce these (Job is now the default), so after one pass this is a no-op.
/// </summary>
public async Task<int> ReclassifyMisclassifiedShiftsAsync(CancellationToken ct = default)
{
var shifts = await _db.Shifts.Include(s => s.Role).Include(s => s.Contacts)
.Where(s => s.Status == ShiftStatus.Open && s.Source == ShiftSource.Aggregated)
.ToListAsync(ct);
var bad = shifts.Where(s => !ShiftSignals.Any(w => (s.Description ?? "").Contains(w))).ToList();
if (bad.Count == 0) return 0;
foreach (var s in bad)
{
_db.JobOpenings.Add(new JobOpening
{
FacilityId = s.FacilityId, RoleId = s.RoleId,
Title = $"استخدام {s.Role?.Name}",
EmploymentType = EmploymentType.FullTime,
SalaryMin = s.PayAmount,
Description = s.Description, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated,
SourceUrl = s.SourceUrl, Lat = s.Lat, Lng = s.Lng,
Contacts = s.Contacts.Select(c => new ContactMethod { Type = c.Type, Value = c.Value, SortOrder = c.SortOrder }).ToList(),
});
s.Status = ShiftStatus.Archived;
}
await _db.SaveChangesAsync(ct);
_log.LogInformation("Reclassified {N} signal-less aggregated shifts into jobs.", bad.Count);
return bad.Count;
}
/// <summary> /// <summary>
/// The self-cleaning pass run automatically at the end of every crawl (and available on demand): /// The self-cleaning pass run automatically at the end of every crawl (and available on demand):
/// archive out-of-scope/duplicate listings, merge duplicate + fold junk facilities, and backfill /// archive out-of-scope/duplicate listings, merge duplicate + fold junk facilities, and backfill
@@ -387,8 +422,9 @@ public class IngestionService
var (mergedFac, cleanedFac) = await MergeAndCleanFacilitiesAsync(ct); var (mergedFac, cleanedFac) = await MergeAndCleanFacilitiesAsync(ct);
var coords = await BackfillCoordsAsync(ct); var coords = await BackfillCoordsAsync(ct);
var pay = await BackfillPayAsync(ct); var pay = await BackfillPayAsync(ct);
_log.LogInformation("Post-ingest cleanup: archived={A} dedupedJobs={DJ} mergedFac={MF} cleanedFac={CF} coords={C} pay={P}", var reclassified = await ReclassifyMisclassifiedShiftsAsync(ct);
archived, dedupedJobs, mergedFac, cleanedFac, coords, pay); _log.LogInformation("Post-ingest cleanup: archived={A} dedupedJobs={DJ} mergedFac={MF} cleanedFac={CF} coords={C} pay={P} reclassified={R}",
archived, dedupedJobs, mergedFac, cleanedFac, coords, pay, reclassified);
return (archived, dedupedJobs, mergedFac, cleanedFac, coords); return (archived, dedupedJobs, mergedFac, cleanedFac, coords);
} }
@@ -821,7 +857,13 @@ public class IngestionService
// one per extracted/typo role (پزشک عمومی، پرستار، دستیار پزشک، بهیار، «بیهیار»). Publish only // one per extracted/typo role (پزشک عمومی، پرستار، دستیار پزشک، بهیار، «بیهیار»). Publish only
// the primary (guard-corrected) role; the rest stay findable via the full description text. // the primary (guard-corrected) role; the rest stay findable via the full description text.
var primaryRole = pubRoles[0]; var primaryRole = pubRoles[0];
if (kindStr.Contains("job") || kindStr.Contains("استخدام")) // A dated SHIFT is created ONLY when the ad is explicitly shift-based (the kind says shift AND
// the text actually carries a shift signal). Otherwise it's an ongoing hiring post → Job, so we
// never fabricate a date/time the source never stated (the «پرستار درمانگاه as فردا ۰۸:۰۰ شیفت»
// bug). Defends against the AI mislabeling a generic ad as a shift, too.
bool isShift = (kindStr.Contains("shift") || kindStr.Contains("شیفت"))
&& new[] { "شیفت", "آنکال", "انکال", "کشیک", "نوبت" }.Any(raw.RawText.Contains);
if (!isShift)
{ {
_db.JobOpenings.Add(new JobOpening _db.JobOpenings.Add(new JobOpening
{ {