Stop job/shift role fan-out: one aggregated ad = one listing
CI/CD / CI · dotnet build (push) Successful in 40s
CI/CD / Deploy · hamkadr (push) Successful in 2m18s

A single ad naming several role-ish words («استخدام بهیار جهت دستیار پزشک و تزریقات») was
fanning out into one listing PER extracted role — 5 near-duplicate cards with different and even
typo roles (پزشک عمومی، پرستار، دستیار پزشک، بهیار، «بیهیار»). Publish now creates ONE listing
with the primary (guard-corrected) role; other role words stay findable via the full description.
DedupeJobsAsync no longer keys on role, so existing fan-out copies collapse — preferring to keep a
non-«پزشک عمومی» copy, then the newest. Run the «حذفِ تکراری» + «اصلاح نقش» buttons to clean the
already-published fan-out.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-21 19:47:19 +03:30
parent 92802d0da0
commit 17da713a35
@@ -414,24 +414,31 @@ public class IngestionService
/// </summary> /// </summary>
public async Task<int> DedupeJobsAsync(CancellationToken ct = default) public async Task<int> DedupeJobsAsync(CancellationToken ct = default)
{ {
var gpId = await _db.Roles.Where(r => r.Name == "پزشک عمومی").Select(r => (int?)r.Id).FirstOrDefaultAsync(ct);
var rows = await _db.JobOpenings var rows = await _db.JobOpenings
.Where(j => j.Status == ShiftStatus.Open && j.Source == ShiftSource.Aggregated) .Where(j => j.Status == ShiftStatus.Open && j.Source == ShiftSource.Aggregated)
.Select(j => new { j.Id, j.RoleId, j.FacilityId, j.Description, j.CreatedAt }) .Select(j => new { j.Id, j.RoleId, j.FacilityId, j.Description, j.CreatedAt })
.ToListAsync(ct); .ToListAsync(ct);
string? Sig(int roleId, int facId, string? desc) // Signature = facility + normalized description core (digits/«… پیش» stripped). RoleId is
// deliberately NOT in the key, so the old role fan-out — the SAME ad published once per
// extracted/typo role (پزشک عمومی، پرستار، بهیار، «بیهیار»…) — collapses into one.
string? Sig(int facId, string? desc)
{ {
var core = NormalizeFa(Regex.Replace(desc ?? "", var core = NormalizeFa(Regex.Replace(desc ?? "",
@"[0-9۰-۹]+|روز پیش|ساعت پیش|هفته پیش|دقیقه پیش|دیروز|پریروز", " ")).Trim(); @"[0-9۰-۹]+|روز پیش|ساعت پیش|هفته پیش|دقیقه پیش|دیروز|پریروز", " ")).Trim();
if (core.Length < 15) return null; // too little to call it a dup safely if (core.Length < 15) return null; // too little to call it a dup safely
return $"j:{roleId}:{facId}:{(core.Length > 120 ? core[..120] : core)}"; return $"j:{facId}:{(core.Length > 120 ? core[..120] : core)}";
} }
// Keep one per group — prefer a non-«پزشک عمومی» role (the fan-out's GP copy is the usual
// mislabel), then the newest.
var toArchive = rows var toArchive = rows
.Select(r => new { r.Id, r.CreatedAt, Key = Sig(r.RoleId, r.FacilityId, r.Description) }) .Select(r => new { r.Id, r.RoleId, r.CreatedAt, Key = Sig(r.FacilityId, r.Description) })
.Where(x => x.Key is not null) .Where(x => x.Key is not null)
.GroupBy(x => x.Key) .GroupBy(x => x.Key)
.SelectMany(g => g.OrderByDescending(x => x.CreatedAt).Skip(1).Select(x => x.Id)) .SelectMany(g => g.OrderBy(x => x.RoleId == gpId ? 1 : 0).ThenByDescending(x => x.CreatedAt)
.Skip(1).Select(x => x.Id))
.ToList(); .ToList();
if (toArchive.Count == 0) return 0; if (toArchive.Count == 0) return 0;
@@ -729,41 +736,42 @@ public class IngestionService
facility.Lat = raw.Lat; facility.Lng = raw.Lng; facility.Lat = raw.Lat; facility.Lng = raw.Lng;
} }
// ONE ad = ONE listing. Do NOT fan out across roles. A single ad naming a few role-ish words
// («استخدام بهیار جهت دستیار پزشک و تزریقات») was exploding into 5 near-duplicate listings —
// one per extracted/typo role (پزشک عمومی، پرستار، دستیار پزشک، بهیار، «بیهیار»). Publish only
// the primary (guard-corrected) role; the rest stay findable via the full description text.
var primaryRole = pubRoles[0];
if (kindStr.Contains("job") || kindStr.Contains("استخدام")) if (kindStr.Contains("job") || kindStr.Contains("استخدام"))
{ {
foreach (var role in pubRoles)
_db.JobOpenings.Add(new JobOpening _db.JobOpenings.Add(new JobOpening
{ {
Facility = facility, Role = role, Facility = facility, Role = primaryRole,
Title = !string.IsNullOrWhiteSpace(d?.Title) && pubRoles.Count == 1 ? d!.Title!.Trim() : $"استخدام {role.Name}", Title = $"استخدام {primaryRole.Name}",
EmploymentType = MapEmployment(d?.EmploymentType, parsed.EmploymentType), EmploymentType = MapEmployment(d?.EmploymentType, parsed.EmploymentType),
// Prefer the AI-extracted salary, falling back to the parser's matching the talent // Prefer the AI-extracted salary, falling back to the parser's (matching the talent path).
// path. (Jobs previously used only parsed.PayAmount, silently dropping the AI figure,
// so every aggregated opening showed «توافقی» even when the ad stated a number.)
SalaryMin = d?.PayAmount ?? parsed.PayAmount, SalaryMin = d?.PayAmount ?? parsed.PayAmount,
Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated,
SourceUrl = raw.SourceUrl, SourceUrl = raw.SourceUrl,
Lat = appLat, Lng = appLng, // source point (Divar) or geocoded neighborhood center Lat = appLat, Lng = appLng, // source point (Divar) or geocoded neighborhood center
Contacts = BuildContacts(d, parsed), // the ad's OWN number(s) — fresh per listing Contacts = BuildContacts(d, parsed), // the ad's OWN number(s)
}); });
} }
else else
{ {
var st = MapShiftType(d?.ShiftType, parsed.ShiftType); var st = MapShiftType(d?.ShiftType, parsed.ShiftType);
var (start, end) = DefaultTimes(st); var (start, end) = DefaultTimes(st);
foreach (var role in pubRoles)
_db.Shifts.Add(new Shift _db.Shifts.Add(new Shift
{ {
Facility = facility, Role = role, Facility = facility, Role = primaryRole,
Date = DateOnly.FromDateTime(DateTime.UtcNow).AddDays(1), Date = DateOnly.FromDateTime(DateTime.UtcNow).AddDays(1),
StartTime = start, EndTime = end, ShiftType = st, StartTime = start, EndTime = end, ShiftType = st,
SpecialtyRequired = role.Name, Description = raw.RawText, SpecialtyRequired = primaryRole.Name, Description = raw.RawText,
PayType = parsed.SharePercent is not null && parsed.PayAmount is null ? PayType.Percentage PayType = parsed.SharePercent is not null && parsed.PayAmount is null ? PayType.Percentage
: parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift, : parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift,
PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent, PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent,
Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl,
Lat = appLat, Lng = appLng, // source point (Divar) or geocoded neighborhood center Lat = appLat, Lng = appLng, // source point (Divar) or geocoded neighborhood center
Contacts = BuildContacts(d, parsed), // the ad's OWN number(s) — fresh per listing Contacts = BuildContacts(d, parsed), // the ad's OWN number(s)
}); });
} }
raw.Status = RawListingStatus.Normalized; raw.Status = RawListingStatus.Normalized;