Stop job/shift role fan-out: one aggregated ad = one listing
CI/CD / CI · dotnet build (push) Successful in 40s
CI/CD / Deploy · hamkadr (push) Successful in 2m18s

A single ad naming several role-ish words («استخدام بهیار جهت دستیار پزشک و تزریقات») was
fanning out into one listing PER extracted role — 5 near-duplicate cards with different and even
typo roles (پزشک عمومی، پرستار، دستیار پزشک، بهیار، «بیهیار»). Publish now creates ONE listing
with the primary (guard-corrected) role; other role words stay findable via the full description.
DedupeJobsAsync no longer keys on role, so existing fan-out copies collapse — preferring to keep a
non-«پزشک عمومی» copy, then the newest. Run the «حذفِ تکراری» + «اصلاح نقش» buttons to clean the
already-published fan-out.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-21 19:47:19 +03:30
parent 92802d0da0
commit 17da713a35
@@ -414,24 +414,31 @@ public class IngestionService
/// </summary> /// </summary>
public async Task<int> DedupeJobsAsync(CancellationToken ct = default) public async Task<int> DedupeJobsAsync(CancellationToken ct = default)
{ {
var gpId = await _db.Roles.Where(r => r.Name == "پزشک عمومی").Select(r => (int?)r.Id).FirstOrDefaultAsync(ct);
var rows = await _db.JobOpenings var rows = await _db.JobOpenings
.Where(j => j.Status == ShiftStatus.Open && j.Source == ShiftSource.Aggregated) .Where(j => j.Status == ShiftStatus.Open && j.Source == ShiftSource.Aggregated)
.Select(j => new { j.Id, j.RoleId, j.FacilityId, j.Description, j.CreatedAt }) .Select(j => new { j.Id, j.RoleId, j.FacilityId, j.Description, j.CreatedAt })
.ToListAsync(ct); .ToListAsync(ct);
string? Sig(int roleId, int facId, string? desc) // Signature = facility + normalized description core (digits/«… پیش» stripped). RoleId is
// deliberately NOT in the key, so the old role fan-out — the SAME ad published once per
// extracted/typo role (پزشک عمومی، پرستار، بهیار، «بیهیار»…) — collapses into one.
string? Sig(int facId, string? desc)
{ {
var core = NormalizeFa(Regex.Replace(desc ?? "", var core = NormalizeFa(Regex.Replace(desc ?? "",
@"[0-9۰-۹]+|روز پیش|ساعت پیش|هفته پیش|دقیقه پیش|دیروز|پریروز", " ")).Trim(); @"[0-9۰-۹]+|روز پیش|ساعت پیش|هفته پیش|دقیقه پیش|دیروز|پریروز", " ")).Trim();
if (core.Length < 15) return null; // too little to call it a dup safely if (core.Length < 15) return null; // too little to call it a dup safely
return $"j:{roleId}:{facId}:{(core.Length > 120 ? core[..120] : core)}"; return $"j:{facId}:{(core.Length > 120 ? core[..120] : core)}";
} }
// Keep one per group — prefer a non-«پزشک عمومی» role (the fan-out's GP copy is the usual
// mislabel), then the newest.
var toArchive = rows var toArchive = rows
.Select(r => new { r.Id, r.CreatedAt, Key = Sig(r.RoleId, r.FacilityId, r.Description) }) .Select(r => new { r.Id, r.RoleId, r.CreatedAt, Key = Sig(r.FacilityId, r.Description) })
.Where(x => x.Key is not null) .Where(x => x.Key is not null)
.GroupBy(x => x.Key) .GroupBy(x => x.Key)
.SelectMany(g => g.OrderByDescending(x => x.CreatedAt).Skip(1).Select(x => x.Id)) .SelectMany(g => g.OrderBy(x => x.RoleId == gpId ? 1 : 0).ThenByDescending(x => x.CreatedAt)
.Skip(1).Select(x => x.Id))
.ToList(); .ToList();
if (toArchive.Count == 0) return 0; if (toArchive.Count == 0) return 0;
@@ -729,42 +736,43 @@ public class IngestionService
facility.Lat = raw.Lat; facility.Lng = raw.Lng; facility.Lat = raw.Lat; facility.Lng = raw.Lng;
} }
// ONE ad = ONE listing. Do NOT fan out across roles. A single ad naming a few role-ish words
// («استخدام بهیار جهت دستیار پزشک و تزریقات») was exploding into 5 near-duplicate listings —
// one per extracted/typo role (پزشک عمومی، پرستار، دستیار پزشک، بهیار، «بیهیار»). Publish only
// the primary (guard-corrected) role; the rest stay findable via the full description text.
var primaryRole = pubRoles[0];
if (kindStr.Contains("job") || kindStr.Contains("استخدام")) if (kindStr.Contains("job") || kindStr.Contains("استخدام"))
{ {
foreach (var role in pubRoles) _db.JobOpenings.Add(new JobOpening
_db.JobOpenings.Add(new JobOpening {
{ Facility = facility, Role = primaryRole,
Facility = facility, Role = role, Title = $"استخدام {primaryRole.Name}",
Title = !string.IsNullOrWhiteSpace(d?.Title) && pubRoles.Count == 1 ? d!.Title!.Trim() : $"استخدام {role.Name}", EmploymentType = MapEmployment(d?.EmploymentType, parsed.EmploymentType),
EmploymentType = MapEmployment(d?.EmploymentType, parsed.EmploymentType), // Prefer the AI-extracted salary, falling back to the parser's (matching the talent path).
// Prefer the AI-extracted salary, falling back to the parser's — matching the talent SalaryMin = d?.PayAmount ?? parsed.PayAmount,
// path. (Jobs previously used only parsed.PayAmount, silently dropping the AI figure, Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated,
// so every aggregated opening showed «توافقی» even when the ad stated a number.) SourceUrl = raw.SourceUrl,
SalaryMin = d?.PayAmount ?? parsed.PayAmount, Lat = appLat, Lng = appLng, // source point (Divar) or geocoded neighborhood center
Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, Contacts = BuildContacts(d, parsed), // the ad's OWN number(s)
SourceUrl = raw.SourceUrl, });
Lat = appLat, Lng = appLng, // source point (Divar) or geocoded neighborhood center
Contacts = BuildContacts(d, parsed), // the ad's OWN number(s) — fresh per listing
});
} }
else else
{ {
var st = MapShiftType(d?.ShiftType, parsed.ShiftType); var st = MapShiftType(d?.ShiftType, parsed.ShiftType);
var (start, end) = DefaultTimes(st); var (start, end) = DefaultTimes(st);
foreach (var role in pubRoles) _db.Shifts.Add(new Shift
_db.Shifts.Add(new Shift {
{ Facility = facility, Role = primaryRole,
Facility = facility, Role = role, Date = DateOnly.FromDateTime(DateTime.UtcNow).AddDays(1),
Date = DateOnly.FromDateTime(DateTime.UtcNow).AddDays(1), StartTime = start, EndTime = end, ShiftType = st,
StartTime = start, EndTime = end, ShiftType = st, SpecialtyRequired = primaryRole.Name, Description = raw.RawText,
SpecialtyRequired = role.Name, Description = raw.RawText, PayType = parsed.SharePercent is not null && parsed.PayAmount is null ? PayType.Percentage
PayType = parsed.SharePercent is not null && parsed.PayAmount is null ? PayType.Percentage : parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift,
: parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift, PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent,
PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl,
Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl, Lat = appLat, Lng = appLng, // source point (Divar) or geocoded neighborhood center
Lat = appLat, Lng = appLng, // source point (Divar) or geocoded neighborhood center Contacts = BuildContacts(d, parsed), // the ad's OWN number(s)
Contacts = BuildContacts(d, parsed), // the ad's OWN number(s) — fresh per listing });
});
} }
raw.Status = RawListingStatus.Normalized; raw.Status = RawListingStatus.Normalized;
} }