diff --git a/src/JobsMedical.Web/Pages/Admin/Index.cshtml b/src/JobsMedical.Web/Pages/Admin/Index.cshtml
index b86dc1c..c52986e 100644
--- a/src/JobsMedical.Web/Pages/Admin/Index.cshtml
+++ b/src/JobsMedical.Web/Pages/Admin/Index.cshtml
@@ -67,13 +67,13 @@
شیفت/استخدام/آمادهبهکارِ جمعآوریشدهای که مختصات ندارند، از روی محلهٔ ذکرشده در متنِ آگهی روی نقشه قرار میگیرند (محدودهٔ تقریبی). فقط مختصاتِ خالی پر میشود؛ موقعیتِ واقعیِ مراکز دستنخورده میماند.
-
- فقط آگهیهایی که با صافیِ فعلی «خارج از حوزه» تشخیص داده میشوند (نه صرفاً ناقص) و استخدامهای تکراری پاک میشوند. آگهیهای معتبر دستنخوردهاند، پس آدرسِ ایندکسشدهشان تغییر نمیکند؛ فقط صفحاتِ بد ۴۰۴ میشوند.
+ فقط آگهیهایی که با صافیِ فعلی «خارج از حوزه» تشخیص داده میشوند (نه صرفاً ناقص) و استخدامهای تکراری بایگانی میشوند (وضعیت «بایگانی»، نه حذف). آگهیهای معتبر دستنخوردهاند، پس آدرسِ ایندکسشدهشان تغییر نمیکند؛ صفحهٔ موارد بایگانیشده ۴۱۰ Gone میدهد تا گوگل تمیز حذفشان کند.
diff --git a/src/JobsMedical.Web/Pages/Admin/Index.cshtml.cs b/src/JobsMedical.Web/Pages/Admin/Index.cshtml.cs
index b4be44d..35c8c8f 100644
--- a/src/JobsMedical.Web/Pages/Admin/Index.cshtml.cs
+++ b/src/JobsMedical.Web/Pages/Admin/Index.cshtml.cs
@@ -133,14 +133,15 @@ public class IndexModel : PageModel
}
///
- /// In-place cleanup of existing aggregated jobs/shifts: delete only the out-of-scope ones
- /// (domestic-helper / promotional / spam) per the current validator, plus near-duplicate job
- /// reposts. Valid listings keep their IDs/URLs. No re-fetch, no AI — runs inline.
+ /// In-place cleanup of existing aggregated jobs/shifts: ARCHIVE (hide, keep the row) only the
+ /// out-of-scope ones (domestic-helper / promotional / spam) per the current validator, plus
+ /// near-duplicate job reposts. Archived pages drop from lists + sitemap and return 410 Gone.
+ /// Valid listings keep their IDs/URLs. Reversible, no re-fetch, no AI — runs inline.
///
public async Task OnPostPurgeInvalidAsync()
{
- var (removed, deduped) = await _ingest.PurgeInvalidAggregatedAsync();
- IngestMessage = $"پاکسازیِ درجا: {removed} آگهیِ خارج از حوزه (خدمات منزل/تبلیغاتی/اسپم) و {deduped} استخدامِ تکراری حذف شد. سایر آگهیها و شناسه/آدرسشان دستنخورده ماند.";
+ var (archived, deduped) = await _ingest.PurgeInvalidAggregatedAsync();
+ IngestMessage = $"بایگانیِ درجا: {archived} آگهیِ خارج از حوزه (خدمات منزل/تبلیغاتی/اسپم) و {deduped} استخدامِ تکراری از سایت پنهان شد (وضعیت «بایگانی»؛ ردیف نگه داشته شد و قابل بازگشت است؛ صفحهشان ۴۱۰ Gone میدهد). آگهیهای معتبر و شناسه/آدرسشان دستنخورده ماند.";
return RedirectToPage();
}
diff --git a/src/JobsMedical.Web/Pages/Jobs/Details.cshtml.cs b/src/JobsMedical.Web/Pages/Jobs/Details.cshtml.cs
index 9f99884..e5d97ad 100644
--- a/src/JobsMedical.Web/Pages/Jobs/Details.cshtml.cs
+++ b/src/JobsMedical.Web/Pages/Jobs/Details.cshtml.cs
@@ -31,6 +31,9 @@ public class DetailsModel : PageModel
{
await LoadAsync(id);
if (Job is null) return NotFound();
+ // Intentionally removed (admin-archived out-of-scope/duplicate ad): 410 Gone is the standard
+ // signal for permanent removal, so search engines deindex it cleanly (we keep the row for audit).
+ if (Job.Status == ShiftStatus.Archived) return StatusCode(StatusCodes.Status410Gone);
MapKey = (await _settings.GetAsync()).NeshanMapKey;
Reported = Request.Query["reported"] == "1";
await _interest.LogJobAsync(InterestEventType.View, id);
diff --git a/src/JobsMedical.Web/Pages/Shifts/Details.cshtml.cs b/src/JobsMedical.Web/Pages/Shifts/Details.cshtml.cs
index a3d7065..887fc22 100644
--- a/src/JobsMedical.Web/Pages/Shifts/Details.cshtml.cs
+++ b/src/JobsMedical.Web/Pages/Shifts/Details.cshtml.cs
@@ -34,6 +34,9 @@ public class DetailsModel : PageModel
{
await LoadAsync(id);
if (Shift is null) return NotFound();
+ // Intentionally removed (admin-archived out-of-scope/duplicate ad): 410 Gone is the standard
+ // signal for permanent removal, so search engines deindex it cleanly (we keep the row for audit).
+ if (Shift.Status == ShiftStatus.Archived) return StatusCode(StatusCodes.Status410Gone);
MapKey = (await _settings.GetAsync()).NeshanMapKey;
Reported = Request.Query["reported"] == "1";
await _interest.LogAsync(InterestEventType.View, id); // behavioral signal for recommendations
diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
index 6778052..21a7742 100644
--- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
+++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
@@ -314,19 +314,19 @@ public class IngestionService
int filled = 0;
var jobs = await _db.JobOpenings
- .Where(j => j.Lat == null && j.Source == ShiftSource.Aggregated && j.Facility.CityId == tehran.Id)
+ .Where(j => j.Status == ShiftStatus.Open && j.Lat == null && j.Source == ShiftSource.Aggregated && j.Facility.CityId == tehran.Id)
.ToListAsync(ct);
foreach (var j in jobs)
if (TehranGeo.Locate(j.Description) is { } g) { j.Lat = g.lat; j.Lng = g.lng; filled++; }
var shifts = await _db.Shifts
- .Where(s => s.Lat == null && s.Source == ShiftSource.Aggregated && s.Facility.CityId == tehran.Id)
+ .Where(s => s.Status == ShiftStatus.Open && s.Lat == null && s.Source == ShiftSource.Aggregated && s.Facility.CityId == tehran.Id)
.ToListAsync(ct);
foreach (var s in shifts)
if (TehranGeo.Locate(s.Description) is { } g) { s.Lat = g.lat; s.Lng = g.lng; filled++; }
var talent = await _db.TalentListings
- .Where(t => t.Lat == null && t.Source == ShiftSource.Aggregated && t.CityId == tehran.Id)
+ .Where(t => t.Status == ShiftStatus.Open && t.Lat == null && t.Source == ShiftSource.Aggregated && t.CityId == tehran.Id)
.ToListAsync(ct);
foreach (var t in talent)
if (TehranGeo.Locate(t.AreaNote, t.Description) is { } g) { t.Lat = g.lat; t.Lng = g.lng; filled++; }
@@ -337,14 +337,18 @@ public class IngestionService
}
///
- /// SEO-safe in-place cleanup of the existing AGGREGATED job/shift board: re-screen each listing's
- /// stored text through the CURRENT validator and delete only the ones that are now clearly
- /// out-of-scope — domestic-helper («امور منزل»), promotional/training, or spam (i.e.
- /// ). Merely-incomplete-but-legit ads are KEPT. Then collapse
- /// near-duplicate job reposts. Valid listings are never touched, so their IDs — and indexed URLs —
- /// stay stable; only the bad pages 404 (which is the desired outcome). Returns (removed, deduped).
+ /// SEO-safe in-place cleanup of the existing AGGREGATED job/shift board: re-screen each Open
+ /// listing's stored text through the CURRENT validator and ARCHIVE (Status → Archived, not delete)
+ /// only the ones that are now clearly out-of-scope — domestic-helper («امور منزل»),
+ /// promotional/training, or spam (i.e. ). Merely-incomplete-
+ /// but-legit ads are KEPT. Then collapse near-duplicate job reposts the same way. Archiving (vs
+ /// hard delete) is the project convention: the row is retained for analysis and the change is
+ /// reversible, the listing drops out of every public screen + the sitemap (which filter Status ==
+ /// Open), and its detail page returns 410 Gone (the standard "permanently removed" signal Google
+ /// uses to deindex). Valid listings are never touched, so their IDs/URLs stay stable.
+ /// Returns (archived, deduped).
///
- public async Task<(int removed, int deduped)> PurgeInvalidAggregatedAsync(CancellationToken ct = default)
+ public async Task<(int archived, int deduped)> PurgeInvalidAggregatedAsync(CancellationToken ct = default)
{
var roleNames = await _db.Roles.Select(r => r.Name).ToListAsync(ct);
var cityNames = await _db.Cities.Select(c => c.Name).ToListAsync(ct);
@@ -357,30 +361,33 @@ public class IngestionService
return _validator.Validate(t, parsed).IsSpam; // spam | promo | domestic-helper
}
- int removed = 0;
+ int archived = 0;
- var jobIds = (await _db.JobOpenings.Where(j => j.Source == ShiftSource.Aggregated)
+ var jobIds = (await _db.JobOpenings.Where(j => j.Status == ShiftStatus.Open && j.Source == ShiftSource.Aggregated)
.Select(j => new { j.Id, j.Description }).ToListAsync(ct))
.Where(j => IsOutOfScope(j.Description)).Select(j => j.Id).ToList();
if (jobIds.Count > 0)
- removed += await _db.JobOpenings.Where(j => jobIds.Contains(j.Id)).ExecuteDeleteAsync(ct);
+ archived += await _db.JobOpenings.Where(j => jobIds.Contains(j.Id))
+ .ExecuteUpdateAsync(u => u.SetProperty(j => j.Status, ShiftStatus.Archived), ct);
- var shiftIds = (await _db.Shifts.Where(s => s.Source == ShiftSource.Aggregated)
+ var shiftIds = (await _db.Shifts.Where(s => s.Status == ShiftStatus.Open && s.Source == ShiftSource.Aggregated)
.Select(s => new { s.Id, s.Description }).ToListAsync(ct))
.Where(s => IsOutOfScope(s.Description)).Select(s => s.Id).ToList();
if (shiftIds.Count > 0)
- removed += await _db.Shifts.Where(s => shiftIds.Contains(s.Id)).ExecuteDeleteAsync(ct);
+ archived += await _db.Shifts.Where(s => shiftIds.Contains(s.Id))
+ .ExecuteUpdateAsync(u => u.SetProperty(s => s.Status, ShiftStatus.Archived), ct);
var deduped = await DedupeJobsAsync(ct);
- _log.LogInformation("Purge removed {R} out-of-scope aggregated listings; deduped {D} jobs.", removed, deduped);
- return (removed, deduped);
+ _log.LogInformation("Purge archived {R} out-of-scope aggregated listings; deduped {D} jobs.", archived, deduped);
+ return (archived, deduped);
}
///
/// Collapse near-duplicate aggregated JOB reposts the exact-hash dedupe missed (same ad re-crawled
/// with slightly different surrounding text → different ContentHash). Signature = role + facility +
/// normalized description core with digits/«… پیش» time-phrases stripped. Keeps the NEWEST of each
- /// group. Per-role fan-out of one ad is preserved (different RoleId → different signature).
+ /// group and ARCHIVES the rest (Status → Archived, reversible — same rationale as the purge).
+ /// Per-role fan-out of one ad is preserved (different RoleId → different signature).
///
public async Task DedupeJobsAsync(CancellationToken ct = default)
{
@@ -397,17 +404,18 @@ public class IngestionService
return $"j:{roleId}:{facId}:{(core.Length > 120 ? core[..120] : core)}";
}
- var toRemove = rows
+ var toArchive = rows
.Select(r => new { r.Id, r.CreatedAt, Key = Sig(r.RoleId, r.FacilityId, r.Description) })
.Where(x => x.Key is not null)
.GroupBy(x => x.Key)
.SelectMany(g => g.OrderByDescending(x => x.CreatedAt).Skip(1).Select(x => x.Id))
.ToList();
- if (toRemove.Count == 0) return 0;
- var removed = await _db.JobOpenings.Where(j => toRemove.Contains(j.Id)).ExecuteDeleteAsync(ct);
- _log.LogInformation("Deduped {N} near-duplicate aggregated jobs.", removed);
- return removed;
+ if (toArchive.Count == 0) return 0;
+ var archived = await _db.JobOpenings.Where(j => toArchive.Contains(j.Id))
+ .ExecuteUpdateAsync(u => u.SetProperty(j => j.Status, ShiftStatus.Archived), ct);
+ _log.LogInformation("Archived {N} near-duplicate aggregated jobs.", archived);
+ return archived;
}
private static string DigitsOnly(string s) => new(HtmlUtil.ToLatinDigits(s).Where(char.IsDigit).ToArray());