Make the listing purge SEO-standard: archive (not delete) + 410 Gone
Per the project archive-not-delete convention, the in-place purge now sets out-of-scope and duplicate aggregated jobs/shifts to ShiftStatus.Archived instead of hard-deleting: - The row is retained for analysis and the change is reversible. - The listing drops out of every public screen and the sitemap (which filter Status == Open). - Its detail page now returns 410 Gone (the standard permanent-removal signal) so search engines deindex it cleanly, instead of leaving the off-topic page live at 200 or hard-404ing. Dedupe of job reposts archives the older copies the same way. Coordinate backfill now also skips non-Open rows. Valid listings are untouched, so IDs/URLs stay stable. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -67,13 +67,13 @@
|
|||||||
شیفت/استخدام/آمادهبهکارِ جمعآوریشدهای که مختصات ندارند، از روی محلهٔ ذکرشده در متنِ آگهی روی نقشه قرار میگیرند (محدودهٔ تقریبی). فقط مختصاتِ خالی پر میشود؛ موقعیتِ واقعیِ مراکز دستنخورده میماند.
|
شیفت/استخدام/آمادهبهکارِ جمعآوریشدهای که مختصات ندارند، از روی محلهٔ ذکرشده در متنِ آگهی روی نقشه قرار میگیرند (محدودهٔ تقریبی). فقط مختصاتِ خالی پر میشود؛ موقعیتِ واقعیِ مراکز دستنخورده میماند.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<form method="post" onsubmit="return confirm('آگهیهای جمعآوریشدهٔ شیفت/استخدام که اکنون خارج از حوزهاند (خدمات منزل/نظافت، تبلیغاتی/آموزشی، اسپم) و استخدامهای تکراری حذف میشوند. آگهیهای معتبر و شناسه/آدرسشان دستنخورده میماند. این کار بازگشتناپذیر است. ادامه؟');">
|
<form method="post" onsubmit="return confirm('آگهیهای جمعآوریشدهٔ شیفت/استخدام که اکنون خارج از حوزهاند (خدمات منزل/نظافت، تبلیغاتی/آموزشی، اسپم) و استخدامهای تکراری «بایگانی» میشوند: از سایت پنهان میشوند ولی ردیفشان نگه داشته میشود (قابل بازگشت). آگهیهای معتبر و شناسه/آدرسشان دستنخورده میماند. ادامه؟');">
|
||||||
<button type="submit" asp-page-handler="PurgeInvalid" class="btn btn-outline btn-block" style="margin-top:10px; color:var(--danger); border-color:var(--danger);">
|
<button type="submit" asp-page-handler="PurgeInvalid" class="btn btn-outline btn-block" style="margin-top:10px; color:var(--danger); border-color:var(--danger);">
|
||||||
🧽 حذفِ درجای آگهیهای خارج از حوزه و تکراری (شیفت/استخدام)
|
🧽 بایگانیِ درجای آگهیهای خارج از حوزه و تکراری (شیفت/استخدام)
|
||||||
</button>
|
</button>
|
||||||
</form>
|
</form>
|
||||||
<p class="muted" style="font-size:11px; margin:6px 0 0;">
|
<p class="muted" style="font-size:11px; margin:6px 0 0;">
|
||||||
فقط آگهیهایی که با صافیِ فعلی «خارج از حوزه» تشخیص داده میشوند (نه صرفاً ناقص) و استخدامهای تکراری پاک میشوند. آگهیهای معتبر دستنخوردهاند، پس آدرسِ ایندکسشدهشان تغییر نمیکند؛ فقط صفحاتِ بد ۴۰۴ میشوند.
|
فقط آگهیهایی که با صافیِ فعلی «خارج از حوزه» تشخیص داده میشوند (نه صرفاً ناقص) و استخدامهای تکراری بایگانی میشوند (وضعیت «بایگانی»، نه حذف). آگهیهای معتبر دستنخوردهاند، پس آدرسِ ایندکسشدهشان تغییر نمیکند؛ صفحهٔ موارد بایگانیشده ۴۱۰ Gone میدهد تا گوگل تمیز حذفشان کند.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<hr style="border:none; border-top:1px solid var(--line); margin:16px 0;" />
|
<hr style="border:none; border-top:1px solid var(--line); margin:16px 0;" />
|
||||||
|
|||||||
@@ -133,14 +133,15 @@ public class IndexModel : PageModel
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// In-place cleanup of existing aggregated jobs/shifts: delete only the out-of-scope ones
|
/// In-place cleanup of existing aggregated jobs/shifts: ARCHIVE (hide, keep the row) only the
|
||||||
/// (domestic-helper / promotional / spam) per the current validator, plus near-duplicate job
|
/// out-of-scope ones (domestic-helper / promotional / spam) per the current validator, plus
|
||||||
/// reposts. Valid listings keep their IDs/URLs. No re-fetch, no AI — runs inline.
|
/// near-duplicate job reposts. Archived pages drop from lists + sitemap and return 410 Gone.
|
||||||
|
/// Valid listings keep their IDs/URLs. Reversible, no re-fetch, no AI — runs inline.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public async Task<IActionResult> OnPostPurgeInvalidAsync()
|
public async Task<IActionResult> OnPostPurgeInvalidAsync()
|
||||||
{
|
{
|
||||||
var (removed, deduped) = await _ingest.PurgeInvalidAggregatedAsync();
|
var (archived, deduped) = await _ingest.PurgeInvalidAggregatedAsync();
|
||||||
IngestMessage = $"پاکسازیِ درجا: {removed} آگهیِ خارج از حوزه (خدمات منزل/تبلیغاتی/اسپم) و {deduped} استخدامِ تکراری حذف شد. سایر آگهیها و شناسه/آدرسشان دستنخورده ماند.";
|
IngestMessage = $"بایگانیِ درجا: {archived} آگهیِ خارج از حوزه (خدمات منزل/تبلیغاتی/اسپم) و {deduped} استخدامِ تکراری از سایت پنهان شد (وضعیت «بایگانی»؛ ردیف نگه داشته شد و قابل بازگشت است؛ صفحهشان ۴۱۰ Gone میدهد). آگهیهای معتبر و شناسه/آدرسشان دستنخورده ماند.";
|
||||||
return RedirectToPage();
|
return RedirectToPage();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -31,6 +31,9 @@ public class DetailsModel : PageModel
|
|||||||
{
|
{
|
||||||
await LoadAsync(id);
|
await LoadAsync(id);
|
||||||
if (Job is null) return NotFound();
|
if (Job is null) return NotFound();
|
||||||
|
// Intentionally removed (admin-archived out-of-scope/duplicate ad): 410 Gone is the standard
|
||||||
|
// signal for permanent removal, so search engines deindex it cleanly (we keep the row for audit).
|
||||||
|
if (Job.Status == ShiftStatus.Archived) return StatusCode(StatusCodes.Status410Gone);
|
||||||
MapKey = (await _settings.GetAsync()).NeshanMapKey;
|
MapKey = (await _settings.GetAsync()).NeshanMapKey;
|
||||||
Reported = Request.Query["reported"] == "1";
|
Reported = Request.Query["reported"] == "1";
|
||||||
await _interest.LogJobAsync(InterestEventType.View, id);
|
await _interest.LogJobAsync(InterestEventType.View, id);
|
||||||
|
|||||||
@@ -34,6 +34,9 @@ public class DetailsModel : PageModel
|
|||||||
{
|
{
|
||||||
await LoadAsync(id);
|
await LoadAsync(id);
|
||||||
if (Shift is null) return NotFound();
|
if (Shift is null) return NotFound();
|
||||||
|
// Intentionally removed (admin-archived out-of-scope/duplicate ad): 410 Gone is the standard
|
||||||
|
// signal for permanent removal, so search engines deindex it cleanly (we keep the row for audit).
|
||||||
|
if (Shift.Status == ShiftStatus.Archived) return StatusCode(StatusCodes.Status410Gone);
|
||||||
MapKey = (await _settings.GetAsync()).NeshanMapKey;
|
MapKey = (await _settings.GetAsync()).NeshanMapKey;
|
||||||
Reported = Request.Query["reported"] == "1";
|
Reported = Request.Query["reported"] == "1";
|
||||||
await _interest.LogAsync(InterestEventType.View, id); // behavioral signal for recommendations
|
await _interest.LogAsync(InterestEventType.View, id); // behavioral signal for recommendations
|
||||||
|
|||||||
@@ -314,19 +314,19 @@ public class IngestionService
|
|||||||
int filled = 0;
|
int filled = 0;
|
||||||
|
|
||||||
var jobs = await _db.JobOpenings
|
var jobs = await _db.JobOpenings
|
||||||
.Where(j => j.Lat == null && j.Source == ShiftSource.Aggregated && j.Facility.CityId == tehran.Id)
|
.Where(j => j.Status == ShiftStatus.Open && j.Lat == null && j.Source == ShiftSource.Aggregated && j.Facility.CityId == tehran.Id)
|
||||||
.ToListAsync(ct);
|
.ToListAsync(ct);
|
||||||
foreach (var j in jobs)
|
foreach (var j in jobs)
|
||||||
if (TehranGeo.Locate(j.Description) is { } g) { j.Lat = g.lat; j.Lng = g.lng; filled++; }
|
if (TehranGeo.Locate(j.Description) is { } g) { j.Lat = g.lat; j.Lng = g.lng; filled++; }
|
||||||
|
|
||||||
var shifts = await _db.Shifts
|
var shifts = await _db.Shifts
|
||||||
.Where(s => s.Lat == null && s.Source == ShiftSource.Aggregated && s.Facility.CityId == tehran.Id)
|
.Where(s => s.Status == ShiftStatus.Open && s.Lat == null && s.Source == ShiftSource.Aggregated && s.Facility.CityId == tehran.Id)
|
||||||
.ToListAsync(ct);
|
.ToListAsync(ct);
|
||||||
foreach (var s in shifts)
|
foreach (var s in shifts)
|
||||||
if (TehranGeo.Locate(s.Description) is { } g) { s.Lat = g.lat; s.Lng = g.lng; filled++; }
|
if (TehranGeo.Locate(s.Description) is { } g) { s.Lat = g.lat; s.Lng = g.lng; filled++; }
|
||||||
|
|
||||||
var talent = await _db.TalentListings
|
var talent = await _db.TalentListings
|
||||||
.Where(t => t.Lat == null && t.Source == ShiftSource.Aggregated && t.CityId == tehran.Id)
|
.Where(t => t.Status == ShiftStatus.Open && t.Lat == null && t.Source == ShiftSource.Aggregated && t.CityId == tehran.Id)
|
||||||
.ToListAsync(ct);
|
.ToListAsync(ct);
|
||||||
foreach (var t in talent)
|
foreach (var t in talent)
|
||||||
if (TehranGeo.Locate(t.AreaNote, t.Description) is { } g) { t.Lat = g.lat; t.Lng = g.lng; filled++; }
|
if (TehranGeo.Locate(t.AreaNote, t.Description) is { } g) { t.Lat = g.lat; t.Lng = g.lng; filled++; }
|
||||||
@@ -337,14 +337,18 @@ public class IngestionService
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// SEO-safe in-place cleanup of the existing AGGREGATED job/shift board: re-screen each listing's
|
/// SEO-safe in-place cleanup of the existing AGGREGATED job/shift board: re-screen each Open
|
||||||
/// stored text through the CURRENT validator and delete only the ones that are now clearly
|
/// listing's stored text through the CURRENT validator and ARCHIVE (Status → Archived, not delete)
|
||||||
/// out-of-scope — domestic-helper («امور منزل»), promotional/training, or spam (i.e.
|
/// only the ones that are now clearly out-of-scope — domestic-helper («امور منزل»),
|
||||||
/// <see cref="ValidationResult.IsSpam"/>). Merely-incomplete-but-legit ads are KEPT. Then collapse
|
/// promotional/training, or spam (i.e. <see cref="ValidationResult.IsSpam"/>). Merely-incomplete-
|
||||||
/// near-duplicate job reposts. Valid listings are never touched, so their IDs — and indexed URLs —
|
/// but-legit ads are KEPT. Then collapse near-duplicate job reposts the same way. Archiving (vs
|
||||||
/// stay stable; only the bad pages 404 (which is the desired outcome). Returns (removed, deduped).
|
/// hard delete) is the project convention: the row is retained for analysis and the change is
|
||||||
|
/// reversible, the listing drops out of every public screen + the sitemap (which filter Status ==
|
||||||
|
/// Open), and its detail page returns 410 Gone (the standard "permanently removed" signal Google
|
||||||
|
/// uses to deindex). Valid listings are never touched, so their IDs/URLs stay stable.
|
||||||
|
/// Returns (archived, deduped).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public async Task<(int removed, int deduped)> PurgeInvalidAggregatedAsync(CancellationToken ct = default)
|
public async Task<(int archived, int deduped)> PurgeInvalidAggregatedAsync(CancellationToken ct = default)
|
||||||
{
|
{
|
||||||
var roleNames = await _db.Roles.Select(r => r.Name).ToListAsync(ct);
|
var roleNames = await _db.Roles.Select(r => r.Name).ToListAsync(ct);
|
||||||
var cityNames = await _db.Cities.Select(c => c.Name).ToListAsync(ct);
|
var cityNames = await _db.Cities.Select(c => c.Name).ToListAsync(ct);
|
||||||
@@ -357,30 +361,33 @@ public class IngestionService
|
|||||||
return _validator.Validate(t, parsed).IsSpam; // spam | promo | domestic-helper
|
return _validator.Validate(t, parsed).IsSpam; // spam | promo | domestic-helper
|
||||||
}
|
}
|
||||||
|
|
||||||
int removed = 0;
|
int archived = 0;
|
||||||
|
|
||||||
var jobIds = (await _db.JobOpenings.Where(j => j.Source == ShiftSource.Aggregated)
|
var jobIds = (await _db.JobOpenings.Where(j => j.Status == ShiftStatus.Open && j.Source == ShiftSource.Aggregated)
|
||||||
.Select(j => new { j.Id, j.Description }).ToListAsync(ct))
|
.Select(j => new { j.Id, j.Description }).ToListAsync(ct))
|
||||||
.Where(j => IsOutOfScope(j.Description)).Select(j => j.Id).ToList();
|
.Where(j => IsOutOfScope(j.Description)).Select(j => j.Id).ToList();
|
||||||
if (jobIds.Count > 0)
|
if (jobIds.Count > 0)
|
||||||
removed += await _db.JobOpenings.Where(j => jobIds.Contains(j.Id)).ExecuteDeleteAsync(ct);
|
archived += await _db.JobOpenings.Where(j => jobIds.Contains(j.Id))
|
||||||
|
.ExecuteUpdateAsync(u => u.SetProperty(j => j.Status, ShiftStatus.Archived), ct);
|
||||||
|
|
||||||
var shiftIds = (await _db.Shifts.Where(s => s.Source == ShiftSource.Aggregated)
|
var shiftIds = (await _db.Shifts.Where(s => s.Status == ShiftStatus.Open && s.Source == ShiftSource.Aggregated)
|
||||||
.Select(s => new { s.Id, s.Description }).ToListAsync(ct))
|
.Select(s => new { s.Id, s.Description }).ToListAsync(ct))
|
||||||
.Where(s => IsOutOfScope(s.Description)).Select(s => s.Id).ToList();
|
.Where(s => IsOutOfScope(s.Description)).Select(s => s.Id).ToList();
|
||||||
if (shiftIds.Count > 0)
|
if (shiftIds.Count > 0)
|
||||||
removed += await _db.Shifts.Where(s => shiftIds.Contains(s.Id)).ExecuteDeleteAsync(ct);
|
archived += await _db.Shifts.Where(s => shiftIds.Contains(s.Id))
|
||||||
|
.ExecuteUpdateAsync(u => u.SetProperty(s => s.Status, ShiftStatus.Archived), ct);
|
||||||
|
|
||||||
var deduped = await DedupeJobsAsync(ct);
|
var deduped = await DedupeJobsAsync(ct);
|
||||||
_log.LogInformation("Purge removed {R} out-of-scope aggregated listings; deduped {D} jobs.", removed, deduped);
|
_log.LogInformation("Purge archived {R} out-of-scope aggregated listings; deduped {D} jobs.", archived, deduped);
|
||||||
return (removed, deduped);
|
return (archived, deduped);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Collapse near-duplicate aggregated JOB reposts the exact-hash dedupe missed (same ad re-crawled
|
/// Collapse near-duplicate aggregated JOB reposts the exact-hash dedupe missed (same ad re-crawled
|
||||||
/// with slightly different surrounding text → different ContentHash). Signature = role + facility +
|
/// with slightly different surrounding text → different ContentHash). Signature = role + facility +
|
||||||
/// normalized description core with digits/«… پیش» time-phrases stripped. Keeps the NEWEST of each
|
/// normalized description core with digits/«… پیش» time-phrases stripped. Keeps the NEWEST of each
|
||||||
/// group. Per-role fan-out of one ad is preserved (different RoleId → different signature).
|
/// group and ARCHIVES the rest (Status → Archived, reversible — same rationale as the purge).
|
||||||
|
/// Per-role fan-out of one ad is preserved (different RoleId → different signature).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public async Task<int> DedupeJobsAsync(CancellationToken ct = default)
|
public async Task<int> DedupeJobsAsync(CancellationToken ct = default)
|
||||||
{
|
{
|
||||||
@@ -397,17 +404,18 @@ public class IngestionService
|
|||||||
return $"j:{roleId}:{facId}:{(core.Length > 120 ? core[..120] : core)}";
|
return $"j:{roleId}:{facId}:{(core.Length > 120 ? core[..120] : core)}";
|
||||||
}
|
}
|
||||||
|
|
||||||
var toRemove = rows
|
var toArchive = rows
|
||||||
.Select(r => new { r.Id, r.CreatedAt, Key = Sig(r.RoleId, r.FacilityId, r.Description) })
|
.Select(r => new { r.Id, r.CreatedAt, Key = Sig(r.RoleId, r.FacilityId, r.Description) })
|
||||||
.Where(x => x.Key is not null)
|
.Where(x => x.Key is not null)
|
||||||
.GroupBy(x => x.Key)
|
.GroupBy(x => x.Key)
|
||||||
.SelectMany(g => g.OrderByDescending(x => x.CreatedAt).Skip(1).Select(x => x.Id))
|
.SelectMany(g => g.OrderByDescending(x => x.CreatedAt).Skip(1).Select(x => x.Id))
|
||||||
.ToList();
|
.ToList();
|
||||||
|
|
||||||
if (toRemove.Count == 0) return 0;
|
if (toArchive.Count == 0) return 0;
|
||||||
var removed = await _db.JobOpenings.Where(j => toRemove.Contains(j.Id)).ExecuteDeleteAsync(ct);
|
var archived = await _db.JobOpenings.Where(j => toArchive.Contains(j.Id))
|
||||||
_log.LogInformation("Deduped {N} near-duplicate aggregated jobs.", removed);
|
.ExecuteUpdateAsync(u => u.SetProperty(j => j.Status, ShiftStatus.Archived), ct);
|
||||||
return removed;
|
_log.LogInformation("Archived {N} near-duplicate aggregated jobs.", archived);
|
||||||
|
return archived;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static string DigitsOnly(string s) => new(HtmlUtil.ToLatinDigits(s).Where(char.IsDigit).ToArray());
|
private static string DigitsOnly(string s) => new(HtmlUtil.ToLatinDigits(s).Where(char.IsDigit).ToArray());
|
||||||
|
|||||||
Reference in New Issue
Block a user