diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
index d70e945..54b442a 100644
--- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
+++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
@@ -149,11 +149,16 @@ public class IngestionService
await DedupeTalentAsync(ct); // collapse same-ad reposts the exact-hash dedup can't catch
+ // Self-clean after every crawl so the board stays tidy with no manual admin clicks: archive
+ // out-of-scope/duplicate listings, merge duplicate + fold junk facilities, backfill coords.
+ var cleanup = results.Count > 0 ? await RunPostIngestCleanupAsync(ct) : default;
+
// Persist a run-log row so admins get a crawl history (with a per-source breakdown).
if (results.Count > 0)
{
var detail = string.Join("؛ ", results.Select(r =>
- $"{r.Source}: یافت {r.Fetched}، صف {r.Queued}، منتشر {r.Published}، پرچم {r.Flagged}، اسپم {r.Spam}، تکراری {r.Duplicates}"));
+ $"{r.Source}: یافت {r.Fetched}، صف {r.Queued}، منتشر {r.Published}، پرچم {r.Flagged}، اسپم {r.Spam}، تکراری {r.Duplicates}"))
+ + $" || پاکسازیِ خودکار: {cleanup.archived} بایگانی، {cleanup.dedupedJobs} استخدامِ تکراری، {cleanup.mergedFac} مرکزِ ادغام، {cleanup.cleanedFac} مرکزِ حذف، {cleanup.coords} مختصات";
_db.IngestionRuns.Add(new IngestionRun
{
Fetched = summary.TotalFetched,
@@ -336,6 +341,24 @@ public class IngestionService
return filled;
}
+ ///
+ /// The self-cleaning pass run automatically at the end of every crawl (and available on demand):
+ /// archive out-of-scope/duplicate listings, merge duplicate + fold junk facilities, and backfill
+ /// missing Tehran map coords. All in-place — reversible (archive, not delete) for listings, guarded
+ /// (never touches employer/verified facilities) — and pure DB + CPU (no AI, no network), so it's
+ /// cheap to run on every ingest. Keeps the board tidy without the admin clicking the cleanup buttons.
+ ///
+ public async Task<(int archived, int dedupedJobs, int mergedFac, int cleanedFac, int coords)>
+ RunPostIngestCleanupAsync(CancellationToken ct = default)
+ {
+ var (archived, dedupedJobs) = await PurgeInvalidAggregatedAsync(ct);
+ var (mergedFac, cleanedFac) = await MergeAndCleanFacilitiesAsync(ct);
+ var coords = await BackfillCoordsAsync(ct);
+ _log.LogInformation("Post-ingest cleanup: archived={A} dedupedJobs={DJ} mergedFac={MF} cleanedFac={CF} coords={C}",
+ archived, dedupedJobs, mergedFac, cleanedFac, coords);
+ return (archived, dedupedJobs, mergedFac, cleanedFac, coords);
+ }
+
///
/// SEO-safe in-place cleanup of the existing AGGREGATED job/shift board: re-screen each Open
/// listing's stored text through the CURRENT validator and ARCHIVE (Status → Archived, not delete)