using JobsMedical.Web.Data; using JobsMedical.Web.Models; using JobsMedical.Web.Services.Scraping; using Microsoft.AspNetCore.Authorization; using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.Mvc.RazorPages; using Microsoft.EntityFrameworkCore; namespace JobsMedical.Web.Pages.Admin; [Authorize(Roles = "Admin")] public class IndexModel : PageModel { private readonly AppDbContext _db; private readonly IngestionService _ingest; private readonly IServiceScopeFactory _scopes; private readonly ILogger _log; public IndexModel(AppDbContext db, IngestionService ingest, IServiceScopeFactory scopes, ILogger log) { _db = db; _ingest = ingest; _scopes = scopes; _log = log; } public List Queue { get; private set; } = new(); public List Flagged { get; private set; } = new(); public IReadOnlyList SourceNames { get; private set; } = new List(); public int PublishedShifts { get; private set; } public int PublishedJobs { get; private set; } public List Runs { get; private set; } = new(); [BindProperty] public string? SourceChannel { get; set; } [BindProperty] public string? RawText { get; set; } [TempData] public string? IngestMessage { get; set; } public async Task OnGetAsync() => await LoadAsync(); public async Task OnPostAddAsync() { if (!string.IsNullOrWhiteSpace(RawText)) { _db.RawListings.Add(new RawListing { SourceChannel = string.IsNullOrWhiteSpace(SourceChannel) ? "ورود دستی" : SourceChannel.Trim(), RawText = RawText.Trim(), Status = RawListingStatus.New, }); await _db.SaveChangesAsync(); } return RedirectToPage(); } /// Fast triage — reject (discard) a queued/flagged item without opening the review page. public async Task OnPostQuickDiscardAsync(int id) { var raw = await _db.RawListings.FirstOrDefaultAsync(r => r.Id == id); if (raw is not null) { raw.Status = RawListingStatus.Discarded; await _db.SaveChangesAsync(); } return RedirectToPage(); } public async Task OnPostRunIngestionAsync() { var s = await _ingest.RunAsync(); IngestMessage = $"جمع‌آوری انجام شد — {s.TotalQueued} در صف، {s.TotalFlagged} پرچم‌خورده، " + $"{s.TotalSpam} اسپم، {s.TotalDuplicates} تکراری."; return RedirectToPage(); } /// /// DESTRUCTIVE rebuild, in two distinct deletes: /// 1. The DEDUPE CACHE — ALL RawListings, including any added via «افزودن دستی». These are not /// published content; they're the crawl/staging rows whose ContentHash blocks re-ingesting /// the same ad. Wiping them lets everything be re-fetched and re-judged by the AI. /// 2. AGGREGATED listings only — Shifts/JobOpenings/TalentListings with Source==Aggregated, i.e. /// produced by ingestion. Employer/admin-posted listings (Source==Direct) are left untouched. /// Then re-fetch everything and re-run it through the (now AI-enabled) pipeline. /// RawListings are deleted first so their LinkedShift/LinkedTalent FKs (SetNull) don't dangle; /// DB cascade clears ContactMethods / Applications / InterestEvents when the posts are deleted. /// public async Task OnPostPurgeAndReingestAsync() { int rawCount, shifts, jobs, talent; await using (var tx = await _db.Database.BeginTransactionAsync()) { rawCount = await _db.RawListings.ExecuteDeleteAsync(); // clear dedupe cache shifts = await _db.Shifts.Where(s => s.Source == ShiftSource.Aggregated).ExecuteDeleteAsync(); jobs = await _db.JobOpenings.Where(j => j.Source == ShiftSource.Aggregated).ExecuteDeleteAsync(); talent = await _db.TalentListings.Where(t => t.Source == ShiftSource.Aggregated).ExecuteDeleteAsync(); await tx.CommitAsync(); } var s = await _ingest.RunAsync(); // fresh fetch → AI audit → publish/queue IngestMessage = $"پاک‌سازی شد (حذف: {rawCount} آیتم کش، {shifts} شیفت، {jobs} استخدام، {talent} آماده‌به‌کارِ جمع‌آوری‌شده). " + $"جمع‌آوری مجدد: {s.TotalPublished} منتشر، {s.TotalQueued} در صف، {s.TotalFlagged} پرچم، {s.TotalSpam} اسپم، {s.TotalDuplicates} تکراری."; return RedirectToPage(); } /// /// Clean up EXISTING aggregated content by re-running the current pipeline over the stored raw /// text — no re-fetch, so nothing is lost to sources only exposing recent posts. Long-running /// (one AI call per item), so it runs on a background scope and returns immediately; the result /// shows up as a new row in the «تاریخچهٔ اجرا» log when it finishes. /// public IActionResult OnPostReprocessStored() { _ = Task.Run(async () => { using var scope = _scopes.CreateScope(); var svc = scope.ServiceProvider.GetRequiredService(); var log = scope.ServiceProvider.GetRequiredService>(); // talentOnly: «آماده به کار» is NoIndex/Disallow → rebuilding it doesn't churn any indexed // URL. Shift/Job detail pages ARE indexed, so they're left to self-clean via turnover. try { await svc.ReprocessAsync(talentOnly: true); } catch (Exception ex) { log.LogError(ex, "Background reprocess failed"); } }); IngestMessage = "پردازش مجدد آیتم‌های ذخیره‌شده در پس‌زمینه آغاز شد. نتیجه پس از اتمام در «تاریخچهٔ اجرا» نمایش داده می‌شود (بسته به تعداد آیتم‌ها و سرعت هوش مصنوعی، چند دقیقه طول می‌کشد)."; return RedirectToPage(); } /// /// Fill missing map coordinates on existing aggregated Tehran listings from their stored ad text /// (TehranGeo). In place — no AI calls, no re-fetch, and crucially no delete/recreate, so indexed /// shift/job URLs keep their IDs. Fast (pure DB + string matching), so it runs inline. /// public async Task OnPostBackfillCoordsAsync() { var n = await _ingest.BackfillCoordsAsync(); IngestMessage = $"مختصات تقریبی برای {n} آگهی جمع‌آوری‌شده از روی متن آگهی تکمیل شد (بدون تغییر شناسه یا آدرس صفحه)."; return RedirectToPage(); } /// /// In-place cleanup of existing aggregated jobs/shifts: ARCHIVE (hide, keep the row) only the /// out-of-scope ones (domestic-helper / promotional / spam) per the current validator, plus /// near-duplicate job reposts. Archived pages drop from lists + sitemap and return 410 Gone. /// Valid listings keep their IDs/URLs. Reversible, no re-fetch, no AI — runs inline. /// public async Task OnPostPurgeInvalidAsync() { var (archived, deduped) = await _ingest.PurgeInvalidAggregatedAsync(); IngestMessage = $"بایگانیِ درجا: {archived} آگهیِ خارج از حوزه (خدمات منزل/تبلیغاتی/اسپم) و {deduped} استخدامِ تکراری از سایت پنهان شد (وضعیت «بایگانی»؛ ردیف نگه داشته شد و قابل بازگشت است؛ صفحه‌شان ۴۱۰ Gone می‌دهد). آگهی‌های معتبر و شناسه/آدرسشان دست‌نخورده ماند."; return RedirectToPage(); } private async Task LoadAsync() { Queue = await _db.RawListings .Where(r => r.Status == RawListingStatus.New) .OrderByDescending(r => r.Confidence).ThenByDescending(r => r.FetchedAt).ToListAsync(); Flagged = await _db.RawListings .Where(r => r.Status == RawListingStatus.Flagged) .OrderByDescending(r => r.FetchedAt).ToListAsync(); SourceNames = _ingest.SourceNames; PublishedShifts = await _db.Shifts.CountAsync(s => s.Source != ShiftSource.Direct); PublishedJobs = await _db.JobOpenings.CountAsync(); Runs = await _db.IngestionRuns.OrderByDescending(r => r.RunAt).Take(15).ToListAsync(); } }