using JobsMedical.Web.Data; using JobsMedical.Web.Models; using JobsMedical.Web.Services; using Microsoft.AspNetCore.Authorization; using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.Mvc.RazorPages; using Microsoft.EntityFrameworkCore; namespace JobsMedical.Web.Pages.Admin; /// Every crawled item with its outcome (queued / published / flagged / discarded), /// filterable by status and source — the full audit trail of ingestion. [Authorize(Roles = "Admin")] public class IngestedModel : PageModel { private readonly AppDbContext _db; public IngestedModel(AppDbContext db) => _db = db; public List Items { get; private set; } = new(); public int Total { get; private set; } public Dictionary Counts { get; private set; } = new(); public List SourceBreakdown { get; private set; } = new(); [TempData] public string? Message { get; set; } /// Per-source tally: how many crawled vs how many actually published. public record SourceStat(string Source, int Total, int Published); [BindProperty(SupportsGet = true)] public string? Status { get; set; } // new|flagged|published|discarded|all [BindProperty(SupportsGet = true)] public string? Source { get; set; } public async Task OnGetAsync() { Counts = await _db.RawListings.GroupBy(r => r.Status) .Select(g => new { g.Key, C = g.Count() }).ToDictionaryAsync(x => x.Key, x => x.C); // Per-source breakdown — group exact SourceChannel rows then fold into source "families" // (تلگرام/ch → تلگرام, وب‌سایت (host) → وب‌سایت) so the table reads one row per source. var bySource = await _db.RawListings.GroupBy(r => r.SourceChannel) .Select(g => new { Source = g.Key, Total = g.Count(), Published = g.Count(x => x.Status == RawListingStatus.Normalized), }) .ToListAsync(); SourceBreakdown = bySource .GroupBy(x => SourceFamily(x.Source)) .Select(g => new SourceStat(g.Key, g.Sum(x => x.Total), g.Sum(x => x.Published))) .OrderByDescending(s => s.Published).ThenByDescending(s => s.Total) .ToList(); var q = _db.RawListings.AsNoTracking().AsQueryable(); var st = Status?.ToLowerInvariant() switch { "new" => (RawListingStatus?)RawListingStatus.New, "flagged" => RawListingStatus.Flagged, "published" => RawListingStatus.Normalized, "discarded" => RawListingStatus.Discarded, _ => null, }; if (st is not null) q = q.Where(r => r.Status == st); if (!string.IsNullOrWhiteSpace(Source)) q = q.Where(r => r.SourceChannel.Contains(Source)); Total = await q.CountAsync(); Items = await q.OrderByDescending(r => r.FetchedAt).Take(200).ToListAsync(); } /// Collapse a channel label to its source family: "تلگرام/nurses" → "تلگرام", /// "وب‌سایت (medjobs.ir)" → "وب‌سایت". Divar/Bale/Medjobs already have no suffix. private static string SourceFamily(string? channel) { if (string.IsNullOrWhiteSpace(channel)) return "نامشخص"; var cut = channel.IndexOfAny(new[] { '/', '(' }); return (cut > 0 ? channel[..cut] : channel).Trim(); } /// /// ARCHIVE (never delete) everything published from ingestion: the aggregated Shift/Job/Talent /// posts are flipped to Archived (hidden from the site but kept for analytics); the raw crawl /// rows are retained untouched as the permanent archive. /// public async Task OnPostArchivePublishedAsync() { var shifts = await _db.Shifts .Where(s => s.Source == ShiftSource.Aggregated && s.Status != ShiftStatus.Archived) .ExecuteUpdateAsync(u => u.SetProperty(s => s.Status, ShiftStatus.Archived)); var jobs = await _db.JobOpenings .Where(j => j.Source == ShiftSource.Aggregated && j.Status != ShiftStatus.Archived) .ExecuteUpdateAsync(u => u.SetProperty(j => j.Status, ShiftStatus.Archived)); var talent = await _db.TalentListings .Where(t => t.Source == ShiftSource.Aggregated && t.Status != ShiftStatus.Archived) .ExecuteUpdateAsync(u => u.SetProperty(t => t.Status, ShiftStatus.Archived)); string P(int n) => JalaliDate.ToPersianDigits(n.ToString()); Message = $"بایگانی شد (از سایت پنهان، در پایگاه‌داده نگه‌داری شد): {P(shifts)} شیفت، {P(jobs)} استخدام، {P(talent)} آماده‌به‌کار."; return RedirectToPage(new { Status }); } }