efbf998caf
Group RawListings by SourceChannel, fold per-channel/per-host labels into source families (تلگرام/x → تلگرام, وبسایت (host) → وبسایت), and show a published-vs-total table so it's clear which sources are actually producing (e.g. why everything is coming from دیوار when Telegram's proxy is down). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
100 lines
4.8 KiB
C#
100 lines
4.8 KiB
C#
using JobsMedical.Web.Data;
|
|
using JobsMedical.Web.Models;
|
|
using JobsMedical.Web.Services;
|
|
using Microsoft.AspNetCore.Authorization;
|
|
using Microsoft.AspNetCore.Mvc;
|
|
using Microsoft.AspNetCore.Mvc.RazorPages;
|
|
using Microsoft.EntityFrameworkCore;
|
|
|
|
namespace JobsMedical.Web.Pages.Admin;
|
|
|
|
/// <summary>Every crawled item with its outcome (queued / published / flagged / discarded),
|
|
/// filterable by status and source — the full audit trail of ingestion.</summary>
|
|
[Authorize(Roles = "Admin")]
|
|
public class IngestedModel : PageModel
|
|
{
|
|
private readonly AppDbContext _db;
|
|
public IngestedModel(AppDbContext db) => _db = db;
|
|
|
|
public List<RawListing> Items { get; private set; } = new();
|
|
public int Total { get; private set; }
|
|
public Dictionary<RawListingStatus, int> Counts { get; private set; } = new();
|
|
public List<SourceStat> SourceBreakdown { get; private set; } = new();
|
|
[TempData] public string? Message { get; set; }
|
|
|
|
/// <summary>Per-source tally: how many crawled vs how many actually published.</summary>
|
|
public record SourceStat(string Source, int Total, int Published);
|
|
|
|
[BindProperty(SupportsGet = true)] public string? Status { get; set; } // new|flagged|published|discarded|all
|
|
[BindProperty(SupportsGet = true)] public string? Source { get; set; }
|
|
|
|
public async Task OnGetAsync()
|
|
{
|
|
Counts = await _db.RawListings.GroupBy(r => r.Status)
|
|
.Select(g => new { g.Key, C = g.Count() }).ToDictionaryAsync(x => x.Key, x => x.C);
|
|
|
|
// Per-source breakdown — group exact SourceChannel rows then fold into source "families"
|
|
// (تلگرام/ch → تلگرام, وبسایت (host) → وبسایت) so the table reads one row per source.
|
|
var bySource = await _db.RawListings.GroupBy(r => r.SourceChannel)
|
|
.Select(g => new
|
|
{
|
|
Source = g.Key,
|
|
Total = g.Count(),
|
|
Published = g.Count(x => x.Status == RawListingStatus.Normalized),
|
|
})
|
|
.ToListAsync();
|
|
SourceBreakdown = bySource
|
|
.GroupBy(x => SourceFamily(x.Source))
|
|
.Select(g => new SourceStat(g.Key, g.Sum(x => x.Total), g.Sum(x => x.Published)))
|
|
.OrderByDescending(s => s.Published).ThenByDescending(s => s.Total)
|
|
.ToList();
|
|
|
|
var q = _db.RawListings.AsNoTracking().AsQueryable();
|
|
|
|
var st = Status?.ToLowerInvariant() switch
|
|
{
|
|
"new" => (RawListingStatus?)RawListingStatus.New,
|
|
"flagged" => RawListingStatus.Flagged,
|
|
"published" => RawListingStatus.Normalized,
|
|
"discarded" => RawListingStatus.Discarded,
|
|
_ => null,
|
|
};
|
|
if (st is not null) q = q.Where(r => r.Status == st);
|
|
if (!string.IsNullOrWhiteSpace(Source)) q = q.Where(r => r.SourceChannel.Contains(Source));
|
|
|
|
Total = await q.CountAsync();
|
|
Items = await q.OrderByDescending(r => r.FetchedAt).Take(200).ToListAsync();
|
|
}
|
|
|
|
/// <summary>Collapse a channel label to its source family: "تلگرام/nurses" → "تلگرام",
|
|
/// "وبسایت (medjobs.ir)" → "وبسایت". Divar/Bale/Medjobs already have no suffix.</summary>
|
|
private static string SourceFamily(string? channel)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(channel)) return "نامشخص";
|
|
var cut = channel.IndexOfAny(new[] { '/', '(' });
|
|
return (cut > 0 ? channel[..cut] : channel).Trim();
|
|
}
|
|
|
|
/// <summary>
|
|
/// ARCHIVE (never delete) everything published from ingestion: the aggregated Shift/Job/Talent
|
|
/// posts are flipped to Archived (hidden from the site but kept for analytics); the raw crawl
|
|
/// rows are retained untouched as the permanent archive.
|
|
/// </summary>
|
|
public async Task<IActionResult> OnPostArchivePublishedAsync()
|
|
{
|
|
var shifts = await _db.Shifts
|
|
.Where(s => s.Source == ShiftSource.Aggregated && s.Status != ShiftStatus.Archived)
|
|
.ExecuteUpdateAsync(u => u.SetProperty(s => s.Status, ShiftStatus.Archived));
|
|
var jobs = await _db.JobOpenings
|
|
.Where(j => j.Source == ShiftSource.Aggregated && j.Status != ShiftStatus.Archived)
|
|
.ExecuteUpdateAsync(u => u.SetProperty(j => j.Status, ShiftStatus.Archived));
|
|
var talent = await _db.TalentListings
|
|
.Where(t => t.Source == ShiftSource.Aggregated && t.Status != ShiftStatus.Archived)
|
|
.ExecuteUpdateAsync(u => u.SetProperty(t => t.Status, ShiftStatus.Archived));
|
|
|
|
string P(int n) => JalaliDate.ToPersianDigits(n.ToString());
|
|
Message = $"بایگانی شد (از سایت پنهان، در پایگاهداده نگهداری شد): {P(shifts)} شیفت، {P(jobs)} استخدام، {P(talent)} آمادهبهکار.";
|
|
return RedirectToPage(new { Status });
|
|
}
|
|
}
|