diff --git a/src/JobsMedical.Web/Pages/Admin/Ingested.cshtml b/src/JobsMedical.Web/Pages/Admin/Ingested.cshtml index 9ad2171..d57c7b6 100644 --- a/src/JobsMedical.Web/Pages/Admin/Ingested.cshtml +++ b/src/JobsMedical.Web/Pages/Admin/Ingested.cshtml @@ -34,6 +34,32 @@ } + @if (Model.SourceBreakdown.Count > 0) + { +
+ 📊 به تفکیک منبع + + + + + + + + + + @foreach (var s in Model.SourceBreakdown) + { + + + + + + } + +
منبعمنتشرشدهکل دریافت
@s.Source@P(s.Published)@P(s.Total)
+
+ } +
@Html.Raw(Pill("all", "همه", Model.Counts.Values.Sum())) @Html.Raw(Pill("new", "در صف", C(JobsMedical.Web.Models.RawListingStatus.New))) diff --git a/src/JobsMedical.Web/Pages/Admin/Ingested.cshtml.cs b/src/JobsMedical.Web/Pages/Admin/Ingested.cshtml.cs index 89114db..c51a31b 100644 --- a/src/JobsMedical.Web/Pages/Admin/Ingested.cshtml.cs +++ b/src/JobsMedical.Web/Pages/Admin/Ingested.cshtml.cs @@ -19,8 +19,12 @@ public class IngestedModel : PageModel public List Items { get; private set; } = new(); public int Total { get; private set; } public Dictionary Counts { get; private set; } = new(); + public List SourceBreakdown { get; private set; } = new(); [TempData] public string? Message { get; set; } + /// Per-source tally: how many crawled vs how many actually published. + public record SourceStat(string Source, int Total, int Published); + [BindProperty(SupportsGet = true)] public string? Status { get; set; } // new|flagged|published|discarded|all [BindProperty(SupportsGet = true)] public string? Source { get; set; } @@ -29,6 +33,22 @@ public class IngestedModel : PageModel Counts = await _db.RawListings.GroupBy(r => r.Status) .Select(g => new { g.Key, C = g.Count() }).ToDictionaryAsync(x => x.Key, x => x.C); + // Per-source breakdown — group exact SourceChannel rows then fold into source "families" + // (تلگرام/ch → تلگرام, وب‌سایت (host) → وب‌سایت) so the table reads one row per source. + var bySource = await _db.RawListings.GroupBy(r => r.SourceChannel) + .Select(g => new + { + Source = g.Key, + Total = g.Count(), + Published = g.Count(x => x.Status == RawListingStatus.Normalized), + }) + .ToListAsync(); + SourceBreakdown = bySource + .GroupBy(x => SourceFamily(x.Source)) + .Select(g => new SourceStat(g.Key, g.Sum(x => x.Total), g.Sum(x => x.Published))) + .OrderByDescending(s => s.Published).ThenByDescending(s => s.Total) + .ToList(); + var q = _db.RawListings.AsNoTracking().AsQueryable(); var st = Status?.ToLowerInvariant() switch @@ -46,6 +66,15 @@ public class IngestedModel : PageModel Items = await q.OrderByDescending(r => r.FetchedAt).Take(200).ToListAsync(); } + /// Collapse a channel label to its source family: "تلگرام/nurses" → "تلگرام", + /// "وب‌سایت (medjobs.ir)" → "وب‌سایت". Divar/Bale/Medjobs already have no suffix. + private static string SourceFamily(string? channel) + { + if (string.IsNullOrWhiteSpace(channel)) return "نامشخص"; + var cut = channel.IndexOfAny(new[] { '/', '(' }); + return (cut > 0 ? channel[..cut] : channel).Trim(); + } + /// /// ARCHIVE (never delete) everything published from ingestion: the aggregated Shift/Job/Talent /// posts are flipped to Archived (hidden from the site but kept for analytics); the raw crawl