Admin/Ingested: per-source breakdown (published vs total crawled)
Group RawListings by SourceChannel, fold per-channel/per-host labels into source families (تلگرام/x → تلگرام, وبسایت (host) → وبسایت), and show a published-vs-total table so it's clear which sources are actually producing (e.g. why everything is coming from دیوار when Telegram's proxy is down). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -34,6 +34,32 @@
|
||||
</form>
|
||||
}
|
||||
|
||||
@if (Model.SourceBreakdown.Count > 0)
|
||||
{
|
||||
<div class="card card-pad" style="margin-bottom:14px;">
|
||||
<strong style="display:block; margin-bottom:8px;">📊 به تفکیک منبع</strong>
|
||||
<table style="width:100%; border-collapse:collapse; font-size:13.5px;">
|
||||
<thead>
|
||||
<tr style="color:var(--muted);">
|
||||
<th style="text-align:start; padding:4px 0;">منبع</th>
|
||||
<th style="text-align:start;">منتشرشده</th>
|
||||
<th style="text-align:start;">کل دریافت</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@foreach (var s in Model.SourceBreakdown)
|
||||
{
|
||||
<tr style="border-top:1px solid var(--line);">
|
||||
<td style="padding:6px 0;"><strong>@s.Source</strong></td>
|
||||
<td><span class="badge badge-verified">@P(s.Published)</span></td>
|
||||
<td class="muted">@P(s.Total)</td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
}
|
||||
|
||||
<div class="ing-filters">
|
||||
@Html.Raw(Pill("all", "همه", Model.Counts.Values.Sum()))
|
||||
@Html.Raw(Pill("new", "در صف", C(JobsMedical.Web.Models.RawListingStatus.New)))
|
||||
|
||||
@@ -19,8 +19,12 @@ public class IngestedModel : PageModel
|
||||
public List<RawListing> Items { get; private set; } = new();
|
||||
public int Total { get; private set; }
|
||||
public Dictionary<RawListingStatus, int> Counts { get; private set; } = new();
|
||||
public List<SourceStat> SourceBreakdown { get; private set; } = new();
|
||||
[TempData] public string? Message { get; set; }
|
||||
|
||||
/// <summary>Per-source tally: how many crawled vs how many actually published.</summary>
|
||||
public record SourceStat(string Source, int Total, int Published);
|
||||
|
||||
[BindProperty(SupportsGet = true)] public string? Status { get; set; } // new|flagged|published|discarded|all
|
||||
[BindProperty(SupportsGet = true)] public string? Source { get; set; }
|
||||
|
||||
@@ -29,6 +33,22 @@ public class IngestedModel : PageModel
|
||||
Counts = await _db.RawListings.GroupBy(r => r.Status)
|
||||
.Select(g => new { g.Key, C = g.Count() }).ToDictionaryAsync(x => x.Key, x => x.C);
|
||||
|
||||
// Per-source breakdown — group exact SourceChannel rows then fold into source "families"
|
||||
// (تلگرام/ch → تلگرام, وبسایت (host) → وبسایت) so the table reads one row per source.
|
||||
var bySource = await _db.RawListings.GroupBy(r => r.SourceChannel)
|
||||
.Select(g => new
|
||||
{
|
||||
Source = g.Key,
|
||||
Total = g.Count(),
|
||||
Published = g.Count(x => x.Status == RawListingStatus.Normalized),
|
||||
})
|
||||
.ToListAsync();
|
||||
SourceBreakdown = bySource
|
||||
.GroupBy(x => SourceFamily(x.Source))
|
||||
.Select(g => new SourceStat(g.Key, g.Sum(x => x.Total), g.Sum(x => x.Published)))
|
||||
.OrderByDescending(s => s.Published).ThenByDescending(s => s.Total)
|
||||
.ToList();
|
||||
|
||||
var q = _db.RawListings.AsNoTracking().AsQueryable();
|
||||
|
||||
var st = Status?.ToLowerInvariant() switch
|
||||
@@ -46,6 +66,15 @@ public class IngestedModel : PageModel
|
||||
Items = await q.OrderByDescending(r => r.FetchedAt).Take(200).ToListAsync();
|
||||
}
|
||||
|
||||
/// <summary>Collapse a channel label to its source family: "تلگرام/nurses" → "تلگرام",
|
||||
/// "وبسایت (medjobs.ir)" → "وبسایت". Divar/Bale/Medjobs already have no suffix.</summary>
|
||||
private static string SourceFamily(string? channel)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(channel)) return "نامشخص";
|
||||
var cut = channel.IndexOfAny(new[] { '/', '(' });
|
||||
return (cut > 0 ? channel[..cut] : channel).Trim();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// ARCHIVE (never delete) everything published from ingestion: the aggregated Shift/Job/Talent
|
||||
/// posts are flipped to Archived (hidden from the site but kept for analytics); the raw crawl
|
||||
|
||||
Reference in New Issue
Block a user