[Ingest] Persistent crawl run-log + per-source breakdown on admin queue
CI/CD / CI · dotnet build (push) Has been cancelled
CI/CD / Deploy · hamkadr (push) Has been cancelled

Each ingestion run now records an IngestionRun row (found/queued/published/flagged/spam/duplicates + a per-source detail string). Admin → صف آگهی‌ها shows a «تاریخچه جمع‌آوری» table of the last 15 runs (hover a row for the per-source breakdown), so admins can see how much each source found vs added over time. IngestionSummary gains TotalFetched. Migration: IngestionRuns table.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-08 06:23:58 +03:30
parent 524c66e25e
commit 487c7ca82f
8 changed files with 1525 additions and 1 deletions
+1
View File
@@ -30,6 +30,7 @@ public class AppDbContext : DbContext, IDataProtectionKeyContext
public DbSet<Report> Reports => Set<Report>();
public DbSet<FacilityDocument> FacilityDocuments => Set<FacilityDocument>();
public DbSet<JobAlert> JobAlerts => Set<JobAlert>();
public DbSet<IngestionRun> IngestionRuns => Set<IngestionRun>();
public DbSet<Review> Reviews => Set<Review>();
protected override void OnModelCreating(ModelBuilder b)
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,43 @@
using System;
using Microsoft.EntityFrameworkCore.Migrations;
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
#nullable disable
namespace JobsMedical.Web.Migrations
{
/// <inheritdoc />
public partial class IngestionRunLog : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.CreateTable(
name: "IngestionRuns",
columns: table => new
{
Id = table.Column<int>(type: "integer", nullable: false)
.Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn),
RunAt = table.Column<DateTime>(type: "timestamp with time zone", nullable: false),
Fetched = table.Column<int>(type: "integer", nullable: false),
Queued = table.Column<int>(type: "integer", nullable: false),
Published = table.Column<int>(type: "integer", nullable: false),
Flagged = table.Column<int>(type: "integer", nullable: false),
Spam = table.Column<int>(type: "integer", nullable: false),
Duplicates = table.Column<int>(type: "integer", nullable: false),
Detail = table.Column<string>(type: "character varying(2000)", maxLength: 2000, nullable: true)
},
constraints: table =>
{
table.PrimaryKey("PK_IngestionRuns", x => x.Id);
});
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropTable(
name: "IngestionRuns");
}
}
}
@@ -421,6 +421,44 @@ namespace JobsMedical.Web.Migrations
b.ToTable("FacilityDocuments");
});
modelBuilder.Entity("JobsMedical.Web.Models.IngestionRun", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Detail")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<int>("Duplicates")
.HasColumnType("integer");
b.Property<int>("Fetched")
.HasColumnType("integer");
b.Property<int>("Flagged")
.HasColumnType("integer");
b.Property<int>("Published")
.HasColumnType("integer");
b.Property<int>("Queued")
.HasColumnType("integer");
b.Property<DateTime>("RunAt")
.HasColumnType("timestamp with time zone");
b.Property<int>("Spam")
.HasColumnType("integer");
b.HasKey("Id");
b.ToTable("IngestionRuns");
});
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
{
b.Property<long>("Id")
@@ -0,0 +1,21 @@
using System.ComponentModel.DataAnnotations;
namespace JobsMedical.Web.Models;
/// <summary>One ingestion run's outcome — kept so admins see a history of what was crawled,
/// how much was found, queued, published, flagged, etc. (with a per-source breakdown).</summary>
public class IngestionRun
{
public int Id { get; set; }
public DateTime RunAt { get; set; } = DateTime.UtcNow;
public int Fetched { get; set; } // total items pulled from all sources
public int Queued { get; set; } // sent to the review queue
public int Published { get; set; } // auto-published
public int Flagged { get; set; } // needs-review
public int Spam { get; set; } // discarded as spam/irrelevant
public int Duplicates { get; set; } // skipped (already seen)
/// <summary>Human-readable per-source breakdown, e.g. "دیوار: یافت ۱۲…؛ مدجابز: یافت ۴۰…".</summary>
[MaxLength(2000)] public string? Detail { get; set; }
}
@@ -62,6 +62,40 @@
</aside>
<div>
@if (Model.Runs.Count > 0)
{
<h2 style="font-size:20px; margin-top:0;">تاریخچه جمع‌آوری</h2>
<div class="card card-pad" style="margin-bottom:18px; overflow-x:auto;">
<table style="width:100%; border-collapse:collapse; font-size:13px; white-space:nowrap;">
<thead>
<tr style="text-align:start; color:var(--muted);">
<th style="padding:6px 8px;">زمان</th>
<th style="padding:6px 8px;">یافت‌شده</th>
<th style="padding:6px 8px;">صف</th>
<th style="padding:6px 8px;">منتشر</th>
<th style="padding:6px 8px;">پرچم</th>
<th style="padding:6px 8px;">اسپم</th>
<th style="padding:6px 8px;">تکراری</th>
</tr>
</thead>
<tbody>
@foreach (var run in Model.Runs)
{
<tr style="border-top:1px solid var(--line);" title="@run.Detail">
<td style="padding:6px 8px;">@JalaliDate.ToLongDate(DateOnly.FromDateTime(run.RunAt)) @run.RunAt.ToString("HH:mm")</td>
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Fetched.ToString())</td>
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Queued.ToString())</td>
<td style="padding:6px 8px; color:var(--primary-dark); font-weight:700;">@JalaliDate.ToPersianDigits(run.Published.ToString())</td>
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Flagged.ToString())</td>
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Spam.ToString())</td>
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Duplicates.ToString())</td>
</tr>
}
</tbody>
</table>
<p class="muted" style="font-size:11px; margin:8px 0 0;">جزئیات هر منبع را با نگه‌داشتن نشانگر روی هر ردیف ببین. لاگ کامل: <code dir="ltr">docker logs hamkadr_api</code></p>
</div>
}
<h2 style="font-size:20px; margin-top:0;">صف بررسی</h2>
@if (Model.Queue.Count == 0)
{
@@ -25,6 +25,7 @@ public class IndexModel : PageModel
public IReadOnlyList<string> SourceNames { get; private set; } = new List<string>();
public int PublishedShifts { get; private set; }
public int PublishedJobs { get; private set; }
public List<IngestionRun> Runs { get; private set; } = new();
[BindProperty] public string? SourceChannel { get; set; }
[BindProperty] public string? RawText { get; set; }
@@ -67,5 +68,6 @@ public class IndexModel : PageModel
SourceNames = _ingest.SourceNames;
PublishedShifts = await _db.Shifts.CountAsync(s => s.Source != ShiftSource.Direct);
PublishedJobs = await _db.JobOpenings.CountAsync();
Runs = await _db.IngestionRuns.OrderByDescending(r => r.RunAt).Take(15).ToListAsync();
}
}
@@ -11,6 +11,7 @@ public record SourceResult(string Source, int Fetched, int Queued, int Published
public record IngestionSummary(List<SourceResult> Sources)
{
public int TotalFetched => Sources.Sum(s => s.Fetched);
public int TotalQueued => Sources.Sum(s => s.Queued);
public int TotalPublished => Sources.Sum(s => s.Published);
public int TotalFlagged => Sources.Sum(s => s.Flagged);
@@ -108,7 +109,27 @@ public class IngestionService
source.Name, fetched, queued, published, flagged, spam, dupes);
}
return new IngestionSummary(results);
var summary = new IngestionSummary(results);
// Persist a run-log row so admins get a crawl history (with a per-source breakdown).
if (results.Count > 0)
{
var detail = string.Join("؛ ", results.Select(r =>
$"{r.Source}: یافت {r.Fetched}، صف {r.Queued}، منتشر {r.Published}، پرچم {r.Flagged}، اسپم {r.Spam}، تکراری {r.Duplicates}"));
_db.IngestionRuns.Add(new IngestionRun
{
Fetched = summary.TotalFetched,
Queued = summary.TotalQueued,
Published = summary.TotalPublished,
Flagged = summary.TotalFlagged,
Spam = summary.TotalSpam,
Duplicates = summary.TotalDuplicates,
Detail = detail.Length > 2000 ? detail[..2000] : detail,
});
await _db.SaveChangesAsync(ct);
}
return summary;
}
private static (RawListingStatus status, string? reason, int confidence) Decide(