[Ingest] Persistent crawl run-log + per-source breakdown on admin queue
Each ingestion run now records an IngestionRun row (found/queued/published/flagged/spam/duplicates + a per-source detail string). Admin → صف آگهیها shows a «تاریخچه جمعآوری» table of the last 15 runs (hover a row for the per-source breakdown), so admins can see how much each source found vs added over time. IngestionSummary gains TotalFetched. Migration: IngestionRuns table. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -30,6 +30,7 @@ public class AppDbContext : DbContext, IDataProtectionKeyContext
|
||||
public DbSet<Report> Reports => Set<Report>();
|
||||
public DbSet<FacilityDocument> FacilityDocuments => Set<FacilityDocument>();
|
||||
public DbSet<JobAlert> JobAlerts => Set<JobAlert>();
|
||||
public DbSet<IngestionRun> IngestionRuns => Set<IngestionRun>();
|
||||
public DbSet<Review> Reviews => Set<Review>();
|
||||
|
||||
protected override void OnModelCreating(ModelBuilder b)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,43 @@
|
||||
using System;
|
||||
using Microsoft.EntityFrameworkCore.Migrations;
|
||||
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
|
||||
|
||||
#nullable disable
|
||||
|
||||
namespace JobsMedical.Web.Migrations
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public partial class IngestionRunLog : Migration
|
||||
{
|
||||
/// <inheritdoc />
|
||||
protected override void Up(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.CreateTable(
|
||||
name: "IngestionRuns",
|
||||
columns: table => new
|
||||
{
|
||||
Id = table.Column<int>(type: "integer", nullable: false)
|
||||
.Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn),
|
||||
RunAt = table.Column<DateTime>(type: "timestamp with time zone", nullable: false),
|
||||
Fetched = table.Column<int>(type: "integer", nullable: false),
|
||||
Queued = table.Column<int>(type: "integer", nullable: false),
|
||||
Published = table.Column<int>(type: "integer", nullable: false),
|
||||
Flagged = table.Column<int>(type: "integer", nullable: false),
|
||||
Spam = table.Column<int>(type: "integer", nullable: false),
|
||||
Duplicates = table.Column<int>(type: "integer", nullable: false),
|
||||
Detail = table.Column<string>(type: "character varying(2000)", maxLength: 2000, nullable: true)
|
||||
},
|
||||
constraints: table =>
|
||||
{
|
||||
table.PrimaryKey("PK_IngestionRuns", x => x.Id);
|
||||
});
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void Down(MigrationBuilder migrationBuilder)
|
||||
{
|
||||
migrationBuilder.DropTable(
|
||||
name: "IngestionRuns");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -421,6 +421,44 @@ namespace JobsMedical.Web.Migrations
|
||||
b.ToTable("FacilityDocuments");
|
||||
});
|
||||
|
||||
modelBuilder.Entity("JobsMedical.Web.Models.IngestionRun", b =>
|
||||
{
|
||||
b.Property<int>("Id")
|
||||
.ValueGeneratedOnAdd()
|
||||
.HasColumnType("integer");
|
||||
|
||||
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
|
||||
|
||||
b.Property<string>("Detail")
|
||||
.HasMaxLength(2000)
|
||||
.HasColumnType("character varying(2000)");
|
||||
|
||||
b.Property<int>("Duplicates")
|
||||
.HasColumnType("integer");
|
||||
|
||||
b.Property<int>("Fetched")
|
||||
.HasColumnType("integer");
|
||||
|
||||
b.Property<int>("Flagged")
|
||||
.HasColumnType("integer");
|
||||
|
||||
b.Property<int>("Published")
|
||||
.HasColumnType("integer");
|
||||
|
||||
b.Property<int>("Queued")
|
||||
.HasColumnType("integer");
|
||||
|
||||
b.Property<DateTime>("RunAt")
|
||||
.HasColumnType("timestamp with time zone");
|
||||
|
||||
b.Property<int>("Spam")
|
||||
.HasColumnType("integer");
|
||||
|
||||
b.HasKey("Id");
|
||||
|
||||
b.ToTable("IngestionRuns");
|
||||
});
|
||||
|
||||
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
|
||||
{
|
||||
b.Property<long>("Id")
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
|
||||
namespace JobsMedical.Web.Models;
|
||||
|
||||
/// <summary>One ingestion run's outcome — kept so admins see a history of what was crawled,
|
||||
/// how much was found, queued, published, flagged, etc. (with a per-source breakdown).</summary>
|
||||
public class IngestionRun
|
||||
{
|
||||
public int Id { get; set; }
|
||||
public DateTime RunAt { get; set; } = DateTime.UtcNow;
|
||||
|
||||
public int Fetched { get; set; } // total items pulled from all sources
|
||||
public int Queued { get; set; } // sent to the review queue
|
||||
public int Published { get; set; } // auto-published
|
||||
public int Flagged { get; set; } // needs-review
|
||||
public int Spam { get; set; } // discarded as spam/irrelevant
|
||||
public int Duplicates { get; set; } // skipped (already seen)
|
||||
|
||||
/// <summary>Human-readable per-source breakdown, e.g. "دیوار: یافت ۱۲…؛ مدجابز: یافت ۴۰…".</summary>
|
||||
[MaxLength(2000)] public string? Detail { get; set; }
|
||||
}
|
||||
@@ -62,6 +62,40 @@
|
||||
</aside>
|
||||
|
||||
<div>
|
||||
@if (Model.Runs.Count > 0)
|
||||
{
|
||||
<h2 style="font-size:20px; margin-top:0;">تاریخچه جمعآوری</h2>
|
||||
<div class="card card-pad" style="margin-bottom:18px; overflow-x:auto;">
|
||||
<table style="width:100%; border-collapse:collapse; font-size:13px; white-space:nowrap;">
|
||||
<thead>
|
||||
<tr style="text-align:start; color:var(--muted);">
|
||||
<th style="padding:6px 8px;">زمان</th>
|
||||
<th style="padding:6px 8px;">یافتشده</th>
|
||||
<th style="padding:6px 8px;">صف</th>
|
||||
<th style="padding:6px 8px;">منتشر</th>
|
||||
<th style="padding:6px 8px;">پرچم</th>
|
||||
<th style="padding:6px 8px;">اسپم</th>
|
||||
<th style="padding:6px 8px;">تکراری</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@foreach (var run in Model.Runs)
|
||||
{
|
||||
<tr style="border-top:1px solid var(--line);" title="@run.Detail">
|
||||
<td style="padding:6px 8px;">@JalaliDate.ToLongDate(DateOnly.FromDateTime(run.RunAt)) @run.RunAt.ToString("HH:mm")</td>
|
||||
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Fetched.ToString())</td>
|
||||
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Queued.ToString())</td>
|
||||
<td style="padding:6px 8px; color:var(--primary-dark); font-weight:700;">@JalaliDate.ToPersianDigits(run.Published.ToString())</td>
|
||||
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Flagged.ToString())</td>
|
||||
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Spam.ToString())</td>
|
||||
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Duplicates.ToString())</td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
<p class="muted" style="font-size:11px; margin:8px 0 0;">جزئیات هر منبع را با نگهداشتن نشانگر روی هر ردیف ببین. لاگ کامل: <code dir="ltr">docker logs hamkadr_api</code></p>
|
||||
</div>
|
||||
}
|
||||
<h2 style="font-size:20px; margin-top:0;">صف بررسی</h2>
|
||||
@if (Model.Queue.Count == 0)
|
||||
{
|
||||
|
||||
@@ -25,6 +25,7 @@ public class IndexModel : PageModel
|
||||
public IReadOnlyList<string> SourceNames { get; private set; } = new List<string>();
|
||||
public int PublishedShifts { get; private set; }
|
||||
public int PublishedJobs { get; private set; }
|
||||
public List<IngestionRun> Runs { get; private set; } = new();
|
||||
|
||||
[BindProperty] public string? SourceChannel { get; set; }
|
||||
[BindProperty] public string? RawText { get; set; }
|
||||
@@ -67,5 +68,6 @@ public class IndexModel : PageModel
|
||||
SourceNames = _ingest.SourceNames;
|
||||
PublishedShifts = await _db.Shifts.CountAsync(s => s.Source != ShiftSource.Direct);
|
||||
PublishedJobs = await _db.JobOpenings.CountAsync();
|
||||
Runs = await _db.IngestionRuns.OrderByDescending(r => r.RunAt).Take(15).ToListAsync();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ public record SourceResult(string Source, int Fetched, int Queued, int Published
|
||||
|
||||
public record IngestionSummary(List<SourceResult> Sources)
|
||||
{
|
||||
public int TotalFetched => Sources.Sum(s => s.Fetched);
|
||||
public int TotalQueued => Sources.Sum(s => s.Queued);
|
||||
public int TotalPublished => Sources.Sum(s => s.Published);
|
||||
public int TotalFlagged => Sources.Sum(s => s.Flagged);
|
||||
@@ -108,7 +109,27 @@ public class IngestionService
|
||||
source.Name, fetched, queued, published, flagged, spam, dupes);
|
||||
}
|
||||
|
||||
return new IngestionSummary(results);
|
||||
var summary = new IngestionSummary(results);
|
||||
|
||||
// Persist a run-log row so admins get a crawl history (with a per-source breakdown).
|
||||
if (results.Count > 0)
|
||||
{
|
||||
var detail = string.Join("؛ ", results.Select(r =>
|
||||
$"{r.Source}: یافت {r.Fetched}، صف {r.Queued}، منتشر {r.Published}، پرچم {r.Flagged}، اسپم {r.Spam}، تکراری {r.Duplicates}"));
|
||||
_db.IngestionRuns.Add(new IngestionRun
|
||||
{
|
||||
Fetched = summary.TotalFetched,
|
||||
Queued = summary.TotalQueued,
|
||||
Published = summary.TotalPublished,
|
||||
Flagged = summary.TotalFlagged,
|
||||
Spam = summary.TotalSpam,
|
||||
Duplicates = summary.TotalDuplicates,
|
||||
Detail = detail.Length > 2000 ? detail[..2000] : detail,
|
||||
});
|
||||
await _db.SaveChangesAsync(ct);
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
private static (RawListingStatus status, string? reason, int confidence) Decide(
|
||||
|
||||
Reference in New Issue
Block a user