[Ingest] Persistent crawl run-log + per-source breakdown on admin queue
Each ingestion run now records an IngestionRun row (found/queued/published/flagged/spam/duplicates + a per-source detail string). Admin → صف آگهیها shows a «تاریخچه جمعآوری» table of the last 15 runs (hover a row for the per-source breakdown), so admins can see how much each source found vs added over time. IngestionSummary gains TotalFetched. Migration: IngestionRuns table. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -30,6 +30,7 @@ public class AppDbContext : DbContext, IDataProtectionKeyContext
|
|||||||
public DbSet<Report> Reports => Set<Report>();
|
public DbSet<Report> Reports => Set<Report>();
|
||||||
public DbSet<FacilityDocument> FacilityDocuments => Set<FacilityDocument>();
|
public DbSet<FacilityDocument> FacilityDocuments => Set<FacilityDocument>();
|
||||||
public DbSet<JobAlert> JobAlerts => Set<JobAlert>();
|
public DbSet<JobAlert> JobAlerts => Set<JobAlert>();
|
||||||
|
public DbSet<IngestionRun> IngestionRuns => Set<IngestionRun>();
|
||||||
public DbSet<Review> Reviews => Set<Review>();
|
public DbSet<Review> Reviews => Set<Review>();
|
||||||
|
|
||||||
protected override void OnModelCreating(ModelBuilder b)
|
protected override void OnModelCreating(ModelBuilder b)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,43 @@
|
|||||||
|
using System;
|
||||||
|
using Microsoft.EntityFrameworkCore.Migrations;
|
||||||
|
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
|
||||||
|
|
||||||
|
#nullable disable
|
||||||
|
|
||||||
|
namespace JobsMedical.Web.Migrations
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
public partial class IngestionRunLog : Migration
|
||||||
|
{
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Up(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.CreateTable(
|
||||||
|
name: "IngestionRuns",
|
||||||
|
columns: table => new
|
||||||
|
{
|
||||||
|
Id = table.Column<int>(type: "integer", nullable: false)
|
||||||
|
.Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn),
|
||||||
|
RunAt = table.Column<DateTime>(type: "timestamp with time zone", nullable: false),
|
||||||
|
Fetched = table.Column<int>(type: "integer", nullable: false),
|
||||||
|
Queued = table.Column<int>(type: "integer", nullable: false),
|
||||||
|
Published = table.Column<int>(type: "integer", nullable: false),
|
||||||
|
Flagged = table.Column<int>(type: "integer", nullable: false),
|
||||||
|
Spam = table.Column<int>(type: "integer", nullable: false),
|
||||||
|
Duplicates = table.Column<int>(type: "integer", nullable: false),
|
||||||
|
Detail = table.Column<string>(type: "character varying(2000)", maxLength: 2000, nullable: true)
|
||||||
|
},
|
||||||
|
constraints: table =>
|
||||||
|
{
|
||||||
|
table.PrimaryKey("PK_IngestionRuns", x => x.Id);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <inheritdoc />
|
||||||
|
protected override void Down(MigrationBuilder migrationBuilder)
|
||||||
|
{
|
||||||
|
migrationBuilder.DropTable(
|
||||||
|
name: "IngestionRuns");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -421,6 +421,44 @@ namespace JobsMedical.Web.Migrations
|
|||||||
b.ToTable("FacilityDocuments");
|
b.ToTable("FacilityDocuments");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
modelBuilder.Entity("JobsMedical.Web.Models.IngestionRun", b =>
|
||||||
|
{
|
||||||
|
b.Property<int>("Id")
|
||||||
|
.ValueGeneratedOnAdd()
|
||||||
|
.HasColumnType("integer");
|
||||||
|
|
||||||
|
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
|
||||||
|
|
||||||
|
b.Property<string>("Detail")
|
||||||
|
.HasMaxLength(2000)
|
||||||
|
.HasColumnType("character varying(2000)");
|
||||||
|
|
||||||
|
b.Property<int>("Duplicates")
|
||||||
|
.HasColumnType("integer");
|
||||||
|
|
||||||
|
b.Property<int>("Fetched")
|
||||||
|
.HasColumnType("integer");
|
||||||
|
|
||||||
|
b.Property<int>("Flagged")
|
||||||
|
.HasColumnType("integer");
|
||||||
|
|
||||||
|
b.Property<int>("Published")
|
||||||
|
.HasColumnType("integer");
|
||||||
|
|
||||||
|
b.Property<int>("Queued")
|
||||||
|
.HasColumnType("integer");
|
||||||
|
|
||||||
|
b.Property<DateTime>("RunAt")
|
||||||
|
.HasColumnType("timestamp with time zone");
|
||||||
|
|
||||||
|
b.Property<int>("Spam")
|
||||||
|
.HasColumnType("integer");
|
||||||
|
|
||||||
|
b.HasKey("Id");
|
||||||
|
|
||||||
|
b.ToTable("IngestionRuns");
|
||||||
|
});
|
||||||
|
|
||||||
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
|
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
|
||||||
{
|
{
|
||||||
b.Property<long>("Id")
|
b.Property<long>("Id")
|
||||||
|
|||||||
@@ -0,0 +1,21 @@
|
|||||||
|
using System.ComponentModel.DataAnnotations;
|
||||||
|
|
||||||
|
namespace JobsMedical.Web.Models;
|
||||||
|
|
||||||
|
/// <summary>One ingestion run's outcome — kept so admins see a history of what was crawled,
|
||||||
|
/// how much was found, queued, published, flagged, etc. (with a per-source breakdown).</summary>
|
||||||
|
public class IngestionRun
|
||||||
|
{
|
||||||
|
public int Id { get; set; }
|
||||||
|
public DateTime RunAt { get; set; } = DateTime.UtcNow;
|
||||||
|
|
||||||
|
public int Fetched { get; set; } // total items pulled from all sources
|
||||||
|
public int Queued { get; set; } // sent to the review queue
|
||||||
|
public int Published { get; set; } // auto-published
|
||||||
|
public int Flagged { get; set; } // needs-review
|
||||||
|
public int Spam { get; set; } // discarded as spam/irrelevant
|
||||||
|
public int Duplicates { get; set; } // skipped (already seen)
|
||||||
|
|
||||||
|
/// <summary>Human-readable per-source breakdown, e.g. "دیوار: یافت ۱۲…؛ مدجابز: یافت ۴۰…".</summary>
|
||||||
|
[MaxLength(2000)] public string? Detail { get; set; }
|
||||||
|
}
|
||||||
@@ -62,6 +62,40 @@
|
|||||||
</aside>
|
</aside>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
|
@if (Model.Runs.Count > 0)
|
||||||
|
{
|
||||||
|
<h2 style="font-size:20px; margin-top:0;">تاریخچه جمعآوری</h2>
|
||||||
|
<div class="card card-pad" style="margin-bottom:18px; overflow-x:auto;">
|
||||||
|
<table style="width:100%; border-collapse:collapse; font-size:13px; white-space:nowrap;">
|
||||||
|
<thead>
|
||||||
|
<tr style="text-align:start; color:var(--muted);">
|
||||||
|
<th style="padding:6px 8px;">زمان</th>
|
||||||
|
<th style="padding:6px 8px;">یافتشده</th>
|
||||||
|
<th style="padding:6px 8px;">صف</th>
|
||||||
|
<th style="padding:6px 8px;">منتشر</th>
|
||||||
|
<th style="padding:6px 8px;">پرچم</th>
|
||||||
|
<th style="padding:6px 8px;">اسپم</th>
|
||||||
|
<th style="padding:6px 8px;">تکراری</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
@foreach (var run in Model.Runs)
|
||||||
|
{
|
||||||
|
<tr style="border-top:1px solid var(--line);" title="@run.Detail">
|
||||||
|
<td style="padding:6px 8px;">@JalaliDate.ToLongDate(DateOnly.FromDateTime(run.RunAt)) @run.RunAt.ToString("HH:mm")</td>
|
||||||
|
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Fetched.ToString())</td>
|
||||||
|
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Queued.ToString())</td>
|
||||||
|
<td style="padding:6px 8px; color:var(--primary-dark); font-weight:700;">@JalaliDate.ToPersianDigits(run.Published.ToString())</td>
|
||||||
|
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Flagged.ToString())</td>
|
||||||
|
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Spam.ToString())</td>
|
||||||
|
<td style="padding:6px 8px;">@JalaliDate.ToPersianDigits(run.Duplicates.ToString())</td>
|
||||||
|
</tr>
|
||||||
|
}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<p class="muted" style="font-size:11px; margin:8px 0 0;">جزئیات هر منبع را با نگهداشتن نشانگر روی هر ردیف ببین. لاگ کامل: <code dir="ltr">docker logs hamkadr_api</code></p>
|
||||||
|
</div>
|
||||||
|
}
|
||||||
<h2 style="font-size:20px; margin-top:0;">صف بررسی</h2>
|
<h2 style="font-size:20px; margin-top:0;">صف بررسی</h2>
|
||||||
@if (Model.Queue.Count == 0)
|
@if (Model.Queue.Count == 0)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ public class IndexModel : PageModel
|
|||||||
public IReadOnlyList<string> SourceNames { get; private set; } = new List<string>();
|
public IReadOnlyList<string> SourceNames { get; private set; } = new List<string>();
|
||||||
public int PublishedShifts { get; private set; }
|
public int PublishedShifts { get; private set; }
|
||||||
public int PublishedJobs { get; private set; }
|
public int PublishedJobs { get; private set; }
|
||||||
|
public List<IngestionRun> Runs { get; private set; } = new();
|
||||||
|
|
||||||
[BindProperty] public string? SourceChannel { get; set; }
|
[BindProperty] public string? SourceChannel { get; set; }
|
||||||
[BindProperty] public string? RawText { get; set; }
|
[BindProperty] public string? RawText { get; set; }
|
||||||
@@ -67,5 +68,6 @@ public class IndexModel : PageModel
|
|||||||
SourceNames = _ingest.SourceNames;
|
SourceNames = _ingest.SourceNames;
|
||||||
PublishedShifts = await _db.Shifts.CountAsync(s => s.Source != ShiftSource.Direct);
|
PublishedShifts = await _db.Shifts.CountAsync(s => s.Source != ShiftSource.Direct);
|
||||||
PublishedJobs = await _db.JobOpenings.CountAsync();
|
PublishedJobs = await _db.JobOpenings.CountAsync();
|
||||||
|
Runs = await _db.IngestionRuns.OrderByDescending(r => r.RunAt).Take(15).ToListAsync();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ public record SourceResult(string Source, int Fetched, int Queued, int Published
|
|||||||
|
|
||||||
public record IngestionSummary(List<SourceResult> Sources)
|
public record IngestionSummary(List<SourceResult> Sources)
|
||||||
{
|
{
|
||||||
|
public int TotalFetched => Sources.Sum(s => s.Fetched);
|
||||||
public int TotalQueued => Sources.Sum(s => s.Queued);
|
public int TotalQueued => Sources.Sum(s => s.Queued);
|
||||||
public int TotalPublished => Sources.Sum(s => s.Published);
|
public int TotalPublished => Sources.Sum(s => s.Published);
|
||||||
public int TotalFlagged => Sources.Sum(s => s.Flagged);
|
public int TotalFlagged => Sources.Sum(s => s.Flagged);
|
||||||
@@ -108,7 +109,27 @@ public class IngestionService
|
|||||||
source.Name, fetched, queued, published, flagged, spam, dupes);
|
source.Name, fetched, queued, published, flagged, spam, dupes);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new IngestionSummary(results);
|
var summary = new IngestionSummary(results);
|
||||||
|
|
||||||
|
// Persist a run-log row so admins get a crawl history (with a per-source breakdown).
|
||||||
|
if (results.Count > 0)
|
||||||
|
{
|
||||||
|
var detail = string.Join("؛ ", results.Select(r =>
|
||||||
|
$"{r.Source}: یافت {r.Fetched}، صف {r.Queued}، منتشر {r.Published}، پرچم {r.Flagged}، اسپم {r.Spam}، تکراری {r.Duplicates}"));
|
||||||
|
_db.IngestionRuns.Add(new IngestionRun
|
||||||
|
{
|
||||||
|
Fetched = summary.TotalFetched,
|
||||||
|
Queued = summary.TotalQueued,
|
||||||
|
Published = summary.TotalPublished,
|
||||||
|
Flagged = summary.TotalFlagged,
|
||||||
|
Spam = summary.TotalSpam,
|
||||||
|
Duplicates = summary.TotalDuplicates,
|
||||||
|
Detail = detail.Length > 2000 ? detail[..2000] : detail,
|
||||||
|
});
|
||||||
|
await _db.SaveChangesAsync(ct);
|
||||||
|
}
|
||||||
|
|
||||||
|
return summary;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static (RawListingStatus status, string? reason, int confidence) Decide(
|
private static (RawListingStatus status, string? reason, int confidence) Decide(
|
||||||
|
|||||||
Reference in New Issue
Block a user