Add scrape/ingestion engine + validation, and 24h shift hour-range visualization

Scrape engine (Services/Scraping/): pluggable IListingSource (working sample + Telegram/Divar credential-ready stubs) → IngestionService (content-hash dedupe → parse → validate → review queue) → ListingValidator (completeness score + spam screen) → IngestionWorker (config-gated hosted service). RawListing gains ContentHash/Confidence/ValidationNotes; RawListingStatus.Flagged. Admin /Admin gets run-now, source list, confidence + flagged queue.

Hour-range viz: _HourBar 24h timeline bar (colored by type, overnight wrap) on shift cards, recommendation cards, and detail.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-03 08:18:19 +03:30
parent 69fa921fbd
commit 931b7b6ffb
24 changed files with 1439 additions and 26 deletions
@@ -1,5 +1,6 @@
using JobsMedical.Web.Data;
using JobsMedical.Web.Models;
using JobsMedical.Web.Services.Scraping;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
using Microsoft.AspNetCore.Mvc.RazorPages;
@@ -7,19 +8,29 @@ using Microsoft.EntityFrameworkCore;
namespace JobsMedical.Web.Pages.Admin;
[Authorize(Roles = "Admin")] // secured by the OTP-auth Admin role
[Authorize(Roles = "Admin")]
public class IndexModel : PageModel
{
private readonly AppDbContext _db;
public IndexModel(AppDbContext db) => _db = db;
private readonly IngestionService _ingest;
public IndexModel(AppDbContext db, IngestionService ingest)
{
_db = db;
_ingest = ingest;
}
public List<RawListing> Queue { get; private set; } = new();
public List<RawListing> Flagged { get; private set; } = new();
public IReadOnlyList<(string Name, bool Enabled)> Sources { get; private set; } = new List<(string, bool)>();
public int PublishedShifts { get; private set; }
public int PublishedJobs { get; private set; }
[BindProperty] public string? SourceChannel { get; set; }
[BindProperty] public string? RawText { get; set; }
[TempData] public string? IngestMessage { get; set; }
public async Task OnGetAsync() => await LoadAsync();
public async Task<IActionResult> OnPostAddAsync()
@@ -37,11 +48,23 @@ public class IndexModel : PageModel
return RedirectToPage();
}
public async Task<IActionResult> OnPostRunIngestionAsync()
{
var s = await _ingest.RunAsync();
IngestMessage = $"جمع‌آوری انجام شد — {s.TotalQueued} در صف، {s.TotalFlagged} پرچم‌خورده، " +
$"{s.TotalSpam} اسپم، {s.TotalDuplicates} تکراری.";
return RedirectToPage();
}
private async Task LoadAsync()
{
Queue = await _db.RawListings
.Where(r => r.Status == RawListingStatus.New)
.OrderByDescending(r => r.Confidence).ThenByDescending(r => r.FetchedAt).ToListAsync();
Flagged = await _db.RawListings
.Where(r => r.Status == RawListingStatus.Flagged)
.OrderByDescending(r => r.FetchedAt).ToListAsync();
Sources = _ingest.Sources;
PublishedShifts = await _db.Shifts.CountAsync(s => s.Source != ShiftSource.Direct);
PublishedJobs = await _db.JobOpenings.CountAsync();
}