Add scrape/ingestion engine + validation, and 24h shift hour-range visualization

Scrape engine (Services/Scraping/): pluggable IListingSource (working sample + Telegram/Divar credential-ready stubs) → IngestionService (content-hash dedupe → parse → validate → review queue) → ListingValidator (completeness score + spam screen) → IngestionWorker (config-gated hosted service). RawListing gains ContentHash/Confidence/ValidationNotes; RawListingStatus.Flagged. Admin /Admin gets run-now, source list, confidence + flagged queue.

Hour-range viz: _HourBar 24h timeline bar (colored by type, overnight wrap) on shift cards, recommendation cards, and detail.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-03 08:18:19 +03:30
parent 69fa921fbd
commit 931b7b6ffb
24 changed files with 1439 additions and 26 deletions
@@ -0,0 +1,59 @@
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping;
public class IngestionOptions
{
public bool Enabled { get; set; } = false; // off by default — opt in via config
public int IntervalMinutes { get; set; } = 30;
}
/// <summary>
/// Periodically runs the ingestion engine when enabled (Ingestion:Enabled=true). Off by default
/// so nothing scrapes uninvited; admins can also trigger a run on demand from the admin UI.
/// </summary>
public class IngestionWorker : BackgroundService
{
private readonly IServiceScopeFactory _scopes;
private readonly IngestionOptions _opts;
private readonly ILogger<IngestionWorker> _log;
public IngestionWorker(IServiceScopeFactory scopes, IOptions<IngestionOptions> opts,
ILogger<IngestionWorker> log)
{
_scopes = scopes;
_opts = opts.Value;
_log = log;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
if (!_opts.Enabled)
{
_log.LogInformation("Ingestion worker disabled (Ingestion:Enabled=false).");
return;
}
var interval = TimeSpan.FromMinutes(Math.Max(1, _opts.IntervalMinutes));
_log.LogInformation("Ingestion worker on; every {Min} min.", _opts.IntervalMinutes);
while (!stoppingToken.IsCancellationRequested)
{
try
{
using var scope = _scopes.CreateScope();
var svc = scope.ServiceProvider.GetRequiredService<IngestionService>();
var summary = await svc.RunAsync(stoppingToken);
_log.LogInformation("Scheduled ingestion: queued={Q} flagged={F} spam={S} dupes={D}",
summary.TotalQueued, summary.TotalFlagged, summary.TotalSpam, summary.TotalDuplicates);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_log.LogError(ex, "Scheduled ingestion run failed");
}
try { await Task.Delay(interval, stoppingToken); }
catch (OperationCanceledException) { break; }
}
}
}