Add scrape/ingestion engine + validation, and 24h shift hour-range visualization

Scrape engine (Services/Scraping/): pluggable IListingSource (working sample + Telegram/Divar credential-ready stubs) → IngestionService (content-hash dedupe → parse → validate → review queue) → ListingValidator (completeness score + spam screen) → IngestionWorker (config-gated hosted service). RawListing gains ContentHash/Confidence/ValidationNotes; RawListingStatus.Flagged. Admin /Admin gets run-now, source list, confidence + flagged queue.

Hour-range viz: _HourBar 24h timeline bar (colored by type, overnight wrap) on shift cards, recommendation cards, and detail.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-03 08:18:19 +03:30
parent 69fa921fbd
commit 931b7b6ffb
24 changed files with 1439 additions and 26 deletions
@@ -0,0 +1,42 @@
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping;
public class DivarOptions
{
public bool Enabled { get; set; }
public string? City { get; set; } // e.g. "tehran"
public List<string> Queries { get; set; } = new(); // search terms, e.g. "استخدام پزشک"
}
/// <summary>
/// Divar source. Credential-ready: configure city + queries in (Ingestion:Divar) and implement
/// the fetch against Divar's listing API/HTML. Dormant until enabled.
/// </summary>
public class DivarListingSource : IListingSource
{
private readonly DivarOptions _opts;
private readonly ILogger<DivarListingSource> _log;
public DivarListingSource(IOptions<DivarOptions> opts, ILogger<DivarListingSource> log)
{
_opts = opts.Value;
_log = log;
}
public string Name => "دیوار";
public bool Enabled => _opts.Enabled && _opts.Queries.Count > 0;
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
{
if (!Enabled)
{
_log.LogInformation("Divar source not configured — skipping.");
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
}
// TODO(prod): query Divar for each term in the configured city, map each ad's
// title+description to new ScrapedItem(Name, text, adUrl).
_log.LogWarning("Divar fetch not yet implemented; returning empty.");
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
}
}