Add scrape/ingestion engine + validation, and 24h shift hour-range visualization
Scrape engine (Services/Scraping/): pluggable IListingSource (working sample + Telegram/Divar credential-ready stubs) → IngestionService (content-hash dedupe → parse → validate → review queue) → ListingValidator (completeness score + spam screen) → IngestionWorker (config-gated hosted service). RawListing gains ContentHash/Confidence/ValidationNotes; RawListingStatus.Flagged. Admin /Admin gets run-now, source list, confidence + flagged queue. Hour-range viz: _HourBar 24h timeline bar (colored by type, overnight wrap) on shift cards, recommendation cards, and detail. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace JobsMedical.Web.Services.Scraping;
|
||||
|
||||
public class DivarOptions
|
||||
{
|
||||
public bool Enabled { get; set; }
|
||||
public string? City { get; set; } // e.g. "tehran"
|
||||
public List<string> Queries { get; set; } = new(); // search terms, e.g. "استخدام پزشک"
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Divar source. Credential-ready: configure city + queries in (Ingestion:Divar) and implement
|
||||
/// the fetch against Divar's listing API/HTML. Dormant until enabled.
|
||||
/// </summary>
|
||||
public class DivarListingSource : IListingSource
|
||||
{
|
||||
private readonly DivarOptions _opts;
|
||||
private readonly ILogger<DivarListingSource> _log;
|
||||
|
||||
public DivarListingSource(IOptions<DivarOptions> opts, ILogger<DivarListingSource> log)
|
||||
{
|
||||
_opts = opts.Value;
|
||||
_log = log;
|
||||
}
|
||||
|
||||
public string Name => "دیوار";
|
||||
public bool Enabled => _opts.Enabled && _opts.Queries.Count > 0;
|
||||
|
||||
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
|
||||
{
|
||||
if (!Enabled)
|
||||
{
|
||||
_log.LogInformation("Divar source not configured — skipping.");
|
||||
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
|
||||
}
|
||||
// TODO(prod): query Divar for each term in the configured city, map each ad's
|
||||
// title+description to new ScrapedItem(Name, text, adUrl).
|
||||
_log.LogWarning("Divar fetch not yet implemented; returning empty.");
|
||||
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user