Add scrape/ingestion engine + validation, and 24h shift hour-range visualization

Scrape engine (Services/Scraping/): pluggable IListingSource (working sample + Telegram/Divar credential-ready stubs) → IngestionService (content-hash dedupe → parse → validate → review queue) → ListingValidator (completeness score + spam screen) → IngestionWorker (config-gated hosted service). RawListing gains ContentHash/Confidence/ValidationNotes; RawListingStatus.Flagged. Admin /Admin gets run-now, source list, confidence + flagged queue.

Hour-range viz: _HourBar 24h timeline bar (colored by type, overnight wrap) on shift cards, recommendation cards, and detail.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-03 08:18:19 +03:30
parent 69fa921fbd
commit 931b7b6ffb
24 changed files with 1439 additions and 26 deletions
+4 -3
View File
@@ -55,9 +55,10 @@ public enum ApplicationStatus
public enum RawListingStatus
{
New = 0, // جدید
Normalized = 1, // تبدیل شده به شیفت
Discarded = 2 // کنار گذاشته شده
New = 0, // جدید (آماده بررسی)
Normalized = 1, // تبدیل شده به شیفت/استخدام
Discarded = 2, // کنار گذاشته شده (یا اسپم)
Flagged = 3 // ناقص/مشکوک — نیازمند بررسی دستی بیشتر
}
public enum EmploymentType
+11
View File
@@ -27,5 +27,16 @@ public class RawListing
[MaxLength(500)]
public string? SourceUrl { get; set; }
/// <summary>SHA-256 of the normalized text — used to dedupe across ingestion runs.</summary>
[MaxLength(64)]
public string? ContentHash { get; set; }
/// <summary>Parser+validator confidence 0100 (how complete/usable the listing looks).</summary>
public int Confidence { get; set; }
/// <summary>Human-readable validation findings (missing fields, spam flags, etc.).</summary>
[MaxLength(1000)]
public string? ValidationNotes { get; set; }
public DateTime FetchedAt { get; set; } = DateTime.UtcNow;
}