Add scrape/ingestion engine + validation, and 24h shift hour-range visualization

Scrape engine (Services/Scraping/): pluggable IListingSource (working sample + Telegram/Divar credential-ready stubs) → IngestionService (content-hash dedupe → parse → validate → review queue) → ListingValidator (completeness score + spam screen) → IngestionWorker (config-gated hosted service). RawListing gains ContentHash/Confidence/ValidationNotes; RawListingStatus.Flagged. Admin /Admin gets run-now, source list, confidence + flagged queue.

Hour-range viz: _HourBar 24h timeline bar (colored by type, overnight wrap) on shift cards, recommendation cards, and detail.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-03 08:18:19 +03:30
parent 69fa921fbd
commit 931b7b6ffb
24 changed files with 1439 additions and 26 deletions
+15 -5
View File
@@ -75,11 +75,21 @@ Shifts support fixed (مقطوع), hourly (ساعتی), **profit-share (درصد
centralizes the display; `Shift.SharePercent` holds the percentage; the listing-parser detects centralizes the display; `Shift.SharePercent` holds the percentage; the listing-parser detects
"۵۰٪ / درصد / سهم" from raw posts; and `/Shifts` has a "سهم درآمد" filter. "۵۰٪ / درصد / سهم" from raw posts; and `/Shifts` has a "سهم درآمد" filter.
### Listing parser (Stage 1) ### Scrape / ingestion engine
`IListingParser` / `HeuristicListingParser` extracts kind (shift vs hire), role, shift type, Pluggable `IListingSource`s (working `SampleListingSource`; credential-ready `Telegram`/`Divar`
employment type, pay, city/district, and phone from a raw Persian post via keyword + regex stubs) → `IngestionService` **dedupes by content hash → parses → validates → enqueues** as
heuristics — **no AI dependency** (LLM APIs are blocked from Iran). Admin reviews the prefilled `RawListing` (status New / Flagged / Discarded-spam) with a confidence score. `ListingValidator`
form and publishes. Swap in an `LlmListingParser` later behind the same interface. scores completeness (role, location, pay, phone, length) and screens spam. `IngestionWorker`
(hosted, config-gated `Ingestion:Enabled`) runs it on a timer; admins can also run it on demand
from `/Admin`. `IListingParser` / `HeuristicListingParser` does the field extraction (kind, role,
shift type, employment, pay, **profit-share %**, city/district, phone) — **no AI dependency** (LLM
APIs are blocked from Iran). Admin reviews the prefilled form and publishes. Swap an
`LlmListingParser`/real sources behind the same interfaces later.
### Hour-range visualization
Every shift card, recommendation card, and detail page shows a **24-hour timeline bar**
(`_HourBar`) with the shift's hours filled and colored by type; overnight shifts wrap past
midnight into two segments.
### Auth ### Auth
Phone OTP via `OtpService` (in-memory codes; dev shows the code on screen — wire Kavenegar/SMS.ir Phone OTP via `OtpService` (in-memory codes; dev shows the code on screen — wire Kavenegar/SMS.ir
+4
View File
@@ -108,5 +108,9 @@ public class AppDbContext : DbContext
.HasForeignKey(j => j.FacilityId).OnDelete(DeleteBehavior.Cascade); .HasForeignKey(j => j.FacilityId).OnDelete(DeleteBehavior.Cascade);
b.Entity<JobOpening>().HasIndex(j => j.Status); b.Entity<JobOpening>().HasIndex(j => j.Status);
b.Entity<JobOpening>().HasIndex(j => j.FacilityId); b.Entity<JobOpening>().HasIndex(j => j.FacilityId);
// Dedupe ingested listings by content hash.
b.Entity<RawListing>().HasIndex(r => r.ContentHash);
b.Entity<RawListing>().HasIndex(r => r.Status);
} }
} }
@@ -0,0 +1,788 @@
// <auto-generated />
using System;
using JobsMedical.Web.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
#nullable disable
namespace JobsMedical.Web.Migrations
{
[DbContext(typeof(AppDbContext))]
[Migration("20260603044159_IngestionFields")]
partial class IngestionFields
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasAnnotation("ProductVersion", "10.0.0")
.HasAnnotation("Relational:MaxIdentifierLength", 63);
NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder);
modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int>("DoctorId")
.HasColumnType("integer");
b.Property<string>("Message")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("ShiftId")
.HasColumnType("integer");
b.Property<int>("Status")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("DoctorId");
b.HasIndex("ShiftId", "DoctorId")
.IsUnique();
b.ToTable("Applications");
});
modelBuilder.Entity("JobsMedical.Web.Models.City", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<string>("Province")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.HasKey("Id");
b.ToTable("Cities");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int>("CityId")
.HasColumnType("integer");
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(120)
.HasColumnType("character varying(120)");
b.HasKey("Id");
b.HasIndex("CityId");
b.ToTable("Districts");
});
modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Bio")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.Property<int?>("CityId")
.HasColumnType("integer");
b.Property<bool>("IsVerified")
.HasColumnType("boolean");
b.Property<string>("LicenseNo")
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int?>("RoleId")
.HasColumnType("integer");
b.Property<string>("Specialty")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<int>("UserId")
.HasColumnType("integer");
b.Property<int>("YearsExperience")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("RoleId");
b.HasIndex("UserId")
.IsUnique();
b.ToTable("DoctorProfiles");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Address")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("BaleId")
.HasMaxLength(50)
.HasColumnType("character varying(50)");
b.Property<int>("CityId")
.HasColumnType("integer");
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("DistrictId")
.HasColumnType("integer");
b.Property<bool>("IsVerified")
.HasColumnType("boolean");
b.Property<double?>("Lat")
.HasColumnType("double precision");
b.Property<double?>("Lng")
.HasColumnType("double precision");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<int?>("OwnerUserId")
.HasColumnType("integer");
b.Property<string>("Phone")
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int>("Type")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("DistrictId");
b.HasIndex("OwnerUserId");
b.ToTable("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
{
b.Property<long>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("bigint");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<long>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int>("EventType")
.HasColumnType("integer");
b.Property<int?>("JobOpeningId")
.HasColumnType("integer");
b.Property<int?>("ShiftId")
.HasColumnType("integer");
b.Property<string>("VisitorId")
.IsRequired()
.HasColumnType("character varying(36)");
b.HasKey("Id");
b.HasIndex("JobOpeningId");
b.HasIndex("ShiftId");
b.HasIndex("VisitorId", "CreatedAt");
b.ToTable("InterestEvents");
});
modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("Description")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<int>("EmploymentType")
.HasColumnType("integer");
b.Property<int>("FacilityId")
.HasColumnType("integer");
b.Property<string>("Requirements")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.Property<int>("RoleId")
.HasColumnType("integer");
b.Property<long?>("SalaryMax")
.HasColumnType("bigint");
b.Property<long?>("SalaryMin")
.HasColumnType("bigint");
b.Property<int>("Source")
.HasColumnType("integer");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("Status")
.HasColumnType("integer");
b.Property<string>("Title")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.HasKey("Id");
b.HasIndex("FacilityId");
b.HasIndex("RoleId");
b.HasIndex("Status");
b.ToTable("JobOpenings");
});
modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int>("Confidence")
.HasColumnType("integer");
b.Property<string>("ContentHash")
.HasMaxLength(64)
.HasColumnType("character varying(64)");
b.Property<DateTime>("FetchedAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("LinkedShiftId")
.HasColumnType("integer");
b.Property<string>("ParsedJson")
.HasColumnType("text");
b.Property<string>("RawText")
.IsRequired()
.HasColumnType("text");
b.Property<string>("SourceChannel")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("Status")
.HasColumnType("integer");
b.Property<string>("ValidationNotes")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.HasKey("Id");
b.HasIndex("ContentHash");
b.HasIndex("LinkedShiftId");
b.HasIndex("Status");
b.ToTable("RawListings");
});
modelBuilder.Entity("JobsMedical.Web.Models.Role", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Category")
.IsRequired()
.HasMaxLength(50)
.HasColumnType("character varying(50)");
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<int>("SortOrder")
.HasColumnType("integer");
b.HasKey("Id");
b.ToTable("Roles");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<DateOnly>("Date")
.HasColumnType("date");
b.Property<string>("Description")
.HasMaxLength(1500)
.HasColumnType("character varying(1500)");
b.Property<TimeOnly>("EndTime")
.HasColumnType("time without time zone");
b.Property<int>("FacilityId")
.HasColumnType("integer");
b.Property<long?>("PayAmount")
.HasColumnType("bigint");
b.Property<int>("PayType")
.HasColumnType("integer");
b.Property<int>("RoleId")
.HasColumnType("integer");
b.Property<int?>("SharePercent")
.HasColumnType("integer");
b.Property<int>("ShiftType")
.HasColumnType("integer");
b.Property<int>("Source")
.HasColumnType("integer");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("SpecialtyRequired")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<TimeOnly>("StartTime")
.HasColumnType("time without time zone");
b.Property<int>("Status")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("FacilityId");
b.HasIndex("RoleId");
b.HasIndex("Date", "Status");
b.ToTable("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.User", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("FullName")
.HasMaxLength(150)
.HasColumnType("character varying(150)");
b.Property<bool>("IsPhoneVerified")
.HasColumnType("boolean");
b.Property<string>("Phone")
.IsRequired()
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int>("Role")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("Phone")
.IsUnique();
b.ToTable("Users");
});
modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int?>("CityId")
.HasColumnType("integer");
b.Property<long?>("MinPay")
.HasColumnType("bigint");
b.Property<int?>("PreferredShiftType")
.HasColumnType("integer");
b.Property<int?>("RoleId")
.HasColumnType("integer");
b.Property<DateTime>("UpdatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("VisitorId")
.IsRequired()
.HasColumnType("character varying(36)");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("RoleId");
b.HasIndex("VisitorId")
.IsUnique();
b.ToTable("UserPreferences");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.Property<string>("Id")
.HasMaxLength(36)
.HasColumnType("character varying(36)");
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<DateTime>("LastSeenAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("UserId")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("UserId");
b.ToTable("Visitors");
});
modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
{
b.HasOne("JobsMedical.Web.Models.User", "Doctor")
.WithMany("Applications")
.HasForeignKey("DoctorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Shift", "Shift")
.WithMany("Applications")
.HasForeignKey("ShiftId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("Doctor");
b.Navigation("Shift");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
});
modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId");
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId");
b.HasOne("JobsMedical.Web.Models.User", "User")
.WithOne("DoctorProfile")
.HasForeignKey("JobsMedical.Web.Models.DoctorProfile", "UserId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
b.Navigation("Role");
b.Navigation("User");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany("Facilities")
.HasForeignKey("CityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.District", "District")
.WithMany("Facilities")
.HasForeignKey("DistrictId")
.OnDelete(DeleteBehavior.SetNull);
b.HasOne("JobsMedical.Web.Models.User", "OwnerUser")
.WithMany()
.HasForeignKey("OwnerUserId")
.OnDelete(DeleteBehavior.SetNull);
b.Navigation("City");
b.Navigation("District");
b.Navigation("OwnerUser");
});
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
{
b.HasOne("JobsMedical.Web.Models.JobOpening", "JobOpening")
.WithMany()
.HasForeignKey("JobOpeningId")
.OnDelete(DeleteBehavior.Cascade);
b.HasOne("JobsMedical.Web.Models.Shift", "Shift")
.WithMany()
.HasForeignKey("ShiftId")
.OnDelete(DeleteBehavior.Cascade);
b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor")
.WithMany("Events")
.HasForeignKey("VisitorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("JobOpening");
b.Navigation("Shift");
b.Navigation("Visitor");
});
modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b =>
{
b.HasOne("JobsMedical.Web.Models.Facility", "Facility")
.WithMany()
.HasForeignKey("FacilityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId")
.OnDelete(DeleteBehavior.Restrict)
.IsRequired();
b.Navigation("Facility");
b.Navigation("Role");
});
modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b =>
{
b.HasOne("JobsMedical.Web.Models.Shift", "LinkedShift")
.WithMany()
.HasForeignKey("LinkedShiftId");
b.Navigation("LinkedShift");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.HasOne("JobsMedical.Web.Models.Facility", "Facility")
.WithMany("Shifts")
.HasForeignKey("FacilityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany("Shifts")
.HasForeignKey("RoleId")
.OnDelete(DeleteBehavior.Restrict)
.IsRequired();
b.Navigation("Facility");
b.Navigation("Role");
});
modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId");
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId");
b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor")
.WithOne("Preferences")
.HasForeignKey("JobsMedical.Web.Models.UserPreferences", "VisitorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
b.Navigation("Role");
b.Navigation("Visitor");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.HasOne("JobsMedical.Web.Models.User", "User")
.WithMany()
.HasForeignKey("UserId")
.OnDelete(DeleteBehavior.SetNull);
b.Navigation("User");
});
modelBuilder.Entity("JobsMedical.Web.Models.City", b =>
{
b.Navigation("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.Navigation("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.Navigation("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.Role", b =>
{
b.Navigation("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.Navigation("Applications");
});
modelBuilder.Entity("JobsMedical.Web.Models.User", b =>
{
b.Navigation("Applications");
b.Navigation("DoctorProfile");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.Navigation("Events");
b.Navigation("Preferences");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,69 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace JobsMedical.Web.Migrations
{
/// <inheritdoc />
public partial class IngestionFields : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<int>(
name: "Confidence",
table: "RawListings",
type: "integer",
nullable: false,
defaultValue: 0);
migrationBuilder.AddColumn<string>(
name: "ContentHash",
table: "RawListings",
type: "character varying(64)",
maxLength: 64,
nullable: true);
migrationBuilder.AddColumn<string>(
name: "ValidationNotes",
table: "RawListings",
type: "character varying(1000)",
maxLength: 1000,
nullable: true);
migrationBuilder.CreateIndex(
name: "IX_RawListings_ContentHash",
table: "RawListings",
column: "ContentHash");
migrationBuilder.CreateIndex(
name: "IX_RawListings_Status",
table: "RawListings",
column: "Status");
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropIndex(
name: "IX_RawListings_ContentHash",
table: "RawListings");
migrationBuilder.DropIndex(
name: "IX_RawListings_Status",
table: "RawListings");
migrationBuilder.DropColumn(
name: "Confidence",
table: "RawListings");
migrationBuilder.DropColumn(
name: "ContentHash",
table: "RawListings");
migrationBuilder.DropColumn(
name: "ValidationNotes",
table: "RawListings");
}
}
}
@@ -319,6 +319,13 @@ namespace JobsMedical.Web.Migrations
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id")); NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int>("Confidence")
.HasColumnType("integer");
b.Property<string>("ContentHash")
.HasMaxLength(64)
.HasColumnType("character varying(64)");
b.Property<DateTime>("FetchedAt") b.Property<DateTime>("FetchedAt")
.HasColumnType("timestamp with time zone"); .HasColumnType("timestamp with time zone");
@@ -344,10 +351,18 @@ namespace JobsMedical.Web.Migrations
b.Property<int>("Status") b.Property<int>("Status")
.HasColumnType("integer"); .HasColumnType("integer");
b.Property<string>("ValidationNotes")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.HasKey("Id"); b.HasKey("Id");
b.HasIndex("ContentHash");
b.HasIndex("LinkedShiftId"); b.HasIndex("LinkedShiftId");
b.HasIndex("Status");
b.ToTable("RawListings"); b.ToTable("RawListings");
}); });
+4 -3
View File
@@ -55,9 +55,10 @@ public enum ApplicationStatus
public enum RawListingStatus public enum RawListingStatus
{ {
New = 0, // جدید New = 0, // جدید (آماده بررسی)
Normalized = 1, // تبدیل شده به شیفت Normalized = 1, // تبدیل شده به شیفت/استخدام
Discarded = 2 // کنار گذاشته شده Discarded = 2, // کنار گذاشته شده (یا اسپم)
Flagged = 3 // ناقص/مشکوک — نیازمند بررسی دستی بیشتر
} }
public enum EmploymentType public enum EmploymentType
+11
View File
@@ -27,5 +27,16 @@ public class RawListing
[MaxLength(500)] [MaxLength(500)]
public string? SourceUrl { get; set; } public string? SourceUrl { get; set; }
/// <summary>SHA-256 of the normalized text — used to dedupe across ingestion runs.</summary>
[MaxLength(64)]
public string? ContentHash { get; set; }
/// <summary>Parser+validator confidence 0100 (how complete/usable the listing looks).</summary>
public int Confidence { get; set; }
/// <summary>Human-readable validation findings (missing fields, spam flags, etc.).</summary>
[MaxLength(1000)]
public string? ValidationNotes { get; set; }
public DateTime FetchedAt { get; set; } = DateTime.UtcNow; public DateTime FetchedAt { get; set; } = DateTime.UtcNow;
} }
+46 -16
View File
@@ -6,29 +6,55 @@
<div class="page-head"> <div class="page-head">
<div class="container"> <div class="container">
<h1>پنل مدیریت — صف آگهی‌های خام</h1> <h1>پنل مدیریت — جمع‌آوری و صف آگهی‌ها</h1>
<p class="muted"> <p class="muted">
آگهی‌های جمع‌آوری‌شده از کانال‌ها را اینجا بررسی، ساختارمند و منتشر کن. آگهی‌های جمع‌آوری‌شده از منابع را بررسی، ساختارمند و منتشر کن.
(@JalaliDate.ToPersianDigits(Model.Queue.Count.ToString()) در انتظار بررسی) (@JalaliDate.ToPersianDigits(Model.Queue.Count.ToString()) در صف،
@JalaliDate.ToPersianDigits(Model.Flagged.Count.ToString()) پرچم‌خورده)
· <a asp-page="/Admin/Facilities">تأیید مراکز درمانی</a> · <a asp-page="/Admin/Facilities">تأیید مراکز درمانی</a>
</p> </p>
</div> </div>
</div> </div>
<div class="container section"> <div class="container section">
@if (Model.IngestMessage is not null)
{
<div class="alert alert-success">✓ @Model.IngestMessage</div>
}
<div class="layout-2"> <div class="layout-2">
<aside class="card card-pad filter-card"> <aside class="card card-pad filter-card">
<h3>افزودن آگهی خام</h3> <h3>موتور جمع‌آوری</h3>
<p class="muted" style="font-size:13px;">منابع متصل:</p>
<ul style="margin:0 0 12px; padding-inline-start:18px; font-size:13.5px;">
@foreach (var src in Model.Sources)
{
<li>@src.Name —
@if (src.Enabled) { <span style="color:var(--primary-dark);">فعال</span> }
else { <span class="muted">غیرفعال (نیازمند تنظیمات)</span> }
</li>
}
</ul>
<form method="post">
<button type="submit" asp-page-handler="RunIngestion" class="btn btn-accent btn-block">اجرای جمع‌آوری اکنون</button>
</form>
<p class="muted" style="font-size:11px; margin:8px 0 0;">
موتور: واکشی ← حذف تکراری ← تجزیه ← اعتبارسنجی ← صف بررسی.
</p>
<hr style="border:none; border-top:1px solid var(--line); margin:16px 0;" />
<h3>افزودن دستی</h3>
<form method="post"> <form method="post">
<div class="filter-group"> <div class="filter-group">
<label>منبع (کانال/سایت)</label> <label>منبع</label>
<input type="text" name="SourceChannel" placeholder="مثلاً کانال شیفت تهران" /> <input type="text" name="SourceChannel" placeholder="مثلاً کانال شیفت تهران" />
</div> </div>
<div class="filter-group"> <div class="filter-group">
<label>متن آگهی</label> <label>متن آگهی</label>
<textarea name="RawText" rows="6" placeholder="متن کپی‌شده از تلگرام/بله/دیوار را اینجا بچسبان..."></textarea> <textarea name="RawText" rows="5" placeholder="متن کپی‌شده را بچسبان..."></textarea>
</div> </div>
<button type="submit" asp-page-handler="Add" class="btn btn-primary btn-block">افزودن به صف</button> <button type="submit" asp-page-handler="Add" class="btn btn-outline btn-block">افزودن به صف</button>
</form> </form>
<p class="muted" style="font-size:12px; margin-bottom:0;"> <p class="muted" style="font-size:12px; margin-bottom:0;">
منتشرشده: @JalaliDate.ToPersianDigits(Model.PublishedShifts.ToString()) شیفت، منتشرشده: @JalaliDate.ToPersianDigits(Model.PublishedShifts.ToString()) شیفت،
@@ -37,22 +63,26 @@
</aside> </aside>
<div> <div>
<h2 style="font-size:20px; margin-top:0;">صف بررسی</h2>
@if (Model.Queue.Count == 0) @if (Model.Queue.Count == 0)
{ {
<div class="card empty-state">صف خالی است. آگهی جدیدی برای بررسی وجود ندارد.</div> <div class="card empty-state">صف خالی است. «اجرای جمع‌آوری» را بزن یا آگهی اضافه کن.</div>
} }
else else
{ {
foreach (var r in Model.Queue) foreach (var r in Model.Queue)
{ {
<div class="card card-pad" style="margin-bottom:14px;"> <partial name="_RawListingRow" model="r" />
<div class="row" style="display:flex; justify-content:space-between;"> }
<strong>@r.SourceChannel</strong> }
<span class="muted" style="font-size:12px;">@JalaliDate.ToLongDate(DateOnly.FromDateTime(r.FetchedAt))</span>
</div> @if (Model.Flagged.Count > 0)
<p style="margin:10px 0; white-space:pre-wrap;">@r.RawText</p> {
<a class="btn btn-accent" asp-page="/Admin/Review" asp-route-id="@r.Id">بررسی و انتشار ←</a> <h2 style="font-size:20px; margin-top:28px;">پرچم‌خورده (ناقص/مشکوک)</h2>
</div> <p class="muted" style="font-size:13px;">اعتبارسنجی این‌ها را کامل ندانست؛ در صورت صحت می‌توانی منتشرشان کنی.</p>
foreach (var r in Model.Flagged)
{
<partial name="_RawListingRow" model="r" />
} }
} }
</div> </div>
@@ -1,5 +1,6 @@
using JobsMedical.Web.Data; using JobsMedical.Web.Data;
using JobsMedical.Web.Models; using JobsMedical.Web.Models;
using JobsMedical.Web.Services.Scraping;
using Microsoft.AspNetCore.Authorization; using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.Mvc;
using Microsoft.AspNetCore.Mvc.RazorPages; using Microsoft.AspNetCore.Mvc.RazorPages;
@@ -7,19 +8,29 @@ using Microsoft.EntityFrameworkCore;
namespace JobsMedical.Web.Pages.Admin; namespace JobsMedical.Web.Pages.Admin;
[Authorize(Roles = "Admin")] // secured by the OTP-auth Admin role [Authorize(Roles = "Admin")]
public class IndexModel : PageModel public class IndexModel : PageModel
{ {
private readonly AppDbContext _db; private readonly AppDbContext _db;
public IndexModel(AppDbContext db) => _db = db; private readonly IngestionService _ingest;
public IndexModel(AppDbContext db, IngestionService ingest)
{
_db = db;
_ingest = ingest;
}
public List<RawListing> Queue { get; private set; } = new(); public List<RawListing> Queue { get; private set; } = new();
public List<RawListing> Flagged { get; private set; } = new();
public IReadOnlyList<(string Name, bool Enabled)> Sources { get; private set; } = new List<(string, bool)>();
public int PublishedShifts { get; private set; } public int PublishedShifts { get; private set; }
public int PublishedJobs { get; private set; } public int PublishedJobs { get; private set; }
[BindProperty] public string? SourceChannel { get; set; } [BindProperty] public string? SourceChannel { get; set; }
[BindProperty] public string? RawText { get; set; } [BindProperty] public string? RawText { get; set; }
[TempData] public string? IngestMessage { get; set; }
public async Task OnGetAsync() => await LoadAsync(); public async Task OnGetAsync() => await LoadAsync();
public async Task<IActionResult> OnPostAddAsync() public async Task<IActionResult> OnPostAddAsync()
@@ -37,11 +48,23 @@ public class IndexModel : PageModel
return RedirectToPage(); return RedirectToPage();
} }
public async Task<IActionResult> OnPostRunIngestionAsync()
{
var s = await _ingest.RunAsync();
IngestMessage = $"جمع‌آوری انجام شد — {s.TotalQueued} در صف، {s.TotalFlagged} پرچم‌خورده، " +
$"{s.TotalSpam} اسپم، {s.TotalDuplicates} تکراری.";
return RedirectToPage();
}
private async Task LoadAsync() private async Task LoadAsync()
{ {
Queue = await _db.RawListings Queue = await _db.RawListings
.Where(r => r.Status == RawListingStatus.New) .Where(r => r.Status == RawListingStatus.New)
.OrderByDescending(r => r.Confidence).ThenByDescending(r => r.FetchedAt).ToListAsync();
Flagged = await _db.RawListings
.Where(r => r.Status == RawListingStatus.Flagged)
.OrderByDescending(r => r.FetchedAt).ToListAsync(); .OrderByDescending(r => r.FetchedAt).ToListAsync();
Sources = _ingest.Sources;
PublishedShifts = await _db.Shifts.CountAsync(s => s.Source != ShiftSource.Direct); PublishedShifts = await _db.Shifts.CountAsync(s => s.Source != ShiftSource.Direct);
PublishedJobs = await _db.JobOpenings.CountAsync(); PublishedJobs = await _db.JobOpenings.CountAsync();
} }
@@ -0,0 +1,42 @@
@model JobsMedical.Web.Models.Shift
@using System.Globalization
@{
var s = Model;
var ci = CultureInfo.InvariantCulture;
int sm = s.StartTime.Hour * 60 + s.StartTime.Minute;
int em = s.EndTime.Hour * 60 + s.EndTime.Minute;
var typeClass = s.ShiftType switch
{
ShiftType.Day => "day",
ShiftType.Evening => "evening",
ShiftType.Night => "night",
_ => "oncall",
};
// Build one or two segments (overnight shifts wrap past midnight). On-call = whole day.
var segs = new List<(double left, double width)>();
if (s.ShiftType == ShiftType.OnCall || em == sm)
segs.Add((0, 100));
else if (em > sm)
segs.Add((sm / 1440.0 * 100, (em - sm) / 1440.0 * 100));
else
{
segs.Add((sm / 1440.0 * 100, (1440 - sm) / 1440.0 * 100));
segs.Add((0, em / 1440.0 * 100));
}
string Pct(double v) => v.ToString("0.##", ci);
}
<div class="hourbar-wrap" title="@JalaliDate.Time(s.StartTime) تا @JalaliDate.Time(s.EndTime)">
<div class="hourbar">
<span class="hourbar-grid" style="left:25%"></span>
<span class="hourbar-grid" style="left:50%"></span>
<span class="hourbar-grid" style="left:75%"></span>
@foreach (var seg in segs)
{
<span class="hourbar-fill @typeClass" style="left:@Pct(seg.left)%; width:@Pct(seg.width)%"></span>
}
</div>
<div class="hourbar-axis">
<span>۰</span><span>۶</span><span>۱۲</span><span>۱۸</span><span>۲۴</span>
</div>
</div>
@@ -0,0 +1,20 @@
@model JobsMedical.Web.Models.RawListing
@{
var c = Model.Confidence;
var confClass = c >= 70 ? "badge-verified" : c >= 50 ? "badge-day" : "badge-type";
}
<div class="card card-pad" style="margin-bottom:12px;">
<div class="row" style="display:flex; justify-content:space-between; align-items:center; gap:8px; flex-wrap:wrap;">
<strong>@Model.SourceChannel</strong>
<span style="display:flex; gap:8px; align-items:center;">
<span class="badge @confClass">اطمینان @JalaliDate.ToPersianDigits(c.ToString())٪</span>
<span class="muted" style="font-size:12px;">@JalaliDate.ToLongDate(DateOnly.FromDateTime(Model.FetchedAt))</span>
</span>
</div>
<p style="margin:10px 0; white-space:pre-wrap;">@Model.RawText</p>
@if (!string.IsNullOrEmpty(Model.ValidationNotes))
{
<p class="muted" style="font-size:12.5px; margin:0 0 10px;">⚠ @Model.ValidationNotes</p>
}
<a class="btn btn-accent" asp-page="/Admin/Review" asp-route-id="@Model.Id">بررسی و انتشار ←</a>
</div>
@@ -22,6 +22,7 @@
<span>📍 @s.Facility?.City?.Name</span> <span>📍 @s.Facility?.City?.Name</span>
</div> </div>
<div class="row">📅 @JalaliDate.WeekDayName(s.Date)، @JalaliDate.ToLongDate(s.Date) — 🕐 @JalaliDate.Time(s.StartTime)</div> <div class="row">📅 @JalaliDate.WeekDayName(s.Date)، @JalaliDate.ToLongDate(s.Date) — 🕐 @JalaliDate.Time(s.StartTime)</div>
<partial name="_HourBar" model="s" />
@* The "why" — what makes a pattern engine trustworthy: every pick is explained. *@ @* The "why" — what makes a pattern engine trustworthy: every pick is explained. *@
<div class="rec-reasons"> <div class="rec-reasons">
@@ -30,6 +30,7 @@
} }
<div class="row">📅 @JalaliDate.WeekDayName(Model.Date)، @JalaliDate.ToLongDate(Model.Date)</div> <div class="row">📅 @JalaliDate.WeekDayName(Model.Date)، @JalaliDate.ToLongDate(Model.Date)</div>
<div class="row">🕐 @JalaliDate.Time(Model.StartTime) تا @JalaliDate.Time(Model.EndTime)</div> <div class="row">🕐 @JalaliDate.Time(Model.StartTime) تا @JalaliDate.Time(Model.EndTime)</div>
<partial name="_HourBar" model="Model" />
<div class="foot"> <div class="foot">
<span class="pay">@JalaliDate.PayLabel(Model.PayType, Model.PayAmount, Model.SharePercent)</span> <span class="pay">@JalaliDate.PayLabel(Model.PayType, Model.PayAmount, Model.SharePercent)</span>
<span class="btn btn-outline" style="padding: 6px 14px;">جزئیات</span> <span class="btn btn-outline" style="padding: 6px 14px;">جزئیات</span>
@@ -50,6 +50,10 @@
<div class="info-row"><span class="k">مدت</span><span class="v">@JalaliDate.ToPersianDigits(s.DurationHours.ToString("0.#")) ساعت</span></div> <div class="info-row"><span class="k">مدت</span><span class="v">@JalaliDate.ToPersianDigits(s.DurationHours.ToString("0.#")) ساعت</span></div>
<div class="info-row"><span class="k">نقش مورد نیاز</span><span class="v">@(s.Role?.Name ?? s.SpecialtyRequired)</span></div> <div class="info-row"><span class="k">نقش مورد نیاز</span><span class="v">@(s.Role?.Name ?? s.SpecialtyRequired)</span></div>
<div class="info-row"><span class="k">پرداخت</span><span class="v" style="color:var(--primary-dark)">@JalaliDate.PayLabel(s.PayType, s.PayAmount, s.SharePercent)</span></div> <div class="info-row"><span class="k">پرداخت</span><span class="v" style="color:var(--primary-dark)">@JalaliDate.PayLabel(s.PayType, s.PayAmount, s.SharePercent)</span></div>
<div style="padding-top:12px;">
<span class="k" style="font-size:13px; color:var(--muted);">بازه ساعت کاری در شبانه‌روز</span>
<partial name="_HourBar" model="s" />
</div>
</div> </div>
@if (!string.IsNullOrEmpty(s.Description)) @if (!string.IsNullOrEmpty(s.Description))
+17
View File
@@ -21,6 +21,23 @@ builder.Services.AddScoped<OtpService>();
// Listing parser: heuristic now; swap for an LLM-backed IListingParser later. // Listing parser: heuristic now; swap for an LLM-backed IListingParser later.
builder.Services.AddSingleton<IListingParser, HeuristicListingParser>(); builder.Services.AddSingleton<IListingParser, HeuristicListingParser>();
// Scrape/ingestion engine: pluggable sources → dedupe → parse → validate → review queue.
builder.Services.Configure<JobsMedical.Web.Services.Scraping.IngestionOptions>(
builder.Configuration.GetSection("Ingestion"));
builder.Services.Configure<JobsMedical.Web.Services.Scraping.TelegramOptions>(
builder.Configuration.GetSection("Ingestion:Telegram"));
builder.Services.Configure<JobsMedical.Web.Services.Scraping.DivarOptions>(
builder.Configuration.GetSection("Ingestion:Divar"));
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.ListingValidator>();
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.IListingSource,
JobsMedical.Web.Services.Scraping.SampleListingSource>();
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.IListingSource,
JobsMedical.Web.Services.Scraping.TelegramListingSource>();
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.IListingSource,
JobsMedical.Web.Services.Scraping.DivarListingSource>();
builder.Services.AddScoped<JobsMedical.Web.Services.Scraping.IngestionService>();
builder.Services.AddHostedService<JobsMedical.Web.Services.Scraping.IngestionWorker>();
// Phone-OTP cookie auth. // Phone-OTP cookie auth.
builder.Services.AddAuthentication(CookieAuthenticationDefaults.AuthenticationScheme) builder.Services.AddAuthentication(CookieAuthenticationDefaults.AuthenticationScheme)
.AddCookie(o => .AddCookie(o =>
@@ -0,0 +1,42 @@
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping;
public class DivarOptions
{
public bool Enabled { get; set; }
public string? City { get; set; } // e.g. "tehran"
public List<string> Queries { get; set; } = new(); // search terms, e.g. "استخدام پزشک"
}
/// <summary>
/// Divar source. Credential-ready: configure city + queries in (Ingestion:Divar) and implement
/// the fetch against Divar's listing API/HTML. Dormant until enabled.
/// </summary>
public class DivarListingSource : IListingSource
{
private readonly DivarOptions _opts;
private readonly ILogger<DivarListingSource> _log;
public DivarListingSource(IOptions<DivarOptions> opts, ILogger<DivarListingSource> log)
{
_opts = opts.Value;
_log = log;
}
public string Name => "دیوار";
public bool Enabled => _opts.Enabled && _opts.Queries.Count > 0;
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
{
if (!Enabled)
{
_log.LogInformation("Divar source not configured — skipping.");
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
}
// TODO(prod): query Divar for each term in the configured city, map each ad's
// title+description to new ScrapedItem(Name, text, adUrl).
_log.LogWarning("Divar fetch not yet implemented; returning empty.");
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
}
}
@@ -0,0 +1,15 @@
namespace JobsMedical.Web.Services.Scraping;
/// <summary>One raw post pulled from a source (a Telegram message, a Divar ad, etc.).</summary>
public record ScrapedItem(string Source, string RawText, string? SourceUrl = null);
/// <summary>
/// A pluggable source the ingestion engine pulls from. Implement once per channel/site.
/// `Enabled` lets a source be present but dormant until it's configured with credentials.
/// </summary>
public interface IListingSource
{
string Name { get; }
bool Enabled { get; }
Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default);
}
@@ -0,0 +1,107 @@
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using JobsMedical.Web.Data;
using JobsMedical.Web.Models;
using Microsoft.EntityFrameworkCore;
namespace JobsMedical.Web.Services.Scraping;
public record SourceResult(string Source, int Fetched, int Queued, int Flagged, int Spam, int Duplicates);
public record IngestionSummary(List<SourceResult> Sources)
{
public int TotalQueued => Sources.Sum(s => s.Queued);
public int TotalFlagged => Sources.Sum(s => s.Flagged);
public int TotalSpam => Sources.Sum(s => s.Spam);
public int TotalDuplicates => Sources.Sum(s => s.Duplicates);
}
/// <summary>
/// The scrape engine. Pulls from every enabled <see cref="IListingSource"/>, dedupes by content
/// hash, parses with <see cref="IListingParser"/>, validates with <see cref="ListingValidator"/>,
/// and stores each as a <see cref="RawListing"/> with a status: New (queued for review),
/// Flagged (incomplete/suspicious), or Discarded (spam). Source-agnostic — add a source and it
/// flows through unchanged.
/// </summary>
public class IngestionService
{
private readonly AppDbContext _db;
private readonly IEnumerable<IListingSource> _sources;
private readonly IListingParser _parser;
private readonly ListingValidator _validator;
private readonly ILogger<IngestionService> _log;
public IngestionService(AppDbContext db, IEnumerable<IListingSource> sources,
IListingParser parser, ListingValidator validator, ILogger<IngestionService> log)
{
_db = db;
_sources = sources;
_parser = parser;
_validator = validator;
_log = log;
}
public IReadOnlyList<(string Name, bool Enabled)> Sources =>
_sources.Select(s => (s.Name, s.Enabled)).ToList();
public async Task<IngestionSummary> RunAsync(CancellationToken ct = default)
{
var roles = await _db.Roles.Select(r => r.Name).ToListAsync(ct);
var cities = await _db.Cities.Select(c => c.Name).ToListAsync(ct);
var districts = await _db.Districts.Select(d => d.Name).ToListAsync(ct);
var results = new List<SourceResult>();
foreach (var source in _sources.Where(s => s.Enabled))
{
int fetched = 0, queued = 0, flagged = 0, spam = 0, dupes = 0;
IReadOnlyList<ScrapedItem> items;
try { items = await source.FetchAsync(ct); }
catch (Exception ex) { _log.LogError(ex, "Source {Source} fetch failed", source.Name); continue; }
foreach (var item in items)
{
fetched++;
var hash = Hash(item.RawText);
if (await _db.RawListings.AnyAsync(r => r.ContentHash == hash, ct)) { dupes++; continue; }
var parsed = _parser.Parse(item.RawText, roles, cities, districts);
var val = _validator.Validate(item.RawText, parsed);
var status = val.IsSpam ? RawListingStatus.Discarded
: val.IsValid ? RawListingStatus.New
: RawListingStatus.Flagged;
if (status == RawListingStatus.New) queued++;
else if (status == RawListingStatus.Flagged) flagged++;
else spam++;
_db.RawListings.Add(new RawListing
{
SourceChannel = item.Source,
SourceUrl = item.SourceUrl,
RawText = item.RawText.Trim(),
ContentHash = hash,
Confidence = val.Confidence,
ValidationNotes = val.Issues.Count > 0 ? string.Join("؛ ", val.Issues) : null,
Status = status,
});
}
await _db.SaveChangesAsync(ct);
results.Add(new SourceResult(source.Name, fetched, queued, flagged, spam, dupes));
_log.LogInformation("Ingestion {Source}: fetched={F} queued={Q} flagged={Fl} spam={S} dupes={D}",
source.Name, fetched, queued, flagged, spam, dupes);
}
return new IngestionSummary(results);
}
/// <summary>SHA-256 hex of the whitespace-normalized text (for cross-run dedupe).</summary>
private static string Hash(string text)
{
var normalized = Regex.Replace((text ?? "").Trim(), @"\s+", " ");
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(normalized));
return Convert.ToHexString(bytes).ToLowerInvariant();
}
}
@@ -0,0 +1,59 @@
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping;
public class IngestionOptions
{
public bool Enabled { get; set; } = false; // off by default — opt in via config
public int IntervalMinutes { get; set; } = 30;
}
/// <summary>
/// Periodically runs the ingestion engine when enabled (Ingestion:Enabled=true). Off by default
/// so nothing scrapes uninvited; admins can also trigger a run on demand from the admin UI.
/// </summary>
public class IngestionWorker : BackgroundService
{
private readonly IServiceScopeFactory _scopes;
private readonly IngestionOptions _opts;
private readonly ILogger<IngestionWorker> _log;
public IngestionWorker(IServiceScopeFactory scopes, IOptions<IngestionOptions> opts,
ILogger<IngestionWorker> log)
{
_scopes = scopes;
_opts = opts.Value;
_log = log;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
if (!_opts.Enabled)
{
_log.LogInformation("Ingestion worker disabled (Ingestion:Enabled=false).");
return;
}
var interval = TimeSpan.FromMinutes(Math.Max(1, _opts.IntervalMinutes));
_log.LogInformation("Ingestion worker on; every {Min} min.", _opts.IntervalMinutes);
while (!stoppingToken.IsCancellationRequested)
{
try
{
using var scope = _scopes.CreateScope();
var svc = scope.ServiceProvider.GetRequiredService<IngestionService>();
var summary = await svc.RunAsync(stoppingToken);
_log.LogInformation("Scheduled ingestion: queued={Q} flagged={F} spam={S} dupes={D}",
summary.TotalQueued, summary.TotalFlagged, summary.TotalSpam, summary.TotalDuplicates);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_log.LogError(ex, "Scheduled ingestion run failed");
}
try { await Task.Delay(interval, stoppingToken); }
catch (OperationCanceledException) { break; }
}
}
}
@@ -0,0 +1,63 @@
using System.Text.RegularExpressions;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
public record ValidationResult(bool IsValid, bool IsSpam, int Confidence, List<string> Issues);
/// <summary>
/// Scores a parsed listing for completeness and screens out spam. A listing must look like a
/// real medical shift/job (role + a location or pay signal, plausible length, contact) to pass.
/// The confidence drives whether it lands in the review queue (New), gets Flagged for a closer
/// look, or is auto-discarded as spam.
/// </summary>
public class ListingValidator
{
// Posts that smell like ads/scams rather than medical shifts.
private static readonly string[] SpamMarkers =
{
"سرمایه گذاری", "سرمایه‌گذاری", "وام", "ارز دیجیتال", "رمز ارز", "فروش فالوور",
"بک لینک", "تبلیغات", "قرعه کشی", "جایزه", "کازینو", "شرط بندی", "بیت کوین"
};
private static readonly string[] MedicalMarkers =
{
"شیفت", "درمانگاه", "بیمارستان", "کلینیک", "پزشک", "پرستار", "ماما", "تکنسین",
"اورژانس", "استخدام", "کادر درمان", "مطب", "آنکال", "کشیک"
};
public ValidationResult Validate(string rawText, ParsedListing parsed)
{
var issues = new List<string>();
var text = rawText ?? "";
bool isSpam = SpamMarkers.Any(text.Contains)
&& !MedicalMarkers.Any(text.Contains);
if (isSpam) issues.Add("به‌نظر اسپم/تبلیغاتی است");
bool looksMedical = MedicalMarkers.Any(text.Contains);
if (!looksMedical) issues.Add("نشانه‌ای از حوزه درمان یافت نشد");
int score = 0;
if (parsed.RoleName is not null) score += 30; else issues.Add("نقش مشخص نیست");
if (parsed.CityName is not null || parsed.DistrictName is not null) score += 20;
else issues.Add("شهر/محل مشخص نیست");
if (parsed.PayAmount is not null || parsed.SharePercent is not null || parsed.PayNegotiable)
score += 20; else issues.Add("اطلاعات پرداخت یافت نشد");
if (parsed.Phone is not null) score += 15; else issues.Add("شماره تماس یافت نشد");
if (parsed.Kind == ListingKind.Shift && parsed.ShiftType is not null) score += 10;
if (looksMedical) score += 5;
// Sanity on length — a few words isn't a real listing; a wall of text is suspicious.
var len = text.Trim().Length;
if (len < 25) { score -= 20; issues.Add("متن خیلی کوتاه است"); }
if (len > 1500) { score -= 10; issues.Add("متن غیرعادی بلند است"); }
if (Regex.Matches(text, @"https?://").Count >= 3) { score -= 15; issues.Add("لینک‌های متعدد"); }
score = Math.Clamp(score, 0, 100);
// Valid enough for the queue if it's medical, not spam, and reasonably complete.
bool isValid = !isSpam && looksMedical && score >= 50;
return new ValidationResult(isValid, isSpam, score, issues);
}
}
@@ -0,0 +1,27 @@
namespace JobsMedical.Web.Services.Scraping;
/// <summary>
/// A built-in source of representative Persian posts (the kind found in shift channels). Always
/// available, needs no credentials — it lets the whole ingestion → validation → review pipeline
/// run and be demoed today, and doubles as a fixture mix of good, incomplete, and spam posts.
/// </summary>
public class SampleListingSource : IListingSource
{
public string Name => "نمونه (کانال آزمایشی)";
public bool Enabled => true;
private static readonly string[] Posts =
{
"درمانگاه شبانه‌روزی در سعادت‌آباد نیازمند پزشک عمومی برای شیفت شب، کارانه ۳ میلیون تومان. تماس ۰۹۱۲۳۴۵۶۷۸۹",
"کلینیک تخصصی در تهران به پرستار برای شیفت عصر نیازمند است، ۵۰٪ سهم درآمد. ۰۹۳۵۱۱۱۲۲۳۳",
"استخدام ماما تمام‌وقت در بیمارستان خصوصی، حقوق توافقی. منطقه شهرک غرب.",
"نیازمند تکنسین اتاق عمل جهت همکاری در نارمک، شیفت صبح. ۰۹۱۲۰۰۰۰۰۰۰",
"فروش فالوور و بک لینک ارزان، سرمایه گذاری در ارز دیجیتال با سود تضمینی!", // spam
"پزشک", // too short / incomplete
"بیمارستان آتیه جهت تکمیل کادر درمان به پزشک عمومی مقیم نیازمند است. قرارداد یک‌ساله، حقوق ۴۵ میلیون ماهانه. تهرانپارس.",
};
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
=> Task.FromResult<IReadOnlyList<ScrapedItem>>(
Posts.Select(p => new ScrapedItem(Name, p)).ToList());
}
@@ -0,0 +1,44 @@
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping;
public class TelegramOptions
{
public bool Enabled { get; set; }
public string? BotToken { get; set; }
public List<string> Channels { get; set; } = new(); // @channel handles to read
}
/// <summary>
/// Telegram/Bale channel source. Credential-ready: wire a bot token + channel list in config
/// (Ingestion:Telegram) and implement the fetch against the Bot API (getUpdates / channel posts)
/// or a userbot. Dormant until enabled, so the engine runs without it.
/// </summary>
public class TelegramListingSource : IListingSource
{
private readonly TelegramOptions _opts;
private readonly ILogger<TelegramListingSource> _log;
public TelegramListingSource(IOptions<TelegramOptions> opts, ILogger<TelegramListingSource> log)
{
_opts = opts.Value;
_log = log;
}
public string Name => "تلگرام/بله";
public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken) && _opts.Channels.Count > 0;
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
{
if (!Enabled)
{
_log.LogInformation("Telegram source not configured — skipping.");
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
}
// TODO(prod): call https://api.telegram.org/bot{token}/getUpdates (or channel history),
// map each message to new ScrapedItem(Name, message.Text, messageLink). The validation +
// dedupe pipeline downstream is already source-agnostic.
_log.LogWarning("Telegram fetch not yet implemented; returning empty.");
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
}
}
+6
View File
@@ -11,5 +11,11 @@
}, },
"Auth": { "Auth": {
"AdminPhone": "09120000000" "AdminPhone": "09120000000"
},
"Ingestion": {
"Enabled": false,
"IntervalMinutes": 30,
"Telegram": { "Enabled": false, "BotToken": "", "Channels": [] },
"Divar": { "Enabled": false, "City": "tehran", "Queries": [] }
} }
} }
+14
View File
@@ -188,6 +188,20 @@ label { font-size: 13px; }
.alert { padding: 12px 16px; border-radius: 10px; margin-bottom: 16px; font-weight: 600; } .alert { padding: 12px 16px; border-radius: 10px; margin-bottom: 16px; font-weight: 600; }
.alert-success { background: var(--primary-soft); color: var(--primary-dark); } .alert-success { background: var(--primary-soft); color: var(--primary-dark); }
/* hour-range timeline bar */
.hourbar-wrap { direction: ltr; margin: 6px 0 2px; }
.hourbar {
position: relative; height: 9px; background: #eef3f6;
border-radius: 999px; overflow: hidden;
}
.hourbar-grid { position: absolute; top: 0; bottom: 0; width: 1px; background: rgba(0,0,0,.06); }
.hourbar-fill { position: absolute; top: 0; bottom: 0; border-radius: 999px; }
.hourbar-fill.day { background: #f0a052; }
.hourbar-fill.evening { background: #e07b3a; }
.hourbar-fill.night { background: #5566c4; }
.hourbar-fill.oncall { background: linear-gradient(90deg, #8a5cc0 25%, #b79be0 50%, #8a5cc0 75%); }
.hourbar-axis { display: flex; justify-content: space-between; font-size: 10px; color: var(--muted); margin-top: 3px; }
/* recommendation reason chips */ /* recommendation reason chips */
.rec-reasons { display: flex; flex-direction: column; gap: 4px; margin: 2px 0; } .rec-reasons { display: flex; flex-direction: column; gap: 4px; margin: 2px 0; }
.rec-reason { font-size: 12px; color: var(--primary-dark); font-weight: 600; } .rec-reason { font-size: 12px; color: var(--primary-dark); font-weight: 600; }