From 36bb165438a2c55f9f2276cfe9e927097406a829 Mon Sep 17 00:00:00 2001
From: "soroush.asadi"
Date: Wed, 3 Jun 2026 17:41:02 +0330
Subject: [PATCH] Real channel fetch (Telegram/Bale/Divar) + AI-audited
automation engine + CI/CD
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- Fetch: Telegram via t.me/s, Bale via Bot API, Divar via web-search (HttpClient, config-gated, graceful)
- AI layer: DB-backed AppSetting (mode auto/manual, thresholds, AI endpoint/model/key/prompt/framework, auto-approve); OpenAI-compatible IAiAuditor (self-host/Iranian endpoints; fails safe to manual)
- Pipeline: fetch → dedupe(hash) → parse → validate → AI audit → Discard/Flag/Queue/auto-publish (resolve-or-create facility)
- Admin: /Admin/Settings automation+AI panel; queue shows confidence + AI verdict; flagged section
- CI/CD: Dockerfile, docker-compose.prod.yml, .gitea/workflows/ci-cd.yml, nginx vhost, DEPLOY.md; forwarded headers + /healthz + prod reference-only seed; ports 22/80/443 only
Co-Authored-By: Claude Opus 4.8
---
DEPLOY.md | 19 +
src/JobsMedical.Web/Data/AppDbContext.cs | 1 +
...140343_AiSettingsAndAutomation.Designer.cs | 833 ++++++++++++++++++
.../20260603140343_AiSettingsAndAutomation.cs | 44 +
.../Migrations/AppDbContextModelSnapshot.cs | 45 +
src/JobsMedical.Web/Models/AppSetting.cs | 50 ++
src/JobsMedical.Web/Models/Enums.cs | 7 +
src/JobsMedical.Web/Pages/Admin/Index.cshtml | 1 +
.../Pages/Admin/Settings.cshtml | 67 ++
.../Pages/Admin/Settings.cshtml.cs | 54 ++
src/JobsMedical.Web/Program.cs | 15 +-
.../Services/Scraping/AiAuditor.cs | 108 +++
.../Services/Scraping/BaleListingSource.cs | 68 ++
.../Services/Scraping/DivarListingSource.cs | 70 +-
.../Services/Scraping/IngestionService.cs | 188 +++-
.../Services/Scraping/SettingsService.cs | 40 +
.../Scraping/TelegramListingSource.cs | 67 +-
src/JobsMedical.Web/appsettings.json | 5 +-
18 files changed, 1614 insertions(+), 68 deletions(-)
create mode 100644 src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.Designer.cs
create mode 100644 src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.cs
create mode 100644 src/JobsMedical.Web/Models/AppSetting.cs
create mode 100644 src/JobsMedical.Web/Pages/Admin/Settings.cshtml
create mode 100644 src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs
create mode 100644 src/JobsMedical.Web/Services/Scraping/AiAuditor.cs
create mode 100644 src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs
create mode 100644 src/JobsMedical.Web/Services/Scraping/SettingsService.cs
diff --git a/DEPLOY.md b/DEPLOY.md
index 9b6dba8..cca40f5 100644
--- a/DEPLOY.md
+++ b/DEPLOY.md
@@ -69,7 +69,26 @@ ConnectionStrings__Default=Host=db;Port=5432;Database=hamkadr;Username=hamkadr;P
Auth__AdminPhone=09XXXXXXXXX
# Future: Kavenegar / SMS.ir keys for real OTP delivery
+
+# --- Channel scraping (optional; off by default) ---
+# Enable the background worker and the sources you want, then their fetch runs on a timer.
+# Ingestion__Enabled=true
+# Ingestion__IntervalMinutes=30
+# Telegram (public channels via t.me/s — no token needed):
+# Ingestion__Telegram__Enabled=true
+# Ingestion__Telegram__Channels__0=shift_channel_username
+# Ingestion__Telegram__Channels__1=another_channel
+# Bale (bot must be a member of the channel; Telegram-style Bot API):
+# Ingestion__Bale__Enabled=true
+# Ingestion__Bale__BotToken=__BALE_BOT_TOKEN__
+# Divar (best-effort web-search):
+# Ingestion__Divar__Enabled=true
+# Ingestion__Divar__Queries__0=استخدام پزشک
+# Ingestion__Divar__Queries__1=پرستار
```
+> The **AI audit layer** is configured at runtime in the admin panel (`/Admin/Settings`) — endpoint,
+> model, API key, prompt/framework, and auto-approve — not via env. Default: AI off, mode = Manual,
+> so every ingested listing waits in the review queue until an admin publishes it.
> `POSTGRES_PASSWORD` and the password in `ConnectionStrings__Default` must be identical.
> `ASPNETCORE_ENVIRONMENT=Production` ⇒ only **reference data** (roles/cities/districts) is seeded —
> no demo facilities/shifts. Real employers add listings via the employer panel.
diff --git a/src/JobsMedical.Web/Data/AppDbContext.cs b/src/JobsMedical.Web/Data/AppDbContext.cs
index 4550f75..f62769e 100644
--- a/src/JobsMedical.Web/Data/AppDbContext.cs
+++ b/src/JobsMedical.Web/Data/AppDbContext.cs
@@ -20,6 +20,7 @@ public class AppDbContext : DbContext
public DbSet Visitors => Set();
public DbSet UserPreferences => Set();
public DbSet InterestEvents => Set();
+ public DbSet AppSettings => Set();
protected override void OnModelCreating(ModelBuilder b)
{
diff --git a/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.Designer.cs b/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.Designer.cs
new file mode 100644
index 0000000..c9e65bc
--- /dev/null
+++ b/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.Designer.cs
@@ -0,0 +1,833 @@
+//
+using System;
+using JobsMedical.Web.Data;
+using Microsoft.EntityFrameworkCore;
+using Microsoft.EntityFrameworkCore.Infrastructure;
+using Microsoft.EntityFrameworkCore.Migrations;
+using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
+using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
+
+#nullable disable
+
+namespace JobsMedical.Web.Migrations
+{
+ [DbContext(typeof(AppDbContext))]
+ [Migration("20260603140343_AiSettingsAndAutomation")]
+ partial class AiSettingsAndAutomation
+ {
+ ///
+ protected override void BuildTargetModel(ModelBuilder modelBuilder)
+ {
+#pragma warning disable 612, 618
+ modelBuilder
+ .HasAnnotation("ProductVersion", "10.0.0")
+ .HasAnnotation("Relational:MaxIdentifierLength", 63);
+
+ NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder);
+
+ modelBuilder.Entity("JobsMedical.Web.Models.AppSetting", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("AiApiKey")
+ .HasMaxLength(200)
+ .HasColumnType("character varying(200)");
+
+ b.Property("AiAutoApprove")
+ .HasColumnType("boolean");
+
+ b.Property("AiEnabled")
+ .HasColumnType("boolean");
+
+ b.Property("AiEndpoint")
+ .HasMaxLength(500)
+ .HasColumnType("character varying(500)");
+
+ b.Property("AiModel")
+ .HasMaxLength(120)
+ .HasColumnType("character varying(120)");
+
+ b.Property("AiSystemPrompt")
+ .IsRequired()
+ .HasMaxLength(4000)
+ .HasColumnType("character varying(4000)");
+
+ b.Property("AutoPublishMinConfidence")
+ .HasColumnType("integer");
+
+ b.Property("Mode")
+ .HasColumnType("integer");
+
+ b.Property("UpdatedAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.HasKey("Id");
+
+ b.ToTable("AppSettings");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("CreatedAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.Property("DoctorId")
+ .HasColumnType("integer");
+
+ b.Property("Message")
+ .HasMaxLength(500)
+ .HasColumnType("character varying(500)");
+
+ b.Property("ShiftId")
+ .HasColumnType("integer");
+
+ b.Property("Status")
+ .HasColumnType("integer");
+
+ b.HasKey("Id");
+
+ b.HasIndex("DoctorId");
+
+ b.HasIndex("ShiftId", "DoctorId")
+ .IsUnique();
+
+ b.ToTable("Applications");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.City", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("IsActive")
+ .HasColumnType("boolean");
+
+ b.Property("Name")
+ .IsRequired()
+ .HasMaxLength(100)
+ .HasColumnType("character varying(100)");
+
+ b.Property("Province")
+ .IsRequired()
+ .HasMaxLength(100)
+ .HasColumnType("character varying(100)");
+
+ b.HasKey("Id");
+
+ b.ToTable("Cities");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("CityId")
+ .HasColumnType("integer");
+
+ b.Property("IsActive")
+ .HasColumnType("boolean");
+
+ b.Property("Name")
+ .IsRequired()
+ .HasMaxLength(120)
+ .HasColumnType("character varying(120)");
+
+ b.HasKey("Id");
+
+ b.HasIndex("CityId");
+
+ b.ToTable("Districts");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("Bio")
+ .HasMaxLength(1000)
+ .HasColumnType("character varying(1000)");
+
+ b.Property("CityId")
+ .HasColumnType("integer");
+
+ b.Property("IsVerified")
+ .HasColumnType("boolean");
+
+ b.Property("LicenseNo")
+ .HasMaxLength(20)
+ .HasColumnType("character varying(20)");
+
+ b.Property("RoleId")
+ .HasColumnType("integer");
+
+ b.Property("Specialty")
+ .IsRequired()
+ .HasMaxLength(100)
+ .HasColumnType("character varying(100)");
+
+ b.Property("UserId")
+ .HasColumnType("integer");
+
+ b.Property("YearsExperience")
+ .HasColumnType("integer");
+
+ b.HasKey("Id");
+
+ b.HasIndex("CityId");
+
+ b.HasIndex("RoleId");
+
+ b.HasIndex("UserId")
+ .IsUnique();
+
+ b.ToTable("DoctorProfiles");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("Address")
+ .HasMaxLength(500)
+ .HasColumnType("character varying(500)");
+
+ b.Property("BaleId")
+ .HasMaxLength(50)
+ .HasColumnType("character varying(50)");
+
+ b.Property("CityId")
+ .HasColumnType("integer");
+
+ b.Property("CreatedAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.Property("DistrictId")
+ .HasColumnType("integer");
+
+ b.Property("IsVerified")
+ .HasColumnType("boolean");
+
+ b.Property("Lat")
+ .HasColumnType("double precision");
+
+ b.Property("Lng")
+ .HasColumnType("double precision");
+
+ b.Property("Name")
+ .IsRequired()
+ .HasMaxLength(200)
+ .HasColumnType("character varying(200)");
+
+ b.Property("OwnerUserId")
+ .HasColumnType("integer");
+
+ b.Property("Phone")
+ .HasMaxLength(20)
+ .HasColumnType("character varying(20)");
+
+ b.Property("Type")
+ .HasColumnType("integer");
+
+ b.HasKey("Id");
+
+ b.HasIndex("CityId");
+
+ b.HasIndex("DistrictId");
+
+ b.HasIndex("OwnerUserId");
+
+ b.ToTable("Facilities");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("bigint");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("CreatedAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.Property("EventType")
+ .HasColumnType("integer");
+
+ b.Property("JobOpeningId")
+ .HasColumnType("integer");
+
+ b.Property("ShiftId")
+ .HasColumnType("integer");
+
+ b.Property("VisitorId")
+ .IsRequired()
+ .HasColumnType("character varying(36)");
+
+ b.HasKey("Id");
+
+ b.HasIndex("JobOpeningId");
+
+ b.HasIndex("ShiftId");
+
+ b.HasIndex("VisitorId", "CreatedAt");
+
+ b.ToTable("InterestEvents");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("CreatedAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.Property("Description")
+ .HasMaxLength(2000)
+ .HasColumnType("character varying(2000)");
+
+ b.Property("EmploymentType")
+ .HasColumnType("integer");
+
+ b.Property("FacilityId")
+ .HasColumnType("integer");
+
+ b.Property("Requirements")
+ .HasMaxLength(1000)
+ .HasColumnType("character varying(1000)");
+
+ b.Property("RoleId")
+ .HasColumnType("integer");
+
+ b.Property("SalaryMax")
+ .HasColumnType("bigint");
+
+ b.Property("SalaryMin")
+ .HasColumnType("bigint");
+
+ b.Property("Source")
+ .HasColumnType("integer");
+
+ b.Property("SourceUrl")
+ .HasMaxLength(500)
+ .HasColumnType("character varying(500)");
+
+ b.Property("Status")
+ .HasColumnType("integer");
+
+ b.Property("Title")
+ .IsRequired()
+ .HasMaxLength(200)
+ .HasColumnType("character varying(200)");
+
+ b.HasKey("Id");
+
+ b.HasIndex("FacilityId");
+
+ b.HasIndex("RoleId");
+
+ b.HasIndex("Status");
+
+ b.ToTable("JobOpenings");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("Confidence")
+ .HasColumnType("integer");
+
+ b.Property("ContentHash")
+ .HasMaxLength(64)
+ .HasColumnType("character varying(64)");
+
+ b.Property("FetchedAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.Property("LinkedShiftId")
+ .HasColumnType("integer");
+
+ b.Property("ParsedJson")
+ .HasColumnType("text");
+
+ b.Property("RawText")
+ .IsRequired()
+ .HasColumnType("text");
+
+ b.Property("SourceChannel")
+ .IsRequired()
+ .HasMaxLength(200)
+ .HasColumnType("character varying(200)");
+
+ b.Property("SourceUrl")
+ .HasMaxLength(500)
+ .HasColumnType("character varying(500)");
+
+ b.Property("Status")
+ .HasColumnType("integer");
+
+ b.Property("ValidationNotes")
+ .HasMaxLength(1000)
+ .HasColumnType("character varying(1000)");
+
+ b.HasKey("Id");
+
+ b.HasIndex("ContentHash");
+
+ b.HasIndex("LinkedShiftId");
+
+ b.HasIndex("Status");
+
+ b.ToTable("RawListings");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Role", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("Category")
+ .IsRequired()
+ .HasMaxLength(50)
+ .HasColumnType("character varying(50)");
+
+ b.Property("IsActive")
+ .HasColumnType("boolean");
+
+ b.Property("Name")
+ .IsRequired()
+ .HasMaxLength(100)
+ .HasColumnType("character varying(100)");
+
+ b.Property("SortOrder")
+ .HasColumnType("integer");
+
+ b.HasKey("Id");
+
+ b.ToTable("Roles");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("CreatedAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.Property("Date")
+ .HasColumnType("date");
+
+ b.Property("Description")
+ .HasMaxLength(1500)
+ .HasColumnType("character varying(1500)");
+
+ b.Property("EndTime")
+ .HasColumnType("time without time zone");
+
+ b.Property("FacilityId")
+ .HasColumnType("integer");
+
+ b.Property("PayAmount")
+ .HasColumnType("bigint");
+
+ b.Property("PayType")
+ .HasColumnType("integer");
+
+ b.Property("RoleId")
+ .HasColumnType("integer");
+
+ b.Property("SharePercent")
+ .HasColumnType("integer");
+
+ b.Property("ShiftType")
+ .HasColumnType("integer");
+
+ b.Property("Source")
+ .HasColumnType("integer");
+
+ b.Property("SourceUrl")
+ .HasMaxLength(500)
+ .HasColumnType("character varying(500)");
+
+ b.Property("SpecialtyRequired")
+ .IsRequired()
+ .HasMaxLength(100)
+ .HasColumnType("character varying(100)");
+
+ b.Property("StartTime")
+ .HasColumnType("time without time zone");
+
+ b.Property("Status")
+ .HasColumnType("integer");
+
+ b.HasKey("Id");
+
+ b.HasIndex("FacilityId");
+
+ b.HasIndex("RoleId");
+
+ b.HasIndex("Date", "Status");
+
+ b.ToTable("Shifts");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.User", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("CreatedAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.Property("FullName")
+ .HasMaxLength(150)
+ .HasColumnType("character varying(150)");
+
+ b.Property("IsPhoneVerified")
+ .HasColumnType("boolean");
+
+ b.Property("Phone")
+ .IsRequired()
+ .HasMaxLength(20)
+ .HasColumnType("character varying(20)");
+
+ b.Property("Role")
+ .HasColumnType("integer");
+
+ b.HasKey("Id");
+
+ b.HasIndex("Phone")
+ .IsUnique();
+
+ b.ToTable("Users");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("CityId")
+ .HasColumnType("integer");
+
+ b.Property("MinPay")
+ .HasColumnType("bigint");
+
+ b.Property("PreferredShiftType")
+ .HasColumnType("integer");
+
+ b.Property("RoleId")
+ .HasColumnType("integer");
+
+ b.Property("UpdatedAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.Property("VisitorId")
+ .IsRequired()
+ .HasColumnType("character varying(36)");
+
+ b.HasKey("Id");
+
+ b.HasIndex("CityId");
+
+ b.HasIndex("RoleId");
+
+ b.HasIndex("VisitorId")
+ .IsUnique();
+
+ b.ToTable("UserPreferences");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
+ {
+ b.Property("Id")
+ .HasMaxLength(36)
+ .HasColumnType("character varying(36)");
+
+ b.Property("CreatedAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.Property("LastSeenAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.Property("UserId")
+ .HasColumnType("integer");
+
+ b.HasKey("Id");
+
+ b.HasIndex("UserId");
+
+ b.ToTable("Visitors");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
+ {
+ b.HasOne("JobsMedical.Web.Models.User", "Doctor")
+ .WithMany("Applications")
+ .HasForeignKey("DoctorId")
+ .OnDelete(DeleteBehavior.Cascade)
+ .IsRequired();
+
+ b.HasOne("JobsMedical.Web.Models.Shift", "Shift")
+ .WithMany("Applications")
+ .HasForeignKey("ShiftId")
+ .OnDelete(DeleteBehavior.Cascade)
+ .IsRequired();
+
+ b.Navigation("Doctor");
+
+ b.Navigation("Shift");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
+ {
+ b.HasOne("JobsMedical.Web.Models.City", "City")
+ .WithMany()
+ .HasForeignKey("CityId")
+ .OnDelete(DeleteBehavior.Cascade)
+ .IsRequired();
+
+ b.Navigation("City");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b =>
+ {
+ b.HasOne("JobsMedical.Web.Models.City", "City")
+ .WithMany()
+ .HasForeignKey("CityId");
+
+ b.HasOne("JobsMedical.Web.Models.Role", "Role")
+ .WithMany()
+ .HasForeignKey("RoleId");
+
+ b.HasOne("JobsMedical.Web.Models.User", "User")
+ .WithOne("DoctorProfile")
+ .HasForeignKey("JobsMedical.Web.Models.DoctorProfile", "UserId")
+ .OnDelete(DeleteBehavior.Cascade)
+ .IsRequired();
+
+ b.Navigation("City");
+
+ b.Navigation("Role");
+
+ b.Navigation("User");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
+ {
+ b.HasOne("JobsMedical.Web.Models.City", "City")
+ .WithMany("Facilities")
+ .HasForeignKey("CityId")
+ .OnDelete(DeleteBehavior.Cascade)
+ .IsRequired();
+
+ b.HasOne("JobsMedical.Web.Models.District", "District")
+ .WithMany("Facilities")
+ .HasForeignKey("DistrictId")
+ .OnDelete(DeleteBehavior.SetNull);
+
+ b.HasOne("JobsMedical.Web.Models.User", "OwnerUser")
+ .WithMany()
+ .HasForeignKey("OwnerUserId")
+ .OnDelete(DeleteBehavior.SetNull);
+
+ b.Navigation("City");
+
+ b.Navigation("District");
+
+ b.Navigation("OwnerUser");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
+ {
+ b.HasOne("JobsMedical.Web.Models.JobOpening", "JobOpening")
+ .WithMany()
+ .HasForeignKey("JobOpeningId")
+ .OnDelete(DeleteBehavior.Cascade);
+
+ b.HasOne("JobsMedical.Web.Models.Shift", "Shift")
+ .WithMany()
+ .HasForeignKey("ShiftId")
+ .OnDelete(DeleteBehavior.Cascade);
+
+ b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor")
+ .WithMany("Events")
+ .HasForeignKey("VisitorId")
+ .OnDelete(DeleteBehavior.Cascade)
+ .IsRequired();
+
+ b.Navigation("JobOpening");
+
+ b.Navigation("Shift");
+
+ b.Navigation("Visitor");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b =>
+ {
+ b.HasOne("JobsMedical.Web.Models.Facility", "Facility")
+ .WithMany()
+ .HasForeignKey("FacilityId")
+ .OnDelete(DeleteBehavior.Cascade)
+ .IsRequired();
+
+ b.HasOne("JobsMedical.Web.Models.Role", "Role")
+ .WithMany()
+ .HasForeignKey("RoleId")
+ .OnDelete(DeleteBehavior.Restrict)
+ .IsRequired();
+
+ b.Navigation("Facility");
+
+ b.Navigation("Role");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b =>
+ {
+ b.HasOne("JobsMedical.Web.Models.Shift", "LinkedShift")
+ .WithMany()
+ .HasForeignKey("LinkedShiftId");
+
+ b.Navigation("LinkedShift");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
+ {
+ b.HasOne("JobsMedical.Web.Models.Facility", "Facility")
+ .WithMany("Shifts")
+ .HasForeignKey("FacilityId")
+ .OnDelete(DeleteBehavior.Cascade)
+ .IsRequired();
+
+ b.HasOne("JobsMedical.Web.Models.Role", "Role")
+ .WithMany("Shifts")
+ .HasForeignKey("RoleId")
+ .OnDelete(DeleteBehavior.Restrict)
+ .IsRequired();
+
+ b.Navigation("Facility");
+
+ b.Navigation("Role");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b =>
+ {
+ b.HasOne("JobsMedical.Web.Models.City", "City")
+ .WithMany()
+ .HasForeignKey("CityId");
+
+ b.HasOne("JobsMedical.Web.Models.Role", "Role")
+ .WithMany()
+ .HasForeignKey("RoleId");
+
+ b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor")
+ .WithOne("Preferences")
+ .HasForeignKey("JobsMedical.Web.Models.UserPreferences", "VisitorId")
+ .OnDelete(DeleteBehavior.Cascade)
+ .IsRequired();
+
+ b.Navigation("City");
+
+ b.Navigation("Role");
+
+ b.Navigation("Visitor");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
+ {
+ b.HasOne("JobsMedical.Web.Models.User", "User")
+ .WithMany()
+ .HasForeignKey("UserId")
+ .OnDelete(DeleteBehavior.SetNull);
+
+ b.Navigation("User");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.City", b =>
+ {
+ b.Navigation("Facilities");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
+ {
+ b.Navigation("Facilities");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
+ {
+ b.Navigation("Shifts");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Role", b =>
+ {
+ b.Navigation("Shifts");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
+ {
+ b.Navigation("Applications");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.User", b =>
+ {
+ b.Navigation("Applications");
+
+ b.Navigation("DoctorProfile");
+ });
+
+ modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
+ {
+ b.Navigation("Events");
+
+ b.Navigation("Preferences");
+ });
+#pragma warning restore 612, 618
+ }
+ }
+}
diff --git a/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.cs b/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.cs
new file mode 100644
index 0000000..4e8e93c
--- /dev/null
+++ b/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.cs
@@ -0,0 +1,44 @@
+using System;
+using Microsoft.EntityFrameworkCore.Migrations;
+using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
+
+#nullable disable
+
+namespace JobsMedical.Web.Migrations
+{
+ ///
+ public partial class AiSettingsAndAutomation : Migration
+ {
+ ///
+ protected override void Up(MigrationBuilder migrationBuilder)
+ {
+ migrationBuilder.CreateTable(
+ name: "AppSettings",
+ columns: table => new
+ {
+ Id = table.Column(type: "integer", nullable: false)
+ .Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn),
+ Mode = table.Column(type: "integer", nullable: false),
+ AutoPublishMinConfidence = table.Column(type: "integer", nullable: false),
+ AiEnabled = table.Column(type: "boolean", nullable: false),
+ AiEndpoint = table.Column(type: "character varying(500)", maxLength: 500, nullable: true),
+ AiApiKey = table.Column(type: "character varying(200)", maxLength: 200, nullable: true),
+ AiModel = table.Column(type: "character varying(120)", maxLength: 120, nullable: true),
+ AiSystemPrompt = table.Column(type: "character varying(4000)", maxLength: 4000, nullable: false),
+ AiAutoApprove = table.Column(type: "boolean", nullable: false),
+ UpdatedAt = table.Column(type: "timestamp with time zone", nullable: false)
+ },
+ constraints: table =>
+ {
+ table.PrimaryKey("PK_AppSettings", x => x.Id);
+ });
+ }
+
+ ///
+ protected override void Down(MigrationBuilder migrationBuilder)
+ {
+ migrationBuilder.DropTable(
+ name: "AppSettings");
+ }
+ }
+}
diff --git a/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs b/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs
index c2e27ac..6d15fd1 100644
--- a/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs
+++ b/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs
@@ -22,6 +22,51 @@ namespace JobsMedical.Web.Migrations
NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder);
+ modelBuilder.Entity("JobsMedical.Web.Models.AppSetting", b =>
+ {
+ b.Property("Id")
+ .ValueGeneratedOnAdd()
+ .HasColumnType("integer");
+
+ NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id"));
+
+ b.Property("AiApiKey")
+ .HasMaxLength(200)
+ .HasColumnType("character varying(200)");
+
+ b.Property("AiAutoApprove")
+ .HasColumnType("boolean");
+
+ b.Property("AiEnabled")
+ .HasColumnType("boolean");
+
+ b.Property("AiEndpoint")
+ .HasMaxLength(500)
+ .HasColumnType("character varying(500)");
+
+ b.Property("AiModel")
+ .HasMaxLength(120)
+ .HasColumnType("character varying(120)");
+
+ b.Property("AiSystemPrompt")
+ .IsRequired()
+ .HasMaxLength(4000)
+ .HasColumnType("character varying(4000)");
+
+ b.Property("AutoPublishMinConfidence")
+ .HasColumnType("integer");
+
+ b.Property("Mode")
+ .HasColumnType("integer");
+
+ b.Property("UpdatedAt")
+ .HasColumnType("timestamp with time zone");
+
+ b.HasKey("Id");
+
+ b.ToTable("AppSettings");
+ });
+
modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
{
b.Property("Id")
diff --git a/src/JobsMedical.Web/Models/AppSetting.cs b/src/JobsMedical.Web/Models/AppSetting.cs
new file mode 100644
index 0000000..4515f35
--- /dev/null
+++ b/src/JobsMedical.Web/Models/AppSetting.cs
@@ -0,0 +1,50 @@
+using System.ComponentModel.DataAnnotations;
+
+namespace JobsMedical.Web.Models;
+
+///
+/// Single-row (Id=1) platform settings the admin controls at runtime — chiefly the ingestion
+/// automation policy and the optional AI audit layer. Kept in the DB (not appsettings) so it's
+/// editable from the admin panel without a redeploy.
+///
+public class AppSetting
+{
+ public int Id { get; set; } = 1;
+
+ // --- Ingestion automation ---
+ public IngestionMode Mode { get; set; } = IngestionMode.Manual;
+
+ /// In Automatic mode WITHOUT AI, listings at/above this confidence auto-publish.
+ public int AutoPublishMinConfidence { get; set; } = 85;
+
+ // --- AI audit layer (optional) ---
+ public bool AiEnabled { get; set; } = false;
+
+ /// OpenAI-compatible chat-completions endpoint (self-hosted or Iranian provider).
+ [MaxLength(500)] public string? AiEndpoint { get; set; }
+ [MaxLength(200)] public string? AiApiKey { get; set; }
+ [MaxLength(120)] public string? AiModel { get; set; } = "gpt-4o-mini";
+
+ /// The prompt + "framework" the AI follows to approve / reject / structure a listing.
+ [MaxLength(4000)]
+ public string AiSystemPrompt { get; set; } = DefaultPrompt;
+
+ /// If AI approves AND Mode is Automatic, publish without human review.
+ public bool AiAutoApprove { get; set; } = false;
+
+ public DateTime UpdatedAt { get; set; } = DateTime.UtcNow;
+
+ public const string DefaultPrompt = """
+ تو دستیار بررسی آگهیهای کاری حوزه درمان برای پلتفرم «همکادر» هستی.
+ هر آگهی خام را بخوان و تصمیم بگیر:
+ - approve: آگهی واقعی و مرتبط با شیفت/استخدام کادر درمان است و اطلاعات کافی دارد.
+ - reject: تبلیغ، اسپم، نامرتبط، یا فاقد اطلاعات حداقلی است.
+ - review: مرتبط است اما ناقص/مبهم و نیاز به بررسی انسانی دارد.
+ نقش، شهر/محله، نوع شیفت، نوع همکاری، مبلغ یا درصد سهم، و عنوان را در صورت وجود استخراج کن.
+ فقط با یک شیء JSON پاسخ بده با کلیدهای:
+ decision (approve|reject|review)، confidence (0-100)، reason (فارسی کوتاه)،
+ kind (shift|job)، role، city، district، shiftType (day|evening|night|oncall)،
+ employmentType (fulltime|parttime|contract|plan)، payAmount (عدد تومان یا null)،
+ sharePercent (0-100 یا null)، title، facilityName.
+ """;
+}
diff --git a/src/JobsMedical.Web/Models/Enums.cs b/src/JobsMedical.Web/Models/Enums.cs
index 6bc7fb0..6326faf 100644
--- a/src/JobsMedical.Web/Models/Enums.cs
+++ b/src/JobsMedical.Web/Models/Enums.cs
@@ -75,3 +75,10 @@ public enum ListingKind
Shift = 0,
Job = 1
}
+
+/// How ingested listings get onto the site.
+public enum IngestionMode
+{
+ Manual = 0, // همهچیز به صف بررسی میرود؛ ادمین تأیید میکند
+ Automatic = 1 // موارد تأییدشده (طبق آستانه/هوش مصنوعی) خودکار منتشر میشوند
+}
diff --git a/src/JobsMedical.Web/Pages/Admin/Index.cshtml b/src/JobsMedical.Web/Pages/Admin/Index.cshtml
index bbbee0a..e40c974 100644
--- a/src/JobsMedical.Web/Pages/Admin/Index.cshtml
+++ b/src/JobsMedical.Web/Pages/Admin/Index.cshtml
@@ -12,6 +12,7 @@
(@JalaliDate.ToPersianDigits(Model.Queue.Count.ToString()) در صف،
@JalaliDate.ToPersianDigits(Model.Flagged.Count.ToString()) پرچمخورده)
· تأیید مراکز درمانی
+ · تنظیمات جمعآوری و AI
diff --git a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml
new file mode 100644
index 0000000..ff7fa04
--- /dev/null
+++ b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml
@@ -0,0 +1,67 @@
+@page
+@model JobsMedical.Web.Pages.Admin.SettingsModel
+@{
+ ViewData["Title"] = "تنظیمات جمعآوری و هوش مصنوعی";
+}
+
+
+
+
+ @if (Model.Saved is not null)
+ {
+
✓ @Model.Saved
+ }
+
+
diff --git a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs
new file mode 100644
index 0000000..2f38445
--- /dev/null
+++ b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs
@@ -0,0 +1,54 @@
+using JobsMedical.Web.Models;
+using JobsMedical.Web.Services.Scraping;
+using Microsoft.AspNetCore.Authorization;
+using Microsoft.AspNetCore.Mvc;
+using Microsoft.AspNetCore.Mvc.RazorPages;
+
+namespace JobsMedical.Web.Pages.Admin;
+
+[Authorize(Roles = "Admin")]
+public class SettingsModel : PageModel
+{
+ private readonly SettingsService _settings;
+ public SettingsModel(SettingsService settings) => _settings = settings;
+
+ [BindProperty] public IngestionMode Mode { get; set; }
+ [BindProperty] public int AutoPublishMinConfidence { get; set; }
+ [BindProperty] public bool AiEnabled { get; set; }
+ [BindProperty] public string? AiEndpoint { get; set; }
+ [BindProperty] public string? AiApiKey { get; set; }
+ [BindProperty] public string? AiModel { get; set; }
+ [BindProperty] public string AiSystemPrompt { get; set; } = "";
+ [BindProperty] public bool AiAutoApprove { get; set; }
+ [TempData] public string? Saved { get; set; }
+
+ public async Task OnGetAsync()
+ {
+ var s = await _settings.GetAsync();
+ Mode = s.Mode;
+ AutoPublishMinConfidence = s.AutoPublishMinConfidence;
+ AiEnabled = s.AiEnabled;
+ AiEndpoint = s.AiEndpoint;
+ AiApiKey = s.AiApiKey;
+ AiModel = s.AiModel;
+ AiSystemPrompt = s.AiSystemPrompt;
+ AiAutoApprove = s.AiAutoApprove;
+ }
+
+ public async Task OnPostAsync()
+ {
+ await _settings.SaveAsync(new AppSetting
+ {
+ Mode = Mode,
+ AutoPublishMinConfidence = AutoPublishMinConfidence,
+ AiEnabled = AiEnabled,
+ AiEndpoint = AiEndpoint,
+ AiApiKey = AiApiKey,
+ AiModel = AiModel,
+ AiSystemPrompt = AiSystemPrompt,
+ AiAutoApprove = AiAutoApprove,
+ });
+ Saved = "تنظیمات ذخیره شد.";
+ return RedirectToPage();
+ }
+}
diff --git a/src/JobsMedical.Web/Program.cs b/src/JobsMedical.Web/Program.cs
index 0c8c847..4eb294c 100644
--- a/src/JobsMedical.Web/Program.cs
+++ b/src/JobsMedical.Web/Program.cs
@@ -21,18 +21,31 @@ builder.Services.AddScoped();
// Listing parser: heuristic now; swap for an LLM-backed IListingParser later.
builder.Services.AddSingleton();
-// Scrape/ingestion engine: pluggable sources → dedupe → parse → validate → review queue.
+// Scrape/ingestion engine: pluggable sources → dedupe → parse → validate → (AI audit) → publish/queue.
+builder.Services.AddHttpClient("scrape", c =>
+{
+ c.Timeout = TimeSpan.FromSeconds(20);
+ c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)");
+});
+builder.Services.AddHttpClient("ai");
builder.Services.Configure(
builder.Configuration.GetSection("Ingestion"));
builder.Services.Configure(
builder.Configuration.GetSection("Ingestion:Telegram"));
+builder.Services.Configure(
+ builder.Configuration.GetSection("Ingestion:Bale"));
builder.Services.Configure(
builder.Configuration.GetSection("Ingestion:Divar"));
builder.Services.AddSingleton();
+builder.Services.AddSingleton();
+builder.Services.AddScoped();
builder.Services.AddSingleton();
builder.Services.AddSingleton();
+builder.Services.AddSingleton();
builder.Services.AddSingleton();
builder.Services.AddScoped();
diff --git a/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs b/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs
new file mode 100644
index 0000000..65990b7
--- /dev/null
+++ b/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs
@@ -0,0 +1,108 @@
+using System.Net.Http.Headers;
+using System.Text;
+using System.Text.Json;
+using JobsMedical.Web.Models;
+
+namespace JobsMedical.Web.Services.Scraping;
+
+public record AiStructured(
+ string? Kind, string? Role, string? City, string? District, string? ShiftType,
+ string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName);
+
+/// An AI verdict on a raw listing.
+public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data)
+{
+ public bool Approve => Decision.Equals("approve", StringComparison.OrdinalIgnoreCase);
+ public bool Reject => Decision.Equals("reject", StringComparison.OrdinalIgnoreCase);
+}
+
+public interface IAiAuditor
+{
+ /// Audit a raw post. Returns null when AI is off or the call fails (fail safe → manual).
+ Task AuditAsync(string rawText, AppSetting settings, CancellationToken ct = default);
+}
+
+///
+/// Calls any OpenAI-compatible chat-completions endpoint (self-hosted vLLM/Ollama, or an Iranian
+/// provider — OpenAI/Anthropic are blocked from Iran). The admin-set system prompt is the
+/// "framework" that tells the model how to approve/reject/structure. We ask for strict JSON and
+/// parse it. Any failure returns null so ingestion falls back to the rule-based path.
+///
+public class OpenAiCompatibleAuditor : IAiAuditor
+{
+ private readonly IHttpClientFactory _http;
+ private readonly ILogger _log;
+
+ public OpenAiCompatibleAuditor(IHttpClientFactory http, ILogger log)
+ {
+ _http = http;
+ _log = log;
+ }
+
+ public async Task AuditAsync(string rawText, AppSetting s, CancellationToken ct = default)
+ {
+ if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint)) return null;
+
+ try
+ {
+ var payload = new
+ {
+ model = string.IsNullOrWhiteSpace(s.AiModel) ? "gpt-4o-mini" : s.AiModel,
+ temperature = 0,
+ response_format = new { type = "json_object" },
+ messages = new object[]
+ {
+ new { role = "system", content = s.AiSystemPrompt },
+ new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." },
+ },
+ };
+
+ var client = _http.CreateClient("ai");
+ client.Timeout = TimeSpan.FromSeconds(30);
+ using var req = new HttpRequestMessage(HttpMethod.Post, s.AiEndpoint)
+ {
+ Content = new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"),
+ };
+ if (!string.IsNullOrWhiteSpace(s.AiApiKey))
+ req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", s.AiApiKey);
+
+ using var resp = await client.SendAsync(req, ct);
+ resp.EnsureSuccessStatusCode();
+ var body = await resp.Content.ReadAsStringAsync(ct);
+
+ using var doc = JsonDocument.Parse(body);
+ var content = doc.RootElement
+ .GetProperty("choices")[0].GetProperty("message").GetProperty("content").GetString();
+ if (string.IsNullOrWhiteSpace(content)) return null;
+
+ return ParseVerdict(content);
+ }
+ catch (Exception ex)
+ {
+ _log.LogWarning(ex, "AI audit failed — falling back to rule-based decision.");
+ return null;
+ }
+ }
+
+ private static AiAuditResult? ParseVerdict(string json)
+ {
+ // The content itself should be a JSON object; tolerate code fences.
+ json = json.Trim().Trim('`');
+ var start = json.IndexOf('{');
+ var end = json.LastIndexOf('}');
+ if (start < 0 || end <= start) return null;
+ json = json.Substring(start, end - start + 1);
+
+ using var doc = JsonDocument.Parse(json);
+ var r = doc.RootElement;
+ string? S(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.String ? v.GetString() : null;
+ int I(string k, int d) => r.TryGetProperty(k, out var v) && v.TryGetInt32(out var n) ? n : d;
+ long? L(string k) => r.TryGetProperty(k, out var v) && v.TryGetInt64(out var n) ? n : null;
+ int? NI(string k) => r.TryGetProperty(k, out var v) && v.TryGetInt32(out var n) ? n : null;
+
+ var decision = (S("decision") ?? "review").ToLowerInvariant();
+ var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"),
+ S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"));
+ return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data);
+ }
+}
diff --git a/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs b/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs
new file mode 100644
index 0000000..21ef6b0
--- /dev/null
+++ b/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs
@@ -0,0 +1,68 @@
+using System.Text.Json;
+using Microsoft.Extensions.Options;
+
+namespace JobsMedical.Web.Services.Scraping;
+
+public class BaleOptions
+{
+ public bool Enabled { get; set; }
+ public string? BotToken { get; set; }
+ public string BaseUrl { get; set; } = "https://tapi.bale.ai"; // Bale Bot API host
+}
+
+///
+/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. The bot must
+/// be a member/admin of the channels it should read. Pulls text from messages and channel posts.
+///
+public class BaleListingSource : IListingSource
+{
+ private readonly BaleOptions _opts;
+ private readonly IHttpClientFactory _http;
+ private readonly ILogger _log;
+
+ public BaleListingSource(IOptions opts, IHttpClientFactory http,
+ ILogger log)
+ {
+ _opts = opts.Value;
+ _http = http;
+ _log = log;
+ }
+
+ public string Name => "بله";
+ public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken);
+
+ public async Task> FetchAsync(CancellationToken ct = default)
+ {
+ if (!Enabled) { _log.LogInformation("Bale source disabled/unconfigured."); return Array.Empty(); }
+
+ try
+ {
+ var client = _http.CreateClient("scrape");
+ var url = $"{_opts.BaseUrl.TrimEnd('/')}/bot{_opts.BotToken}/getUpdates";
+ var body = await client.GetStringAsync(url, ct);
+
+ using var doc = JsonDocument.Parse(body);
+ if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array)
+ return Array.Empty();
+
+ var items = new List();
+ foreach (var update in result.EnumerateArray())
+ {
+ var text = TextOf(update, "channel_post") ?? TextOf(update, "message");
+ if (!string.IsNullOrWhiteSpace(text) && text!.Trim().Length >= 15)
+ items.Add(new ScrapedItem("بله", text.Trim()));
+ }
+ return items;
+ }
+ catch (Exception ex)
+ {
+ _log.LogWarning(ex, "Bale fetch failed.");
+ return Array.Empty();
+ }
+ }
+
+ private static string? TextOf(JsonElement update, string key)
+ => update.TryGetProperty(key, out var m)
+ && m.TryGetProperty("text", out var t) && t.ValueKind == JsonValueKind.String
+ ? t.GetString() : null;
+}
diff --git a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs
index ce537bf..cdb60c9 100644
--- a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs
@@ -1,3 +1,5 @@
+using System.Text;
+using System.Text.Json;
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping;
@@ -5,38 +7,80 @@ namespace JobsMedical.Web.Services.Scraping;
public class DivarOptions
{
public bool Enabled { get; set; }
- public string? City { get; set; } // e.g. "tehran"
- public List Queries { get; set; } = new(); // search terms, e.g. "استخدام پزشک"
+ public string City { get; set; } = "tehran";
+ public string Category { get; set; } = "jobs";
+ public List Queries { get; set; } = new(); // e.g. "پرستار", "پزشک عمومی", "درمانگاه"
+ public string BaseUrl { get; set; } = "https://api.divar.ir/v8/web-search";
+ public int PerQuery { get; set; } = 25;
}
///
-/// Divar source. Credential-ready: configure city + queries in (Ingestion:Divar) and implement
-/// the fetch against Divar's listing API/HTML. Dormant until enabled.
+/// Best-effort Divar fetch: queries Divar's web-search JSON for each term and harvests post
+/// titles + descriptions. Divar's private API shifts shape over time, so we walk the JSON
+/// tolerantly for any object carrying a "title" plus a nearby description field, and fail soft.
///
public class DivarListingSource : IListingSource
{
private readonly DivarOptions _opts;
+ private readonly IHttpClientFactory _http;
private readonly ILogger _log;
- public DivarListingSource(IOptions opts, ILogger log)
+ public DivarListingSource(IOptions opts, IHttpClientFactory http,
+ ILogger log)
{
_opts = opts.Value;
+ _http = http;
_log = log;
}
public string Name => "دیوار";
public bool Enabled => _opts.Enabled && _opts.Queries.Count > 0;
- public Task> FetchAsync(CancellationToken ct = default)
+ public async Task> FetchAsync(CancellationToken ct = default)
{
- if (!Enabled)
+ if (!Enabled) { _log.LogInformation("Divar source disabled/unconfigured."); return Array.Empty(); }
+
+ var client = _http.CreateClient("scrape");
+ var items = new List();
+ foreach (var q in _opts.Queries.Where(q => q.Trim().Length > 0))
{
- _log.LogInformation("Divar source not configured — skipping.");
- return Task.FromResult>(Array.Empty());
+ try
+ {
+ var url = $"{_opts.BaseUrl.TrimEnd('/')}/{_opts.City}/{_opts.Category}?q={Uri.EscapeDataString(q)}";
+ var body = await client.GetStringAsync(url, ct);
+ using var doc = JsonDocument.Parse(body);
+ foreach (var text in Harvest(doc.RootElement).Take(_opts.PerQuery))
+ items.Add(new ScrapedItem("دیوار", text, "https://divar.ir"));
+ }
+ catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); }
+ }
+ return items;
+ }
+
+ private static readonly string[] DescKeys =
+ { "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" };
+
+ /// Walk the JSON; for each object with a string "title", emit title + first description.
+ private static IEnumerable Harvest(JsonElement el)
+ {
+ if (el.ValueKind == JsonValueKind.Object)
+ {
+ if (el.TryGetProperty("title", out var t) && t.ValueKind == JsonValueKind.String)
+ {
+ var sb = new StringBuilder(t.GetString());
+ foreach (var k in DescKeys)
+ if (el.TryGetProperty(k, out var d) && d.ValueKind == JsonValueKind.String)
+ { sb.Append(" — ").Append(d.GetString()); break; }
+ var text = sb.ToString().Trim();
+ if (text.Length >= 15) yield return text;
+ }
+ foreach (var p in el.EnumerateObject())
+ foreach (var s in Harvest(p.Value)) yield return s;
+ }
+ else if (el.ValueKind == JsonValueKind.Array)
+ {
+ foreach (var item in el.EnumerateArray())
+ foreach (var s in Harvest(item)) yield return s;
}
- // TODO(prod): query Divar for each term in the configured city, map each ad's
- // title+description to new ScrapedItem(Name, text, adUrl).
- _log.LogWarning("Divar fetch not yet implemented; returning empty.");
- return Task.FromResult>(Array.Empty());
}
}
diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
index 65356d5..a9c21b5 100644
--- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
+++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
@@ -7,22 +7,24 @@ using Microsoft.EntityFrameworkCore;
namespace JobsMedical.Web.Services.Scraping;
-public record SourceResult(string Source, int Fetched, int Queued, int Flagged, int Spam, int Duplicates);
+public record SourceResult(string Source, int Fetched, int Queued, int Published, int Flagged, int Spam, int Duplicates);
public record IngestionSummary(List Sources)
{
public int TotalQueued => Sources.Sum(s => s.Queued);
+ public int TotalPublished => Sources.Sum(s => s.Published);
public int TotalFlagged => Sources.Sum(s => s.Flagged);
public int TotalSpam => Sources.Sum(s => s.Spam);
public int TotalDuplicates => Sources.Sum(s => s.Duplicates);
}
///
-/// The scrape engine. Pulls from every enabled , dedupes by content
-/// hash, parses with , validates with ,
-/// and stores each as a with a status: New (queued for review),
-/// Flagged (incomplete/suspicious), or Discarded (spam). Source-agnostic — add a source and it
-/// flows through unchanged.
+/// The scrape engine. For every enabled source: dedupe by content hash → parse → rule-validate →
+/// (optional) AI audit → decide. Decision depends on admin settings:
+/// • spam → Discarded
+/// • AI on: AI verdict drives approve/reject/review; approve + Automatic + AiAutoApprove → publish
+/// • AI off: Automatic + confidence ≥ threshold → publish; else queue/flag
+/// "Publish" resolves-or-creates an (unverified) facility and creates the Shift/JobOpening.
///
public class IngestionService
{
@@ -30,16 +32,15 @@ public class IngestionService
private readonly IEnumerable _sources;
private readonly IListingParser _parser;
private readonly ListingValidator _validator;
+ private readonly IAiAuditor _ai;
+ private readonly SettingsService _settings;
private readonly ILogger _log;
- public IngestionService(AppDbContext db, IEnumerable sources,
- IListingParser parser, ListingValidator validator, ILogger log)
+ public IngestionService(AppDbContext db, IEnumerable sources, IListingParser parser,
+ ListingValidator validator, IAiAuditor ai, SettingsService settings, ILogger log)
{
- _db = db;
- _sources = sources;
- _parser = parser;
- _validator = validator;
- _log = log;
+ _db = db; _sources = sources; _parser = parser; _validator = validator;
+ _ai = ai; _settings = settings; _log = log;
}
public IReadOnlyList<(string Name, bool Enabled)> Sources =>
@@ -47,18 +48,22 @@ public class IngestionService
public async Task RunAsync(CancellationToken ct = default)
{
- var roles = await _db.Roles.Select(r => r.Name).ToListAsync(ct);
- var cities = await _db.Cities.Select(c => c.Name).ToListAsync(ct);
- var districts = await _db.Districts.Select(d => d.Name).ToListAsync(ct);
+ var settings = await _settings.GetAsync();
+ var roles = await _db.Roles.ToListAsync(ct);
+ var cities = await _db.Cities.ToListAsync(ct);
+ var districts = await _db.Districts.ToListAsync(ct);
+ var roleNames = roles.Select(r => r.Name).ToList();
+ var cityNames = cities.Select(c => c.Name).ToList();
+ var districtNames = districts.Select(d => d.Name).ToList();
var results = new List();
foreach (var source in _sources.Where(s => s.Enabled))
{
- int fetched = 0, queued = 0, flagged = 0, spam = 0, dupes = 0;
+ int fetched = 0, queued = 0, published = 0, flagged = 0, spam = 0, dupes = 0;
IReadOnlyList items;
try { items = await source.FetchAsync(ct); }
- catch (Exception ex) { _log.LogError(ex, "Source {Source} fetch failed", source.Name); continue; }
+ catch (Exception ex) { _log.LogError(ex, "Source {Source} failed", source.Name); continue; }
foreach (var item in items)
{
@@ -66,42 +71,155 @@ public class IngestionService
var hash = Hash(item.RawText);
if (await _db.RawListings.AnyAsync(r => r.ContentHash == hash, ct)) { dupes++; continue; }
- var parsed = _parser.Parse(item.RawText, roles, cities, districts);
+ var parsed = _parser.Parse(item.RawText, roleNames, cityNames, districtNames);
var val = _validator.Validate(item.RawText, parsed);
- var status = val.IsSpam ? RawListingStatus.Discarded
- : val.IsValid ? RawListingStatus.New
- : RawListingStatus.Flagged;
- if (status == RawListingStatus.New) queued++;
- else if (status == RawListingStatus.Flagged) flagged++;
- else spam++;
+ AiAuditResult? ai = null;
+ if (settings.AiEnabled && !val.IsSpam)
+ ai = await _ai.AuditAsync(item.RawText, settings, ct);
- _db.RawListings.Add(new RawListing
+ var (status, reason, confidence) = Decide(settings, val, ai);
+
+ var raw = new RawListing
{
SourceChannel = item.Source,
SourceUrl = item.SourceUrl,
RawText = item.RawText.Trim(),
ContentHash = hash,
- Confidence = val.Confidence,
- ValidationNotes = val.Issues.Count > 0 ? string.Join("؛ ", val.Issues) : null,
+ Confidence = confidence,
+ ValidationNotes = reason,
Status = status,
- });
+ };
+ _db.RawListings.Add(raw);
+
+ if (status == RawListingStatus.Normalized)
+ {
+ try { Publish(parsed, ai, raw, roles, cities, districts); published++; }
+ catch (Exception ex) { _log.LogWarning(ex, "Auto-publish failed; queueing instead"); raw.Status = RawListingStatus.New; queued++; }
+ }
+ else if (status == RawListingStatus.New) queued++;
+ else if (status == RawListingStatus.Flagged) flagged++;
+ else spam++;
}
await _db.SaveChangesAsync(ct);
- results.Add(new SourceResult(source.Name, fetched, queued, flagged, spam, dupes));
- _log.LogInformation("Ingestion {Source}: fetched={F} queued={Q} flagged={Fl} spam={S} dupes={D}",
- source.Name, fetched, queued, flagged, spam, dupes);
+ results.Add(new SourceResult(source.Name, fetched, queued, published, flagged, spam, dupes));
+ _log.LogInformation("Ingest {S}: fetched={F} queued={Q} published={P} flagged={Fl} spam={Sp} dupes={D}",
+ source.Name, fetched, queued, published, flagged, spam, dupes);
}
return new IngestionSummary(results);
}
- /// SHA-256 hex of the whitespace-normalized text (for cross-run dedupe).
+ private static (RawListingStatus status, string? reason, int confidence) Decide(
+ AppSetting s, ValidationResult val, AiAuditResult? ai)
+ {
+ var notes = val.Issues.Count > 0 ? string.Join("؛ ", val.Issues) : null;
+
+ if (val.IsSpam)
+ return (RawListingStatus.Discarded, Join("اسپم", notes), val.Confidence);
+
+ if (ai is not null)
+ {
+ var aiNote = Join($"AI: {ai.Decision} ({ai.Confidence}٪)" + (ai.Reason is null ? "" : $" — {ai.Reason}"), notes);
+ if (ai.Reject) return (RawListingStatus.Discarded, aiNote, ai.Confidence);
+ if (ai.Approve)
+ return (s.Mode == IngestionMode.Automatic && s.AiAutoApprove
+ ? RawListingStatus.Normalized : RawListingStatus.New, aiNote, ai.Confidence);
+ return (RawListingStatus.Flagged, aiNote, ai.Confidence); // review
+ }
+
+ if (!val.IsValid) return (RawListingStatus.Flagged, notes, val.Confidence);
+ if (s.Mode == IngestionMode.Automatic && val.Confidence >= s.AutoPublishMinConfidence)
+ return (RawListingStatus.Normalized, notes, val.Confidence);
+ return (RawListingStatus.New, notes, val.Confidence);
+ }
+
+ private void Publish(ParsedListing parsed, AiAuditResult? ai, RawListing raw,
+ List roles, List cities, List districts)
+ {
+ var d = ai?.Data;
+ var roleName = d?.Role ?? parsed.RoleName;
+ var cityName = d?.City ?? parsed.CityName;
+ var districtName = d?.District ?? parsed.DistrictName;
+
+ var role = roles.FirstOrDefault(r => r.Name == roleName) ?? roles.First();
+ var city = cities.FirstOrDefault(c => c.Name == cityName)
+ ?? cities.FirstOrDefault(c => c.IsActive) ?? cities.First();
+ var district = districts.FirstOrDefault(x => x.Name == districtName && x.CityId == city.Id);
+
+ var facilityName = !string.IsNullOrWhiteSpace(d?.FacilityName) ? d!.FacilityName!.Trim()
+ : $"مرکز درمانی (از {raw.SourceChannel})";
+ var facility = _db.Facilities.Local.FirstOrDefault(f => f.Name == facilityName && f.CityId == city.Id)
+ ?? _db.Facilities.FirstOrDefault(f => f.Name == facilityName && f.CityId == city.Id);
+ if (facility is null)
+ {
+ facility = new Facility
+ {
+ Name = facilityName, Type = FacilityType.Clinic, City = city, DistrictId = district?.Id,
+ Phone = parsed.Phone, IsVerified = false,
+ };
+ _db.Facilities.Add(facility);
+ }
+
+ var kind = (d?.Kind ?? parsed.Kind.ToString()).ToLowerInvariant();
+ if (kind.Contains("job") || kind.Contains("استخدام"))
+ {
+ _db.JobOpenings.Add(new JobOpening
+ {
+ Facility = facility, Role = role,
+ Title = !string.IsNullOrWhiteSpace(d?.Title) ? d!.Title!.Trim() : $"استخدام {role.Name}",
+ EmploymentType = MapEmployment(d?.EmploymentType, parsed.EmploymentType),
+ SalaryMin = parsed.PayAmount,
+ Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated,
+ SourceUrl = raw.SourceUrl,
+ });
+ }
+ else
+ {
+ var st = MapShiftType(d?.ShiftType, parsed.ShiftType);
+ var (start, end) = DefaultTimes(st);
+ _db.Shifts.Add(new Shift
+ {
+ Facility = facility, Role = role,
+ Date = DateOnly.FromDateTime(DateTime.UtcNow).AddDays(1),
+ StartTime = start, EndTime = end, ShiftType = st,
+ SpecialtyRequired = role.Name, Description = raw.RawText,
+ PayType = parsed.SharePercent is not null && parsed.PayAmount is null ? PayType.Percentage
+ : parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift,
+ PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent,
+ Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl,
+ });
+ }
+ raw.Status = RawListingStatus.Normalized;
+ }
+
+ private static ShiftType MapShiftType(string? ai, ShiftType? parsed) => (ai?.ToLowerInvariant()) switch
+ {
+ "day" => ShiftType.Day, "evening" => ShiftType.Evening, "night" => ShiftType.Night, "oncall" => ShiftType.OnCall,
+ _ => parsed ?? ShiftType.Day,
+ };
+
+ private static EmploymentType MapEmployment(string? ai, EmploymentType? parsed) => (ai?.ToLowerInvariant()) switch
+ {
+ "parttime" => EmploymentType.PartTime, "contract" => EmploymentType.Contract,
+ "plan" => EmploymentType.Plan, "fulltime" => EmploymentType.FullTime,
+ _ => parsed ?? EmploymentType.FullTime,
+ };
+
+ private static (TimeOnly, TimeOnly) DefaultTimes(ShiftType t) => t switch
+ {
+ ShiftType.Day => (new TimeOnly(8, 0), new TimeOnly(14, 0)),
+ ShiftType.Evening => (new TimeOnly(14, 0), new TimeOnly(20, 0)),
+ ShiftType.Night => (new TimeOnly(20, 0), new TimeOnly(8, 0)),
+ _ => (new TimeOnly(8, 0), new TimeOnly(8, 0)),
+ };
+
+ private static string? Join(string a, string? b) => string.IsNullOrEmpty(b) ? a : $"{a} | {b}";
+
private static string Hash(string text)
{
var normalized = Regex.Replace((text ?? "").Trim(), @"\s+", " ");
- var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(normalized));
- return Convert.ToHexString(bytes).ToLowerInvariant();
+ return Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(normalized))).ToLowerInvariant();
}
}
diff --git a/src/JobsMedical.Web/Services/Scraping/SettingsService.cs b/src/JobsMedical.Web/Services/Scraping/SettingsService.cs
new file mode 100644
index 0000000..ff24df0
--- /dev/null
+++ b/src/JobsMedical.Web/Services/Scraping/SettingsService.cs
@@ -0,0 +1,40 @@
+using JobsMedical.Web.Data;
+using JobsMedical.Web.Models;
+using Microsoft.EntityFrameworkCore;
+
+namespace JobsMedical.Web.Services.Scraping;
+
+/// Loads/creates the single platform-settings row (Id=1).
+public class SettingsService
+{
+ private readonly AppDbContext _db;
+ public SettingsService(AppDbContext db) => _db = db;
+
+ public async Task GetAsync()
+ {
+ var s = await _db.AppSettings.FirstOrDefaultAsync(x => x.Id == 1);
+ if (s is null)
+ {
+ s = new AppSetting { Id = 1 };
+ _db.AppSettings.Add(s);
+ await _db.SaveChangesAsync();
+ }
+ return s;
+ }
+
+ public async Task SaveAsync(AppSetting incoming)
+ {
+ var s = await GetAsync();
+ s.Mode = incoming.Mode;
+ s.AutoPublishMinConfidence = Math.Clamp(incoming.AutoPublishMinConfidence, 0, 100);
+ s.AiEnabled = incoming.AiEnabled;
+ s.AiEndpoint = incoming.AiEndpoint?.Trim();
+ s.AiApiKey = incoming.AiApiKey?.Trim();
+ s.AiModel = incoming.AiModel?.Trim();
+ s.AiSystemPrompt = string.IsNullOrWhiteSpace(incoming.AiSystemPrompt)
+ ? AppSetting.DefaultPrompt : incoming.AiSystemPrompt;
+ s.AiAutoApprove = incoming.AiAutoApprove;
+ s.UpdatedAt = DateTime.UtcNow;
+ await _db.SaveChangesAsync();
+ }
+}
diff --git a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
index 5977caa..b655819 100644
--- a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
@@ -1,3 +1,5 @@
+using System.Net;
+using System.Text.RegularExpressions;
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping;
@@ -5,40 +7,71 @@ namespace JobsMedical.Web.Services.Scraping;
public class TelegramOptions
{
public bool Enabled { get; set; }
- public string? BotToken { get; set; }
- public List Channels { get; set; } = new(); // @channel handles to read
+ public string? BotToken { get; set; } // optional (for private channels later)
+ public List Channels { get; set; } = new(); // public channel usernames (no @)
+ public int PerChannel { get; set; } = 20;
}
///
-/// Telegram/Bale channel source. Credential-ready: wire a bot token + channel list in config
-/// (Ingestion:Telegram) and implement the fetch against the Bot API (getUpdates / channel posts)
-/// or a userbot. Dormant until enabled, so the engine runs without it.
+/// Reads public Telegram channels via the web preview (https://t.me/s/<channel>) — no bot
+/// token or login needed for public channels. Each message's text becomes a ScrapedItem.
///
public class TelegramListingSource : IListingSource
{
private readonly TelegramOptions _opts;
+ private readonly IHttpClientFactory _http;
private readonly ILogger _log;
- public TelegramListingSource(IOptions opts, ILogger log)
+ public TelegramListingSource(IOptions opts, IHttpClientFactory http,
+ ILogger log)
{
_opts = opts.Value;
+ _http = http;
_log = log;
}
- public string Name => "تلگرام/بله";
- public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken) && _opts.Channels.Count > 0;
+ public string Name => "تلگرام";
+ public bool Enabled => _opts.Enabled && _opts.Channels.Count > 0;
- public Task> FetchAsync(CancellationToken ct = default)
+ public async Task> FetchAsync(CancellationToken ct = default)
{
- if (!Enabled)
+ if (!Enabled) { _log.LogInformation("Telegram source disabled/unconfigured."); return Array.Empty(); }
+
+ var client = _http.CreateClient("scrape");
+ var items = new List();
+ foreach (var ch in _opts.Channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0))
{
- _log.LogInformation("Telegram source not configured — skipping.");
- return Task.FromResult>(Array.Empty());
+ try
+ {
+ var html = await client.GetStringAsync($"https://t.me/s/{ch}", ct);
+ foreach (var text in ExtractMessages(html).Take(_opts.PerChannel))
+ items.Add(new ScrapedItem($"تلگرام/{ch}", text, $"https://t.me/{ch}"));
+ }
+ catch (Exception ex) { _log.LogWarning(ex, "Telegram fetch failed for {Channel}", ch); }
+ }
+ return items;
+ }
+
+ // Message bodies live in ...
.
+ private static IEnumerable ExtractMessages(string html)
+ {
+ foreach (Match m in Regex.Matches(html,
+ "]*>(.*?)
", RegexOptions.Singleline))
+ {
+ var text = HtmlUtil.ToPlainText(m.Groups[1].Value);
+ if (text.Length >= 15) yield return text;
}
- // TODO(prod): call https://api.telegram.org/bot{token}/getUpdates (or channel history),
- // map each message to new ScrapedItem(Name, message.Text, messageLink). The validation +
- // dedupe pipeline downstream is already source-agnostic.
- _log.LogWarning("Telegram fetch not yet implemented; returning empty.");
- return Task.FromResult>(Array.Empty());
+ }
+}
+
+internal static class HtmlUtil
+{
+ public static string ToPlainText(string html)
+ {
+ var s = Regex.Replace(html, "
", "\n", RegexOptions.IgnoreCase);
+ s = Regex.Replace(s, "<[^>]+>", ""); // strip remaining tags
+ s = WebUtility.HtmlDecode(s);
+ s = Regex.Replace(s, "[ \\t]+", " ");
+ return s.Trim();
}
}
diff --git a/src/JobsMedical.Web/appsettings.json b/src/JobsMedical.Web/appsettings.json
index 8fe568b..53d6c0d 100644
--- a/src/JobsMedical.Web/appsettings.json
+++ b/src/JobsMedical.Web/appsettings.json
@@ -15,7 +15,8 @@
"Ingestion": {
"Enabled": false,
"IntervalMinutes": 30,
- "Telegram": { "Enabled": false, "BotToken": "", "Channels": [] },
- "Divar": { "Enabled": false, "City": "tehran", "Queries": [] }
+ "Telegram": { "Enabled": false, "BotToken": "", "Channels": [], "PerChannel": 20 },
+ "Bale": { "Enabled": false, "BotToken": "", "BaseUrl": "https://tapi.bale.ai" },
+ "Divar": { "Enabled": false, "City": "tehran", "Category": "jobs", "Queries": [], "BaseUrl": "https://api.divar.ir/v8/web-search", "PerQuery": 25 }
}
}