diff --git a/DEPLOY.md b/DEPLOY.md index 9b6dba8..cca40f5 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -69,7 +69,26 @@ ConnectionStrings__Default=Host=db;Port=5432;Database=hamkadr;Username=hamkadr;P Auth__AdminPhone=09XXXXXXXXX # Future: Kavenegar / SMS.ir keys for real OTP delivery + +# --- Channel scraping (optional; off by default) --- +# Enable the background worker and the sources you want, then their fetch runs on a timer. +# Ingestion__Enabled=true +# Ingestion__IntervalMinutes=30 +# Telegram (public channels via t.me/s — no token needed): +# Ingestion__Telegram__Enabled=true +# Ingestion__Telegram__Channels__0=shift_channel_username +# Ingestion__Telegram__Channels__1=another_channel +# Bale (bot must be a member of the channel; Telegram-style Bot API): +# Ingestion__Bale__Enabled=true +# Ingestion__Bale__BotToken=__BALE_BOT_TOKEN__ +# Divar (best-effort web-search): +# Ingestion__Divar__Enabled=true +# Ingestion__Divar__Queries__0=استخدام پزشک +# Ingestion__Divar__Queries__1=پرستار ``` +> The **AI audit layer** is configured at runtime in the admin panel (`/Admin/Settings`) — endpoint, +> model, API key, prompt/framework, and auto-approve — not via env. Default: AI off, mode = Manual, +> so every ingested listing waits in the review queue until an admin publishes it. > `POSTGRES_PASSWORD` and the password in `ConnectionStrings__Default` must be identical. > `ASPNETCORE_ENVIRONMENT=Production` ⇒ only **reference data** (roles/cities/districts) is seeded — > no demo facilities/shifts. Real employers add listings via the employer panel. diff --git a/src/JobsMedical.Web/Data/AppDbContext.cs b/src/JobsMedical.Web/Data/AppDbContext.cs index 4550f75..f62769e 100644 --- a/src/JobsMedical.Web/Data/AppDbContext.cs +++ b/src/JobsMedical.Web/Data/AppDbContext.cs @@ -20,6 +20,7 @@ public class AppDbContext : DbContext public DbSet Visitors => Set(); public DbSet UserPreferences => Set(); public DbSet InterestEvents => Set(); + public DbSet AppSettings => Set(); protected override void OnModelCreating(ModelBuilder b) { diff --git a/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.Designer.cs b/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.Designer.cs new file mode 100644 index 0000000..c9e65bc --- /dev/null +++ b/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.Designer.cs @@ -0,0 +1,833 @@ +// +using System; +using JobsMedical.Web.Data; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; +using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata; + +#nullable disable + +namespace JobsMedical.Web.Migrations +{ + [DbContext(typeof(AppDbContext))] + [Migration("20260603140343_AiSettingsAndAutomation")] + partial class AiSettingsAndAutomation + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasAnnotation("ProductVersion", "10.0.0") + .HasAnnotation("Relational:MaxIdentifierLength", 63); + + NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder); + + modelBuilder.Entity("JobsMedical.Web.Models.AppSetting", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("AiApiKey") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("AiAutoApprove") + .HasColumnType("boolean"); + + b.Property("AiEnabled") + .HasColumnType("boolean"); + + b.Property("AiEndpoint") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("AiModel") + .HasMaxLength(120) + .HasColumnType("character varying(120)"); + + b.Property("AiSystemPrompt") + .IsRequired() + .HasMaxLength(4000) + .HasColumnType("character varying(4000)"); + + b.Property("AutoPublishMinConfidence") + .HasColumnType("integer"); + + b.Property("Mode") + .HasColumnType("integer"); + + b.Property("UpdatedAt") + .HasColumnType("timestamp with time zone"); + + b.HasKey("Id"); + + b.ToTable("AppSettings"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Application", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("DoctorId") + .HasColumnType("integer"); + + b.Property("Message") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("ShiftId") + .HasColumnType("integer"); + + b.Property("Status") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("DoctorId"); + + b.HasIndex("ShiftId", "DoctorId") + .IsUnique(); + + b.ToTable("Applications"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.City", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("IsActive") + .HasColumnType("boolean"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("Province") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.HasKey("Id"); + + b.ToTable("Cities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.District", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("IsActive") + .HasColumnType("boolean"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(120) + .HasColumnType("character varying(120)"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.ToTable("Districts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Bio") + .HasMaxLength(1000) + .HasColumnType("character varying(1000)"); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("IsVerified") + .HasColumnType("boolean"); + + b.Property("LicenseNo") + .HasMaxLength(20) + .HasColumnType("character varying(20)"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("Specialty") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("UserId") + .HasColumnType("integer"); + + b.Property("YearsExperience") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("UserId") + .IsUnique(); + + b.ToTable("DoctorProfiles"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Facility", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Address") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("BaleId") + .HasMaxLength(50) + .HasColumnType("character varying(50)"); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("DistrictId") + .HasColumnType("integer"); + + b.Property("IsVerified") + .HasColumnType("boolean"); + + b.Property("Lat") + .HasColumnType("double precision"); + + b.Property("Lng") + .HasColumnType("double precision"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("OwnerUserId") + .HasColumnType("integer"); + + b.Property("Phone") + .HasMaxLength(20) + .HasColumnType("character varying(20)"); + + b.Property("Type") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.HasIndex("DistrictId"); + + b.HasIndex("OwnerUserId"); + + b.ToTable("Facilities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("bigint"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("EventType") + .HasColumnType("integer"); + + b.Property("JobOpeningId") + .HasColumnType("integer"); + + b.Property("ShiftId") + .HasColumnType("integer"); + + b.Property("VisitorId") + .IsRequired() + .HasColumnType("character varying(36)"); + + b.HasKey("Id"); + + b.HasIndex("JobOpeningId"); + + b.HasIndex("ShiftId"); + + b.HasIndex("VisitorId", "CreatedAt"); + + b.ToTable("InterestEvents"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("Description") + .HasMaxLength(2000) + .HasColumnType("character varying(2000)"); + + b.Property("EmploymentType") + .HasColumnType("integer"); + + b.Property("FacilityId") + .HasColumnType("integer"); + + b.Property("Requirements") + .HasMaxLength(1000) + .HasColumnType("character varying(1000)"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("SalaryMax") + .HasColumnType("bigint"); + + b.Property("SalaryMin") + .HasColumnType("bigint"); + + b.Property("Source") + .HasColumnType("integer"); + + b.Property("SourceUrl") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("Status") + .HasColumnType("integer"); + + b.Property("Title") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.HasKey("Id"); + + b.HasIndex("FacilityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("Status"); + + b.ToTable("JobOpenings"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Confidence") + .HasColumnType("integer"); + + b.Property("ContentHash") + .HasMaxLength(64) + .HasColumnType("character varying(64)"); + + b.Property("FetchedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("LinkedShiftId") + .HasColumnType("integer"); + + b.Property("ParsedJson") + .HasColumnType("text"); + + b.Property("RawText") + .IsRequired() + .HasColumnType("text"); + + b.Property("SourceChannel") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("SourceUrl") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("Status") + .HasColumnType("integer"); + + b.Property("ValidationNotes") + .HasMaxLength(1000) + .HasColumnType("character varying(1000)"); + + b.HasKey("Id"); + + b.HasIndex("ContentHash"); + + b.HasIndex("LinkedShiftId"); + + b.HasIndex("Status"); + + b.ToTable("RawListings"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Role", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Category") + .IsRequired() + .HasMaxLength(50) + .HasColumnType("character varying(50)"); + + b.Property("IsActive") + .HasColumnType("boolean"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("SortOrder") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.ToTable("Roles"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Shift", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("Date") + .HasColumnType("date"); + + b.Property("Description") + .HasMaxLength(1500) + .HasColumnType("character varying(1500)"); + + b.Property("EndTime") + .HasColumnType("time without time zone"); + + b.Property("FacilityId") + .HasColumnType("integer"); + + b.Property("PayAmount") + .HasColumnType("bigint"); + + b.Property("PayType") + .HasColumnType("integer"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("SharePercent") + .HasColumnType("integer"); + + b.Property("ShiftType") + .HasColumnType("integer"); + + b.Property("Source") + .HasColumnType("integer"); + + b.Property("SourceUrl") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("SpecialtyRequired") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("StartTime") + .HasColumnType("time without time zone"); + + b.Property("Status") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("FacilityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("Date", "Status"); + + b.ToTable("Shifts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.User", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("FullName") + .HasMaxLength(150) + .HasColumnType("character varying(150)"); + + b.Property("IsPhoneVerified") + .HasColumnType("boolean"); + + b.Property("Phone") + .IsRequired() + .HasMaxLength(20) + .HasColumnType("character varying(20)"); + + b.Property("Role") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("Phone") + .IsUnique(); + + b.ToTable("Users"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("MinPay") + .HasColumnType("bigint"); + + b.Property("PreferredShiftType") + .HasColumnType("integer"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("UpdatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("VisitorId") + .IsRequired() + .HasColumnType("character varying(36)"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("VisitorId") + .IsUnique(); + + b.ToTable("UserPreferences"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b => + { + b.Property("Id") + .HasMaxLength(36) + .HasColumnType("character varying(36)"); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("LastSeenAt") + .HasColumnType("timestamp with time zone"); + + b.Property("UserId") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("UserId"); + + b.ToTable("Visitors"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Application", b => + { + b.HasOne("JobsMedical.Web.Models.User", "Doctor") + .WithMany("Applications") + .HasForeignKey("DoctorId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.Shift", "Shift") + .WithMany("Applications") + .HasForeignKey("ShiftId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("Doctor"); + + b.Navigation("Shift"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.District", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany() + .HasForeignKey("CityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("City"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany() + .HasForeignKey("CityId"); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany() + .HasForeignKey("RoleId"); + + b.HasOne("JobsMedical.Web.Models.User", "User") + .WithOne("DoctorProfile") + .HasForeignKey("JobsMedical.Web.Models.DoctorProfile", "UserId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("City"); + + b.Navigation("Role"); + + b.Navigation("User"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Facility", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany("Facilities") + .HasForeignKey("CityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.District", "District") + .WithMany("Facilities") + .HasForeignKey("DistrictId") + .OnDelete(DeleteBehavior.SetNull); + + b.HasOne("JobsMedical.Web.Models.User", "OwnerUser") + .WithMany() + .HasForeignKey("OwnerUserId") + .OnDelete(DeleteBehavior.SetNull); + + b.Navigation("City"); + + b.Navigation("District"); + + b.Navigation("OwnerUser"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b => + { + b.HasOne("JobsMedical.Web.Models.JobOpening", "JobOpening") + .WithMany() + .HasForeignKey("JobOpeningId") + .OnDelete(DeleteBehavior.Cascade); + + b.HasOne("JobsMedical.Web.Models.Shift", "Shift") + .WithMany() + .HasForeignKey("ShiftId") + .OnDelete(DeleteBehavior.Cascade); + + b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor") + .WithMany("Events") + .HasForeignKey("VisitorId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("JobOpening"); + + b.Navigation("Shift"); + + b.Navigation("Visitor"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b => + { + b.HasOne("JobsMedical.Web.Models.Facility", "Facility") + .WithMany() + .HasForeignKey("FacilityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany() + .HasForeignKey("RoleId") + .OnDelete(DeleteBehavior.Restrict) + .IsRequired(); + + b.Navigation("Facility"); + + b.Navigation("Role"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b => + { + b.HasOne("JobsMedical.Web.Models.Shift", "LinkedShift") + .WithMany() + .HasForeignKey("LinkedShiftId"); + + b.Navigation("LinkedShift"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Shift", b => + { + b.HasOne("JobsMedical.Web.Models.Facility", "Facility") + .WithMany("Shifts") + .HasForeignKey("FacilityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany("Shifts") + .HasForeignKey("RoleId") + .OnDelete(DeleteBehavior.Restrict) + .IsRequired(); + + b.Navigation("Facility"); + + b.Navigation("Role"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany() + .HasForeignKey("CityId"); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany() + .HasForeignKey("RoleId"); + + b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor") + .WithOne("Preferences") + .HasForeignKey("JobsMedical.Web.Models.UserPreferences", "VisitorId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("City"); + + b.Navigation("Role"); + + b.Navigation("Visitor"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b => + { + b.HasOne("JobsMedical.Web.Models.User", "User") + .WithMany() + .HasForeignKey("UserId") + .OnDelete(DeleteBehavior.SetNull); + + b.Navigation("User"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.City", b => + { + b.Navigation("Facilities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.District", b => + { + b.Navigation("Facilities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Facility", b => + { + b.Navigation("Shifts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Role", b => + { + b.Navigation("Shifts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Shift", b => + { + b.Navigation("Applications"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.User", b => + { + b.Navigation("Applications"); + + b.Navigation("DoctorProfile"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b => + { + b.Navigation("Events"); + + b.Navigation("Preferences"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.cs b/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.cs new file mode 100644 index 0000000..4e8e93c --- /dev/null +++ b/src/JobsMedical.Web/Migrations/20260603140343_AiSettingsAndAutomation.cs @@ -0,0 +1,44 @@ +using System; +using Microsoft.EntityFrameworkCore.Migrations; +using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata; + +#nullable disable + +namespace JobsMedical.Web.Migrations +{ + /// + public partial class AiSettingsAndAutomation : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + migrationBuilder.CreateTable( + name: "AppSettings", + columns: table => new + { + Id = table.Column(type: "integer", nullable: false) + .Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn), + Mode = table.Column(type: "integer", nullable: false), + AutoPublishMinConfidence = table.Column(type: "integer", nullable: false), + AiEnabled = table.Column(type: "boolean", nullable: false), + AiEndpoint = table.Column(type: "character varying(500)", maxLength: 500, nullable: true), + AiApiKey = table.Column(type: "character varying(200)", maxLength: 200, nullable: true), + AiModel = table.Column(type: "character varying(120)", maxLength: 120, nullable: true), + AiSystemPrompt = table.Column(type: "character varying(4000)", maxLength: 4000, nullable: false), + AiAutoApprove = table.Column(type: "boolean", nullable: false), + UpdatedAt = table.Column(type: "timestamp with time zone", nullable: false) + }, + constraints: table => + { + table.PrimaryKey("PK_AppSettings", x => x.Id); + }); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.DropTable( + name: "AppSettings"); + } + } +} diff --git a/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs b/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs index c2e27ac..6d15fd1 100644 --- a/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs +++ b/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs @@ -22,6 +22,51 @@ namespace JobsMedical.Web.Migrations NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder); + modelBuilder.Entity("JobsMedical.Web.Models.AppSetting", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("AiApiKey") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("AiAutoApprove") + .HasColumnType("boolean"); + + b.Property("AiEnabled") + .HasColumnType("boolean"); + + b.Property("AiEndpoint") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("AiModel") + .HasMaxLength(120) + .HasColumnType("character varying(120)"); + + b.Property("AiSystemPrompt") + .IsRequired() + .HasMaxLength(4000) + .HasColumnType("character varying(4000)"); + + b.Property("AutoPublishMinConfidence") + .HasColumnType("integer"); + + b.Property("Mode") + .HasColumnType("integer"); + + b.Property("UpdatedAt") + .HasColumnType("timestamp with time zone"); + + b.HasKey("Id"); + + b.ToTable("AppSettings"); + }); + modelBuilder.Entity("JobsMedical.Web.Models.Application", b => { b.Property("Id") diff --git a/src/JobsMedical.Web/Models/AppSetting.cs b/src/JobsMedical.Web/Models/AppSetting.cs new file mode 100644 index 0000000..4515f35 --- /dev/null +++ b/src/JobsMedical.Web/Models/AppSetting.cs @@ -0,0 +1,50 @@ +using System.ComponentModel.DataAnnotations; + +namespace JobsMedical.Web.Models; + +/// +/// Single-row (Id=1) platform settings the admin controls at runtime — chiefly the ingestion +/// automation policy and the optional AI audit layer. Kept in the DB (not appsettings) so it's +/// editable from the admin panel without a redeploy. +/// +public class AppSetting +{ + public int Id { get; set; } = 1; + + // --- Ingestion automation --- + public IngestionMode Mode { get; set; } = IngestionMode.Manual; + + /// In Automatic mode WITHOUT AI, listings at/above this confidence auto-publish. + public int AutoPublishMinConfidence { get; set; } = 85; + + // --- AI audit layer (optional) --- + public bool AiEnabled { get; set; } = false; + + /// OpenAI-compatible chat-completions endpoint (self-hosted or Iranian provider). + [MaxLength(500)] public string? AiEndpoint { get; set; } + [MaxLength(200)] public string? AiApiKey { get; set; } + [MaxLength(120)] public string? AiModel { get; set; } = "gpt-4o-mini"; + + /// The prompt + "framework" the AI follows to approve / reject / structure a listing. + [MaxLength(4000)] + public string AiSystemPrompt { get; set; } = DefaultPrompt; + + /// If AI approves AND Mode is Automatic, publish without human review. + public bool AiAutoApprove { get; set; } = false; + + public DateTime UpdatedAt { get; set; } = DateTime.UtcNow; + + public const string DefaultPrompt = """ + تو دستیار بررسی آگهی‌های کاری حوزه درمان برای پلتفرم «همکادر» هستی. + هر آگهی خام را بخوان و تصمیم بگیر: + - approve: آگهی واقعی و مرتبط با شیفت/استخدام کادر درمان است و اطلاعات کافی دارد. + - reject: تبلیغ، اسپم، نامرتبط، یا فاقد اطلاعات حداقلی است. + - review: مرتبط است اما ناقص/مبهم و نیاز به بررسی انسانی دارد. + نقش، شهر/محله، نوع شیفت، نوع همکاری، مبلغ یا درصد سهم، و عنوان را در صورت وجود استخراج کن. + فقط با یک شیء JSON پاسخ بده با کلیدهای: + decision (approve|reject|review)، confidence (0-100)، reason (فارسی کوتاه)، + kind (shift|job)، role، city، district، shiftType (day|evening|night|oncall)، + employmentType (fulltime|parttime|contract|plan)، payAmount (عدد تومان یا null)، + sharePercent (0-100 یا null)، title، facilityName. + """; +} diff --git a/src/JobsMedical.Web/Models/Enums.cs b/src/JobsMedical.Web/Models/Enums.cs index 6bc7fb0..6326faf 100644 --- a/src/JobsMedical.Web/Models/Enums.cs +++ b/src/JobsMedical.Web/Models/Enums.cs @@ -75,3 +75,10 @@ public enum ListingKind Shift = 0, Job = 1 } + +/// How ingested listings get onto the site. +public enum IngestionMode +{ + Manual = 0, // همه‌چیز به صف بررسی می‌رود؛ ادمین تأیید می‌کند + Automatic = 1 // موارد تأییدشده (طبق آستانه/هوش مصنوعی) خودکار منتشر می‌شوند +} diff --git a/src/JobsMedical.Web/Pages/Admin/Index.cshtml b/src/JobsMedical.Web/Pages/Admin/Index.cshtml index bbbee0a..e40c974 100644 --- a/src/JobsMedical.Web/Pages/Admin/Index.cshtml +++ b/src/JobsMedical.Web/Pages/Admin/Index.cshtml @@ -12,6 +12,7 @@ (@JalaliDate.ToPersianDigits(Model.Queue.Count.ToString()) در صف، @JalaliDate.ToPersianDigits(Model.Flagged.Count.ToString()) پرچم‌خورده) · تأیید مراکز درمانی + · تنظیمات جمع‌آوری و AI

diff --git a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml new file mode 100644 index 0000000..ff7fa04 --- /dev/null +++ b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml @@ -0,0 +1,67 @@ +@page +@model JobsMedical.Web.Pages.Admin.SettingsModel +@{ + ViewData["Title"] = "تنظیمات جمع‌آوری و هوش مصنوعی"; +} + +
+
+

تنظیمات جمع‌آوری و هوش مصنوعی

+

← بازگشت به صف

+
+
+ +
+ @if (Model.Saved is not null) + { +
✓ @Model.Saved
+ } +
+

حالت انتشار

+
+ + +
+
+ + +

در حالت خودکار و بدون AI، آگهی‌هایی با اطمینان بالاتر از این مقدار خودکار منتشر می‌شوند.

+
+ +
+ +

لایه هوش مصنوعی (اختیاری)

+
+ +

در صورت فعال بودن، هر آگهی پیش از انتشار توسط مدل بررسی و تأیید/رد/ساختارمند می‌شود.

+
+
+ + +

می‌تواند یک مدل self-hosted یا سرویس داخلی باشد (OpenAI/Anthropic در ایران مسدودند).

+
+
+
+
+
+
+ + +

به مدل بگو چطور تأیید/رد کند و چه فیلدهایی را استخراج کند. خروجی باید JSON باشد.

+
+
+ +
+ + +
+
diff --git a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs new file mode 100644 index 0000000..2f38445 --- /dev/null +++ b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs @@ -0,0 +1,54 @@ +using JobsMedical.Web.Models; +using JobsMedical.Web.Services.Scraping; +using Microsoft.AspNetCore.Authorization; +using Microsoft.AspNetCore.Mvc; +using Microsoft.AspNetCore.Mvc.RazorPages; + +namespace JobsMedical.Web.Pages.Admin; + +[Authorize(Roles = "Admin")] +public class SettingsModel : PageModel +{ + private readonly SettingsService _settings; + public SettingsModel(SettingsService settings) => _settings = settings; + + [BindProperty] public IngestionMode Mode { get; set; } + [BindProperty] public int AutoPublishMinConfidence { get; set; } + [BindProperty] public bool AiEnabled { get; set; } + [BindProperty] public string? AiEndpoint { get; set; } + [BindProperty] public string? AiApiKey { get; set; } + [BindProperty] public string? AiModel { get; set; } + [BindProperty] public string AiSystemPrompt { get; set; } = ""; + [BindProperty] public bool AiAutoApprove { get; set; } + [TempData] public string? Saved { get; set; } + + public async Task OnGetAsync() + { + var s = await _settings.GetAsync(); + Mode = s.Mode; + AutoPublishMinConfidence = s.AutoPublishMinConfidence; + AiEnabled = s.AiEnabled; + AiEndpoint = s.AiEndpoint; + AiApiKey = s.AiApiKey; + AiModel = s.AiModel; + AiSystemPrompt = s.AiSystemPrompt; + AiAutoApprove = s.AiAutoApprove; + } + + public async Task OnPostAsync() + { + await _settings.SaveAsync(new AppSetting + { + Mode = Mode, + AutoPublishMinConfidence = AutoPublishMinConfidence, + AiEnabled = AiEnabled, + AiEndpoint = AiEndpoint, + AiApiKey = AiApiKey, + AiModel = AiModel, + AiSystemPrompt = AiSystemPrompt, + AiAutoApprove = AiAutoApprove, + }); + Saved = "تنظیمات ذخیره شد."; + return RedirectToPage(); + } +} diff --git a/src/JobsMedical.Web/Program.cs b/src/JobsMedical.Web/Program.cs index 0c8c847..4eb294c 100644 --- a/src/JobsMedical.Web/Program.cs +++ b/src/JobsMedical.Web/Program.cs @@ -21,18 +21,31 @@ builder.Services.AddScoped(); // Listing parser: heuristic now; swap for an LLM-backed IListingParser later. builder.Services.AddSingleton(); -// Scrape/ingestion engine: pluggable sources → dedupe → parse → validate → review queue. +// Scrape/ingestion engine: pluggable sources → dedupe → parse → validate → (AI audit) → publish/queue. +builder.Services.AddHttpClient("scrape", c => +{ + c.Timeout = TimeSpan.FromSeconds(20); + c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)"); +}); +builder.Services.AddHttpClient("ai"); builder.Services.Configure( builder.Configuration.GetSection("Ingestion")); builder.Services.Configure( builder.Configuration.GetSection("Ingestion:Telegram")); +builder.Services.Configure( + builder.Configuration.GetSection("Ingestion:Bale")); builder.Services.Configure( builder.Configuration.GetSection("Ingestion:Divar")); builder.Services.AddSingleton(); +builder.Services.AddSingleton(); +builder.Services.AddScoped(); builder.Services.AddSingleton(); builder.Services.AddSingleton(); +builder.Services.AddSingleton(); builder.Services.AddSingleton(); builder.Services.AddScoped(); diff --git a/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs b/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs new file mode 100644 index 0000000..65990b7 --- /dev/null +++ b/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs @@ -0,0 +1,108 @@ +using System.Net.Http.Headers; +using System.Text; +using System.Text.Json; +using JobsMedical.Web.Models; + +namespace JobsMedical.Web.Services.Scraping; + +public record AiStructured( + string? Kind, string? Role, string? City, string? District, string? ShiftType, + string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName); + +/// An AI verdict on a raw listing. +public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data) +{ + public bool Approve => Decision.Equals("approve", StringComparison.OrdinalIgnoreCase); + public bool Reject => Decision.Equals("reject", StringComparison.OrdinalIgnoreCase); +} + +public interface IAiAuditor +{ + /// Audit a raw post. Returns null when AI is off or the call fails (fail safe → manual). + Task AuditAsync(string rawText, AppSetting settings, CancellationToken ct = default); +} + +/// +/// Calls any OpenAI-compatible chat-completions endpoint (self-hosted vLLM/Ollama, or an Iranian +/// provider — OpenAI/Anthropic are blocked from Iran). The admin-set system prompt is the +/// "framework" that tells the model how to approve/reject/structure. We ask for strict JSON and +/// parse it. Any failure returns null so ingestion falls back to the rule-based path. +/// +public class OpenAiCompatibleAuditor : IAiAuditor +{ + private readonly IHttpClientFactory _http; + private readonly ILogger _log; + + public OpenAiCompatibleAuditor(IHttpClientFactory http, ILogger log) + { + _http = http; + _log = log; + } + + public async Task AuditAsync(string rawText, AppSetting s, CancellationToken ct = default) + { + if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint)) return null; + + try + { + var payload = new + { + model = string.IsNullOrWhiteSpace(s.AiModel) ? "gpt-4o-mini" : s.AiModel, + temperature = 0, + response_format = new { type = "json_object" }, + messages = new object[] + { + new { role = "system", content = s.AiSystemPrompt }, + new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." }, + }, + }; + + var client = _http.CreateClient("ai"); + client.Timeout = TimeSpan.FromSeconds(30); + using var req = new HttpRequestMessage(HttpMethod.Post, s.AiEndpoint) + { + Content = new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"), + }; + if (!string.IsNullOrWhiteSpace(s.AiApiKey)) + req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", s.AiApiKey); + + using var resp = await client.SendAsync(req, ct); + resp.EnsureSuccessStatusCode(); + var body = await resp.Content.ReadAsStringAsync(ct); + + using var doc = JsonDocument.Parse(body); + var content = doc.RootElement + .GetProperty("choices")[0].GetProperty("message").GetProperty("content").GetString(); + if (string.IsNullOrWhiteSpace(content)) return null; + + return ParseVerdict(content); + } + catch (Exception ex) + { + _log.LogWarning(ex, "AI audit failed — falling back to rule-based decision."); + return null; + } + } + + private static AiAuditResult? ParseVerdict(string json) + { + // The content itself should be a JSON object; tolerate code fences. + json = json.Trim().Trim('`'); + var start = json.IndexOf('{'); + var end = json.LastIndexOf('}'); + if (start < 0 || end <= start) return null; + json = json.Substring(start, end - start + 1); + + using var doc = JsonDocument.Parse(json); + var r = doc.RootElement; + string? S(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.String ? v.GetString() : null; + int I(string k, int d) => r.TryGetProperty(k, out var v) && v.TryGetInt32(out var n) ? n : d; + long? L(string k) => r.TryGetProperty(k, out var v) && v.TryGetInt64(out var n) ? n : null; + int? NI(string k) => r.TryGetProperty(k, out var v) && v.TryGetInt32(out var n) ? n : null; + + var decision = (S("decision") ?? "review").ToLowerInvariant(); + var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"), + S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName")); + return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data); + } +} diff --git a/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs b/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs new file mode 100644 index 0000000..21ef6b0 --- /dev/null +++ b/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs @@ -0,0 +1,68 @@ +using System.Text.Json; +using Microsoft.Extensions.Options; + +namespace JobsMedical.Web.Services.Scraping; + +public class BaleOptions +{ + public bool Enabled { get; set; } + public string? BotToken { get; set; } + public string BaseUrl { get; set; } = "https://tapi.bale.ai"; // Bale Bot API host +} + +/// +/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. The bot must +/// be a member/admin of the channels it should read. Pulls text from messages and channel posts. +/// +public class BaleListingSource : IListingSource +{ + private readonly BaleOptions _opts; + private readonly IHttpClientFactory _http; + private readonly ILogger _log; + + public BaleListingSource(IOptions opts, IHttpClientFactory http, + ILogger log) + { + _opts = opts.Value; + _http = http; + _log = log; + } + + public string Name => "بله"; + public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken); + + public async Task> FetchAsync(CancellationToken ct = default) + { + if (!Enabled) { _log.LogInformation("Bale source disabled/unconfigured."); return Array.Empty(); } + + try + { + var client = _http.CreateClient("scrape"); + var url = $"{_opts.BaseUrl.TrimEnd('/')}/bot{_opts.BotToken}/getUpdates"; + var body = await client.GetStringAsync(url, ct); + + using var doc = JsonDocument.Parse(body); + if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array) + return Array.Empty(); + + var items = new List(); + foreach (var update in result.EnumerateArray()) + { + var text = TextOf(update, "channel_post") ?? TextOf(update, "message"); + if (!string.IsNullOrWhiteSpace(text) && text!.Trim().Length >= 15) + items.Add(new ScrapedItem("بله", text.Trim())); + } + return items; + } + catch (Exception ex) + { + _log.LogWarning(ex, "Bale fetch failed."); + return Array.Empty(); + } + } + + private static string? TextOf(JsonElement update, string key) + => update.TryGetProperty(key, out var m) + && m.TryGetProperty("text", out var t) && t.ValueKind == JsonValueKind.String + ? t.GetString() : null; +} diff --git a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs index ce537bf..cdb60c9 100644 --- a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs @@ -1,3 +1,5 @@ +using System.Text; +using System.Text.Json; using Microsoft.Extensions.Options; namespace JobsMedical.Web.Services.Scraping; @@ -5,38 +7,80 @@ namespace JobsMedical.Web.Services.Scraping; public class DivarOptions { public bool Enabled { get; set; } - public string? City { get; set; } // e.g. "tehran" - public List Queries { get; set; } = new(); // search terms, e.g. "استخدام پزشک" + public string City { get; set; } = "tehran"; + public string Category { get; set; } = "jobs"; + public List Queries { get; set; } = new(); // e.g. "پرستار", "پزشک عمومی", "درمانگاه" + public string BaseUrl { get; set; } = "https://api.divar.ir/v8/web-search"; + public int PerQuery { get; set; } = 25; } /// -/// Divar source. Credential-ready: configure city + queries in (Ingestion:Divar) and implement -/// the fetch against Divar's listing API/HTML. Dormant until enabled. +/// Best-effort Divar fetch: queries Divar's web-search JSON for each term and harvests post +/// titles + descriptions. Divar's private API shifts shape over time, so we walk the JSON +/// tolerantly for any object carrying a "title" plus a nearby description field, and fail soft. /// public class DivarListingSource : IListingSource { private readonly DivarOptions _opts; + private readonly IHttpClientFactory _http; private readonly ILogger _log; - public DivarListingSource(IOptions opts, ILogger log) + public DivarListingSource(IOptions opts, IHttpClientFactory http, + ILogger log) { _opts = opts.Value; + _http = http; _log = log; } public string Name => "دیوار"; public bool Enabled => _opts.Enabled && _opts.Queries.Count > 0; - public Task> FetchAsync(CancellationToken ct = default) + public async Task> FetchAsync(CancellationToken ct = default) { - if (!Enabled) + if (!Enabled) { _log.LogInformation("Divar source disabled/unconfigured."); return Array.Empty(); } + + var client = _http.CreateClient("scrape"); + var items = new List(); + foreach (var q in _opts.Queries.Where(q => q.Trim().Length > 0)) { - _log.LogInformation("Divar source not configured — skipping."); - return Task.FromResult>(Array.Empty()); + try + { + var url = $"{_opts.BaseUrl.TrimEnd('/')}/{_opts.City}/{_opts.Category}?q={Uri.EscapeDataString(q)}"; + var body = await client.GetStringAsync(url, ct); + using var doc = JsonDocument.Parse(body); + foreach (var text in Harvest(doc.RootElement).Take(_opts.PerQuery)) + items.Add(new ScrapedItem("دیوار", text, "https://divar.ir")); + } + catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); } + } + return items; + } + + private static readonly string[] DescKeys = + { "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" }; + + /// Walk the JSON; for each object with a string "title", emit title + first description. + private static IEnumerable Harvest(JsonElement el) + { + if (el.ValueKind == JsonValueKind.Object) + { + if (el.TryGetProperty("title", out var t) && t.ValueKind == JsonValueKind.String) + { + var sb = new StringBuilder(t.GetString()); + foreach (var k in DescKeys) + if (el.TryGetProperty(k, out var d) && d.ValueKind == JsonValueKind.String) + { sb.Append(" — ").Append(d.GetString()); break; } + var text = sb.ToString().Trim(); + if (text.Length >= 15) yield return text; + } + foreach (var p in el.EnumerateObject()) + foreach (var s in Harvest(p.Value)) yield return s; + } + else if (el.ValueKind == JsonValueKind.Array) + { + foreach (var item in el.EnumerateArray()) + foreach (var s in Harvest(item)) yield return s; } - // TODO(prod): query Divar for each term in the configured city, map each ad's - // title+description to new ScrapedItem(Name, text, adUrl). - _log.LogWarning("Divar fetch not yet implemented; returning empty."); - return Task.FromResult>(Array.Empty()); } } diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs index 65356d5..a9c21b5 100644 --- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs +++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs @@ -7,22 +7,24 @@ using Microsoft.EntityFrameworkCore; namespace JobsMedical.Web.Services.Scraping; -public record SourceResult(string Source, int Fetched, int Queued, int Flagged, int Spam, int Duplicates); +public record SourceResult(string Source, int Fetched, int Queued, int Published, int Flagged, int Spam, int Duplicates); public record IngestionSummary(List Sources) { public int TotalQueued => Sources.Sum(s => s.Queued); + public int TotalPublished => Sources.Sum(s => s.Published); public int TotalFlagged => Sources.Sum(s => s.Flagged); public int TotalSpam => Sources.Sum(s => s.Spam); public int TotalDuplicates => Sources.Sum(s => s.Duplicates); } /// -/// The scrape engine. Pulls from every enabled , dedupes by content -/// hash, parses with , validates with , -/// and stores each as a with a status: New (queued for review), -/// Flagged (incomplete/suspicious), or Discarded (spam). Source-agnostic — add a source and it -/// flows through unchanged. +/// The scrape engine. For every enabled source: dedupe by content hash → parse → rule-validate → +/// (optional) AI audit → decide. Decision depends on admin settings: +/// • spam → Discarded +/// • AI on: AI verdict drives approve/reject/review; approve + Automatic + AiAutoApprove → publish +/// • AI off: Automatic + confidence ≥ threshold → publish; else queue/flag +/// "Publish" resolves-or-creates an (unverified) facility and creates the Shift/JobOpening. /// public class IngestionService { @@ -30,16 +32,15 @@ public class IngestionService private readonly IEnumerable _sources; private readonly IListingParser _parser; private readonly ListingValidator _validator; + private readonly IAiAuditor _ai; + private readonly SettingsService _settings; private readonly ILogger _log; - public IngestionService(AppDbContext db, IEnumerable sources, - IListingParser parser, ListingValidator validator, ILogger log) + public IngestionService(AppDbContext db, IEnumerable sources, IListingParser parser, + ListingValidator validator, IAiAuditor ai, SettingsService settings, ILogger log) { - _db = db; - _sources = sources; - _parser = parser; - _validator = validator; - _log = log; + _db = db; _sources = sources; _parser = parser; _validator = validator; + _ai = ai; _settings = settings; _log = log; } public IReadOnlyList<(string Name, bool Enabled)> Sources => @@ -47,18 +48,22 @@ public class IngestionService public async Task RunAsync(CancellationToken ct = default) { - var roles = await _db.Roles.Select(r => r.Name).ToListAsync(ct); - var cities = await _db.Cities.Select(c => c.Name).ToListAsync(ct); - var districts = await _db.Districts.Select(d => d.Name).ToListAsync(ct); + var settings = await _settings.GetAsync(); + var roles = await _db.Roles.ToListAsync(ct); + var cities = await _db.Cities.ToListAsync(ct); + var districts = await _db.Districts.ToListAsync(ct); + var roleNames = roles.Select(r => r.Name).ToList(); + var cityNames = cities.Select(c => c.Name).ToList(); + var districtNames = districts.Select(d => d.Name).ToList(); var results = new List(); foreach (var source in _sources.Where(s => s.Enabled)) { - int fetched = 0, queued = 0, flagged = 0, spam = 0, dupes = 0; + int fetched = 0, queued = 0, published = 0, flagged = 0, spam = 0, dupes = 0; IReadOnlyList items; try { items = await source.FetchAsync(ct); } - catch (Exception ex) { _log.LogError(ex, "Source {Source} fetch failed", source.Name); continue; } + catch (Exception ex) { _log.LogError(ex, "Source {Source} failed", source.Name); continue; } foreach (var item in items) { @@ -66,42 +71,155 @@ public class IngestionService var hash = Hash(item.RawText); if (await _db.RawListings.AnyAsync(r => r.ContentHash == hash, ct)) { dupes++; continue; } - var parsed = _parser.Parse(item.RawText, roles, cities, districts); + var parsed = _parser.Parse(item.RawText, roleNames, cityNames, districtNames); var val = _validator.Validate(item.RawText, parsed); - var status = val.IsSpam ? RawListingStatus.Discarded - : val.IsValid ? RawListingStatus.New - : RawListingStatus.Flagged; - if (status == RawListingStatus.New) queued++; - else if (status == RawListingStatus.Flagged) flagged++; - else spam++; + AiAuditResult? ai = null; + if (settings.AiEnabled && !val.IsSpam) + ai = await _ai.AuditAsync(item.RawText, settings, ct); - _db.RawListings.Add(new RawListing + var (status, reason, confidence) = Decide(settings, val, ai); + + var raw = new RawListing { SourceChannel = item.Source, SourceUrl = item.SourceUrl, RawText = item.RawText.Trim(), ContentHash = hash, - Confidence = val.Confidence, - ValidationNotes = val.Issues.Count > 0 ? string.Join("؛ ", val.Issues) : null, + Confidence = confidence, + ValidationNotes = reason, Status = status, - }); + }; + _db.RawListings.Add(raw); + + if (status == RawListingStatus.Normalized) + { + try { Publish(parsed, ai, raw, roles, cities, districts); published++; } + catch (Exception ex) { _log.LogWarning(ex, "Auto-publish failed; queueing instead"); raw.Status = RawListingStatus.New; queued++; } + } + else if (status == RawListingStatus.New) queued++; + else if (status == RawListingStatus.Flagged) flagged++; + else spam++; } await _db.SaveChangesAsync(ct); - results.Add(new SourceResult(source.Name, fetched, queued, flagged, spam, dupes)); - _log.LogInformation("Ingestion {Source}: fetched={F} queued={Q} flagged={Fl} spam={S} dupes={D}", - source.Name, fetched, queued, flagged, spam, dupes); + results.Add(new SourceResult(source.Name, fetched, queued, published, flagged, spam, dupes)); + _log.LogInformation("Ingest {S}: fetched={F} queued={Q} published={P} flagged={Fl} spam={Sp} dupes={D}", + source.Name, fetched, queued, published, flagged, spam, dupes); } return new IngestionSummary(results); } - /// SHA-256 hex of the whitespace-normalized text (for cross-run dedupe). + private static (RawListingStatus status, string? reason, int confidence) Decide( + AppSetting s, ValidationResult val, AiAuditResult? ai) + { + var notes = val.Issues.Count > 0 ? string.Join("؛ ", val.Issues) : null; + + if (val.IsSpam) + return (RawListingStatus.Discarded, Join("اسپم", notes), val.Confidence); + + if (ai is not null) + { + var aiNote = Join($"AI: {ai.Decision} ({ai.Confidence}٪)" + (ai.Reason is null ? "" : $" — {ai.Reason}"), notes); + if (ai.Reject) return (RawListingStatus.Discarded, aiNote, ai.Confidence); + if (ai.Approve) + return (s.Mode == IngestionMode.Automatic && s.AiAutoApprove + ? RawListingStatus.Normalized : RawListingStatus.New, aiNote, ai.Confidence); + return (RawListingStatus.Flagged, aiNote, ai.Confidence); // review + } + + if (!val.IsValid) return (RawListingStatus.Flagged, notes, val.Confidence); + if (s.Mode == IngestionMode.Automatic && val.Confidence >= s.AutoPublishMinConfidence) + return (RawListingStatus.Normalized, notes, val.Confidence); + return (RawListingStatus.New, notes, val.Confidence); + } + + private void Publish(ParsedListing parsed, AiAuditResult? ai, RawListing raw, + List roles, List cities, List districts) + { + var d = ai?.Data; + var roleName = d?.Role ?? parsed.RoleName; + var cityName = d?.City ?? parsed.CityName; + var districtName = d?.District ?? parsed.DistrictName; + + var role = roles.FirstOrDefault(r => r.Name == roleName) ?? roles.First(); + var city = cities.FirstOrDefault(c => c.Name == cityName) + ?? cities.FirstOrDefault(c => c.IsActive) ?? cities.First(); + var district = districts.FirstOrDefault(x => x.Name == districtName && x.CityId == city.Id); + + var facilityName = !string.IsNullOrWhiteSpace(d?.FacilityName) ? d!.FacilityName!.Trim() + : $"مرکز درمانی (از {raw.SourceChannel})"; + var facility = _db.Facilities.Local.FirstOrDefault(f => f.Name == facilityName && f.CityId == city.Id) + ?? _db.Facilities.FirstOrDefault(f => f.Name == facilityName && f.CityId == city.Id); + if (facility is null) + { + facility = new Facility + { + Name = facilityName, Type = FacilityType.Clinic, City = city, DistrictId = district?.Id, + Phone = parsed.Phone, IsVerified = false, + }; + _db.Facilities.Add(facility); + } + + var kind = (d?.Kind ?? parsed.Kind.ToString()).ToLowerInvariant(); + if (kind.Contains("job") || kind.Contains("استخدام")) + { + _db.JobOpenings.Add(new JobOpening + { + Facility = facility, Role = role, + Title = !string.IsNullOrWhiteSpace(d?.Title) ? d!.Title!.Trim() : $"استخدام {role.Name}", + EmploymentType = MapEmployment(d?.EmploymentType, parsed.EmploymentType), + SalaryMin = parsed.PayAmount, + Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, + SourceUrl = raw.SourceUrl, + }); + } + else + { + var st = MapShiftType(d?.ShiftType, parsed.ShiftType); + var (start, end) = DefaultTimes(st); + _db.Shifts.Add(new Shift + { + Facility = facility, Role = role, + Date = DateOnly.FromDateTime(DateTime.UtcNow).AddDays(1), + StartTime = start, EndTime = end, ShiftType = st, + SpecialtyRequired = role.Name, Description = raw.RawText, + PayType = parsed.SharePercent is not null && parsed.PayAmount is null ? PayType.Percentage + : parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift, + PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent, + Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl, + }); + } + raw.Status = RawListingStatus.Normalized; + } + + private static ShiftType MapShiftType(string? ai, ShiftType? parsed) => (ai?.ToLowerInvariant()) switch + { + "day" => ShiftType.Day, "evening" => ShiftType.Evening, "night" => ShiftType.Night, "oncall" => ShiftType.OnCall, + _ => parsed ?? ShiftType.Day, + }; + + private static EmploymentType MapEmployment(string? ai, EmploymentType? parsed) => (ai?.ToLowerInvariant()) switch + { + "parttime" => EmploymentType.PartTime, "contract" => EmploymentType.Contract, + "plan" => EmploymentType.Plan, "fulltime" => EmploymentType.FullTime, + _ => parsed ?? EmploymentType.FullTime, + }; + + private static (TimeOnly, TimeOnly) DefaultTimes(ShiftType t) => t switch + { + ShiftType.Day => (new TimeOnly(8, 0), new TimeOnly(14, 0)), + ShiftType.Evening => (new TimeOnly(14, 0), new TimeOnly(20, 0)), + ShiftType.Night => (new TimeOnly(20, 0), new TimeOnly(8, 0)), + _ => (new TimeOnly(8, 0), new TimeOnly(8, 0)), + }; + + private static string? Join(string a, string? b) => string.IsNullOrEmpty(b) ? a : $"{a} | {b}"; + private static string Hash(string text) { var normalized = Regex.Replace((text ?? "").Trim(), @"\s+", " "); - var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(normalized)); - return Convert.ToHexString(bytes).ToLowerInvariant(); + return Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(normalized))).ToLowerInvariant(); } } diff --git a/src/JobsMedical.Web/Services/Scraping/SettingsService.cs b/src/JobsMedical.Web/Services/Scraping/SettingsService.cs new file mode 100644 index 0000000..ff24df0 --- /dev/null +++ b/src/JobsMedical.Web/Services/Scraping/SettingsService.cs @@ -0,0 +1,40 @@ +using JobsMedical.Web.Data; +using JobsMedical.Web.Models; +using Microsoft.EntityFrameworkCore; + +namespace JobsMedical.Web.Services.Scraping; + +/// Loads/creates the single platform-settings row (Id=1). +public class SettingsService +{ + private readonly AppDbContext _db; + public SettingsService(AppDbContext db) => _db = db; + + public async Task GetAsync() + { + var s = await _db.AppSettings.FirstOrDefaultAsync(x => x.Id == 1); + if (s is null) + { + s = new AppSetting { Id = 1 }; + _db.AppSettings.Add(s); + await _db.SaveChangesAsync(); + } + return s; + } + + public async Task SaveAsync(AppSetting incoming) + { + var s = await GetAsync(); + s.Mode = incoming.Mode; + s.AutoPublishMinConfidence = Math.Clamp(incoming.AutoPublishMinConfidence, 0, 100); + s.AiEnabled = incoming.AiEnabled; + s.AiEndpoint = incoming.AiEndpoint?.Trim(); + s.AiApiKey = incoming.AiApiKey?.Trim(); + s.AiModel = incoming.AiModel?.Trim(); + s.AiSystemPrompt = string.IsNullOrWhiteSpace(incoming.AiSystemPrompt) + ? AppSetting.DefaultPrompt : incoming.AiSystemPrompt; + s.AiAutoApprove = incoming.AiAutoApprove; + s.UpdatedAt = DateTime.UtcNow; + await _db.SaveChangesAsync(); + } +} diff --git a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs index 5977caa..b655819 100644 --- a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs @@ -1,3 +1,5 @@ +using System.Net; +using System.Text.RegularExpressions; using Microsoft.Extensions.Options; namespace JobsMedical.Web.Services.Scraping; @@ -5,40 +7,71 @@ namespace JobsMedical.Web.Services.Scraping; public class TelegramOptions { public bool Enabled { get; set; } - public string? BotToken { get; set; } - public List Channels { get; set; } = new(); // @channel handles to read + public string? BotToken { get; set; } // optional (for private channels later) + public List Channels { get; set; } = new(); // public channel usernames (no @) + public int PerChannel { get; set; } = 20; } /// -/// Telegram/Bale channel source. Credential-ready: wire a bot token + channel list in config -/// (Ingestion:Telegram) and implement the fetch against the Bot API (getUpdates / channel posts) -/// or a userbot. Dormant until enabled, so the engine runs without it. +/// Reads public Telegram channels via the web preview (https://t.me/s/<channel>) — no bot +/// token or login needed for public channels. Each message's text becomes a ScrapedItem. /// public class TelegramListingSource : IListingSource { private readonly TelegramOptions _opts; + private readonly IHttpClientFactory _http; private readonly ILogger _log; - public TelegramListingSource(IOptions opts, ILogger log) + public TelegramListingSource(IOptions opts, IHttpClientFactory http, + ILogger log) { _opts = opts.Value; + _http = http; _log = log; } - public string Name => "تلگرام/بله"; - public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken) && _opts.Channels.Count > 0; + public string Name => "تلگرام"; + public bool Enabled => _opts.Enabled && _opts.Channels.Count > 0; - public Task> FetchAsync(CancellationToken ct = default) + public async Task> FetchAsync(CancellationToken ct = default) { - if (!Enabled) + if (!Enabled) { _log.LogInformation("Telegram source disabled/unconfigured."); return Array.Empty(); } + + var client = _http.CreateClient("scrape"); + var items = new List(); + foreach (var ch in _opts.Channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0)) { - _log.LogInformation("Telegram source not configured — skipping."); - return Task.FromResult>(Array.Empty()); + try + { + var html = await client.GetStringAsync($"https://t.me/s/{ch}", ct); + foreach (var text in ExtractMessages(html).Take(_opts.PerChannel)) + items.Add(new ScrapedItem($"تلگرام/{ch}", text, $"https://t.me/{ch}")); + } + catch (Exception ex) { _log.LogWarning(ex, "Telegram fetch failed for {Channel}", ch); } + } + return items; + } + + // Message bodies live in
...
. + private static IEnumerable ExtractMessages(string html) + { + foreach (Match m in Regex.Matches(html, + "
]*>(.*?)
", RegexOptions.Singleline)) + { + var text = HtmlUtil.ToPlainText(m.Groups[1].Value); + if (text.Length >= 15) yield return text; } - // TODO(prod): call https://api.telegram.org/bot{token}/getUpdates (or channel history), - // map each message to new ScrapedItem(Name, message.Text, messageLink). The validation + - // dedupe pipeline downstream is already source-agnostic. - _log.LogWarning("Telegram fetch not yet implemented; returning empty."); - return Task.FromResult>(Array.Empty()); + } +} + +internal static class HtmlUtil +{ + public static string ToPlainText(string html) + { + var s = Regex.Replace(html, "", "\n", RegexOptions.IgnoreCase); + s = Regex.Replace(s, "<[^>]+>", ""); // strip remaining tags + s = WebUtility.HtmlDecode(s); + s = Regex.Replace(s, "[ \\t]+", " "); + return s.Trim(); } } diff --git a/src/JobsMedical.Web/appsettings.json b/src/JobsMedical.Web/appsettings.json index 8fe568b..53d6c0d 100644 --- a/src/JobsMedical.Web/appsettings.json +++ b/src/JobsMedical.Web/appsettings.json @@ -15,7 +15,8 @@ "Ingestion": { "Enabled": false, "IntervalMinutes": 30, - "Telegram": { "Enabled": false, "BotToken": "", "Channels": [] }, - "Divar": { "Enabled": false, "City": "tehran", "Queries": [] } + "Telegram": { "Enabled": false, "BotToken": "", "Channels": [], "PerChannel": 20 }, + "Bale": { "Enabled": false, "BotToken": "", "BaseUrl": "https://tapi.bale.ai" }, + "Divar": { "Enabled": false, "City": "tehran", "Category": "jobs", "Queries": [], "BaseUrl": "https://api.divar.ir/v8/web-search", "PerQuery": 25 } } }