diff --git a/DEPLOY.md b/DEPLOY.md index a3fe777..794bb90 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -65,21 +65,12 @@ POSTGRES_PASSWORD=__CHANGE_ME__ # Platform admin phone (gets the Admin role on login) ADMIN_PHONE=09XXXXXXXXX - -# --- Channel scraping (optional; off by default) — toggles --- -# INGESTION_ENABLED=true -# INGESTION_INTERVAL_MINUTES=30 -# TELEGRAM_ENABLED=true -# TELEGRAM_BOT_TOKEN=__TELEGRAM_BOT_TOKEN__ -# BALE_ENABLED=true -# BALE_BOT_TOKEN=__BALE_BOT_TOKEN__ -# DIVAR_ENABLED=true ``` -> Channel **lists** (`Telegram.Channels`, `Divar.Queries`) live in `appsettings.json` (or add -> `Ingestion__Telegram__Channels__0=...` keys). The toggles above gate each source on/off. -> The **AI audit layer** is configured at runtime in the admin panel (`/Admin/Settings`) — endpoint, -> model, API key, prompt/framework, auto-approve — not via env. Default: AI off, mode = Manual, -> so every ingested listing waits in the review queue until an admin publishes it. +> **That's the whole secret.** Everything else — the **AI audit layer** *and* the **channel +> sources** (Telegram channels, Bale bot token, Divar queries, auto-ingest on/off + interval) — is +> configured at runtime in the admin panel (`/Admin/Settings`), stored in the DB. No redeploy to +> change them. Defaults: AI off, mode = Manual, all sources off ⇒ nothing publishes without admin +> review. > `ASPNETCORE_ENVIRONMENT=Production` is set by the compose file ⇒ only **reference data** > (roles/cities/districts) is seeded — no demo facilities/shifts. diff --git a/docker-compose.yml b/docker-compose.yml index db08d83..4228721 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,14 +23,7 @@ services: ASPNETCORE_URLS: "http://+:8080" ConnectionStrings__Default: "Host=db;Port=5432;Database=${POSTGRES_DB:-hamkadr};Username=${POSTGRES_USER:-hamkadr};Password=${POSTGRES_PASSWORD}" Auth__AdminPhone: "${ADMIN_PHONE:-}" - # Channel scraping (optional; enable + configure via ENV_FILE) - Ingestion__Enabled: "${INGESTION_ENABLED:-false}" - Ingestion__IntervalMinutes: "${INGESTION_INTERVAL_MINUTES:-30}" - Ingestion__Telegram__Enabled: "${TELEGRAM_ENABLED:-false}" - Ingestion__Telegram__BotToken: "${TELEGRAM_BOT_TOKEN:-}" - Ingestion__Bale__Enabled: "${BALE_ENABLED:-false}" - Ingestion__Bale__BotToken: "${BALE_BOT_TOKEN:-}" - Ingestion__Divar__Enabled: "${DIVAR_ENABLED:-false}" + # Ingestion + AI + channel sources are configured at runtime in /Admin/Settings (DB), not here. # healthcheck is defined in the Dockerfile (bash /dev/tcp probe) so the deploy # job's `docker inspect Health.Status` wait works. diff --git a/src/JobsMedical.Web/Migrations/20260603210638_IngestionSourcesInSettings.Designer.cs b/src/JobsMedical.Web/Migrations/20260603210638_IngestionSourcesInSettings.Designer.cs new file mode 100644 index 0000000..bf62dc7 --- /dev/null +++ b/src/JobsMedical.Web/Migrations/20260603210638_IngestionSourcesInSettings.Designer.cs @@ -0,0 +1,873 @@ +// +using System; +using JobsMedical.Web.Data; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; +using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata; + +#nullable disable + +namespace JobsMedical.Web.Migrations +{ + [DbContext(typeof(AppDbContext))] + [Migration("20260603210638_IngestionSourcesInSettings")] + partial class IngestionSourcesInSettings + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasAnnotation("ProductVersion", "10.0.0") + .HasAnnotation("Relational:MaxIdentifierLength", 63); + + NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder); + + modelBuilder.Entity("JobsMedical.Web.Models.AppSetting", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("AiApiKey") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("AiAutoApprove") + .HasColumnType("boolean"); + + b.Property("AiEnabled") + .HasColumnType("boolean"); + + b.Property("AiEndpoint") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("AiModel") + .HasMaxLength(120) + .HasColumnType("character varying(120)"); + + b.Property("AiSystemPrompt") + .IsRequired() + .HasMaxLength(4000) + .HasColumnType("character varying(4000)"); + + b.Property("AutoIngestEnabled") + .HasColumnType("boolean"); + + b.Property("AutoPublishMinConfidence") + .HasColumnType("integer"); + + b.Property("BaleBotToken") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("BaleEnabled") + .HasColumnType("boolean"); + + b.Property("DivarCity") + .HasMaxLength(60) + .HasColumnType("character varying(60)"); + + b.Property("DivarEnabled") + .HasColumnType("boolean"); + + b.Property("DivarQueries") + .HasMaxLength(2000) + .HasColumnType("character varying(2000)"); + + b.Property("IngestIntervalMinutes") + .HasColumnType("integer"); + + b.Property("Mode") + .HasColumnType("integer"); + + b.Property("TelegramChannels") + .HasMaxLength(2000) + .HasColumnType("character varying(2000)"); + + b.Property("TelegramEnabled") + .HasColumnType("boolean"); + + b.Property("UpdatedAt") + .HasColumnType("timestamp with time zone"); + + b.HasKey("Id"); + + b.ToTable("AppSettings"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Application", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("DoctorId") + .HasColumnType("integer"); + + b.Property("Message") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("ShiftId") + .HasColumnType("integer"); + + b.Property("Status") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("DoctorId"); + + b.HasIndex("ShiftId", "DoctorId") + .IsUnique(); + + b.ToTable("Applications"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.City", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("IsActive") + .HasColumnType("boolean"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("Province") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.HasKey("Id"); + + b.ToTable("Cities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.District", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("IsActive") + .HasColumnType("boolean"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(120) + .HasColumnType("character varying(120)"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.ToTable("Districts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Bio") + .HasMaxLength(1000) + .HasColumnType("character varying(1000)"); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("IsVerified") + .HasColumnType("boolean"); + + b.Property("LicenseNo") + .HasMaxLength(20) + .HasColumnType("character varying(20)"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("Specialty") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("UserId") + .HasColumnType("integer"); + + b.Property("YearsExperience") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("UserId") + .IsUnique(); + + b.ToTable("DoctorProfiles"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Facility", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Address") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("BaleId") + .HasMaxLength(50) + .HasColumnType("character varying(50)"); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("DistrictId") + .HasColumnType("integer"); + + b.Property("IsVerified") + .HasColumnType("boolean"); + + b.Property("Lat") + .HasColumnType("double precision"); + + b.Property("Lng") + .HasColumnType("double precision"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("OwnerUserId") + .HasColumnType("integer"); + + b.Property("Phone") + .HasMaxLength(20) + .HasColumnType("character varying(20)"); + + b.Property("Type") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.HasIndex("DistrictId"); + + b.HasIndex("OwnerUserId"); + + b.ToTable("Facilities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("bigint"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("EventType") + .HasColumnType("integer"); + + b.Property("JobOpeningId") + .HasColumnType("integer"); + + b.Property("ShiftId") + .HasColumnType("integer"); + + b.Property("VisitorId") + .IsRequired() + .HasColumnType("character varying(36)"); + + b.HasKey("Id"); + + b.HasIndex("JobOpeningId"); + + b.HasIndex("ShiftId"); + + b.HasIndex("VisitorId", "CreatedAt"); + + b.ToTable("InterestEvents"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("Description") + .HasMaxLength(2000) + .HasColumnType("character varying(2000)"); + + b.Property("EmploymentType") + .HasColumnType("integer"); + + b.Property("FacilityId") + .HasColumnType("integer"); + + b.Property("GenderRequirement") + .HasColumnType("integer"); + + b.Property("Requirements") + .HasMaxLength(1000) + .HasColumnType("character varying(1000)"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("SalaryMax") + .HasColumnType("bigint"); + + b.Property("SalaryMin") + .HasColumnType("bigint"); + + b.Property("Source") + .HasColumnType("integer"); + + b.Property("SourceUrl") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("Status") + .HasColumnType("integer"); + + b.Property("Title") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.HasKey("Id"); + + b.HasIndex("FacilityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("Status"); + + b.ToTable("JobOpenings"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Confidence") + .HasColumnType("integer"); + + b.Property("ContentHash") + .HasMaxLength(64) + .HasColumnType("character varying(64)"); + + b.Property("FetchedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("LinkedShiftId") + .HasColumnType("integer"); + + b.Property("ParsedJson") + .HasColumnType("text"); + + b.Property("RawText") + .IsRequired() + .HasColumnType("text"); + + b.Property("SourceChannel") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("SourceUrl") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("Status") + .HasColumnType("integer"); + + b.Property("ValidationNotes") + .HasMaxLength(1000) + .HasColumnType("character varying(1000)"); + + b.HasKey("Id"); + + b.HasIndex("ContentHash"); + + b.HasIndex("LinkedShiftId"); + + b.HasIndex("Status"); + + b.ToTable("RawListings"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Role", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Category") + .IsRequired() + .HasMaxLength(50) + .HasColumnType("character varying(50)"); + + b.Property("IsActive") + .HasColumnType("boolean"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("SortOrder") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.ToTable("Roles"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Shift", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("Date") + .HasColumnType("date"); + + b.Property("Description") + .HasMaxLength(1500) + .HasColumnType("character varying(1500)"); + + b.Property("EndTime") + .HasColumnType("time without time zone"); + + b.Property("FacilityId") + .HasColumnType("integer"); + + b.Property("GenderRequirement") + .HasColumnType("integer"); + + b.Property("PayAmount") + .HasColumnType("bigint"); + + b.Property("PayType") + .HasColumnType("integer"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("SharePercent") + .HasColumnType("integer"); + + b.Property("ShiftType") + .HasColumnType("integer"); + + b.Property("Source") + .HasColumnType("integer"); + + b.Property("SourceUrl") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("SpecialtyRequired") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("StartTime") + .HasColumnType("time without time zone"); + + b.Property("Status") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("FacilityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("Date", "Status"); + + b.ToTable("Shifts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.User", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("FullName") + .HasMaxLength(150) + .HasColumnType("character varying(150)"); + + b.Property("IsPhoneVerified") + .HasColumnType("boolean"); + + b.Property("Phone") + .IsRequired() + .HasMaxLength(20) + .HasColumnType("character varying(20)"); + + b.Property("Role") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("Phone") + .IsUnique(); + + b.ToTable("Users"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("Gender") + .HasColumnType("integer"); + + b.Property("MinPay") + .HasColumnType("bigint"); + + b.Property("PreferredShiftType") + .HasColumnType("integer"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("UpdatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("VisitorId") + .IsRequired() + .HasColumnType("character varying(36)"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("VisitorId") + .IsUnique(); + + b.ToTable("UserPreferences"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b => + { + b.Property("Id") + .HasMaxLength(36) + .HasColumnType("character varying(36)"); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("LastSeenAt") + .HasColumnType("timestamp with time zone"); + + b.Property("UserId") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("UserId"); + + b.ToTable("Visitors"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Application", b => + { + b.HasOne("JobsMedical.Web.Models.User", "Doctor") + .WithMany("Applications") + .HasForeignKey("DoctorId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.Shift", "Shift") + .WithMany("Applications") + .HasForeignKey("ShiftId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("Doctor"); + + b.Navigation("Shift"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.District", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany() + .HasForeignKey("CityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("City"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany() + .HasForeignKey("CityId"); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany() + .HasForeignKey("RoleId"); + + b.HasOne("JobsMedical.Web.Models.User", "User") + .WithOne("DoctorProfile") + .HasForeignKey("JobsMedical.Web.Models.DoctorProfile", "UserId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("City"); + + b.Navigation("Role"); + + b.Navigation("User"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Facility", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany("Facilities") + .HasForeignKey("CityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.District", "District") + .WithMany("Facilities") + .HasForeignKey("DistrictId") + .OnDelete(DeleteBehavior.SetNull); + + b.HasOne("JobsMedical.Web.Models.User", "OwnerUser") + .WithMany() + .HasForeignKey("OwnerUserId") + .OnDelete(DeleteBehavior.SetNull); + + b.Navigation("City"); + + b.Navigation("District"); + + b.Navigation("OwnerUser"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b => + { + b.HasOne("JobsMedical.Web.Models.JobOpening", "JobOpening") + .WithMany() + .HasForeignKey("JobOpeningId") + .OnDelete(DeleteBehavior.Cascade); + + b.HasOne("JobsMedical.Web.Models.Shift", "Shift") + .WithMany() + .HasForeignKey("ShiftId") + .OnDelete(DeleteBehavior.Cascade); + + b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor") + .WithMany("Events") + .HasForeignKey("VisitorId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("JobOpening"); + + b.Navigation("Shift"); + + b.Navigation("Visitor"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b => + { + b.HasOne("JobsMedical.Web.Models.Facility", "Facility") + .WithMany() + .HasForeignKey("FacilityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany() + .HasForeignKey("RoleId") + .OnDelete(DeleteBehavior.Restrict) + .IsRequired(); + + b.Navigation("Facility"); + + b.Navigation("Role"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b => + { + b.HasOne("JobsMedical.Web.Models.Shift", "LinkedShift") + .WithMany() + .HasForeignKey("LinkedShiftId"); + + b.Navigation("LinkedShift"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Shift", b => + { + b.HasOne("JobsMedical.Web.Models.Facility", "Facility") + .WithMany("Shifts") + .HasForeignKey("FacilityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany("Shifts") + .HasForeignKey("RoleId") + .OnDelete(DeleteBehavior.Restrict) + .IsRequired(); + + b.Navigation("Facility"); + + b.Navigation("Role"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany() + .HasForeignKey("CityId"); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany() + .HasForeignKey("RoleId"); + + b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor") + .WithOne("Preferences") + .HasForeignKey("JobsMedical.Web.Models.UserPreferences", "VisitorId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("City"); + + b.Navigation("Role"); + + b.Navigation("Visitor"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b => + { + b.HasOne("JobsMedical.Web.Models.User", "User") + .WithMany() + .HasForeignKey("UserId") + .OnDelete(DeleteBehavior.SetNull); + + b.Navigation("User"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.City", b => + { + b.Navigation("Facilities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.District", b => + { + b.Navigation("Facilities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Facility", b => + { + b.Navigation("Shifts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Role", b => + { + b.Navigation("Shifts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Shift", b => + { + b.Navigation("Applications"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.User", b => + { + b.Navigation("Applications"); + + b.Navigation("DoctorProfile"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b => + { + b.Navigation("Events"); + + b.Navigation("Preferences"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/src/JobsMedical.Web/Migrations/20260603210638_IngestionSourcesInSettings.cs b/src/JobsMedical.Web/Migrations/20260603210638_IngestionSourcesInSettings.cs new file mode 100644 index 0000000..4bc55b8 --- /dev/null +++ b/src/JobsMedical.Web/Migrations/20260603210638_IngestionSourcesInSettings.cs @@ -0,0 +1,117 @@ +using Microsoft.EntityFrameworkCore.Migrations; + +#nullable disable + +namespace JobsMedical.Web.Migrations +{ + /// + public partial class IngestionSourcesInSettings : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + migrationBuilder.AddColumn( + name: "AutoIngestEnabled", + table: "AppSettings", + type: "boolean", + nullable: false, + defaultValue: false); + + migrationBuilder.AddColumn( + name: "BaleBotToken", + table: "AppSettings", + type: "character varying(200)", + maxLength: 200, + nullable: true); + + migrationBuilder.AddColumn( + name: "BaleEnabled", + table: "AppSettings", + type: "boolean", + nullable: false, + defaultValue: false); + + migrationBuilder.AddColumn( + name: "DivarCity", + table: "AppSettings", + type: "character varying(60)", + maxLength: 60, + nullable: true); + + migrationBuilder.AddColumn( + name: "DivarEnabled", + table: "AppSettings", + type: "boolean", + nullable: false, + defaultValue: false); + + migrationBuilder.AddColumn( + name: "DivarQueries", + table: "AppSettings", + type: "character varying(2000)", + maxLength: 2000, + nullable: true); + + migrationBuilder.AddColumn( + name: "IngestIntervalMinutes", + table: "AppSettings", + type: "integer", + nullable: false, + defaultValue: 0); + + migrationBuilder.AddColumn( + name: "TelegramChannels", + table: "AppSettings", + type: "character varying(2000)", + maxLength: 2000, + nullable: true); + + migrationBuilder.AddColumn( + name: "TelegramEnabled", + table: "AppSettings", + type: "boolean", + nullable: false, + defaultValue: false); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.DropColumn( + name: "AutoIngestEnabled", + table: "AppSettings"); + + migrationBuilder.DropColumn( + name: "BaleBotToken", + table: "AppSettings"); + + migrationBuilder.DropColumn( + name: "BaleEnabled", + table: "AppSettings"); + + migrationBuilder.DropColumn( + name: "DivarCity", + table: "AppSettings"); + + migrationBuilder.DropColumn( + name: "DivarEnabled", + table: "AppSettings"); + + migrationBuilder.DropColumn( + name: "DivarQueries", + table: "AppSettings"); + + migrationBuilder.DropColumn( + name: "IngestIntervalMinutes", + table: "AppSettings"); + + migrationBuilder.DropColumn( + name: "TelegramChannels", + table: "AppSettings"); + + migrationBuilder.DropColumn( + name: "TelegramEnabled", + table: "AppSettings"); + } + } +} diff --git a/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs b/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs index 721e58e..2e44bea 100644 --- a/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs +++ b/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs @@ -53,12 +53,43 @@ namespace JobsMedical.Web.Migrations .HasMaxLength(4000) .HasColumnType("character varying(4000)"); + b.Property("AutoIngestEnabled") + .HasColumnType("boolean"); + b.Property("AutoPublishMinConfidence") .HasColumnType("integer"); + b.Property("BaleBotToken") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("BaleEnabled") + .HasColumnType("boolean"); + + b.Property("DivarCity") + .HasMaxLength(60) + .HasColumnType("character varying(60)"); + + b.Property("DivarEnabled") + .HasColumnType("boolean"); + + b.Property("DivarQueries") + .HasMaxLength(2000) + .HasColumnType("character varying(2000)"); + + b.Property("IngestIntervalMinutes") + .HasColumnType("integer"); + b.Property("Mode") .HasColumnType("integer"); + b.Property("TelegramChannels") + .HasMaxLength(2000) + .HasColumnType("character varying(2000)"); + + b.Property("TelegramEnabled") + .HasColumnType("boolean"); + b.Property("UpdatedAt") .HasColumnType("timestamp with time zone"); diff --git a/src/JobsMedical.Web/Models/AppSetting.cs b/src/JobsMedical.Web/Models/AppSetting.cs index 4515f35..3317b21 100644 --- a/src/JobsMedical.Web/Models/AppSetting.cs +++ b/src/JobsMedical.Web/Models/AppSetting.cs @@ -32,8 +32,31 @@ public class AppSetting /// If AI approves AND Mode is Automatic, publish without human review. public bool AiAutoApprove { get; set; } = false; + // --- Channel scraping sources (configured here, NOT in env) --- + /// Run the ingestion worker on a timer. + public bool AutoIngestEnabled { get; set; } = false; + public int IngestIntervalMinutes { get; set; } = 30; + + public bool TelegramEnabled { get; set; } = false; + /// Public Telegram channel usernames, one per line or comma-separated. + [MaxLength(2000)] public string? TelegramChannels { get; set; } + + public bool BaleEnabled { get; set; } = false; + [MaxLength(200)] public string? BaleBotToken { get; set; } + + public bool DivarEnabled { get; set; } = false; + [MaxLength(60)] public string? DivarCity { get; set; } = "tehran"; + /// Divar search terms, one per line or comma-separated. + [MaxLength(2000)] public string? DivarQueries { get; set; } + public DateTime UpdatedAt { get; set; } = DateTime.UtcNow; + /// Split a textarea (newline/comma separated) into trimmed non-empty items. + public static List SplitList(string? s) => string.IsNullOrWhiteSpace(s) + ? new() + : s.Split(new[] { '\n', '\r', ',', '،' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) + .ToList(); + public const string DefaultPrompt = """ تو دستیار بررسی آگهی‌های کاری حوزه درمان برای پلتفرم «همکادر» هستی. هر آگهی خام را بخوان و تصمیم بگیر: diff --git a/src/JobsMedical.Web/Pages/Admin/Index.cshtml b/src/JobsMedical.Web/Pages/Admin/Index.cshtml index e40c974..440d9bb 100644 --- a/src/JobsMedical.Web/Pages/Admin/Index.cshtml +++ b/src/JobsMedical.Web/Pages/Admin/Index.cshtml @@ -26,16 +26,10 @@
+
+ +

منابع جمع‌آوری (اسکرپ کانال‌ها)

+
+ +
+
+ + +
+ +
+ + + +
+ +
+ + + +
+ +
+ +
+
+
+
+
+ diff --git a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs index 2f38445..4a975cc 100644 --- a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs +++ b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs @@ -20,6 +20,16 @@ public class SettingsModel : PageModel [BindProperty] public string? AiModel { get; set; } [BindProperty] public string AiSystemPrompt { get; set; } = ""; [BindProperty] public bool AiAutoApprove { get; set; } + // Channel scraping sources + [BindProperty] public bool AutoIngestEnabled { get; set; } + [BindProperty] public int IngestIntervalMinutes { get; set; } = 30; + [BindProperty] public bool TelegramEnabled { get; set; } + [BindProperty] public string? TelegramChannels { get; set; } + [BindProperty] public bool BaleEnabled { get; set; } + [BindProperty] public string? BaleBotToken { get; set; } + [BindProperty] public bool DivarEnabled { get; set; } + [BindProperty] public string? DivarCity { get; set; } + [BindProperty] public string? DivarQueries { get; set; } [TempData] public string? Saved { get; set; } public async Task OnGetAsync() @@ -33,6 +43,15 @@ public class SettingsModel : PageModel AiModel = s.AiModel; AiSystemPrompt = s.AiSystemPrompt; AiAutoApprove = s.AiAutoApprove; + AutoIngestEnabled = s.AutoIngestEnabled; + IngestIntervalMinutes = s.IngestIntervalMinutes; + TelegramEnabled = s.TelegramEnabled; + TelegramChannels = s.TelegramChannels; + BaleEnabled = s.BaleEnabled; + BaleBotToken = s.BaleBotToken; + DivarEnabled = s.DivarEnabled; + DivarCity = s.DivarCity; + DivarQueries = s.DivarQueries; } public async Task OnPostAsync() @@ -47,6 +66,15 @@ public class SettingsModel : PageModel AiModel = AiModel, AiSystemPrompt = AiSystemPrompt, AiAutoApprove = AiAutoApprove, + AutoIngestEnabled = AutoIngestEnabled, + IngestIntervalMinutes = IngestIntervalMinutes, + TelegramEnabled = TelegramEnabled, + TelegramChannels = TelegramChannels, + BaleEnabled = BaleEnabled, + BaleBotToken = BaleBotToken, + DivarEnabled = DivarEnabled, + DivarCity = DivarCity, + DivarQueries = DivarQueries, }); Saved = "تنظیمات ذخیره شد."; return RedirectToPage(); diff --git a/src/JobsMedical.Web/Program.cs b/src/JobsMedical.Web/Program.cs index 4eb294c..6b2a554 100644 --- a/src/JobsMedical.Web/Program.cs +++ b/src/JobsMedical.Web/Program.cs @@ -28,14 +28,6 @@ builder.Services.AddHttpClient("scrape", c => c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)"); }); builder.Services.AddHttpClient("ai"); -builder.Services.Configure( - builder.Configuration.GetSection("Ingestion")); -builder.Services.Configure( - builder.Configuration.GetSection("Ingestion:Telegram")); -builder.Services.Configure( - builder.Configuration.GetSection("Ingestion:Bale")); -builder.Services.Configure( - builder.Configuration.GetSection("Ingestion:Divar")); builder.Services.AddSingleton(); builder.Services.AddSingleton(); diff --git a/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs b/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs index 21ef6b0..279c1da 100644 --- a/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs @@ -1,46 +1,34 @@ using System.Text.Json; -using Microsoft.Extensions.Options; +using JobsMedical.Web.Models; namespace JobsMedical.Web.Services.Scraping; -public class BaleOptions -{ - public bool Enabled { get; set; } - public string? BotToken { get; set; } - public string BaseUrl { get; set; } = "https://tapi.bale.ai"; // Bale Bot API host -} - /// -/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. The bot must -/// be a member/admin of the channels it should read. Pulls text from messages and channel posts. +/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. Enabled + +/// bot token come from admin settings (DB). The bot must be a member of the channels it reads. /// public class BaleListingSource : IListingSource { - private readonly BaleOptions _opts; + private const string BaseUrl = "https://tapi.bale.ai"; private readonly IHttpClientFactory _http; private readonly ILogger _log; - public BaleListingSource(IOptions opts, IHttpClientFactory http, - ILogger log) + public BaleListingSource(IHttpClientFactory http, ILogger log) { - _opts = opts.Value; _http = http; _log = log; } public string Name => "بله"; - public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken); - public async Task> FetchAsync(CancellationToken ct = default) + public async Task> FetchAsync(AppSetting s, CancellationToken ct = default) { - if (!Enabled) { _log.LogInformation("Bale source disabled/unconfigured."); return Array.Empty(); } + if (!s.BaleEnabled || string.IsNullOrWhiteSpace(s.BaleBotToken)) return Array.Empty(); try { var client = _http.CreateClient("scrape"); - var url = $"{_opts.BaseUrl.TrimEnd('/')}/bot{_opts.BotToken}/getUpdates"; - var body = await client.GetStringAsync(url, ct); - + var body = await client.GetStringAsync($"{BaseUrl}/bot{s.BaleBotToken}/getUpdates", ct); using var doc = JsonDocument.Parse(body); if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array) return Array.Empty(); @@ -54,11 +42,7 @@ public class BaleListingSource : IListingSource } return items; } - catch (Exception ex) - { - _log.LogWarning(ex, "Bale fetch failed."); - return Array.Empty(); - } + catch (Exception ex) { _log.LogWarning(ex, "Bale fetch failed."); return Array.Empty(); } } private static string? TextOf(JsonElement update, string key) diff --git a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs index cdb60c9..c70bfdb 100644 --- a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs @@ -1,55 +1,44 @@ using System.Text; using System.Text.Json; -using Microsoft.Extensions.Options; +using JobsMedical.Web.Models; namespace JobsMedical.Web.Services.Scraping; -public class DivarOptions -{ - public bool Enabled { get; set; } - public string City { get; set; } = "tehran"; - public string Category { get; set; } = "jobs"; - public List Queries { get; set; } = new(); // e.g. "پرستار", "پزشک عمومی", "درمانگاه" - public string BaseUrl { get; set; } = "https://api.divar.ir/v8/web-search"; - public int PerQuery { get; set; } = 25; -} - /// /// Best-effort Divar fetch: queries Divar's web-search JSON for each term and harvests post -/// titles + descriptions. Divar's private API shifts shape over time, so we walk the JSON -/// tolerantly for any object carrying a "title" plus a nearby description field, and fail soft. +/// titles + descriptions. Enabled + city + queries come from admin settings (DB). Divar's +/// private API shifts shape, so we walk JSON tolerantly and fail soft. /// public class DivarListingSource : IListingSource { - private readonly DivarOptions _opts; + private const string BaseUrl = "https://api.divar.ir/v8/web-search"; private readonly IHttpClientFactory _http; private readonly ILogger _log; - public DivarListingSource(IOptions opts, IHttpClientFactory http, - ILogger log) + public DivarListingSource(IHttpClientFactory http, ILogger log) { - _opts = opts.Value; _http = http; _log = log; } public string Name => "دیوار"; - public bool Enabled => _opts.Enabled && _opts.Queries.Count > 0; - public async Task> FetchAsync(CancellationToken ct = default) + public async Task> FetchAsync(AppSetting s, CancellationToken ct = default) { - if (!Enabled) { _log.LogInformation("Divar source disabled/unconfigured."); return Array.Empty(); } + var queries = AppSetting.SplitList(s.DivarQueries); + if (!s.DivarEnabled || queries.Count == 0) return Array.Empty(); + var city = string.IsNullOrWhiteSpace(s.DivarCity) ? "tehran" : s.DivarCity.Trim(); var client = _http.CreateClient("scrape"); var items = new List(); - foreach (var q in _opts.Queries.Where(q => q.Trim().Length > 0)) + foreach (var q in queries) { try { - var url = $"{_opts.BaseUrl.TrimEnd('/')}/{_opts.City}/{_opts.Category}?q={Uri.EscapeDataString(q)}"; + var url = $"{BaseUrl}/{city}/jobs?q={Uri.EscapeDataString(q)}"; var body = await client.GetStringAsync(url, ct); using var doc = JsonDocument.Parse(body); - foreach (var text in Harvest(doc.RootElement).Take(_opts.PerQuery)) + foreach (var text in Harvest(doc.RootElement).Take(25)) items.Add(new ScrapedItem("دیوار", text, "https://divar.ir")); } catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); } @@ -60,7 +49,6 @@ public class DivarListingSource : IListingSource private static readonly string[] DescKeys = { "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" }; - /// Walk the JSON; for each object with a string "title", emit title + first description. private static IEnumerable Harvest(JsonElement el) { if (el.ValueKind == JsonValueKind.Object) @@ -75,12 +63,12 @@ public class DivarListingSource : IListingSource if (text.Length >= 15) yield return text; } foreach (var p in el.EnumerateObject()) - foreach (var s in Harvest(p.Value)) yield return s; + foreach (var x in Harvest(p.Value)) yield return x; } else if (el.ValueKind == JsonValueKind.Array) { foreach (var item in el.EnumerateArray()) - foreach (var s in Harvest(item)) yield return s; + foreach (var x in Harvest(item)) yield return x; } } } diff --git a/src/JobsMedical.Web/Services/Scraping/IListingSource.cs b/src/JobsMedical.Web/Services/Scraping/IListingSource.cs index 8079fa2..a7fe8f9 100644 --- a/src/JobsMedical.Web/Services/Scraping/IListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/IListingSource.cs @@ -1,15 +1,17 @@ +using JobsMedical.Web.Models; + namespace JobsMedical.Web.Services.Scraping; /// One raw post pulled from a source (a Telegram message, a Divar ad, etc.). public record ScrapedItem(string Source, string RawText, string? SourceUrl = null); /// -/// A pluggable source the ingestion engine pulls from. Implement once per channel/site. -/// `Enabled` lets a source be present but dormant until it's configured with credentials. +/// A pluggable source the ingestion engine pulls from. Configuration (enabled, channels, tokens) +/// comes from the DB-backed passed in — set in the admin panel, not env. +/// A disabled/unconfigured source returns an empty list. /// public interface IListingSource { string Name { get; } - bool Enabled { get; } - Task> FetchAsync(CancellationToken ct = default); + Task> FetchAsync(AppSetting settings, CancellationToken ct = default); } diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs index a9c21b5..857d08b 100644 --- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs +++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs @@ -43,8 +43,7 @@ public class IngestionService _ai = ai; _settings = settings; _log = log; } - public IReadOnlyList<(string Name, bool Enabled)> Sources => - _sources.Select(s => (s.Name, s.Enabled)).ToList(); + public IReadOnlyList SourceNames => _sources.Select(s => s.Name).ToList(); public async Task RunAsync(CancellationToken ct = default) { @@ -58,12 +57,13 @@ public class IngestionService var results = new List(); - foreach (var source in _sources.Where(s => s.Enabled)) + foreach (var source in _sources) { int fetched = 0, queued = 0, published = 0, flagged = 0, spam = 0, dupes = 0; IReadOnlyList items; - try { items = await source.FetchAsync(ct); } + try { items = await source.FetchAsync(settings, ct); } catch (Exception ex) { _log.LogError(ex, "Source {Source} failed", source.Name); continue; } + if (items.Count == 0) continue; // disabled/unconfigured source foreach (var item in items) { diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionWorker.cs b/src/JobsMedical.Web/Services/Scraping/IngestionWorker.cs index 3ed6173..ef3b920 100644 --- a/src/JobsMedical.Web/Services/Scraping/IngestionWorker.cs +++ b/src/JobsMedical.Web/Services/Scraping/IngestionWorker.cs @@ -1,58 +1,52 @@ -using Microsoft.Extensions.Options; - namespace JobsMedical.Web.Services.Scraping; -public class IngestionOptions -{ - public bool Enabled { get; set; } = false; // off by default — opt in via config - public int IntervalMinutes { get; set; } = 30; -} - /// -/// Periodically runs the ingestion engine when enabled (Ingestion:Enabled=true). Off by default -/// so nothing scrapes uninvited; admins can also trigger a run on demand from the admin UI. +/// Periodically runs the ingestion engine when the admin has turned auto-ingest ON +/// (AppSetting.AutoIngestEnabled) — read fresh from the DB each cycle, so it can be toggled at +/// runtime from the admin panel with no redeploy. When off, it idles and re-checks. /// public class IngestionWorker : BackgroundService { private readonly IServiceScopeFactory _scopes; - private readonly IngestionOptions _opts; private readonly ILogger _log; - public IngestionWorker(IServiceScopeFactory scopes, IOptions opts, - ILogger log) + public IngestionWorker(IServiceScopeFactory scopes, ILogger log) { _scopes = scopes; - _opts = opts.Value; _log = log; } protected override async Task ExecuteAsync(CancellationToken stoppingToken) { - if (!_opts.Enabled) - { - _log.LogInformation("Ingestion worker disabled (Ingestion:Enabled=false)."); - return; - } - - var interval = TimeSpan.FromMinutes(Math.Max(1, _opts.IntervalMinutes)); - _log.LogInformation("Ingestion worker on; every {Min} min.", _opts.IntervalMinutes); + // Small startup delay so the DB/migrations are ready. + try { await Task.Delay(TimeSpan.FromSeconds(20), stoppingToken); } + catch (OperationCanceledException) { return; } while (!stoppingToken.IsCancellationRequested) { + var idleMinutes = 10; try { using var scope = _scopes.CreateScope(); - var svc = scope.ServiceProvider.GetRequiredService(); - var summary = await svc.RunAsync(stoppingToken); - _log.LogInformation("Scheduled ingestion: queued={Q} flagged={F} spam={S} dupes={D}", - summary.TotalQueued, summary.TotalFlagged, summary.TotalSpam, summary.TotalDuplicates); + var settings = await scope.ServiceProvider + .GetRequiredService().GetAsync(); + + if (settings.AutoIngestEnabled) + { + var svc = scope.ServiceProvider.GetRequiredService(); + var summary = await svc.RunAsync(stoppingToken); + _log.LogInformation("Auto-ingest: queued={Q} published={P} flagged={F} spam={S} dupes={D}", + summary.TotalQueued, summary.TotalPublished, summary.TotalFlagged, + summary.TotalSpam, summary.TotalDuplicates); + idleMinutes = Math.Max(1, settings.IngestIntervalMinutes); + } } catch (Exception ex) when (ex is not OperationCanceledException) { - _log.LogError(ex, "Scheduled ingestion run failed"); + _log.LogError(ex, "Auto-ingest cycle failed"); } - try { await Task.Delay(interval, stoppingToken); } + try { await Task.Delay(TimeSpan.FromMinutes(idleMinutes), stoppingToken); } catch (OperationCanceledException) { break; } } } diff --git a/src/JobsMedical.Web/Services/Scraping/SampleListingSource.cs b/src/JobsMedical.Web/Services/Scraping/SampleListingSource.cs index 7484e6f..246fa1f 100644 --- a/src/JobsMedical.Web/Services/Scraping/SampleListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/SampleListingSource.cs @@ -1,27 +1,33 @@ +using JobsMedical.Web.Models; +using Microsoft.Extensions.Hosting; + namespace JobsMedical.Web.Services.Scraping; /// -/// A built-in source of representative Persian posts (the kind found in shift channels). Always -/// available, needs no credentials — it lets the whole ingestion → validation → review pipeline -/// run and be demoed today, and doubles as a fixture mix of good, incomplete, and spam posts. +/// Built-in representative Persian posts (good, incomplete, and spam) so the whole pipeline can be +/// demoed. Only active in Development — never injects sample data into production. /// public class SampleListingSource : IListingSource { + private readonly IHostEnvironment _env; + public SampleListingSource(IHostEnvironment env) => _env = env; + public string Name => "نمونه (کانال آزمایشی)"; - public bool Enabled => true; private static readonly string[] Posts = { "درمانگاه شبانه‌روزی در سعادت‌آباد نیازمند پزشک عمومی برای شیفت شب، کارانه ۳ میلیون تومان. تماس ۰۹۱۲۳۴۵۶۷۸۹", - "کلینیک تخصصی در تهران به پرستار برای شیفت عصر نیازمند است، ۵۰٪ سهم درآمد. ۰۹۳۵۱۱۱۲۲۳۳", + "کلینیک تخصصی در تهران به پرستار خانم برای شیفت عصر نیازمند است، ۵۰٪ سهم درآمد. ۰۹۳۵۱۱۱۲۲۳۳", "استخدام ماما تمام‌وقت در بیمارستان خصوصی، حقوق توافقی. منطقه شهرک غرب.", - "نیازمند تکنسین اتاق عمل جهت همکاری در نارمک، شیفت صبح. ۰۹۱۲۰۰۰۰۰۰۰", - "فروش فالوور و بک لینک ارزان، سرمایه گذاری در ارز دیجیتال با سود تضمینی!", // spam - "پزشک", // too short / incomplete + "نیازمند تکنسین اتاق عمل آقا جهت همکاری در نارمک، شیفت صبح. ۰۹۱۲۰۰۰۰۰۰۰", + "فروش فالوور و بک لینک ارزان، سرمایه گذاری در ارز دیجیتال با سود تضمینی!", + "پزشک", "بیمارستان آتیه جهت تکمیل کادر درمان به پزشک عمومی مقیم نیازمند است. قرارداد یک‌ساله، حقوق ۴۵ میلیون ماهانه. تهرانپارس.", }; - public Task> FetchAsync(CancellationToken ct = default) + public Task> FetchAsync(AppSetting settings, CancellationToken ct = default) => Task.FromResult>( - Posts.Select(p => new ScrapedItem(Name, p)).ToList()); + _env.IsDevelopment() + ? Posts.Select(p => new ScrapedItem(Name, p)).ToList() + : Array.Empty()); } diff --git a/src/JobsMedical.Web/Services/Scraping/SettingsService.cs b/src/JobsMedical.Web/Services/Scraping/SettingsService.cs index ff24df0..ef1ed6e 100644 --- a/src/JobsMedical.Web/Services/Scraping/SettingsService.cs +++ b/src/JobsMedical.Web/Services/Scraping/SettingsService.cs @@ -34,6 +34,16 @@ public class SettingsService s.AiSystemPrompt = string.IsNullOrWhiteSpace(incoming.AiSystemPrompt) ? AppSetting.DefaultPrompt : incoming.AiSystemPrompt; s.AiAutoApprove = incoming.AiAutoApprove; + // Channel scraping sources + s.AutoIngestEnabled = incoming.AutoIngestEnabled; + s.IngestIntervalMinutes = Math.Max(1, incoming.IngestIntervalMinutes); + s.TelegramEnabled = incoming.TelegramEnabled; + s.TelegramChannels = incoming.TelegramChannels?.Trim(); + s.BaleEnabled = incoming.BaleEnabled; + s.BaleBotToken = incoming.BaleBotToken?.Trim(); + s.DivarEnabled = incoming.DivarEnabled; + s.DivarCity = string.IsNullOrWhiteSpace(incoming.DivarCity) ? "tehran" : incoming.DivarCity.Trim(); + s.DivarQueries = incoming.DivarQueries?.Trim(); s.UpdatedAt = DateTime.UtcNow; await _db.SaveChangesAsync(); } diff --git a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs index b655819..85b0169 100644 --- a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs @@ -1,50 +1,39 @@ using System.Net; using System.Text.RegularExpressions; -using Microsoft.Extensions.Options; +using JobsMedical.Web.Models; namespace JobsMedical.Web.Services.Scraping; -public class TelegramOptions -{ - public bool Enabled { get; set; } - public string? BotToken { get; set; } // optional (for private channels later) - public List Channels { get; set; } = new(); // public channel usernames (no @) - public int PerChannel { get; set; } = 20; -} - /// /// Reads public Telegram channels via the web preview (https://t.me/s/<channel>) — no bot -/// token or login needed for public channels. Each message's text becomes a ScrapedItem. +/// token needed for public channels. Enabled + channel list come from the admin settings (DB). /// public class TelegramListingSource : IListingSource { - private readonly TelegramOptions _opts; private readonly IHttpClientFactory _http; private readonly ILogger _log; - public TelegramListingSource(IOptions opts, IHttpClientFactory http, - ILogger log) + public TelegramListingSource(IHttpClientFactory http, ILogger log) { - _opts = opts.Value; _http = http; _log = log; } public string Name => "تلگرام"; - public bool Enabled => _opts.Enabled && _opts.Channels.Count > 0; - public async Task> FetchAsync(CancellationToken ct = default) + public async Task> FetchAsync(AppSetting s, CancellationToken ct = default) { - if (!Enabled) { _log.LogInformation("Telegram source disabled/unconfigured."); return Array.Empty(); } + var channels = AppSetting.SplitList(s.TelegramChannels); + if (!s.TelegramEnabled || channels.Count == 0) return Array.Empty(); var client = _http.CreateClient("scrape"); var items = new List(); - foreach (var ch in _opts.Channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0)) + foreach (var ch in channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0)) { try { var html = await client.GetStringAsync($"https://t.me/s/{ch}", ct); - foreach (var text in ExtractMessages(html).Take(_opts.PerChannel)) + foreach (var text in ExtractMessages(html).Take(20)) items.Add(new ScrapedItem($"تلگرام/{ch}", text, $"https://t.me/{ch}")); } catch (Exception ex) { _log.LogWarning(ex, "Telegram fetch failed for {Channel}", ch); } @@ -52,7 +41,6 @@ public class TelegramListingSource : IListingSource return items; } - // Message bodies live in
...
. private static IEnumerable ExtractMessages(string html) { foreach (Match m in Regex.Matches(html, @@ -69,7 +57,7 @@ internal static class HtmlUtil public static string ToPlainText(string html) { var s = Regex.Replace(html, "", "\n", RegexOptions.IgnoreCase); - s = Regex.Replace(s, "<[^>]+>", ""); // strip remaining tags + s = Regex.Replace(s, "<[^>]+>", ""); s = WebUtility.HtmlDecode(s); s = Regex.Replace(s, "[ \\t]+", " "); return s.Trim(); diff --git a/src/JobsMedical.Web/appsettings.json b/src/JobsMedical.Web/appsettings.json index 53d6c0d..7956808 100644 --- a/src/JobsMedical.Web/appsettings.json +++ b/src/JobsMedical.Web/appsettings.json @@ -11,12 +11,5 @@ }, "Auth": { "AdminPhone": "09120000000" - }, - "Ingestion": { - "Enabled": false, - "IntervalMinutes": 30, - "Telegram": { "Enabled": false, "BotToken": "", "Channels": [], "PerChannel": 20 }, - "Bale": { "Enabled": false, "BotToken": "", "BaseUrl": "https://tapi.bale.ai" }, - "Divar": { "Enabled": false, "City": "tehran", "Category": "jobs", "Queries": [], "BaseUrl": "https://api.divar.ir/v8/web-search", "PerQuery": 25 } } }