Move ingestion + Telegram/Bale/Divar config to DB-backed admin settings
CI/CD / CI · dotnet build (push) Successful in 6m22s
CI/CD / Deploy · hamkadr (push) Failing after 3s

- AppSetting gains source config: AutoIngestEnabled, IngestIntervalMinutes, Telegram/Bale/Divar enabled+channels/token/queries
- IListingSource.FetchAsync(AppSetting) — sources read config from DB, not IOptions/appsettings; sample source dev-only
- IngestionWorker reads AutoIngest+interval from DB each cycle (toggle at runtime, no redeploy)
- /Admin/Settings gets a 'منابع جمع‌آوری' section; removed Ingestion env/appsettings + compose env vars
- ENV_FILE shrinks to HOST_PORT + POSTGRES_* + ADMIN_PHONE (AI + sources are all in-admin); migration

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-04 00:44:11 +03:30
parent 6cfdd16c42
commit 3c08c1a265
20 changed files with 1217 additions and 167 deletions
+5 -14
View File
@@ -65,21 +65,12 @@ POSTGRES_PASSWORD=__CHANGE_ME__
# Platform admin phone (gets the Admin role on login) # Platform admin phone (gets the Admin role on login)
ADMIN_PHONE=09XXXXXXXXX ADMIN_PHONE=09XXXXXXXXX
# --- Channel scraping (optional; off by default) — toggles ---
# INGESTION_ENABLED=true
# INGESTION_INTERVAL_MINUTES=30
# TELEGRAM_ENABLED=true
# TELEGRAM_BOT_TOKEN=__TELEGRAM_BOT_TOKEN__
# BALE_ENABLED=true
# BALE_BOT_TOKEN=__BALE_BOT_TOKEN__
# DIVAR_ENABLED=true
``` ```
> Channel **lists** (`Telegram.Channels`, `Divar.Queries`) live in `appsettings.json` (or add > **That's the whole secret.** Everything else — the **AI audit layer** *and* the **channel
> `Ingestion__Telegram__Channels__0=...` keys). The toggles above gate each source on/off. > sources** (Telegram channels, Bale bot token, Divar queries, auto-ingest on/off + interval) — is
> The **AI audit layer** is configured at runtime in the admin panel (`/Admin/Settings`) — endpoint, > configured at runtime in the admin panel (`/Admin/Settings`), stored in the DB. No redeploy to
> model, API key, prompt/framework, auto-approve — not via env. Default: AI off, mode = Manual, > change them. Defaults: AI off, mode = Manual, all sources off ⇒ nothing publishes without admin
> so every ingested listing waits in the review queue until an admin publishes it. > review.
> `ASPNETCORE_ENVIRONMENT=Production` is set by the compose file ⇒ only **reference data** > `ASPNETCORE_ENVIRONMENT=Production` is set by the compose file ⇒ only **reference data**
> (roles/cities/districts) is seeded — no demo facilities/shifts. > (roles/cities/districts) is seeded — no demo facilities/shifts.
+1 -8
View File
@@ -23,14 +23,7 @@ services:
ASPNETCORE_URLS: "http://+:8080" ASPNETCORE_URLS: "http://+:8080"
ConnectionStrings__Default: "Host=db;Port=5432;Database=${POSTGRES_DB:-hamkadr};Username=${POSTGRES_USER:-hamkadr};Password=${POSTGRES_PASSWORD}" ConnectionStrings__Default: "Host=db;Port=5432;Database=${POSTGRES_DB:-hamkadr};Username=${POSTGRES_USER:-hamkadr};Password=${POSTGRES_PASSWORD}"
Auth__AdminPhone: "${ADMIN_PHONE:-}" Auth__AdminPhone: "${ADMIN_PHONE:-}"
# Channel scraping (optional; enable + configure via ENV_FILE) # Ingestion + AI + channel sources are configured at runtime in /Admin/Settings (DB), not here.
Ingestion__Enabled: "${INGESTION_ENABLED:-false}"
Ingestion__IntervalMinutes: "${INGESTION_INTERVAL_MINUTES:-30}"
Ingestion__Telegram__Enabled: "${TELEGRAM_ENABLED:-false}"
Ingestion__Telegram__BotToken: "${TELEGRAM_BOT_TOKEN:-}"
Ingestion__Bale__Enabled: "${BALE_ENABLED:-false}"
Ingestion__Bale__BotToken: "${BALE_BOT_TOKEN:-}"
Ingestion__Divar__Enabled: "${DIVAR_ENABLED:-false}"
# healthcheck is defined in the Dockerfile (bash /dev/tcp probe) so the deploy # healthcheck is defined in the Dockerfile (bash /dev/tcp probe) so the deploy
# job's `docker inspect Health.Status` wait works. # job's `docker inspect Health.Status` wait works.
@@ -0,0 +1,873 @@
// <auto-generated />
using System;
using JobsMedical.Web.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
#nullable disable
namespace JobsMedical.Web.Migrations
{
[DbContext(typeof(AppDbContext))]
[Migration("20260603210638_IngestionSourcesInSettings")]
partial class IngestionSourcesInSettings
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasAnnotation("ProductVersion", "10.0.0")
.HasAnnotation("Relational:MaxIdentifierLength", 63);
NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder);
modelBuilder.Entity("JobsMedical.Web.Models.AppSetting", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("AiApiKey")
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<bool>("AiAutoApprove")
.HasColumnType("boolean");
b.Property<bool>("AiEnabled")
.HasColumnType("boolean");
b.Property<string>("AiEndpoint")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("AiModel")
.HasMaxLength(120)
.HasColumnType("character varying(120)");
b.Property<string>("AiSystemPrompt")
.IsRequired()
.HasMaxLength(4000)
.HasColumnType("character varying(4000)");
b.Property<bool>("AutoIngestEnabled")
.HasColumnType("boolean");
b.Property<int>("AutoPublishMinConfidence")
.HasColumnType("integer");
b.Property<string>("BaleBotToken")
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<bool>("BaleEnabled")
.HasColumnType("boolean");
b.Property<string>("DivarCity")
.HasMaxLength(60)
.HasColumnType("character varying(60)");
b.Property<bool>("DivarEnabled")
.HasColumnType("boolean");
b.Property<string>("DivarQueries")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<int>("IngestIntervalMinutes")
.HasColumnType("integer");
b.Property<int>("Mode")
.HasColumnType("integer");
b.Property<string>("TelegramChannels")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<bool>("TelegramEnabled")
.HasColumnType("boolean");
b.Property<DateTime>("UpdatedAt")
.HasColumnType("timestamp with time zone");
b.HasKey("Id");
b.ToTable("AppSettings");
});
modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int>("DoctorId")
.HasColumnType("integer");
b.Property<string>("Message")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("ShiftId")
.HasColumnType("integer");
b.Property<int>("Status")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("DoctorId");
b.HasIndex("ShiftId", "DoctorId")
.IsUnique();
b.ToTable("Applications");
});
modelBuilder.Entity("JobsMedical.Web.Models.City", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<string>("Province")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.HasKey("Id");
b.ToTable("Cities");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int>("CityId")
.HasColumnType("integer");
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(120)
.HasColumnType("character varying(120)");
b.HasKey("Id");
b.HasIndex("CityId");
b.ToTable("Districts");
});
modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Bio")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.Property<int?>("CityId")
.HasColumnType("integer");
b.Property<bool>("IsVerified")
.HasColumnType("boolean");
b.Property<string>("LicenseNo")
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int?>("RoleId")
.HasColumnType("integer");
b.Property<string>("Specialty")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<int>("UserId")
.HasColumnType("integer");
b.Property<int>("YearsExperience")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("RoleId");
b.HasIndex("UserId")
.IsUnique();
b.ToTable("DoctorProfiles");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Address")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("BaleId")
.HasMaxLength(50)
.HasColumnType("character varying(50)");
b.Property<int>("CityId")
.HasColumnType("integer");
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("DistrictId")
.HasColumnType("integer");
b.Property<bool>("IsVerified")
.HasColumnType("boolean");
b.Property<double?>("Lat")
.HasColumnType("double precision");
b.Property<double?>("Lng")
.HasColumnType("double precision");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<int?>("OwnerUserId")
.HasColumnType("integer");
b.Property<string>("Phone")
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int>("Type")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("DistrictId");
b.HasIndex("OwnerUserId");
b.ToTable("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
{
b.Property<long>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("bigint");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<long>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int>("EventType")
.HasColumnType("integer");
b.Property<int?>("JobOpeningId")
.HasColumnType("integer");
b.Property<int?>("ShiftId")
.HasColumnType("integer");
b.Property<string>("VisitorId")
.IsRequired()
.HasColumnType("character varying(36)");
b.HasKey("Id");
b.HasIndex("JobOpeningId");
b.HasIndex("ShiftId");
b.HasIndex("VisitorId", "CreatedAt");
b.ToTable("InterestEvents");
});
modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("Description")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<int>("EmploymentType")
.HasColumnType("integer");
b.Property<int>("FacilityId")
.HasColumnType("integer");
b.Property<int>("GenderRequirement")
.HasColumnType("integer");
b.Property<string>("Requirements")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.Property<int>("RoleId")
.HasColumnType("integer");
b.Property<long?>("SalaryMax")
.HasColumnType("bigint");
b.Property<long?>("SalaryMin")
.HasColumnType("bigint");
b.Property<int>("Source")
.HasColumnType("integer");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("Status")
.HasColumnType("integer");
b.Property<string>("Title")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.HasKey("Id");
b.HasIndex("FacilityId");
b.HasIndex("RoleId");
b.HasIndex("Status");
b.ToTable("JobOpenings");
});
modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int>("Confidence")
.HasColumnType("integer");
b.Property<string>("ContentHash")
.HasMaxLength(64)
.HasColumnType("character varying(64)");
b.Property<DateTime>("FetchedAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("LinkedShiftId")
.HasColumnType("integer");
b.Property<string>("ParsedJson")
.HasColumnType("text");
b.Property<string>("RawText")
.IsRequired()
.HasColumnType("text");
b.Property<string>("SourceChannel")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("Status")
.HasColumnType("integer");
b.Property<string>("ValidationNotes")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.HasKey("Id");
b.HasIndex("ContentHash");
b.HasIndex("LinkedShiftId");
b.HasIndex("Status");
b.ToTable("RawListings");
});
modelBuilder.Entity("JobsMedical.Web.Models.Role", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Category")
.IsRequired()
.HasMaxLength(50)
.HasColumnType("character varying(50)");
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<int>("SortOrder")
.HasColumnType("integer");
b.HasKey("Id");
b.ToTable("Roles");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<DateOnly>("Date")
.HasColumnType("date");
b.Property<string>("Description")
.HasMaxLength(1500)
.HasColumnType("character varying(1500)");
b.Property<TimeOnly>("EndTime")
.HasColumnType("time without time zone");
b.Property<int>("FacilityId")
.HasColumnType("integer");
b.Property<int>("GenderRequirement")
.HasColumnType("integer");
b.Property<long?>("PayAmount")
.HasColumnType("bigint");
b.Property<int>("PayType")
.HasColumnType("integer");
b.Property<int>("RoleId")
.HasColumnType("integer");
b.Property<int?>("SharePercent")
.HasColumnType("integer");
b.Property<int>("ShiftType")
.HasColumnType("integer");
b.Property<int>("Source")
.HasColumnType("integer");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("SpecialtyRequired")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<TimeOnly>("StartTime")
.HasColumnType("time without time zone");
b.Property<int>("Status")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("FacilityId");
b.HasIndex("RoleId");
b.HasIndex("Date", "Status");
b.ToTable("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.User", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("FullName")
.HasMaxLength(150)
.HasColumnType("character varying(150)");
b.Property<bool>("IsPhoneVerified")
.HasColumnType("boolean");
b.Property<string>("Phone")
.IsRequired()
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int>("Role")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("Phone")
.IsUnique();
b.ToTable("Users");
});
modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int?>("CityId")
.HasColumnType("integer");
b.Property<int>("Gender")
.HasColumnType("integer");
b.Property<long?>("MinPay")
.HasColumnType("bigint");
b.Property<int?>("PreferredShiftType")
.HasColumnType("integer");
b.Property<int?>("RoleId")
.HasColumnType("integer");
b.Property<DateTime>("UpdatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("VisitorId")
.IsRequired()
.HasColumnType("character varying(36)");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("RoleId");
b.HasIndex("VisitorId")
.IsUnique();
b.ToTable("UserPreferences");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.Property<string>("Id")
.HasMaxLength(36)
.HasColumnType("character varying(36)");
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<DateTime>("LastSeenAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("UserId")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("UserId");
b.ToTable("Visitors");
});
modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
{
b.HasOne("JobsMedical.Web.Models.User", "Doctor")
.WithMany("Applications")
.HasForeignKey("DoctorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Shift", "Shift")
.WithMany("Applications")
.HasForeignKey("ShiftId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("Doctor");
b.Navigation("Shift");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
});
modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId");
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId");
b.HasOne("JobsMedical.Web.Models.User", "User")
.WithOne("DoctorProfile")
.HasForeignKey("JobsMedical.Web.Models.DoctorProfile", "UserId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
b.Navigation("Role");
b.Navigation("User");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany("Facilities")
.HasForeignKey("CityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.District", "District")
.WithMany("Facilities")
.HasForeignKey("DistrictId")
.OnDelete(DeleteBehavior.SetNull);
b.HasOne("JobsMedical.Web.Models.User", "OwnerUser")
.WithMany()
.HasForeignKey("OwnerUserId")
.OnDelete(DeleteBehavior.SetNull);
b.Navigation("City");
b.Navigation("District");
b.Navigation("OwnerUser");
});
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
{
b.HasOne("JobsMedical.Web.Models.JobOpening", "JobOpening")
.WithMany()
.HasForeignKey("JobOpeningId")
.OnDelete(DeleteBehavior.Cascade);
b.HasOne("JobsMedical.Web.Models.Shift", "Shift")
.WithMany()
.HasForeignKey("ShiftId")
.OnDelete(DeleteBehavior.Cascade);
b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor")
.WithMany("Events")
.HasForeignKey("VisitorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("JobOpening");
b.Navigation("Shift");
b.Navigation("Visitor");
});
modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b =>
{
b.HasOne("JobsMedical.Web.Models.Facility", "Facility")
.WithMany()
.HasForeignKey("FacilityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId")
.OnDelete(DeleteBehavior.Restrict)
.IsRequired();
b.Navigation("Facility");
b.Navigation("Role");
});
modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b =>
{
b.HasOne("JobsMedical.Web.Models.Shift", "LinkedShift")
.WithMany()
.HasForeignKey("LinkedShiftId");
b.Navigation("LinkedShift");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.HasOne("JobsMedical.Web.Models.Facility", "Facility")
.WithMany("Shifts")
.HasForeignKey("FacilityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany("Shifts")
.HasForeignKey("RoleId")
.OnDelete(DeleteBehavior.Restrict)
.IsRequired();
b.Navigation("Facility");
b.Navigation("Role");
});
modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId");
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId");
b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor")
.WithOne("Preferences")
.HasForeignKey("JobsMedical.Web.Models.UserPreferences", "VisitorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
b.Navigation("Role");
b.Navigation("Visitor");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.HasOne("JobsMedical.Web.Models.User", "User")
.WithMany()
.HasForeignKey("UserId")
.OnDelete(DeleteBehavior.SetNull);
b.Navigation("User");
});
modelBuilder.Entity("JobsMedical.Web.Models.City", b =>
{
b.Navigation("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.Navigation("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.Navigation("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.Role", b =>
{
b.Navigation("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.Navigation("Applications");
});
modelBuilder.Entity("JobsMedical.Web.Models.User", b =>
{
b.Navigation("Applications");
b.Navigation("DoctorProfile");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.Navigation("Events");
b.Navigation("Preferences");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,117 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace JobsMedical.Web.Migrations
{
/// <inheritdoc />
public partial class IngestionSourcesInSettings : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<bool>(
name: "AutoIngestEnabled",
table: "AppSettings",
type: "boolean",
nullable: false,
defaultValue: false);
migrationBuilder.AddColumn<string>(
name: "BaleBotToken",
table: "AppSettings",
type: "character varying(200)",
maxLength: 200,
nullable: true);
migrationBuilder.AddColumn<bool>(
name: "BaleEnabled",
table: "AppSettings",
type: "boolean",
nullable: false,
defaultValue: false);
migrationBuilder.AddColumn<string>(
name: "DivarCity",
table: "AppSettings",
type: "character varying(60)",
maxLength: 60,
nullable: true);
migrationBuilder.AddColumn<bool>(
name: "DivarEnabled",
table: "AppSettings",
type: "boolean",
nullable: false,
defaultValue: false);
migrationBuilder.AddColumn<string>(
name: "DivarQueries",
table: "AppSettings",
type: "character varying(2000)",
maxLength: 2000,
nullable: true);
migrationBuilder.AddColumn<int>(
name: "IngestIntervalMinutes",
table: "AppSettings",
type: "integer",
nullable: false,
defaultValue: 0);
migrationBuilder.AddColumn<string>(
name: "TelegramChannels",
table: "AppSettings",
type: "character varying(2000)",
maxLength: 2000,
nullable: true);
migrationBuilder.AddColumn<bool>(
name: "TelegramEnabled",
table: "AppSettings",
type: "boolean",
nullable: false,
defaultValue: false);
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropColumn(
name: "AutoIngestEnabled",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "BaleBotToken",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "BaleEnabled",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "DivarCity",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "DivarEnabled",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "DivarQueries",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "IngestIntervalMinutes",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "TelegramChannels",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "TelegramEnabled",
table: "AppSettings");
}
}
}
@@ -53,12 +53,43 @@ namespace JobsMedical.Web.Migrations
.HasMaxLength(4000) .HasMaxLength(4000)
.HasColumnType("character varying(4000)"); .HasColumnType("character varying(4000)");
b.Property<bool>("AutoIngestEnabled")
.HasColumnType("boolean");
b.Property<int>("AutoPublishMinConfidence") b.Property<int>("AutoPublishMinConfidence")
.HasColumnType("integer"); .HasColumnType("integer");
b.Property<string>("BaleBotToken")
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<bool>("BaleEnabled")
.HasColumnType("boolean");
b.Property<string>("DivarCity")
.HasMaxLength(60)
.HasColumnType("character varying(60)");
b.Property<bool>("DivarEnabled")
.HasColumnType("boolean");
b.Property<string>("DivarQueries")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<int>("IngestIntervalMinutes")
.HasColumnType("integer");
b.Property<int>("Mode") b.Property<int>("Mode")
.HasColumnType("integer"); .HasColumnType("integer");
b.Property<string>("TelegramChannels")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<bool>("TelegramEnabled")
.HasColumnType("boolean");
b.Property<DateTime>("UpdatedAt") b.Property<DateTime>("UpdatedAt")
.HasColumnType("timestamp with time zone"); .HasColumnType("timestamp with time zone");
+23
View File
@@ -32,8 +32,31 @@ public class AppSetting
/// <summary>If AI approves AND Mode is Automatic, publish without human review.</summary> /// <summary>If AI approves AND Mode is Automatic, publish without human review.</summary>
public bool AiAutoApprove { get; set; } = false; public bool AiAutoApprove { get; set; } = false;
// --- Channel scraping sources (configured here, NOT in env) ---
/// <summary>Run the ingestion worker on a timer.</summary>
public bool AutoIngestEnabled { get; set; } = false;
public int IngestIntervalMinutes { get; set; } = 30;
public bool TelegramEnabled { get; set; } = false;
/// <summary>Public Telegram channel usernames, one per line or comma-separated.</summary>
[MaxLength(2000)] public string? TelegramChannels { get; set; }
public bool BaleEnabled { get; set; } = false;
[MaxLength(200)] public string? BaleBotToken { get; set; }
public bool DivarEnabled { get; set; } = false;
[MaxLength(60)] public string? DivarCity { get; set; } = "tehran";
/// <summary>Divar search terms, one per line or comma-separated.</summary>
[MaxLength(2000)] public string? DivarQueries { get; set; }
public DateTime UpdatedAt { get; set; } = DateTime.UtcNow; public DateTime UpdatedAt { get; set; } = DateTime.UtcNow;
/// <summary>Split a textarea (newline/comma separated) into trimmed non-empty items.</summary>
public static List<string> SplitList(string? s) => string.IsNullOrWhiteSpace(s)
? new()
: s.Split(new[] { '\n', '\r', ',', '،' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToList();
public const string DefaultPrompt = """ public const string DefaultPrompt = """
تو دستیار بررسی آگهیهای کاری حوزه درمان برای پلتفرم «همکادر» هستی. تو دستیار بررسی آگهیهای کاری حوزه درمان برای پلتفرم «همکادر» هستی.
هر آگهی خام را بخوان و تصمیم بگیر: هر آگهی خام را بخوان و تصمیم بگیر:
+4 -10
View File
@@ -26,16 +26,10 @@
<div class="layout-2"> <div class="layout-2">
<aside class="card card-pad filter-card"> <aside class="card card-pad filter-card">
<h3>موتور جمع‌آوری</h3> <h3>موتور جمع‌آوری</h3>
<p class="muted" style="font-size:13px;">منابع متصل:</p> <p class="muted" style="font-size:13px;">منابع: @string.Join("، ", Model.SourceNames)</p>
<ul style="margin:0 0 12px; padding-inline-start:18px; font-size:13.5px;"> <p class="muted" style="font-size:12px; margin:0 0 12px;">
@foreach (var src in Model.Sources) فعال/غیرفعال‌سازی و تنظیم کانال‌ها در <a asp-page="/Admin/Settings">تنظیمات</a>.
{ </p>
<li>@src.Name —
@if (src.Enabled) { <span style="color:var(--primary-dark);">فعال</span> }
else { <span class="muted">غیرفعال (نیازمند تنظیمات)</span> }
</li>
}
</ul>
<form method="post"> <form method="post">
<button type="submit" asp-page-handler="RunIngestion" class="btn btn-accent btn-block">اجرای جمع‌آوری اکنون</button> <button type="submit" asp-page-handler="RunIngestion" class="btn btn-accent btn-block">اجرای جمع‌آوری اکنون</button>
</form> </form>
@@ -22,7 +22,7 @@ public class IndexModel : PageModel
public List<RawListing> Queue { get; private set; } = new(); public List<RawListing> Queue { get; private set; } = new();
public List<RawListing> Flagged { get; private set; } = new(); public List<RawListing> Flagged { get; private set; } = new();
public IReadOnlyList<(string Name, bool Enabled)> Sources { get; private set; } = new List<(string, bool)>(); public IReadOnlyList<string> SourceNames { get; private set; } = new List<string>();
public int PublishedShifts { get; private set; } public int PublishedShifts { get; private set; }
public int PublishedJobs { get; private set; } public int PublishedJobs { get; private set; }
@@ -64,7 +64,7 @@ public class IndexModel : PageModel
Flagged = await _db.RawListings Flagged = await _db.RawListings
.Where(r => r.Status == RawListingStatus.Flagged) .Where(r => r.Status == RawListingStatus.Flagged)
.OrderByDescending(r => r.FetchedAt).ToListAsync(); .OrderByDescending(r => r.FetchedAt).ToListAsync();
Sources = _ingest.Sources; SourceNames = _ingest.SourceNames;
PublishedShifts = await _db.Shifts.CountAsync(s => s.Source != ShiftSource.Direct); PublishedShifts = await _db.Shifts.CountAsync(s => s.Source != ShiftSource.Direct);
PublishedJobs = await _db.JobOpenings.CountAsync(); PublishedJobs = await _db.JobOpenings.CountAsync();
} }
@@ -62,6 +62,49 @@
</label> </label>
</div> </div>
<hr style="border:none; border-top:1px solid var(--line); margin:18px 0;" />
<h3 style="margin-top:0;">منابع جمع‌آوری (اسکرپ کانال‌ها)</h3>
<div class="filter-group">
<label style="display:flex; align-items:center; gap:8px; font-weight:700;">
<input type="checkbox" name="AutoIngestEnabled" value="true" style="width:auto;" checked="@Model.AutoIngestEnabled" />
اجرای خودکار جمع‌آوری روی زمان‌بند
</label>
</div>
<div class="filter-group">
<label>فاصله اجرای خودکار (دقیقه)</label>
<input type="number" name="IngestIntervalMinutes" min="1" value="@Model.IngestIntervalMinutes" dir="ltr" />
</div>
<div class="filter-group">
<label style="display:flex; align-items:center; gap:8px; font-weight:700;">
<input type="checkbox" name="TelegramEnabled" value="true" style="width:auto;" checked="@Model.TelegramEnabled" />
تلگرام (کانال‌های عمومی — بدون توکن)
</label>
<label style="margin-top:6px;">یوزرنیم کانال‌ها (هر خط یک کانال)</label>
<textarea name="TelegramChannels" rows="3" dir="ltr" placeholder="shift_channel&#10;another_channel">@Model.TelegramChannels</textarea>
</div>
<div class="filter-group">
<label style="display:flex; align-items:center; gap:8px; font-weight:700;">
<input type="checkbox" name="BaleEnabled" value="true" style="width:auto;" checked="@Model.BaleEnabled" />
بله (بات باید عضو کانال باشد)
</label>
<label style="margin-top:6px;">توکن بات بله</label>
<input type="password" name="BaleBotToken" value="@Model.BaleBotToken" dir="ltr" />
</div>
<div class="filter-group">
<label style="display:flex; align-items:center; gap:8px; font-weight:700;">
<input type="checkbox" name="DivarEnabled" value="true" style="width:auto;" checked="@Model.DivarEnabled" />
دیوار
</label>
<div style="display:flex; gap:8px; margin-top:6px;">
<div style="flex:0 0 120px;"><label>شهر (slug)</label><input type="text" name="DivarCity" value="@Model.DivarCity" dir="ltr" placeholder="tehran" /></div>
<div style="flex:1;"><label>عبارت‌های جستجو (هر خط یکی)</label><textarea name="DivarQueries" rows="3">@Model.DivarQueries</textarea></div>
</div>
</div>
<button type="submit" class="btn btn-accent btn-block btn-lg">ذخیره تنظیمات</button> <button type="submit" class="btn btn-accent btn-block btn-lg">ذخیره تنظیمات</button>
</form> </form>
</div> </div>
@@ -20,6 +20,16 @@ public class SettingsModel : PageModel
[BindProperty] public string? AiModel { get; set; } [BindProperty] public string? AiModel { get; set; }
[BindProperty] public string AiSystemPrompt { get; set; } = ""; [BindProperty] public string AiSystemPrompt { get; set; } = "";
[BindProperty] public bool AiAutoApprove { get; set; } [BindProperty] public bool AiAutoApprove { get; set; }
// Channel scraping sources
[BindProperty] public bool AutoIngestEnabled { get; set; }
[BindProperty] public int IngestIntervalMinutes { get; set; } = 30;
[BindProperty] public bool TelegramEnabled { get; set; }
[BindProperty] public string? TelegramChannels { get; set; }
[BindProperty] public bool BaleEnabled { get; set; }
[BindProperty] public string? BaleBotToken { get; set; }
[BindProperty] public bool DivarEnabled { get; set; }
[BindProperty] public string? DivarCity { get; set; }
[BindProperty] public string? DivarQueries { get; set; }
[TempData] public string? Saved { get; set; } [TempData] public string? Saved { get; set; }
public async Task OnGetAsync() public async Task OnGetAsync()
@@ -33,6 +43,15 @@ public class SettingsModel : PageModel
AiModel = s.AiModel; AiModel = s.AiModel;
AiSystemPrompt = s.AiSystemPrompt; AiSystemPrompt = s.AiSystemPrompt;
AiAutoApprove = s.AiAutoApprove; AiAutoApprove = s.AiAutoApprove;
AutoIngestEnabled = s.AutoIngestEnabled;
IngestIntervalMinutes = s.IngestIntervalMinutes;
TelegramEnabled = s.TelegramEnabled;
TelegramChannels = s.TelegramChannels;
BaleEnabled = s.BaleEnabled;
BaleBotToken = s.BaleBotToken;
DivarEnabled = s.DivarEnabled;
DivarCity = s.DivarCity;
DivarQueries = s.DivarQueries;
} }
public async Task<IActionResult> OnPostAsync() public async Task<IActionResult> OnPostAsync()
@@ -47,6 +66,15 @@ public class SettingsModel : PageModel
AiModel = AiModel, AiModel = AiModel,
AiSystemPrompt = AiSystemPrompt, AiSystemPrompt = AiSystemPrompt,
AiAutoApprove = AiAutoApprove, AiAutoApprove = AiAutoApprove,
AutoIngestEnabled = AutoIngestEnabled,
IngestIntervalMinutes = IngestIntervalMinutes,
TelegramEnabled = TelegramEnabled,
TelegramChannels = TelegramChannels,
BaleEnabled = BaleEnabled,
BaleBotToken = BaleBotToken,
DivarEnabled = DivarEnabled,
DivarCity = DivarCity,
DivarQueries = DivarQueries,
}); });
Saved = "تنظیمات ذخیره شد."; Saved = "تنظیمات ذخیره شد.";
return RedirectToPage(); return RedirectToPage();
-8
View File
@@ -28,14 +28,6 @@ builder.Services.AddHttpClient("scrape", c =>
c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)"); c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)");
}); });
builder.Services.AddHttpClient("ai"); builder.Services.AddHttpClient("ai");
builder.Services.Configure<JobsMedical.Web.Services.Scraping.IngestionOptions>(
builder.Configuration.GetSection("Ingestion"));
builder.Services.Configure<JobsMedical.Web.Services.Scraping.TelegramOptions>(
builder.Configuration.GetSection("Ingestion:Telegram"));
builder.Services.Configure<JobsMedical.Web.Services.Scraping.BaleOptions>(
builder.Configuration.GetSection("Ingestion:Bale"));
builder.Services.Configure<JobsMedical.Web.Services.Scraping.DivarOptions>(
builder.Configuration.GetSection("Ingestion:Divar"));
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.ListingValidator>(); builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.ListingValidator>();
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.IAiAuditor, builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.IAiAuditor,
JobsMedical.Web.Services.Scraping.OpenAiCompatibleAuditor>(); JobsMedical.Web.Services.Scraping.OpenAiCompatibleAuditor>();
@@ -1,46 +1,34 @@
using System.Text.Json; using System.Text.Json;
using Microsoft.Extensions.Options; using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping; namespace JobsMedical.Web.Services.Scraping;
public class BaleOptions
{
public bool Enabled { get; set; }
public string? BotToken { get; set; }
public string BaseUrl { get; set; } = "https://tapi.bale.ai"; // Bale Bot API host
}
/// <summary> /// <summary>
/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. The bot must /// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. Enabled +
/// be a member/admin of the channels it should read. Pulls text from messages and channel posts. /// bot token come from admin settings (DB). The bot must be a member of the channels it reads.
/// </summary> /// </summary>
public class BaleListingSource : IListingSource public class BaleListingSource : IListingSource
{ {
private readonly BaleOptions _opts; private const string BaseUrl = "https://tapi.bale.ai";
private readonly IHttpClientFactory _http; private readonly IHttpClientFactory _http;
private readonly ILogger<BaleListingSource> _log; private readonly ILogger<BaleListingSource> _log;
public BaleListingSource(IOptions<BaleOptions> opts, IHttpClientFactory http, public BaleListingSource(IHttpClientFactory http, ILogger<BaleListingSource> log)
ILogger<BaleListingSource> log)
{ {
_opts = opts.Value;
_http = http; _http = http;
_log = log; _log = log;
} }
public string Name => "بله"; public string Name => "بله";
public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken);
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default) public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
{ {
if (!Enabled) { _log.LogInformation("Bale source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); } if (!s.BaleEnabled || string.IsNullOrWhiteSpace(s.BaleBotToken)) return Array.Empty<ScrapedItem>();
try try
{ {
var client = _http.CreateClient("scrape"); var client = _http.CreateClient("scrape");
var url = $"{_opts.BaseUrl.TrimEnd('/')}/bot{_opts.BotToken}/getUpdates"; var body = await client.GetStringAsync($"{BaseUrl}/bot{s.BaleBotToken}/getUpdates", ct);
var body = await client.GetStringAsync(url, ct);
using var doc = JsonDocument.Parse(body); using var doc = JsonDocument.Parse(body);
if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array) if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array)
return Array.Empty<ScrapedItem>(); return Array.Empty<ScrapedItem>();
@@ -54,11 +42,7 @@ public class BaleListingSource : IListingSource
} }
return items; return items;
} }
catch (Exception ex) catch (Exception ex) { _log.LogWarning(ex, "Bale fetch failed."); return Array.Empty<ScrapedItem>(); }
{
_log.LogWarning(ex, "Bale fetch failed.");
return Array.Empty<ScrapedItem>();
}
} }
private static string? TextOf(JsonElement update, string key) private static string? TextOf(JsonElement update, string key)
@@ -1,55 +1,44 @@
using System.Text; using System.Text;
using System.Text.Json; using System.Text.Json;
using Microsoft.Extensions.Options; using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping; namespace JobsMedical.Web.Services.Scraping;
public class DivarOptions
{
public bool Enabled { get; set; }
public string City { get; set; } = "tehran";
public string Category { get; set; } = "jobs";
public List<string> Queries { get; set; } = new(); // e.g. "پرستار", "پزشک عمومی", "درمانگاه"
public string BaseUrl { get; set; } = "https://api.divar.ir/v8/web-search";
public int PerQuery { get; set; } = 25;
}
/// <summary> /// <summary>
/// Best-effort Divar fetch: queries Divar's web-search JSON for each term and harvests post /// Best-effort Divar fetch: queries Divar's web-search JSON for each term and harvests post
/// titles + descriptions. Divar's private API shifts shape over time, so we walk the JSON /// titles + descriptions. Enabled + city + queries come from admin settings (DB). Divar's
/// tolerantly for any object carrying a "title" plus a nearby description field, and fail soft. /// private API shifts shape, so we walk JSON tolerantly and fail soft.
/// </summary> /// </summary>
public class DivarListingSource : IListingSource public class DivarListingSource : IListingSource
{ {
private readonly DivarOptions _opts; private const string BaseUrl = "https://api.divar.ir/v8/web-search";
private readonly IHttpClientFactory _http; private readonly IHttpClientFactory _http;
private readonly ILogger<DivarListingSource> _log; private readonly ILogger<DivarListingSource> _log;
public DivarListingSource(IOptions<DivarOptions> opts, IHttpClientFactory http, public DivarListingSource(IHttpClientFactory http, ILogger<DivarListingSource> log)
ILogger<DivarListingSource> log)
{ {
_opts = opts.Value;
_http = http; _http = http;
_log = log; _log = log;
} }
public string Name => "دیوار"; public string Name => "دیوار";
public bool Enabled => _opts.Enabled && _opts.Queries.Count > 0;
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default) public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
{ {
if (!Enabled) { _log.LogInformation("Divar source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); } var queries = AppSetting.SplitList(s.DivarQueries);
if (!s.DivarEnabled || queries.Count == 0) return Array.Empty<ScrapedItem>();
var city = string.IsNullOrWhiteSpace(s.DivarCity) ? "tehran" : s.DivarCity.Trim();
var client = _http.CreateClient("scrape"); var client = _http.CreateClient("scrape");
var items = new List<ScrapedItem>(); var items = new List<ScrapedItem>();
foreach (var q in _opts.Queries.Where(q => q.Trim().Length > 0)) foreach (var q in queries)
{ {
try try
{ {
var url = $"{_opts.BaseUrl.TrimEnd('/')}/{_opts.City}/{_opts.Category}?q={Uri.EscapeDataString(q)}"; var url = $"{BaseUrl}/{city}/jobs?q={Uri.EscapeDataString(q)}";
var body = await client.GetStringAsync(url, ct); var body = await client.GetStringAsync(url, ct);
using var doc = JsonDocument.Parse(body); using var doc = JsonDocument.Parse(body);
foreach (var text in Harvest(doc.RootElement).Take(_opts.PerQuery)) foreach (var text in Harvest(doc.RootElement).Take(25))
items.Add(new ScrapedItem("دیوار", text, "https://divar.ir")); items.Add(new ScrapedItem("دیوار", text, "https://divar.ir"));
} }
catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); } catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); }
@@ -60,7 +49,6 @@ public class DivarListingSource : IListingSource
private static readonly string[] DescKeys = private static readonly string[] DescKeys =
{ "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" }; { "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" };
/// <summary>Walk the JSON; for each object with a string "title", emit title + first description.</summary>
private static IEnumerable<string> Harvest(JsonElement el) private static IEnumerable<string> Harvest(JsonElement el)
{ {
if (el.ValueKind == JsonValueKind.Object) if (el.ValueKind == JsonValueKind.Object)
@@ -75,12 +63,12 @@ public class DivarListingSource : IListingSource
if (text.Length >= 15) yield return text; if (text.Length >= 15) yield return text;
} }
foreach (var p in el.EnumerateObject()) foreach (var p in el.EnumerateObject())
foreach (var s in Harvest(p.Value)) yield return s; foreach (var x in Harvest(p.Value)) yield return x;
} }
else if (el.ValueKind == JsonValueKind.Array) else if (el.ValueKind == JsonValueKind.Array)
{ {
foreach (var item in el.EnumerateArray()) foreach (var item in el.EnumerateArray())
foreach (var s in Harvest(item)) yield return s; foreach (var x in Harvest(item)) yield return x;
} }
} }
} }
@@ -1,15 +1,17 @@
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping; namespace JobsMedical.Web.Services.Scraping;
/// <summary>One raw post pulled from a source (a Telegram message, a Divar ad, etc.).</summary> /// <summary>One raw post pulled from a source (a Telegram message, a Divar ad, etc.).</summary>
public record ScrapedItem(string Source, string RawText, string? SourceUrl = null); public record ScrapedItem(string Source, string RawText, string? SourceUrl = null);
/// <summary> /// <summary>
/// A pluggable source the ingestion engine pulls from. Implement once per channel/site. /// A pluggable source the ingestion engine pulls from. Configuration (enabled, channels, tokens)
/// `Enabled` lets a source be present but dormant until it's configured with credentials. /// comes from the DB-backed <see cref="AppSetting"/> passed in — set in the admin panel, not env.
/// A disabled/unconfigured source returns an empty list.
/// </summary> /// </summary>
public interface IListingSource public interface IListingSource
{ {
string Name { get; } string Name { get; }
bool Enabled { get; } Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting settings, CancellationToken ct = default);
Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default);
} }
@@ -43,8 +43,7 @@ public class IngestionService
_ai = ai; _settings = settings; _log = log; _ai = ai; _settings = settings; _log = log;
} }
public IReadOnlyList<(string Name, bool Enabled)> Sources => public IReadOnlyList<string> SourceNames => _sources.Select(s => s.Name).ToList();
_sources.Select(s => (s.Name, s.Enabled)).ToList();
public async Task<IngestionSummary> RunAsync(CancellationToken ct = default) public async Task<IngestionSummary> RunAsync(CancellationToken ct = default)
{ {
@@ -58,12 +57,13 @@ public class IngestionService
var results = new List<SourceResult>(); var results = new List<SourceResult>();
foreach (var source in _sources.Where(s => s.Enabled)) foreach (var source in _sources)
{ {
int fetched = 0, queued = 0, published = 0, flagged = 0, spam = 0, dupes = 0; int fetched = 0, queued = 0, published = 0, flagged = 0, spam = 0, dupes = 0;
IReadOnlyList<ScrapedItem> items; IReadOnlyList<ScrapedItem> items;
try { items = await source.FetchAsync(ct); } try { items = await source.FetchAsync(settings, ct); }
catch (Exception ex) { _log.LogError(ex, "Source {Source} failed", source.Name); continue; } catch (Exception ex) { _log.LogError(ex, "Source {Source} failed", source.Name); continue; }
if (items.Count == 0) continue; // disabled/unconfigured source
foreach (var item in items) foreach (var item in items)
{ {
@@ -1,58 +1,52 @@
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping; namespace JobsMedical.Web.Services.Scraping;
public class IngestionOptions
{
public bool Enabled { get; set; } = false; // off by default — opt in via config
public int IntervalMinutes { get; set; } = 30;
}
/// <summary> /// <summary>
/// Periodically runs the ingestion engine when enabled (Ingestion:Enabled=true). Off by default /// Periodically runs the ingestion engine when the admin has turned auto-ingest ON
/// so nothing scrapes uninvited; admins can also trigger a run on demand from the admin UI. /// (AppSetting.AutoIngestEnabled) — read fresh from the DB each cycle, so it can be toggled at
/// runtime from the admin panel with no redeploy. When off, it idles and re-checks.
/// </summary> /// </summary>
public class IngestionWorker : BackgroundService public class IngestionWorker : BackgroundService
{ {
private readonly IServiceScopeFactory _scopes; private readonly IServiceScopeFactory _scopes;
private readonly IngestionOptions _opts;
private readonly ILogger<IngestionWorker> _log; private readonly ILogger<IngestionWorker> _log;
public IngestionWorker(IServiceScopeFactory scopes, IOptions<IngestionOptions> opts, public IngestionWorker(IServiceScopeFactory scopes, ILogger<IngestionWorker> log)
ILogger<IngestionWorker> log)
{ {
_scopes = scopes; _scopes = scopes;
_opts = opts.Value;
_log = log; _log = log;
} }
protected override async Task ExecuteAsync(CancellationToken stoppingToken) protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{ {
if (!_opts.Enabled) // Small startup delay so the DB/migrations are ready.
{ try { await Task.Delay(TimeSpan.FromSeconds(20), stoppingToken); }
_log.LogInformation("Ingestion worker disabled (Ingestion:Enabled=false)."); catch (OperationCanceledException) { return; }
return;
}
var interval = TimeSpan.FromMinutes(Math.Max(1, _opts.IntervalMinutes));
_log.LogInformation("Ingestion worker on; every {Min} min.", _opts.IntervalMinutes);
while (!stoppingToken.IsCancellationRequested) while (!stoppingToken.IsCancellationRequested)
{ {
var idleMinutes = 10;
try try
{ {
using var scope = _scopes.CreateScope(); using var scope = _scopes.CreateScope();
var svc = scope.ServiceProvider.GetRequiredService<IngestionService>(); var settings = await scope.ServiceProvider
var summary = await svc.RunAsync(stoppingToken); .GetRequiredService<SettingsService>().GetAsync();
_log.LogInformation("Scheduled ingestion: queued={Q} flagged={F} spam={S} dupes={D}",
summary.TotalQueued, summary.TotalFlagged, summary.TotalSpam, summary.TotalDuplicates); if (settings.AutoIngestEnabled)
{
var svc = scope.ServiceProvider.GetRequiredService<IngestionService>();
var summary = await svc.RunAsync(stoppingToken);
_log.LogInformation("Auto-ingest: queued={Q} published={P} flagged={F} spam={S} dupes={D}",
summary.TotalQueued, summary.TotalPublished, summary.TotalFlagged,
summary.TotalSpam, summary.TotalDuplicates);
idleMinutes = Math.Max(1, settings.IngestIntervalMinutes);
}
} }
catch (Exception ex) when (ex is not OperationCanceledException) catch (Exception ex) when (ex is not OperationCanceledException)
{ {
_log.LogError(ex, "Scheduled ingestion run failed"); _log.LogError(ex, "Auto-ingest cycle failed");
} }
try { await Task.Delay(interval, stoppingToken); } try { await Task.Delay(TimeSpan.FromMinutes(idleMinutes), stoppingToken); }
catch (OperationCanceledException) { break; } catch (OperationCanceledException) { break; }
} }
} }
@@ -1,27 +1,33 @@
using JobsMedical.Web.Models;
using Microsoft.Extensions.Hosting;
namespace JobsMedical.Web.Services.Scraping; namespace JobsMedical.Web.Services.Scraping;
/// <summary> /// <summary>
/// A built-in source of representative Persian posts (the kind found in shift channels). Always /// Built-in representative Persian posts (good, incomplete, and spam) so the whole pipeline can be
/// available, needs no credentials — it lets the whole ingestion → validation → review pipeline /// demoed. Only active in Development — never injects sample data into production.
/// run and be demoed today, and doubles as a fixture mix of good, incomplete, and spam posts.
/// </summary> /// </summary>
public class SampleListingSource : IListingSource public class SampleListingSource : IListingSource
{ {
private readonly IHostEnvironment _env;
public SampleListingSource(IHostEnvironment env) => _env = env;
public string Name => "نمونه (کانال آزمایشی)"; public string Name => "نمونه (کانال آزمایشی)";
public bool Enabled => true;
private static readonly string[] Posts = private static readonly string[] Posts =
{ {
"درمانگاه شبانه‌روزی در سعادت‌آباد نیازمند پزشک عمومی برای شیفت شب، کارانه ۳ میلیون تومان. تماس ۰۹۱۲۳۴۵۶۷۸۹", "درمانگاه شبانه‌روزی در سعادت‌آباد نیازمند پزشک عمومی برای شیفت شب، کارانه ۳ میلیون تومان. تماس ۰۹۱۲۳۴۵۶۷۸۹",
"کلینیک تخصصی در تهران به پرستار برای شیفت عصر نیازمند است، ۵۰٪ سهم درآمد. ۰۹۳۵۱۱۱۲۲۳۳", "کلینیک تخصصی در تهران به پرستار خانم برای شیفت عصر نیازمند است، ۵۰٪ سهم درآمد. ۰۹۳۵۱۱۱۲۲۳۳",
"استخدام ماما تمام‌وقت در بیمارستان خصوصی، حقوق توافقی. منطقه شهرک غرب.", "استخدام ماما تمام‌وقت در بیمارستان خصوصی، حقوق توافقی. منطقه شهرک غرب.",
"نیازمند تکنسین اتاق عمل جهت همکاری در نارمک، شیفت صبح. ۰۹۱۲۰۰۰۰۰۰۰", "نیازمند تکنسین اتاق عمل آقا جهت همکاری در نارمک، شیفت صبح. ۰۹۱۲۰۰۰۰۰۰۰",
"فروش فالوور و بک لینک ارزان، سرمایه گذاری در ارز دیجیتال با سود تضمینی!", // spam "فروش فالوور و بک لینک ارزان، سرمایه گذاری در ارز دیجیتال با سود تضمینی!",
"پزشک", // too short / incomplete "پزشک",
"بیمارستان آتیه جهت تکمیل کادر درمان به پزشک عمومی مقیم نیازمند است. قرارداد یک‌ساله، حقوق ۴۵ میلیون ماهانه. تهرانپارس.", "بیمارستان آتیه جهت تکمیل کادر درمان به پزشک عمومی مقیم نیازمند است. قرارداد یک‌ساله، حقوق ۴۵ میلیون ماهانه. تهرانپارس.",
}; };
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default) public Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting settings, CancellationToken ct = default)
=> Task.FromResult<IReadOnlyList<ScrapedItem>>( => Task.FromResult<IReadOnlyList<ScrapedItem>>(
Posts.Select(p => new ScrapedItem(Name, p)).ToList()); _env.IsDevelopment()
? Posts.Select(p => new ScrapedItem(Name, p)).ToList()
: Array.Empty<ScrapedItem>());
} }
@@ -34,6 +34,16 @@ public class SettingsService
s.AiSystemPrompt = string.IsNullOrWhiteSpace(incoming.AiSystemPrompt) s.AiSystemPrompt = string.IsNullOrWhiteSpace(incoming.AiSystemPrompt)
? AppSetting.DefaultPrompt : incoming.AiSystemPrompt; ? AppSetting.DefaultPrompt : incoming.AiSystemPrompt;
s.AiAutoApprove = incoming.AiAutoApprove; s.AiAutoApprove = incoming.AiAutoApprove;
// Channel scraping sources
s.AutoIngestEnabled = incoming.AutoIngestEnabled;
s.IngestIntervalMinutes = Math.Max(1, incoming.IngestIntervalMinutes);
s.TelegramEnabled = incoming.TelegramEnabled;
s.TelegramChannels = incoming.TelegramChannels?.Trim();
s.BaleEnabled = incoming.BaleEnabled;
s.BaleBotToken = incoming.BaleBotToken?.Trim();
s.DivarEnabled = incoming.DivarEnabled;
s.DivarCity = string.IsNullOrWhiteSpace(incoming.DivarCity) ? "tehran" : incoming.DivarCity.Trim();
s.DivarQueries = incoming.DivarQueries?.Trim();
s.UpdatedAt = DateTime.UtcNow; s.UpdatedAt = DateTime.UtcNow;
await _db.SaveChangesAsync(); await _db.SaveChangesAsync();
} }
@@ -1,50 +1,39 @@
using System.Net; using System.Net;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using Microsoft.Extensions.Options; using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping; namespace JobsMedical.Web.Services.Scraping;
public class TelegramOptions
{
public bool Enabled { get; set; }
public string? BotToken { get; set; } // optional (for private channels later)
public List<string> Channels { get; set; } = new(); // public channel usernames (no @)
public int PerChannel { get; set; } = 20;
}
/// <summary> /// <summary>
/// Reads public Telegram channels via the web preview (https://t.me/s/&lt;channel&gt;) — no bot /// Reads public Telegram channels via the web preview (https://t.me/s/&lt;channel&gt;) — no bot
/// token or login needed for public channels. Each message's text becomes a ScrapedItem. /// token needed for public channels. Enabled + channel list come from the admin settings (DB).
/// </summary> /// </summary>
public class TelegramListingSource : IListingSource public class TelegramListingSource : IListingSource
{ {
private readonly TelegramOptions _opts;
private readonly IHttpClientFactory _http; private readonly IHttpClientFactory _http;
private readonly ILogger<TelegramListingSource> _log; private readonly ILogger<TelegramListingSource> _log;
public TelegramListingSource(IOptions<TelegramOptions> opts, IHttpClientFactory http, public TelegramListingSource(IHttpClientFactory http, ILogger<TelegramListingSource> log)
ILogger<TelegramListingSource> log)
{ {
_opts = opts.Value;
_http = http; _http = http;
_log = log; _log = log;
} }
public string Name => "تلگرام"; public string Name => "تلگرام";
public bool Enabled => _opts.Enabled && _opts.Channels.Count > 0;
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default) public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
{ {
if (!Enabled) { _log.LogInformation("Telegram source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); } var channels = AppSetting.SplitList(s.TelegramChannels);
if (!s.TelegramEnabled || channels.Count == 0) return Array.Empty<ScrapedItem>();
var client = _http.CreateClient("scrape"); var client = _http.CreateClient("scrape");
var items = new List<ScrapedItem>(); var items = new List<ScrapedItem>();
foreach (var ch in _opts.Channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0)) foreach (var ch in channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0))
{ {
try try
{ {
var html = await client.GetStringAsync($"https://t.me/s/{ch}", ct); var html = await client.GetStringAsync($"https://t.me/s/{ch}", ct);
foreach (var text in ExtractMessages(html).Take(_opts.PerChannel)) foreach (var text in ExtractMessages(html).Take(20))
items.Add(new ScrapedItem($"تلگرام/{ch}", text, $"https://t.me/{ch}")); items.Add(new ScrapedItem($"تلگرام/{ch}", text, $"https://t.me/{ch}"));
} }
catch (Exception ex) { _log.LogWarning(ex, "Telegram fetch failed for {Channel}", ch); } catch (Exception ex) { _log.LogWarning(ex, "Telegram fetch failed for {Channel}", ch); }
@@ -52,7 +41,6 @@ public class TelegramListingSource : IListingSource
return items; return items;
} }
// Message bodies live in <div class="tgme_widget_message_text ...">...</div>.
private static IEnumerable<string> ExtractMessages(string html) private static IEnumerable<string> ExtractMessages(string html)
{ {
foreach (Match m in Regex.Matches(html, foreach (Match m in Regex.Matches(html,
@@ -69,7 +57,7 @@ internal static class HtmlUtil
public static string ToPlainText(string html) public static string ToPlainText(string html)
{ {
var s = Regex.Replace(html, "<br\\s*/?>", "\n", RegexOptions.IgnoreCase); var s = Regex.Replace(html, "<br\\s*/?>", "\n", RegexOptions.IgnoreCase);
s = Regex.Replace(s, "<[^>]+>", ""); // strip remaining tags s = Regex.Replace(s, "<[^>]+>", "");
s = WebUtility.HtmlDecode(s); s = WebUtility.HtmlDecode(s);
s = Regex.Replace(s, "[ \\t]+", " "); s = Regex.Replace(s, "[ \\t]+", " ");
return s.Trim(); return s.Trim();
-7
View File
@@ -11,12 +11,5 @@
}, },
"Auth": { "Auth": {
"AdminPhone": "09120000000" "AdminPhone": "09120000000"
},
"Ingestion": {
"Enabled": false,
"IntervalMinutes": 30,
"Telegram": { "Enabled": false, "BotToken": "", "Channels": [], "PerChannel": 20 },
"Bale": { "Enabled": false, "BotToken": "", "BaseUrl": "https://tapi.bale.ai" },
"Divar": { "Enabled": false, "City": "tehran", "Category": "jobs", "Queries": [], "BaseUrl": "https://api.divar.ir/v8/web-search", "PerQuery": 25 }
} }
} }