Move ingestion + Telegram/Bale/Divar config to DB-backed admin settings
CI/CD / CI · dotnet build (push) Successful in 6m22s
CI/CD / Deploy · hamkadr (push) Failing after 3s

- AppSetting gains source config: AutoIngestEnabled, IngestIntervalMinutes, Telegram/Bale/Divar enabled+channels/token/queries
- IListingSource.FetchAsync(AppSetting) — sources read config from DB, not IOptions/appsettings; sample source dev-only
- IngestionWorker reads AutoIngest+interval from DB each cycle (toggle at runtime, no redeploy)
- /Admin/Settings gets a 'منابع جمع‌آوری' section; removed Ingestion env/appsettings + compose env vars
- ENV_FILE shrinks to HOST_PORT + POSTGRES_* + ADMIN_PHONE (AI + sources are all in-admin); migration

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-04 00:44:11 +03:30
parent 6cfdd16c42
commit 3c08c1a265
20 changed files with 1217 additions and 167 deletions
+5 -14
View File
@@ -65,21 +65,12 @@ POSTGRES_PASSWORD=__CHANGE_ME__
# Platform admin phone (gets the Admin role on login)
ADMIN_PHONE=09XXXXXXXXX
# --- Channel scraping (optional; off by default) — toggles ---
# INGESTION_ENABLED=true
# INGESTION_INTERVAL_MINUTES=30
# TELEGRAM_ENABLED=true
# TELEGRAM_BOT_TOKEN=__TELEGRAM_BOT_TOKEN__
# BALE_ENABLED=true
# BALE_BOT_TOKEN=__BALE_BOT_TOKEN__
# DIVAR_ENABLED=true
```
> Channel **lists** (`Telegram.Channels`, `Divar.Queries`) live in `appsettings.json` (or add
> `Ingestion__Telegram__Channels__0=...` keys). The toggles above gate each source on/off.
> The **AI audit layer** is configured at runtime in the admin panel (`/Admin/Settings`) — endpoint,
> model, API key, prompt/framework, auto-approve — not via env. Default: AI off, mode = Manual,
> so every ingested listing waits in the review queue until an admin publishes it.
> **That's the whole secret.** Everything else — the **AI audit layer** *and* the **channel
> sources** (Telegram channels, Bale bot token, Divar queries, auto-ingest on/off + interval) — is
> configured at runtime in the admin panel (`/Admin/Settings`), stored in the DB. No redeploy to
> change them. Defaults: AI off, mode = Manual, all sources off ⇒ nothing publishes without admin
> review.
> `ASPNETCORE_ENVIRONMENT=Production` is set by the compose file ⇒ only **reference data**
> (roles/cities/districts) is seeded — no demo facilities/shifts.
+1 -8
View File
@@ -23,14 +23,7 @@ services:
ASPNETCORE_URLS: "http://+:8080"
ConnectionStrings__Default: "Host=db;Port=5432;Database=${POSTGRES_DB:-hamkadr};Username=${POSTGRES_USER:-hamkadr};Password=${POSTGRES_PASSWORD}"
Auth__AdminPhone: "${ADMIN_PHONE:-}"
# Channel scraping (optional; enable + configure via ENV_FILE)
Ingestion__Enabled: "${INGESTION_ENABLED:-false}"
Ingestion__IntervalMinutes: "${INGESTION_INTERVAL_MINUTES:-30}"
Ingestion__Telegram__Enabled: "${TELEGRAM_ENABLED:-false}"
Ingestion__Telegram__BotToken: "${TELEGRAM_BOT_TOKEN:-}"
Ingestion__Bale__Enabled: "${BALE_ENABLED:-false}"
Ingestion__Bale__BotToken: "${BALE_BOT_TOKEN:-}"
Ingestion__Divar__Enabled: "${DIVAR_ENABLED:-false}"
# Ingestion + AI + channel sources are configured at runtime in /Admin/Settings (DB), not here.
# healthcheck is defined in the Dockerfile (bash /dev/tcp probe) so the deploy
# job's `docker inspect Health.Status` wait works.
@@ -0,0 +1,873 @@
// <auto-generated />
using System;
using JobsMedical.Web.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
#nullable disable
namespace JobsMedical.Web.Migrations
{
[DbContext(typeof(AppDbContext))]
[Migration("20260603210638_IngestionSourcesInSettings")]
partial class IngestionSourcesInSettings
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasAnnotation("ProductVersion", "10.0.0")
.HasAnnotation("Relational:MaxIdentifierLength", 63);
NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder);
modelBuilder.Entity("JobsMedical.Web.Models.AppSetting", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("AiApiKey")
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<bool>("AiAutoApprove")
.HasColumnType("boolean");
b.Property<bool>("AiEnabled")
.HasColumnType("boolean");
b.Property<string>("AiEndpoint")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("AiModel")
.HasMaxLength(120)
.HasColumnType("character varying(120)");
b.Property<string>("AiSystemPrompt")
.IsRequired()
.HasMaxLength(4000)
.HasColumnType("character varying(4000)");
b.Property<bool>("AutoIngestEnabled")
.HasColumnType("boolean");
b.Property<int>("AutoPublishMinConfidence")
.HasColumnType("integer");
b.Property<string>("BaleBotToken")
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<bool>("BaleEnabled")
.HasColumnType("boolean");
b.Property<string>("DivarCity")
.HasMaxLength(60)
.HasColumnType("character varying(60)");
b.Property<bool>("DivarEnabled")
.HasColumnType("boolean");
b.Property<string>("DivarQueries")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<int>("IngestIntervalMinutes")
.HasColumnType("integer");
b.Property<int>("Mode")
.HasColumnType("integer");
b.Property<string>("TelegramChannels")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<bool>("TelegramEnabled")
.HasColumnType("boolean");
b.Property<DateTime>("UpdatedAt")
.HasColumnType("timestamp with time zone");
b.HasKey("Id");
b.ToTable("AppSettings");
});
modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int>("DoctorId")
.HasColumnType("integer");
b.Property<string>("Message")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("ShiftId")
.HasColumnType("integer");
b.Property<int>("Status")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("DoctorId");
b.HasIndex("ShiftId", "DoctorId")
.IsUnique();
b.ToTable("Applications");
});
modelBuilder.Entity("JobsMedical.Web.Models.City", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<string>("Province")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.HasKey("Id");
b.ToTable("Cities");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int>("CityId")
.HasColumnType("integer");
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(120)
.HasColumnType("character varying(120)");
b.HasKey("Id");
b.HasIndex("CityId");
b.ToTable("Districts");
});
modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Bio")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.Property<int?>("CityId")
.HasColumnType("integer");
b.Property<bool>("IsVerified")
.HasColumnType("boolean");
b.Property<string>("LicenseNo")
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int?>("RoleId")
.HasColumnType("integer");
b.Property<string>("Specialty")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<int>("UserId")
.HasColumnType("integer");
b.Property<int>("YearsExperience")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("RoleId");
b.HasIndex("UserId")
.IsUnique();
b.ToTable("DoctorProfiles");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Address")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("BaleId")
.HasMaxLength(50)
.HasColumnType("character varying(50)");
b.Property<int>("CityId")
.HasColumnType("integer");
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("DistrictId")
.HasColumnType("integer");
b.Property<bool>("IsVerified")
.HasColumnType("boolean");
b.Property<double?>("Lat")
.HasColumnType("double precision");
b.Property<double?>("Lng")
.HasColumnType("double precision");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<int?>("OwnerUserId")
.HasColumnType("integer");
b.Property<string>("Phone")
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int>("Type")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("DistrictId");
b.HasIndex("OwnerUserId");
b.ToTable("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
{
b.Property<long>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("bigint");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<long>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int>("EventType")
.HasColumnType("integer");
b.Property<int?>("JobOpeningId")
.HasColumnType("integer");
b.Property<int?>("ShiftId")
.HasColumnType("integer");
b.Property<string>("VisitorId")
.IsRequired()
.HasColumnType("character varying(36)");
b.HasKey("Id");
b.HasIndex("JobOpeningId");
b.HasIndex("ShiftId");
b.HasIndex("VisitorId", "CreatedAt");
b.ToTable("InterestEvents");
});
modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("Description")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<int>("EmploymentType")
.HasColumnType("integer");
b.Property<int>("FacilityId")
.HasColumnType("integer");
b.Property<int>("GenderRequirement")
.HasColumnType("integer");
b.Property<string>("Requirements")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.Property<int>("RoleId")
.HasColumnType("integer");
b.Property<long?>("SalaryMax")
.HasColumnType("bigint");
b.Property<long?>("SalaryMin")
.HasColumnType("bigint");
b.Property<int>("Source")
.HasColumnType("integer");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("Status")
.HasColumnType("integer");
b.Property<string>("Title")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.HasKey("Id");
b.HasIndex("FacilityId");
b.HasIndex("RoleId");
b.HasIndex("Status");
b.ToTable("JobOpenings");
});
modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int>("Confidence")
.HasColumnType("integer");
b.Property<string>("ContentHash")
.HasMaxLength(64)
.HasColumnType("character varying(64)");
b.Property<DateTime>("FetchedAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("LinkedShiftId")
.HasColumnType("integer");
b.Property<string>("ParsedJson")
.HasColumnType("text");
b.Property<string>("RawText")
.IsRequired()
.HasColumnType("text");
b.Property<string>("SourceChannel")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("Status")
.HasColumnType("integer");
b.Property<string>("ValidationNotes")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.HasKey("Id");
b.HasIndex("ContentHash");
b.HasIndex("LinkedShiftId");
b.HasIndex("Status");
b.ToTable("RawListings");
});
modelBuilder.Entity("JobsMedical.Web.Models.Role", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Category")
.IsRequired()
.HasMaxLength(50)
.HasColumnType("character varying(50)");
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<int>("SortOrder")
.HasColumnType("integer");
b.HasKey("Id");
b.ToTable("Roles");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<DateOnly>("Date")
.HasColumnType("date");
b.Property<string>("Description")
.HasMaxLength(1500)
.HasColumnType("character varying(1500)");
b.Property<TimeOnly>("EndTime")
.HasColumnType("time without time zone");
b.Property<int>("FacilityId")
.HasColumnType("integer");
b.Property<int>("GenderRequirement")
.HasColumnType("integer");
b.Property<long?>("PayAmount")
.HasColumnType("bigint");
b.Property<int>("PayType")
.HasColumnType("integer");
b.Property<int>("RoleId")
.HasColumnType("integer");
b.Property<int?>("SharePercent")
.HasColumnType("integer");
b.Property<int>("ShiftType")
.HasColumnType("integer");
b.Property<int>("Source")
.HasColumnType("integer");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("SpecialtyRequired")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<TimeOnly>("StartTime")
.HasColumnType("time without time zone");
b.Property<int>("Status")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("FacilityId");
b.HasIndex("RoleId");
b.HasIndex("Date", "Status");
b.ToTable("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.User", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("FullName")
.HasMaxLength(150)
.HasColumnType("character varying(150)");
b.Property<bool>("IsPhoneVerified")
.HasColumnType("boolean");
b.Property<string>("Phone")
.IsRequired()
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int>("Role")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("Phone")
.IsUnique();
b.ToTable("Users");
});
modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int?>("CityId")
.HasColumnType("integer");
b.Property<int>("Gender")
.HasColumnType("integer");
b.Property<long?>("MinPay")
.HasColumnType("bigint");
b.Property<int?>("PreferredShiftType")
.HasColumnType("integer");
b.Property<int?>("RoleId")
.HasColumnType("integer");
b.Property<DateTime>("UpdatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("VisitorId")
.IsRequired()
.HasColumnType("character varying(36)");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("RoleId");
b.HasIndex("VisitorId")
.IsUnique();
b.ToTable("UserPreferences");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.Property<string>("Id")
.HasMaxLength(36)
.HasColumnType("character varying(36)");
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<DateTime>("LastSeenAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("UserId")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("UserId");
b.ToTable("Visitors");
});
modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
{
b.HasOne("JobsMedical.Web.Models.User", "Doctor")
.WithMany("Applications")
.HasForeignKey("DoctorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Shift", "Shift")
.WithMany("Applications")
.HasForeignKey("ShiftId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("Doctor");
b.Navigation("Shift");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
});
modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId");
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId");
b.HasOne("JobsMedical.Web.Models.User", "User")
.WithOne("DoctorProfile")
.HasForeignKey("JobsMedical.Web.Models.DoctorProfile", "UserId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
b.Navigation("Role");
b.Navigation("User");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany("Facilities")
.HasForeignKey("CityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.District", "District")
.WithMany("Facilities")
.HasForeignKey("DistrictId")
.OnDelete(DeleteBehavior.SetNull);
b.HasOne("JobsMedical.Web.Models.User", "OwnerUser")
.WithMany()
.HasForeignKey("OwnerUserId")
.OnDelete(DeleteBehavior.SetNull);
b.Navigation("City");
b.Navigation("District");
b.Navigation("OwnerUser");
});
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
{
b.HasOne("JobsMedical.Web.Models.JobOpening", "JobOpening")
.WithMany()
.HasForeignKey("JobOpeningId")
.OnDelete(DeleteBehavior.Cascade);
b.HasOne("JobsMedical.Web.Models.Shift", "Shift")
.WithMany()
.HasForeignKey("ShiftId")
.OnDelete(DeleteBehavior.Cascade);
b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor")
.WithMany("Events")
.HasForeignKey("VisitorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("JobOpening");
b.Navigation("Shift");
b.Navigation("Visitor");
});
modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b =>
{
b.HasOne("JobsMedical.Web.Models.Facility", "Facility")
.WithMany()
.HasForeignKey("FacilityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId")
.OnDelete(DeleteBehavior.Restrict)
.IsRequired();
b.Navigation("Facility");
b.Navigation("Role");
});
modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b =>
{
b.HasOne("JobsMedical.Web.Models.Shift", "LinkedShift")
.WithMany()
.HasForeignKey("LinkedShiftId");
b.Navigation("LinkedShift");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.HasOne("JobsMedical.Web.Models.Facility", "Facility")
.WithMany("Shifts")
.HasForeignKey("FacilityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany("Shifts")
.HasForeignKey("RoleId")
.OnDelete(DeleteBehavior.Restrict)
.IsRequired();
b.Navigation("Facility");
b.Navigation("Role");
});
modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId");
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId");
b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor")
.WithOne("Preferences")
.HasForeignKey("JobsMedical.Web.Models.UserPreferences", "VisitorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
b.Navigation("Role");
b.Navigation("Visitor");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.HasOne("JobsMedical.Web.Models.User", "User")
.WithMany()
.HasForeignKey("UserId")
.OnDelete(DeleteBehavior.SetNull);
b.Navigation("User");
});
modelBuilder.Entity("JobsMedical.Web.Models.City", b =>
{
b.Navigation("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.Navigation("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.Navigation("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.Role", b =>
{
b.Navigation("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.Navigation("Applications");
});
modelBuilder.Entity("JobsMedical.Web.Models.User", b =>
{
b.Navigation("Applications");
b.Navigation("DoctorProfile");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.Navigation("Events");
b.Navigation("Preferences");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,117 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace JobsMedical.Web.Migrations
{
/// <inheritdoc />
public partial class IngestionSourcesInSettings : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<bool>(
name: "AutoIngestEnabled",
table: "AppSettings",
type: "boolean",
nullable: false,
defaultValue: false);
migrationBuilder.AddColumn<string>(
name: "BaleBotToken",
table: "AppSettings",
type: "character varying(200)",
maxLength: 200,
nullable: true);
migrationBuilder.AddColumn<bool>(
name: "BaleEnabled",
table: "AppSettings",
type: "boolean",
nullable: false,
defaultValue: false);
migrationBuilder.AddColumn<string>(
name: "DivarCity",
table: "AppSettings",
type: "character varying(60)",
maxLength: 60,
nullable: true);
migrationBuilder.AddColumn<bool>(
name: "DivarEnabled",
table: "AppSettings",
type: "boolean",
nullable: false,
defaultValue: false);
migrationBuilder.AddColumn<string>(
name: "DivarQueries",
table: "AppSettings",
type: "character varying(2000)",
maxLength: 2000,
nullable: true);
migrationBuilder.AddColumn<int>(
name: "IngestIntervalMinutes",
table: "AppSettings",
type: "integer",
nullable: false,
defaultValue: 0);
migrationBuilder.AddColumn<string>(
name: "TelegramChannels",
table: "AppSettings",
type: "character varying(2000)",
maxLength: 2000,
nullable: true);
migrationBuilder.AddColumn<bool>(
name: "TelegramEnabled",
table: "AppSettings",
type: "boolean",
nullable: false,
defaultValue: false);
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropColumn(
name: "AutoIngestEnabled",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "BaleBotToken",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "BaleEnabled",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "DivarCity",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "DivarEnabled",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "DivarQueries",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "IngestIntervalMinutes",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "TelegramChannels",
table: "AppSettings");
migrationBuilder.DropColumn(
name: "TelegramEnabled",
table: "AppSettings");
}
}
}
@@ -53,12 +53,43 @@ namespace JobsMedical.Web.Migrations
.HasMaxLength(4000)
.HasColumnType("character varying(4000)");
b.Property<bool>("AutoIngestEnabled")
.HasColumnType("boolean");
b.Property<int>("AutoPublishMinConfidence")
.HasColumnType("integer");
b.Property<string>("BaleBotToken")
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<bool>("BaleEnabled")
.HasColumnType("boolean");
b.Property<string>("DivarCity")
.HasMaxLength(60)
.HasColumnType("character varying(60)");
b.Property<bool>("DivarEnabled")
.HasColumnType("boolean");
b.Property<string>("DivarQueries")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<int>("IngestIntervalMinutes")
.HasColumnType("integer");
b.Property<int>("Mode")
.HasColumnType("integer");
b.Property<string>("TelegramChannels")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<bool>("TelegramEnabled")
.HasColumnType("boolean");
b.Property<DateTime>("UpdatedAt")
.HasColumnType("timestamp with time zone");
+23
View File
@@ -32,8 +32,31 @@ public class AppSetting
/// <summary>If AI approves AND Mode is Automatic, publish without human review.</summary>
public bool AiAutoApprove { get; set; } = false;
// --- Channel scraping sources (configured here, NOT in env) ---
/// <summary>Run the ingestion worker on a timer.</summary>
public bool AutoIngestEnabled { get; set; } = false;
public int IngestIntervalMinutes { get; set; } = 30;
public bool TelegramEnabled { get; set; } = false;
/// <summary>Public Telegram channel usernames, one per line or comma-separated.</summary>
[MaxLength(2000)] public string? TelegramChannels { get; set; }
public bool BaleEnabled { get; set; } = false;
[MaxLength(200)] public string? BaleBotToken { get; set; }
public bool DivarEnabled { get; set; } = false;
[MaxLength(60)] public string? DivarCity { get; set; } = "tehran";
/// <summary>Divar search terms, one per line or comma-separated.</summary>
[MaxLength(2000)] public string? DivarQueries { get; set; }
public DateTime UpdatedAt { get; set; } = DateTime.UtcNow;
/// <summary>Split a textarea (newline/comma separated) into trimmed non-empty items.</summary>
public static List<string> SplitList(string? s) => string.IsNullOrWhiteSpace(s)
? new()
: s.Split(new[] { '\n', '\r', ',', '،' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
.ToList();
public const string DefaultPrompt = """
تو دستیار بررسی آگهیهای کاری حوزه درمان برای پلتفرم «همکادر» هستی.
هر آگهی خام را بخوان و تصمیم بگیر:
+4 -10
View File
@@ -26,16 +26,10 @@
<div class="layout-2">
<aside class="card card-pad filter-card">
<h3>موتور جمع‌آوری</h3>
<p class="muted" style="font-size:13px;">منابع متصل:</p>
<ul style="margin:0 0 12px; padding-inline-start:18px; font-size:13.5px;">
@foreach (var src in Model.Sources)
{
<li>@src.Name —
@if (src.Enabled) { <span style="color:var(--primary-dark);">فعال</span> }
else { <span class="muted">غیرفعال (نیازمند تنظیمات)</span> }
</li>
}
</ul>
<p class="muted" style="font-size:13px;">منابع: @string.Join("، ", Model.SourceNames)</p>
<p class="muted" style="font-size:12px; margin:0 0 12px;">
فعال/غیرفعال‌سازی و تنظیم کانال‌ها در <a asp-page="/Admin/Settings">تنظیمات</a>.
</p>
<form method="post">
<button type="submit" asp-page-handler="RunIngestion" class="btn btn-accent btn-block">اجرای جمع‌آوری اکنون</button>
</form>
@@ -22,7 +22,7 @@ public class IndexModel : PageModel
public List<RawListing> Queue { get; private set; } = new();
public List<RawListing> Flagged { get; private set; } = new();
public IReadOnlyList<(string Name, bool Enabled)> Sources { get; private set; } = new List<(string, bool)>();
public IReadOnlyList<string> SourceNames { get; private set; } = new List<string>();
public int PublishedShifts { get; private set; }
public int PublishedJobs { get; private set; }
@@ -64,7 +64,7 @@ public class IndexModel : PageModel
Flagged = await _db.RawListings
.Where(r => r.Status == RawListingStatus.Flagged)
.OrderByDescending(r => r.FetchedAt).ToListAsync();
Sources = _ingest.Sources;
SourceNames = _ingest.SourceNames;
PublishedShifts = await _db.Shifts.CountAsync(s => s.Source != ShiftSource.Direct);
PublishedJobs = await _db.JobOpenings.CountAsync();
}
@@ -62,6 +62,49 @@
</label>
</div>
<hr style="border:none; border-top:1px solid var(--line); margin:18px 0;" />
<h3 style="margin-top:0;">منابع جمع‌آوری (اسکرپ کانال‌ها)</h3>
<div class="filter-group">
<label style="display:flex; align-items:center; gap:8px; font-weight:700;">
<input type="checkbox" name="AutoIngestEnabled" value="true" style="width:auto;" checked="@Model.AutoIngestEnabled" />
اجرای خودکار جمع‌آوری روی زمان‌بند
</label>
</div>
<div class="filter-group">
<label>فاصله اجرای خودکار (دقیقه)</label>
<input type="number" name="IngestIntervalMinutes" min="1" value="@Model.IngestIntervalMinutes" dir="ltr" />
</div>
<div class="filter-group">
<label style="display:flex; align-items:center; gap:8px; font-weight:700;">
<input type="checkbox" name="TelegramEnabled" value="true" style="width:auto;" checked="@Model.TelegramEnabled" />
تلگرام (کانال‌های عمومی — بدون توکن)
</label>
<label style="margin-top:6px;">یوزرنیم کانال‌ها (هر خط یک کانال)</label>
<textarea name="TelegramChannels" rows="3" dir="ltr" placeholder="shift_channel&#10;another_channel">@Model.TelegramChannels</textarea>
</div>
<div class="filter-group">
<label style="display:flex; align-items:center; gap:8px; font-weight:700;">
<input type="checkbox" name="BaleEnabled" value="true" style="width:auto;" checked="@Model.BaleEnabled" />
بله (بات باید عضو کانال باشد)
</label>
<label style="margin-top:6px;">توکن بات بله</label>
<input type="password" name="BaleBotToken" value="@Model.BaleBotToken" dir="ltr" />
</div>
<div class="filter-group">
<label style="display:flex; align-items:center; gap:8px; font-weight:700;">
<input type="checkbox" name="DivarEnabled" value="true" style="width:auto;" checked="@Model.DivarEnabled" />
دیوار
</label>
<div style="display:flex; gap:8px; margin-top:6px;">
<div style="flex:0 0 120px;"><label>شهر (slug)</label><input type="text" name="DivarCity" value="@Model.DivarCity" dir="ltr" placeholder="tehran" /></div>
<div style="flex:1;"><label>عبارت‌های جستجو (هر خط یکی)</label><textarea name="DivarQueries" rows="3">@Model.DivarQueries</textarea></div>
</div>
</div>
<button type="submit" class="btn btn-accent btn-block btn-lg">ذخیره تنظیمات</button>
</form>
</div>
@@ -20,6 +20,16 @@ public class SettingsModel : PageModel
[BindProperty] public string? AiModel { get; set; }
[BindProperty] public string AiSystemPrompt { get; set; } = "";
[BindProperty] public bool AiAutoApprove { get; set; }
// Channel scraping sources
[BindProperty] public bool AutoIngestEnabled { get; set; }
[BindProperty] public int IngestIntervalMinutes { get; set; } = 30;
[BindProperty] public bool TelegramEnabled { get; set; }
[BindProperty] public string? TelegramChannels { get; set; }
[BindProperty] public bool BaleEnabled { get; set; }
[BindProperty] public string? BaleBotToken { get; set; }
[BindProperty] public bool DivarEnabled { get; set; }
[BindProperty] public string? DivarCity { get; set; }
[BindProperty] public string? DivarQueries { get; set; }
[TempData] public string? Saved { get; set; }
public async Task OnGetAsync()
@@ -33,6 +43,15 @@ public class SettingsModel : PageModel
AiModel = s.AiModel;
AiSystemPrompt = s.AiSystemPrompt;
AiAutoApprove = s.AiAutoApprove;
AutoIngestEnabled = s.AutoIngestEnabled;
IngestIntervalMinutes = s.IngestIntervalMinutes;
TelegramEnabled = s.TelegramEnabled;
TelegramChannels = s.TelegramChannels;
BaleEnabled = s.BaleEnabled;
BaleBotToken = s.BaleBotToken;
DivarEnabled = s.DivarEnabled;
DivarCity = s.DivarCity;
DivarQueries = s.DivarQueries;
}
public async Task<IActionResult> OnPostAsync()
@@ -47,6 +66,15 @@ public class SettingsModel : PageModel
AiModel = AiModel,
AiSystemPrompt = AiSystemPrompt,
AiAutoApprove = AiAutoApprove,
AutoIngestEnabled = AutoIngestEnabled,
IngestIntervalMinutes = IngestIntervalMinutes,
TelegramEnabled = TelegramEnabled,
TelegramChannels = TelegramChannels,
BaleEnabled = BaleEnabled,
BaleBotToken = BaleBotToken,
DivarEnabled = DivarEnabled,
DivarCity = DivarCity,
DivarQueries = DivarQueries,
});
Saved = "تنظیمات ذخیره شد.";
return RedirectToPage();
-8
View File
@@ -28,14 +28,6 @@ builder.Services.AddHttpClient("scrape", c =>
c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)");
});
builder.Services.AddHttpClient("ai");
builder.Services.Configure<JobsMedical.Web.Services.Scraping.IngestionOptions>(
builder.Configuration.GetSection("Ingestion"));
builder.Services.Configure<JobsMedical.Web.Services.Scraping.TelegramOptions>(
builder.Configuration.GetSection("Ingestion:Telegram"));
builder.Services.Configure<JobsMedical.Web.Services.Scraping.BaleOptions>(
builder.Configuration.GetSection("Ingestion:Bale"));
builder.Services.Configure<JobsMedical.Web.Services.Scraping.DivarOptions>(
builder.Configuration.GetSection("Ingestion:Divar"));
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.ListingValidator>();
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.IAiAuditor,
JobsMedical.Web.Services.Scraping.OpenAiCompatibleAuditor>();
@@ -1,46 +1,34 @@
using System.Text.Json;
using Microsoft.Extensions.Options;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
public class BaleOptions
{
public bool Enabled { get; set; }
public string? BotToken { get; set; }
public string BaseUrl { get; set; } = "https://tapi.bale.ai"; // Bale Bot API host
}
/// <summary>
/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. The bot must
/// be a member/admin of the channels it should read. Pulls text from messages and channel posts.
/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. Enabled +
/// bot token come from admin settings (DB). The bot must be a member of the channels it reads.
/// </summary>
public class BaleListingSource : IListingSource
{
private readonly BaleOptions _opts;
private const string BaseUrl = "https://tapi.bale.ai";
private readonly IHttpClientFactory _http;
private readonly ILogger<BaleListingSource> _log;
public BaleListingSource(IOptions<BaleOptions> opts, IHttpClientFactory http,
ILogger<BaleListingSource> log)
public BaleListingSource(IHttpClientFactory http, ILogger<BaleListingSource> log)
{
_opts = opts.Value;
_http = http;
_log = log;
}
public string Name => "بله";
public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken);
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
{
if (!Enabled) { _log.LogInformation("Bale source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); }
if (!s.BaleEnabled || string.IsNullOrWhiteSpace(s.BaleBotToken)) return Array.Empty<ScrapedItem>();
try
{
var client = _http.CreateClient("scrape");
var url = $"{_opts.BaseUrl.TrimEnd('/')}/bot{_opts.BotToken}/getUpdates";
var body = await client.GetStringAsync(url, ct);
var body = await client.GetStringAsync($"{BaseUrl}/bot{s.BaleBotToken}/getUpdates", ct);
using var doc = JsonDocument.Parse(body);
if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array)
return Array.Empty<ScrapedItem>();
@@ -54,11 +42,7 @@ public class BaleListingSource : IListingSource
}
return items;
}
catch (Exception ex)
{
_log.LogWarning(ex, "Bale fetch failed.");
return Array.Empty<ScrapedItem>();
}
catch (Exception ex) { _log.LogWarning(ex, "Bale fetch failed."); return Array.Empty<ScrapedItem>(); }
}
private static string? TextOf(JsonElement update, string key)
@@ -1,55 +1,44 @@
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Options;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
public class DivarOptions
{
public bool Enabled { get; set; }
public string City { get; set; } = "tehran";
public string Category { get; set; } = "jobs";
public List<string> Queries { get; set; } = new(); // e.g. "پرستار", "پزشک عمومی", "درمانگاه"
public string BaseUrl { get; set; } = "https://api.divar.ir/v8/web-search";
public int PerQuery { get; set; } = 25;
}
/// <summary>
/// Best-effort Divar fetch: queries Divar's web-search JSON for each term and harvests post
/// titles + descriptions. Divar's private API shifts shape over time, so we walk the JSON
/// tolerantly for any object carrying a "title" plus a nearby description field, and fail soft.
/// titles + descriptions. Enabled + city + queries come from admin settings (DB). Divar's
/// private API shifts shape, so we walk JSON tolerantly and fail soft.
/// </summary>
public class DivarListingSource : IListingSource
{
private readonly DivarOptions _opts;
private const string BaseUrl = "https://api.divar.ir/v8/web-search";
private readonly IHttpClientFactory _http;
private readonly ILogger<DivarListingSource> _log;
public DivarListingSource(IOptions<DivarOptions> opts, IHttpClientFactory http,
ILogger<DivarListingSource> log)
public DivarListingSource(IHttpClientFactory http, ILogger<DivarListingSource> log)
{
_opts = opts.Value;
_http = http;
_log = log;
}
public string Name => "دیوار";
public bool Enabled => _opts.Enabled && _opts.Queries.Count > 0;
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
{
if (!Enabled) { _log.LogInformation("Divar source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); }
var queries = AppSetting.SplitList(s.DivarQueries);
if (!s.DivarEnabled || queries.Count == 0) return Array.Empty<ScrapedItem>();
var city = string.IsNullOrWhiteSpace(s.DivarCity) ? "tehran" : s.DivarCity.Trim();
var client = _http.CreateClient("scrape");
var items = new List<ScrapedItem>();
foreach (var q in _opts.Queries.Where(q => q.Trim().Length > 0))
foreach (var q in queries)
{
try
{
var url = $"{_opts.BaseUrl.TrimEnd('/')}/{_opts.City}/{_opts.Category}?q={Uri.EscapeDataString(q)}";
var url = $"{BaseUrl}/{city}/jobs?q={Uri.EscapeDataString(q)}";
var body = await client.GetStringAsync(url, ct);
using var doc = JsonDocument.Parse(body);
foreach (var text in Harvest(doc.RootElement).Take(_opts.PerQuery))
foreach (var text in Harvest(doc.RootElement).Take(25))
items.Add(new ScrapedItem("دیوار", text, "https://divar.ir"));
}
catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); }
@@ -60,7 +49,6 @@ public class DivarListingSource : IListingSource
private static readonly string[] DescKeys =
{ "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" };
/// <summary>Walk the JSON; for each object with a string "title", emit title + first description.</summary>
private static IEnumerable<string> Harvest(JsonElement el)
{
if (el.ValueKind == JsonValueKind.Object)
@@ -75,12 +63,12 @@ public class DivarListingSource : IListingSource
if (text.Length >= 15) yield return text;
}
foreach (var p in el.EnumerateObject())
foreach (var s in Harvest(p.Value)) yield return s;
foreach (var x in Harvest(p.Value)) yield return x;
}
else if (el.ValueKind == JsonValueKind.Array)
{
foreach (var item in el.EnumerateArray())
foreach (var s in Harvest(item)) yield return s;
foreach (var x in Harvest(item)) yield return x;
}
}
}
@@ -1,15 +1,17 @@
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
/// <summary>One raw post pulled from a source (a Telegram message, a Divar ad, etc.).</summary>
public record ScrapedItem(string Source, string RawText, string? SourceUrl = null);
/// <summary>
/// A pluggable source the ingestion engine pulls from. Implement once per channel/site.
/// `Enabled` lets a source be present but dormant until it's configured with credentials.
/// A pluggable source the ingestion engine pulls from. Configuration (enabled, channels, tokens)
/// comes from the DB-backed <see cref="AppSetting"/> passed in — set in the admin panel, not env.
/// A disabled/unconfigured source returns an empty list.
/// </summary>
public interface IListingSource
{
string Name { get; }
bool Enabled { get; }
Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default);
Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting settings, CancellationToken ct = default);
}
@@ -43,8 +43,7 @@ public class IngestionService
_ai = ai; _settings = settings; _log = log;
}
public IReadOnlyList<(string Name, bool Enabled)> Sources =>
_sources.Select(s => (s.Name, s.Enabled)).ToList();
public IReadOnlyList<string> SourceNames => _sources.Select(s => s.Name).ToList();
public async Task<IngestionSummary> RunAsync(CancellationToken ct = default)
{
@@ -58,12 +57,13 @@ public class IngestionService
var results = new List<SourceResult>();
foreach (var source in _sources.Where(s => s.Enabled))
foreach (var source in _sources)
{
int fetched = 0, queued = 0, published = 0, flagged = 0, spam = 0, dupes = 0;
IReadOnlyList<ScrapedItem> items;
try { items = await source.FetchAsync(ct); }
try { items = await source.FetchAsync(settings, ct); }
catch (Exception ex) { _log.LogError(ex, "Source {Source} failed", source.Name); continue; }
if (items.Count == 0) continue; // disabled/unconfigured source
foreach (var item in items)
{
@@ -1,58 +1,52 @@
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping;
public class IngestionOptions
{
public bool Enabled { get; set; } = false; // off by default — opt in via config
public int IntervalMinutes { get; set; } = 30;
}
/// <summary>
/// Periodically runs the ingestion engine when enabled (Ingestion:Enabled=true). Off by default
/// so nothing scrapes uninvited; admins can also trigger a run on demand from the admin UI.
/// Periodically runs the ingestion engine when the admin has turned auto-ingest ON
/// (AppSetting.AutoIngestEnabled) — read fresh from the DB each cycle, so it can be toggled at
/// runtime from the admin panel with no redeploy. When off, it idles and re-checks.
/// </summary>
public class IngestionWorker : BackgroundService
{
private readonly IServiceScopeFactory _scopes;
private readonly IngestionOptions _opts;
private readonly ILogger<IngestionWorker> _log;
public IngestionWorker(IServiceScopeFactory scopes, IOptions<IngestionOptions> opts,
ILogger<IngestionWorker> log)
public IngestionWorker(IServiceScopeFactory scopes, ILogger<IngestionWorker> log)
{
_scopes = scopes;
_opts = opts.Value;
_log = log;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
if (!_opts.Enabled)
{
_log.LogInformation("Ingestion worker disabled (Ingestion:Enabled=false).");
return;
}
var interval = TimeSpan.FromMinutes(Math.Max(1, _opts.IntervalMinutes));
_log.LogInformation("Ingestion worker on; every {Min} min.", _opts.IntervalMinutes);
// Small startup delay so the DB/migrations are ready.
try { await Task.Delay(TimeSpan.FromSeconds(20), stoppingToken); }
catch (OperationCanceledException) { return; }
while (!stoppingToken.IsCancellationRequested)
{
var idleMinutes = 10;
try
{
using var scope = _scopes.CreateScope();
var svc = scope.ServiceProvider.GetRequiredService<IngestionService>();
var summary = await svc.RunAsync(stoppingToken);
_log.LogInformation("Scheduled ingestion: queued={Q} flagged={F} spam={S} dupes={D}",
summary.TotalQueued, summary.TotalFlagged, summary.TotalSpam, summary.TotalDuplicates);
var settings = await scope.ServiceProvider
.GetRequiredService<SettingsService>().GetAsync();
if (settings.AutoIngestEnabled)
{
var svc = scope.ServiceProvider.GetRequiredService<IngestionService>();
var summary = await svc.RunAsync(stoppingToken);
_log.LogInformation("Auto-ingest: queued={Q} published={P} flagged={F} spam={S} dupes={D}",
summary.TotalQueued, summary.TotalPublished, summary.TotalFlagged,
summary.TotalSpam, summary.TotalDuplicates);
idleMinutes = Math.Max(1, settings.IngestIntervalMinutes);
}
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
_log.LogError(ex, "Scheduled ingestion run failed");
_log.LogError(ex, "Auto-ingest cycle failed");
}
try { await Task.Delay(interval, stoppingToken); }
try { await Task.Delay(TimeSpan.FromMinutes(idleMinutes), stoppingToken); }
catch (OperationCanceledException) { break; }
}
}
@@ -1,27 +1,33 @@
using JobsMedical.Web.Models;
using Microsoft.Extensions.Hosting;
namespace JobsMedical.Web.Services.Scraping;
/// <summary>
/// A built-in source of representative Persian posts (the kind found in shift channels). Always
/// available, needs no credentials — it lets the whole ingestion → validation → review pipeline
/// run and be demoed today, and doubles as a fixture mix of good, incomplete, and spam posts.
/// Built-in representative Persian posts (good, incomplete, and spam) so the whole pipeline can be
/// demoed. Only active in Development — never injects sample data into production.
/// </summary>
public class SampleListingSource : IListingSource
{
private readonly IHostEnvironment _env;
public SampleListingSource(IHostEnvironment env) => _env = env;
public string Name => "نمونه (کانال آزمایشی)";
public bool Enabled => true;
private static readonly string[] Posts =
{
"درمانگاه شبانه‌روزی در سعادت‌آباد نیازمند پزشک عمومی برای شیفت شب، کارانه ۳ میلیون تومان. تماس ۰۹۱۲۳۴۵۶۷۸۹",
"کلینیک تخصصی در تهران به پرستار برای شیفت عصر نیازمند است، ۵۰٪ سهم درآمد. ۰۹۳۵۱۱۱۲۲۳۳",
"کلینیک تخصصی در تهران به پرستار خانم برای شیفت عصر نیازمند است، ۵۰٪ سهم درآمد. ۰۹۳۵۱۱۱۲۲۳۳",
"استخدام ماما تمام‌وقت در بیمارستان خصوصی، حقوق توافقی. منطقه شهرک غرب.",
"نیازمند تکنسین اتاق عمل جهت همکاری در نارمک، شیفت صبح. ۰۹۱۲۰۰۰۰۰۰۰",
"فروش فالوور و بک لینک ارزان، سرمایه گذاری در ارز دیجیتال با سود تضمینی!", // spam
"پزشک", // too short / incomplete
"نیازمند تکنسین اتاق عمل آقا جهت همکاری در نارمک، شیفت صبح. ۰۹۱۲۰۰۰۰۰۰۰",
"فروش فالوور و بک لینک ارزان، سرمایه گذاری در ارز دیجیتال با سود تضمینی!",
"پزشک",
"بیمارستان آتیه جهت تکمیل کادر درمان به پزشک عمومی مقیم نیازمند است. قرارداد یک‌ساله، حقوق ۴۵ میلیون ماهانه. تهرانپارس.",
};
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting settings, CancellationToken ct = default)
=> Task.FromResult<IReadOnlyList<ScrapedItem>>(
Posts.Select(p => new ScrapedItem(Name, p)).ToList());
_env.IsDevelopment()
? Posts.Select(p => new ScrapedItem(Name, p)).ToList()
: Array.Empty<ScrapedItem>());
}
@@ -34,6 +34,16 @@ public class SettingsService
s.AiSystemPrompt = string.IsNullOrWhiteSpace(incoming.AiSystemPrompt)
? AppSetting.DefaultPrompt : incoming.AiSystemPrompt;
s.AiAutoApprove = incoming.AiAutoApprove;
// Channel scraping sources
s.AutoIngestEnabled = incoming.AutoIngestEnabled;
s.IngestIntervalMinutes = Math.Max(1, incoming.IngestIntervalMinutes);
s.TelegramEnabled = incoming.TelegramEnabled;
s.TelegramChannels = incoming.TelegramChannels?.Trim();
s.BaleEnabled = incoming.BaleEnabled;
s.BaleBotToken = incoming.BaleBotToken?.Trim();
s.DivarEnabled = incoming.DivarEnabled;
s.DivarCity = string.IsNullOrWhiteSpace(incoming.DivarCity) ? "tehran" : incoming.DivarCity.Trim();
s.DivarQueries = incoming.DivarQueries?.Trim();
s.UpdatedAt = DateTime.UtcNow;
await _db.SaveChangesAsync();
}
@@ -1,50 +1,39 @@
using System.Net;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Options;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
public class TelegramOptions
{
public bool Enabled { get; set; }
public string? BotToken { get; set; } // optional (for private channels later)
public List<string> Channels { get; set; } = new(); // public channel usernames (no @)
public int PerChannel { get; set; } = 20;
}
/// <summary>
/// Reads public Telegram channels via the web preview (https://t.me/s/&lt;channel&gt;) — no bot
/// token or login needed for public channels. Each message's text becomes a ScrapedItem.
/// token needed for public channels. Enabled + channel list come from the admin settings (DB).
/// </summary>
public class TelegramListingSource : IListingSource
{
private readonly TelegramOptions _opts;
private readonly IHttpClientFactory _http;
private readonly ILogger<TelegramListingSource> _log;
public TelegramListingSource(IOptions<TelegramOptions> opts, IHttpClientFactory http,
ILogger<TelegramListingSource> log)
public TelegramListingSource(IHttpClientFactory http, ILogger<TelegramListingSource> log)
{
_opts = opts.Value;
_http = http;
_log = log;
}
public string Name => "تلگرام";
public bool Enabled => _opts.Enabled && _opts.Channels.Count > 0;
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
{
if (!Enabled) { _log.LogInformation("Telegram source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); }
var channels = AppSetting.SplitList(s.TelegramChannels);
if (!s.TelegramEnabled || channels.Count == 0) return Array.Empty<ScrapedItem>();
var client = _http.CreateClient("scrape");
var items = new List<ScrapedItem>();
foreach (var ch in _opts.Channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0))
foreach (var ch in channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0))
{
try
{
var html = await client.GetStringAsync($"https://t.me/s/{ch}", ct);
foreach (var text in ExtractMessages(html).Take(_opts.PerChannel))
foreach (var text in ExtractMessages(html).Take(20))
items.Add(new ScrapedItem($"تلگرام/{ch}", text, $"https://t.me/{ch}"));
}
catch (Exception ex) { _log.LogWarning(ex, "Telegram fetch failed for {Channel}", ch); }
@@ -52,7 +41,6 @@ public class TelegramListingSource : IListingSource
return items;
}
// Message bodies live in <div class="tgme_widget_message_text ...">...</div>.
private static IEnumerable<string> ExtractMessages(string html)
{
foreach (Match m in Regex.Matches(html,
@@ -69,7 +57,7 @@ internal static class HtmlUtil
public static string ToPlainText(string html)
{
var s = Regex.Replace(html, "<br\\s*/?>", "\n", RegexOptions.IgnoreCase);
s = Regex.Replace(s, "<[^>]+>", ""); // strip remaining tags
s = Regex.Replace(s, "<[^>]+>", "");
s = WebUtility.HtmlDecode(s);
s = Regex.Replace(s, "[ \\t]+", " ");
return s.Trim();
-7
View File
@@ -11,12 +11,5 @@
},
"Auth": {
"AdminPhone": "09120000000"
},
"Ingestion": {
"Enabled": false,
"IntervalMinutes": 30,
"Telegram": { "Enabled": false, "BotToken": "", "Channels": [], "PerChannel": 20 },
"Bale": { "Enabled": false, "BotToken": "", "BaseUrl": "https://tapi.bale.ai" },
"Divar": { "Enabled": false, "City": "tehran", "Category": "jobs", "Queries": [], "BaseUrl": "https://api.divar.ir/v8/web-search", "PerQuery": 25 }
}
}