Real channel fetch (Telegram/Bale/Divar) + AI-audited automation engine + CI/CD

- Fetch: Telegram via t.me/s, Bale via Bot API, Divar via web-search (HttpClient, config-gated, graceful)
- AI layer: DB-backed AppSetting (mode auto/manual, thresholds, AI endpoint/model/key/prompt/framework, auto-approve); OpenAI-compatible IAiAuditor (self-host/Iranian endpoints; fails safe to manual)
- Pipeline: fetch → dedupe(hash) → parse → validate → AI audit → Discard/Flag/Queue/auto-publish (resolve-or-create facility)
- Admin: /Admin/Settings automation+AI panel; queue shows confidence + AI verdict; flagged section
- CI/CD: Dockerfile, docker-compose.prod.yml, .gitea/workflows/ci-cd.yml, nginx vhost, DEPLOY.md; forwarded headers + /healthz + prod reference-only seed; ports 22/80/443 only

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-03 17:41:02 +03:30
parent 931b7b6ffb
commit 36bb165438
18 changed files with 1614 additions and 68 deletions
+19
View File
@@ -69,7 +69,26 @@ ConnectionStrings__Default=Host=db;Port=5432;Database=hamkadr;Username=hamkadr;P
Auth__AdminPhone=09XXXXXXXXX
# Future: Kavenegar / SMS.ir keys for real OTP delivery
# --- Channel scraping (optional; off by default) ---
# Enable the background worker and the sources you want, then their fetch runs on a timer.
# Ingestion__Enabled=true
# Ingestion__IntervalMinutes=30
# Telegram (public channels via t.me/s — no token needed):
# Ingestion__Telegram__Enabled=true
# Ingestion__Telegram__Channels__0=shift_channel_username
# Ingestion__Telegram__Channels__1=another_channel
# Bale (bot must be a member of the channel; Telegram-style Bot API):
# Ingestion__Bale__Enabled=true
# Ingestion__Bale__BotToken=__BALE_BOT_TOKEN__
# Divar (best-effort web-search):
# Ingestion__Divar__Enabled=true
# Ingestion__Divar__Queries__0=استخدام پزشک
# Ingestion__Divar__Queries__1=پرستار
```
> The **AI audit layer** is configured at runtime in the admin panel (`/Admin/Settings`) — endpoint,
> model, API key, prompt/framework, and auto-approve — not via env. Default: AI off, mode = Manual,
> so every ingested listing waits in the review queue until an admin publishes it.
> `POSTGRES_PASSWORD` and the password in `ConnectionStrings__Default` must be identical.
> `ASPNETCORE_ENVIRONMENT=Production` ⇒ only **reference data** (roles/cities/districts) is seeded —
> no demo facilities/shifts. Real employers add listings via the employer panel.
+1
View File
@@ -20,6 +20,7 @@ public class AppDbContext : DbContext
public DbSet<Visitor> Visitors => Set<Visitor>();
public DbSet<UserPreferences> UserPreferences => Set<UserPreferences>();
public DbSet<InterestEvent> InterestEvents => Set<InterestEvent>();
public DbSet<AppSetting> AppSettings => Set<AppSetting>();
protected override void OnModelCreating(ModelBuilder b)
{
@@ -0,0 +1,833 @@
// <auto-generated />
using System;
using JobsMedical.Web.Data;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
#nullable disable
namespace JobsMedical.Web.Migrations
{
[DbContext(typeof(AppDbContext))]
[Migration("20260603140343_AiSettingsAndAutomation")]
partial class AiSettingsAndAutomation
{
/// <inheritdoc />
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasAnnotation("ProductVersion", "10.0.0")
.HasAnnotation("Relational:MaxIdentifierLength", 63);
NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder);
modelBuilder.Entity("JobsMedical.Web.Models.AppSetting", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("AiApiKey")
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<bool>("AiAutoApprove")
.HasColumnType("boolean");
b.Property<bool>("AiEnabled")
.HasColumnType("boolean");
b.Property<string>("AiEndpoint")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("AiModel")
.HasMaxLength(120)
.HasColumnType("character varying(120)");
b.Property<string>("AiSystemPrompt")
.IsRequired()
.HasMaxLength(4000)
.HasColumnType("character varying(4000)");
b.Property<int>("AutoPublishMinConfidence")
.HasColumnType("integer");
b.Property<int>("Mode")
.HasColumnType("integer");
b.Property<DateTime>("UpdatedAt")
.HasColumnType("timestamp with time zone");
b.HasKey("Id");
b.ToTable("AppSettings");
});
modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int>("DoctorId")
.HasColumnType("integer");
b.Property<string>("Message")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("ShiftId")
.HasColumnType("integer");
b.Property<int>("Status")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("DoctorId");
b.HasIndex("ShiftId", "DoctorId")
.IsUnique();
b.ToTable("Applications");
});
modelBuilder.Entity("JobsMedical.Web.Models.City", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<string>("Province")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.HasKey("Id");
b.ToTable("Cities");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int>("CityId")
.HasColumnType("integer");
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(120)
.HasColumnType("character varying(120)");
b.HasKey("Id");
b.HasIndex("CityId");
b.ToTable("Districts");
});
modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Bio")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.Property<int?>("CityId")
.HasColumnType("integer");
b.Property<bool>("IsVerified")
.HasColumnType("boolean");
b.Property<string>("LicenseNo")
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int?>("RoleId")
.HasColumnType("integer");
b.Property<string>("Specialty")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<int>("UserId")
.HasColumnType("integer");
b.Property<int>("YearsExperience")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("RoleId");
b.HasIndex("UserId")
.IsUnique();
b.ToTable("DoctorProfiles");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Address")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("BaleId")
.HasMaxLength(50)
.HasColumnType("character varying(50)");
b.Property<int>("CityId")
.HasColumnType("integer");
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("DistrictId")
.HasColumnType("integer");
b.Property<bool>("IsVerified")
.HasColumnType("boolean");
b.Property<double?>("Lat")
.HasColumnType("double precision");
b.Property<double?>("Lng")
.HasColumnType("double precision");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<int?>("OwnerUserId")
.HasColumnType("integer");
b.Property<string>("Phone")
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int>("Type")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("DistrictId");
b.HasIndex("OwnerUserId");
b.ToTable("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
{
b.Property<long>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("bigint");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<long>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<int>("EventType")
.HasColumnType("integer");
b.Property<int?>("JobOpeningId")
.HasColumnType("integer");
b.Property<int?>("ShiftId")
.HasColumnType("integer");
b.Property<string>("VisitorId")
.IsRequired()
.HasColumnType("character varying(36)");
b.HasKey("Id");
b.HasIndex("JobOpeningId");
b.HasIndex("ShiftId");
b.HasIndex("VisitorId", "CreatedAt");
b.ToTable("InterestEvents");
});
modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("Description")
.HasMaxLength(2000)
.HasColumnType("character varying(2000)");
b.Property<int>("EmploymentType")
.HasColumnType("integer");
b.Property<int>("FacilityId")
.HasColumnType("integer");
b.Property<string>("Requirements")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.Property<int>("RoleId")
.HasColumnType("integer");
b.Property<long?>("SalaryMax")
.HasColumnType("bigint");
b.Property<long?>("SalaryMin")
.HasColumnType("bigint");
b.Property<int>("Source")
.HasColumnType("integer");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("Status")
.HasColumnType("integer");
b.Property<string>("Title")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.HasKey("Id");
b.HasIndex("FacilityId");
b.HasIndex("RoleId");
b.HasIndex("Status");
b.ToTable("JobOpenings");
});
modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int>("Confidence")
.HasColumnType("integer");
b.Property<string>("ContentHash")
.HasMaxLength(64)
.HasColumnType("character varying(64)");
b.Property<DateTime>("FetchedAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("LinkedShiftId")
.HasColumnType("integer");
b.Property<string>("ParsedJson")
.HasColumnType("text");
b.Property<string>("RawText")
.IsRequired()
.HasColumnType("text");
b.Property<string>("SourceChannel")
.IsRequired()
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<int>("Status")
.HasColumnType("integer");
b.Property<string>("ValidationNotes")
.HasMaxLength(1000)
.HasColumnType("character varying(1000)");
b.HasKey("Id");
b.HasIndex("ContentHash");
b.HasIndex("LinkedShiftId");
b.HasIndex("Status");
b.ToTable("RawListings");
});
modelBuilder.Entity("JobsMedical.Web.Models.Role", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("Category")
.IsRequired()
.HasMaxLength(50)
.HasColumnType("character varying(50)");
b.Property<bool>("IsActive")
.HasColumnType("boolean");
b.Property<string>("Name")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<int>("SortOrder")
.HasColumnType("integer");
b.HasKey("Id");
b.ToTable("Roles");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<DateOnly>("Date")
.HasColumnType("date");
b.Property<string>("Description")
.HasMaxLength(1500)
.HasColumnType("character varying(1500)");
b.Property<TimeOnly>("EndTime")
.HasColumnType("time without time zone");
b.Property<int>("FacilityId")
.HasColumnType("integer");
b.Property<long?>("PayAmount")
.HasColumnType("bigint");
b.Property<int>("PayType")
.HasColumnType("integer");
b.Property<int>("RoleId")
.HasColumnType("integer");
b.Property<int?>("SharePercent")
.HasColumnType("integer");
b.Property<int>("ShiftType")
.HasColumnType("integer");
b.Property<int>("Source")
.HasColumnType("integer");
b.Property<string>("SourceUrl")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("SpecialtyRequired")
.IsRequired()
.HasMaxLength(100)
.HasColumnType("character varying(100)");
b.Property<TimeOnly>("StartTime")
.HasColumnType("time without time zone");
b.Property<int>("Status")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("FacilityId");
b.HasIndex("RoleId");
b.HasIndex("Date", "Status");
b.ToTable("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.User", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("FullName")
.HasMaxLength(150)
.HasColumnType("character varying(150)");
b.Property<bool>("IsPhoneVerified")
.HasColumnType("boolean");
b.Property<string>("Phone")
.IsRequired()
.HasMaxLength(20)
.HasColumnType("character varying(20)");
b.Property<int>("Role")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("Phone")
.IsUnique();
b.ToTable("Users");
});
modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int?>("CityId")
.HasColumnType("integer");
b.Property<long?>("MinPay")
.HasColumnType("bigint");
b.Property<int?>("PreferredShiftType")
.HasColumnType("integer");
b.Property<int?>("RoleId")
.HasColumnType("integer");
b.Property<DateTime>("UpdatedAt")
.HasColumnType("timestamp with time zone");
b.Property<string>("VisitorId")
.IsRequired()
.HasColumnType("character varying(36)");
b.HasKey("Id");
b.HasIndex("CityId");
b.HasIndex("RoleId");
b.HasIndex("VisitorId")
.IsUnique();
b.ToTable("UserPreferences");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.Property<string>("Id")
.HasMaxLength(36)
.HasColumnType("character varying(36)");
b.Property<DateTime>("CreatedAt")
.HasColumnType("timestamp with time zone");
b.Property<DateTime>("LastSeenAt")
.HasColumnType("timestamp with time zone");
b.Property<int?>("UserId")
.HasColumnType("integer");
b.HasKey("Id");
b.HasIndex("UserId");
b.ToTable("Visitors");
});
modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
{
b.HasOne("JobsMedical.Web.Models.User", "Doctor")
.WithMany("Applications")
.HasForeignKey("DoctorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Shift", "Shift")
.WithMany("Applications")
.HasForeignKey("ShiftId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("Doctor");
b.Navigation("Shift");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
});
modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId");
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId");
b.HasOne("JobsMedical.Web.Models.User", "User")
.WithOne("DoctorProfile")
.HasForeignKey("JobsMedical.Web.Models.DoctorProfile", "UserId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
b.Navigation("Role");
b.Navigation("User");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany("Facilities")
.HasForeignKey("CityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.District", "District")
.WithMany("Facilities")
.HasForeignKey("DistrictId")
.OnDelete(DeleteBehavior.SetNull);
b.HasOne("JobsMedical.Web.Models.User", "OwnerUser")
.WithMany()
.HasForeignKey("OwnerUserId")
.OnDelete(DeleteBehavior.SetNull);
b.Navigation("City");
b.Navigation("District");
b.Navigation("OwnerUser");
});
modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b =>
{
b.HasOne("JobsMedical.Web.Models.JobOpening", "JobOpening")
.WithMany()
.HasForeignKey("JobOpeningId")
.OnDelete(DeleteBehavior.Cascade);
b.HasOne("JobsMedical.Web.Models.Shift", "Shift")
.WithMany()
.HasForeignKey("ShiftId")
.OnDelete(DeleteBehavior.Cascade);
b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor")
.WithMany("Events")
.HasForeignKey("VisitorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("JobOpening");
b.Navigation("Shift");
b.Navigation("Visitor");
});
modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b =>
{
b.HasOne("JobsMedical.Web.Models.Facility", "Facility")
.WithMany()
.HasForeignKey("FacilityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId")
.OnDelete(DeleteBehavior.Restrict)
.IsRequired();
b.Navigation("Facility");
b.Navigation("Role");
});
modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b =>
{
b.HasOne("JobsMedical.Web.Models.Shift", "LinkedShift")
.WithMany()
.HasForeignKey("LinkedShiftId");
b.Navigation("LinkedShift");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.HasOne("JobsMedical.Web.Models.Facility", "Facility")
.WithMany("Shifts")
.HasForeignKey("FacilityId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany("Shifts")
.HasForeignKey("RoleId")
.OnDelete(DeleteBehavior.Restrict)
.IsRequired();
b.Navigation("Facility");
b.Navigation("Role");
});
modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b =>
{
b.HasOne("JobsMedical.Web.Models.City", "City")
.WithMany()
.HasForeignKey("CityId");
b.HasOne("JobsMedical.Web.Models.Role", "Role")
.WithMany()
.HasForeignKey("RoleId");
b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor")
.WithOne("Preferences")
.HasForeignKey("JobsMedical.Web.Models.UserPreferences", "VisitorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.Navigation("City");
b.Navigation("Role");
b.Navigation("Visitor");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.HasOne("JobsMedical.Web.Models.User", "User")
.WithMany()
.HasForeignKey("UserId")
.OnDelete(DeleteBehavior.SetNull);
b.Navigation("User");
});
modelBuilder.Entity("JobsMedical.Web.Models.City", b =>
{
b.Navigation("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.District", b =>
{
b.Navigation("Facilities");
});
modelBuilder.Entity("JobsMedical.Web.Models.Facility", b =>
{
b.Navigation("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.Role", b =>
{
b.Navigation("Shifts");
});
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{
b.Navigation("Applications");
});
modelBuilder.Entity("JobsMedical.Web.Models.User", b =>
{
b.Navigation("Applications");
b.Navigation("DoctorProfile");
});
modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b =>
{
b.Navigation("Events");
b.Navigation("Preferences");
});
#pragma warning restore 612, 618
}
}
}
@@ -0,0 +1,44 @@
using System;
using Microsoft.EntityFrameworkCore.Migrations;
using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata;
#nullable disable
namespace JobsMedical.Web.Migrations
{
/// <inheritdoc />
public partial class AiSettingsAndAutomation : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.CreateTable(
name: "AppSettings",
columns: table => new
{
Id = table.Column<int>(type: "integer", nullable: false)
.Annotation("Npgsql:ValueGenerationStrategy", NpgsqlValueGenerationStrategy.IdentityByDefaultColumn),
Mode = table.Column<int>(type: "integer", nullable: false),
AutoPublishMinConfidence = table.Column<int>(type: "integer", nullable: false),
AiEnabled = table.Column<bool>(type: "boolean", nullable: false),
AiEndpoint = table.Column<string>(type: "character varying(500)", maxLength: 500, nullable: true),
AiApiKey = table.Column<string>(type: "character varying(200)", maxLength: 200, nullable: true),
AiModel = table.Column<string>(type: "character varying(120)", maxLength: 120, nullable: true),
AiSystemPrompt = table.Column<string>(type: "character varying(4000)", maxLength: 4000, nullable: false),
AiAutoApprove = table.Column<bool>(type: "boolean", nullable: false),
UpdatedAt = table.Column<DateTime>(type: "timestamp with time zone", nullable: false)
},
constraints: table =>
{
table.PrimaryKey("PK_AppSettings", x => x.Id);
});
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropTable(
name: "AppSettings");
}
}
}
@@ -22,6 +22,51 @@ namespace JobsMedical.Web.Migrations
NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder);
modelBuilder.Entity("JobsMedical.Web.Models.AppSetting", b =>
{
b.Property<int>("Id")
.ValueGeneratedOnAdd()
.HasColumnType("integer");
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<string>("AiApiKey")
.HasMaxLength(200)
.HasColumnType("character varying(200)");
b.Property<bool>("AiAutoApprove")
.HasColumnType("boolean");
b.Property<bool>("AiEnabled")
.HasColumnType("boolean");
b.Property<string>("AiEndpoint")
.HasMaxLength(500)
.HasColumnType("character varying(500)");
b.Property<string>("AiModel")
.HasMaxLength(120)
.HasColumnType("character varying(120)");
b.Property<string>("AiSystemPrompt")
.IsRequired()
.HasMaxLength(4000)
.HasColumnType("character varying(4000)");
b.Property<int>("AutoPublishMinConfidence")
.HasColumnType("integer");
b.Property<int>("Mode")
.HasColumnType("integer");
b.Property<DateTime>("UpdatedAt")
.HasColumnType("timestamp with time zone");
b.HasKey("Id");
b.ToTable("AppSettings");
});
modelBuilder.Entity("JobsMedical.Web.Models.Application", b =>
{
b.Property<int>("Id")
+50
View File
@@ -0,0 +1,50 @@
using System.ComponentModel.DataAnnotations;
namespace JobsMedical.Web.Models;
/// <summary>
/// Single-row (Id=1) platform settings the admin controls at runtime — chiefly the ingestion
/// automation policy and the optional AI audit layer. Kept in the DB (not appsettings) so it's
/// editable from the admin panel without a redeploy.
/// </summary>
public class AppSetting
{
public int Id { get; set; } = 1;
// --- Ingestion automation ---
public IngestionMode Mode { get; set; } = IngestionMode.Manual;
/// <summary>In Automatic mode WITHOUT AI, listings at/above this confidence auto-publish.</summary>
public int AutoPublishMinConfidence { get; set; } = 85;
// --- AI audit layer (optional) ---
public bool AiEnabled { get; set; } = false;
/// <summary>OpenAI-compatible chat-completions endpoint (self-hosted or Iranian provider).</summary>
[MaxLength(500)] public string? AiEndpoint { get; set; }
[MaxLength(200)] public string? AiApiKey { get; set; }
[MaxLength(120)] public string? AiModel { get; set; } = "gpt-4o-mini";
/// <summary>The prompt + "framework" the AI follows to approve / reject / structure a listing.</summary>
[MaxLength(4000)]
public string AiSystemPrompt { get; set; } = DefaultPrompt;
/// <summary>If AI approves AND Mode is Automatic, publish without human review.</summary>
public bool AiAutoApprove { get; set; } = false;
public DateTime UpdatedAt { get; set; } = DateTime.UtcNow;
public const string DefaultPrompt = """
تو دستیار بررسی آگهیهای کاری حوزه درمان برای پلتفرم «همکادر» هستی.
هر آگهی خام را بخوان و تصمیم بگیر:
- approve: آگهی واقعی و مرتبط با شیفت/استخدام کادر درمان است و اطلاعات کافی دارد.
- reject: تبلیغ، اسپم، نامرتبط، یا فاقد اطلاعات حداقلی است.
- review: مرتبط است اما ناقص/مبهم و نیاز به بررسی انسانی دارد.
نقش، شهر/محله، نوع شیفت، نوع همکاری، مبلغ یا درصد سهم، و عنوان را در صورت وجود استخراج کن.
فقط با یک شیء JSON پاسخ بده با کلیدهای:
decision (approve|reject|review)، confidence (0-100)، reason (فارسی کوتاه)،
kind (shift|job)، role، city، district، shiftType (day|evening|night|oncall)،
employmentType (fulltime|parttime|contract|plan)، payAmount (عدد تومان یا null)،
sharePercent (0-100 یا null)، title، facilityName.
""";
}
+7
View File
@@ -75,3 +75,10 @@ public enum ListingKind
Shift = 0,
Job = 1
}
/// <summary>How ingested listings get onto the site.</summary>
public enum IngestionMode
{
Manual = 0, // همه‌چیز به صف بررسی می‌رود؛ ادمین تأیید می‌کند
Automatic = 1 // موارد تأییدشده (طبق آستانه/هوش مصنوعی) خودکار منتشر می‌شوند
}
@@ -12,6 +12,7 @@
(@JalaliDate.ToPersianDigits(Model.Queue.Count.ToString()) در صف،
@JalaliDate.ToPersianDigits(Model.Flagged.Count.ToString()) پرچم‌خورده)
· <a asp-page="/Admin/Facilities">تأیید مراکز درمانی</a>
· <a asp-page="/Admin/Settings">تنظیمات جمع‌آوری و AI</a>
</p>
</div>
</div>
@@ -0,0 +1,67 @@
@page
@model JobsMedical.Web.Pages.Admin.SettingsModel
@{
ViewData["Title"] = "تنظیمات جمع‌آوری و هوش مصنوعی";
}
<div class="page-head">
<div class="container">
<h1>تنظیمات جمع‌آوری و هوش مصنوعی</h1>
<p class="muted"><a asp-page="/Admin/Index">← بازگشت به صف</a></p>
</div>
</div>
<div class="container section" style="max-width:680px;">
@if (Model.Saved is not null)
{
<div class="alert alert-success">✓ @Model.Saved</div>
}
<form method="post" class="card card-pad">
<h3 style="margin-top:0;">حالت انتشار</h3>
<div class="filter-group">
<label>نحوه افزودن آگهی‌ها به سایت</label>
<select name="Mode">
<option value="0" selected="@(Model.Mode == JobsMedical.Web.Models.IngestionMode.Manual)">دستی — همه به صف بررسی می‌روند</option>
<option value="1" selected="@(Model.Mode == JobsMedical.Web.Models.IngestionMode.Automatic)">خودکار — موارد تأییدشده مستقیم منتشر می‌شوند</option>
</select>
</div>
<div class="filter-group">
<label>حداقل درصد اطمینان برای انتشار خودکار (بدون هوش مصنوعی)</label>
<input type="number" name="AutoPublishMinConfidence" min="0" max="100" value="@Model.AutoPublishMinConfidence" dir="ltr" />
<p class="muted" style="font-size:12px; margin:4px 0 0;">در حالت خودکار و بدون AI، آگهی‌هایی با اطمینان بالاتر از این مقدار خودکار منتشر می‌شوند.</p>
</div>
<hr style="border:none; border-top:1px solid var(--line); margin:18px 0;" />
<h3 style="margin-top:0;">لایه هوش مصنوعی (اختیاری)</h3>
<div class="filter-group">
<label style="display:flex; align-items:center; gap:8px; font-weight:700;">
<input type="checkbox" name="AiEnabled" value="true" style="width:auto;" checked="@Model.AiEnabled" />
فعال‌سازی بررسی با هوش مصنوعی قبل از انتشار
</label>
<p class="muted" style="font-size:12px; margin:4px 0 0;">در صورت فعال بودن، هر آگهی پیش از انتشار توسط مدل بررسی و تأیید/رد/ساختارمند می‌شود.</p>
</div>
<div class="filter-group">
<label>آدرس سرویس (سازگار با OpenAI)</label>
<input type="text" name="AiEndpoint" value="@Model.AiEndpoint" placeholder="https://host/v1/chat/completions" dir="ltr" />
<p class="muted" style="font-size:12px; margin:4px 0 0;">می‌تواند یک مدل self-hosted یا سرویس داخلی باشد (OpenAI/Anthropic در ایران مسدودند).</p>
</div>
<div class="filter-group" style="display:flex; gap:8px;">
<div style="flex:1;"><label>کلید API</label><input type="password" name="AiApiKey" value="@Model.AiApiKey" dir="ltr" /></div>
<div style="flex:1;"><label>نام مدل</label><input type="text" name="AiModel" value="@Model.AiModel" dir="ltr" /></div>
</div>
<div class="filter-group">
<label>دستور و چارچوب هوش مصنوعی (Prompt / Framework)</label>
<textarea name="AiSystemPrompt" rows="10" dir="rtl">@Model.AiSystemPrompt</textarea>
<p class="muted" style="font-size:12px; margin:4px 0 0;">به مدل بگو چطور تأیید/رد کند و چه فیلدهایی را استخراج کند. خروجی باید JSON باشد.</p>
</div>
<div class="filter-group">
<label style="display:flex; align-items:center; gap:8px; font-weight:700;">
<input type="checkbox" name="AiAutoApprove" value="true" style="width:auto;" checked="@Model.AiAutoApprove" />
در حالت خودکار، آگهی‌هایی که AI تأیید می‌کند مستقیم منتشر شوند
</label>
</div>
<button type="submit" class="btn btn-accent btn-block btn-lg">ذخیره تنظیمات</button>
</form>
</div>
@@ -0,0 +1,54 @@
using JobsMedical.Web.Models;
using JobsMedical.Web.Services.Scraping;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
using Microsoft.AspNetCore.Mvc.RazorPages;
namespace JobsMedical.Web.Pages.Admin;
[Authorize(Roles = "Admin")]
public class SettingsModel : PageModel
{
private readonly SettingsService _settings;
public SettingsModel(SettingsService settings) => _settings = settings;
[BindProperty] public IngestionMode Mode { get; set; }
[BindProperty] public int AutoPublishMinConfidence { get; set; }
[BindProperty] public bool AiEnabled { get; set; }
[BindProperty] public string? AiEndpoint { get; set; }
[BindProperty] public string? AiApiKey { get; set; }
[BindProperty] public string? AiModel { get; set; }
[BindProperty] public string AiSystemPrompt { get; set; } = "";
[BindProperty] public bool AiAutoApprove { get; set; }
[TempData] public string? Saved { get; set; }
public async Task OnGetAsync()
{
var s = await _settings.GetAsync();
Mode = s.Mode;
AutoPublishMinConfidence = s.AutoPublishMinConfidence;
AiEnabled = s.AiEnabled;
AiEndpoint = s.AiEndpoint;
AiApiKey = s.AiApiKey;
AiModel = s.AiModel;
AiSystemPrompt = s.AiSystemPrompt;
AiAutoApprove = s.AiAutoApprove;
}
public async Task<IActionResult> OnPostAsync()
{
await _settings.SaveAsync(new AppSetting
{
Mode = Mode,
AutoPublishMinConfidence = AutoPublishMinConfidence,
AiEnabled = AiEnabled,
AiEndpoint = AiEndpoint,
AiApiKey = AiApiKey,
AiModel = AiModel,
AiSystemPrompt = AiSystemPrompt,
AiAutoApprove = AiAutoApprove,
});
Saved = "تنظیمات ذخیره شد.";
return RedirectToPage();
}
}
+14 -1
View File
@@ -21,18 +21,31 @@ builder.Services.AddScoped<OtpService>();
// Listing parser: heuristic now; swap for an LLM-backed IListingParser later.
builder.Services.AddSingleton<IListingParser, HeuristicListingParser>();
// Scrape/ingestion engine: pluggable sources → dedupe → parse → validate → review queue.
// Scrape/ingestion engine: pluggable sources → dedupe → parse → validate → (AI audit) → publish/queue.
builder.Services.AddHttpClient("scrape", c =>
{
c.Timeout = TimeSpan.FromSeconds(20);
c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)");
});
builder.Services.AddHttpClient("ai");
builder.Services.Configure<JobsMedical.Web.Services.Scraping.IngestionOptions>(
builder.Configuration.GetSection("Ingestion"));
builder.Services.Configure<JobsMedical.Web.Services.Scraping.TelegramOptions>(
builder.Configuration.GetSection("Ingestion:Telegram"));
builder.Services.Configure<JobsMedical.Web.Services.Scraping.BaleOptions>(
builder.Configuration.GetSection("Ingestion:Bale"));
builder.Services.Configure<JobsMedical.Web.Services.Scraping.DivarOptions>(
builder.Configuration.GetSection("Ingestion:Divar"));
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.ListingValidator>();
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.IAiAuditor,
JobsMedical.Web.Services.Scraping.OpenAiCompatibleAuditor>();
builder.Services.AddScoped<JobsMedical.Web.Services.Scraping.SettingsService>();
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.IListingSource,
JobsMedical.Web.Services.Scraping.SampleListingSource>();
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.IListingSource,
JobsMedical.Web.Services.Scraping.TelegramListingSource>();
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.IListingSource,
JobsMedical.Web.Services.Scraping.BaleListingSource>();
builder.Services.AddSingleton<JobsMedical.Web.Services.Scraping.IListingSource,
JobsMedical.Web.Services.Scraping.DivarListingSource>();
builder.Services.AddScoped<JobsMedical.Web.Services.Scraping.IngestionService>();
@@ -0,0 +1,108 @@
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
public record AiStructured(
string? Kind, string? Role, string? City, string? District, string? ShiftType,
string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName);
/// <summary>An AI verdict on a raw listing.</summary>
public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data)
{
public bool Approve => Decision.Equals("approve", StringComparison.OrdinalIgnoreCase);
public bool Reject => Decision.Equals("reject", StringComparison.OrdinalIgnoreCase);
}
public interface IAiAuditor
{
/// <summary>Audit a raw post. Returns null when AI is off or the call fails (fail safe → manual).</summary>
Task<AiAuditResult?> AuditAsync(string rawText, AppSetting settings, CancellationToken ct = default);
}
/// <summary>
/// Calls any OpenAI-compatible chat-completions endpoint (self-hosted vLLM/Ollama, or an Iranian
/// provider — OpenAI/Anthropic are blocked from Iran). The admin-set system prompt is the
/// "framework" that tells the model how to approve/reject/structure. We ask for strict JSON and
/// parse it. Any failure returns null so ingestion falls back to the rule-based path.
/// </summary>
public class OpenAiCompatibleAuditor : IAiAuditor
{
private readonly IHttpClientFactory _http;
private readonly ILogger<OpenAiCompatibleAuditor> _log;
public OpenAiCompatibleAuditor(IHttpClientFactory http, ILogger<OpenAiCompatibleAuditor> log)
{
_http = http;
_log = log;
}
public async Task<AiAuditResult?> AuditAsync(string rawText, AppSetting s, CancellationToken ct = default)
{
if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint)) return null;
try
{
var payload = new
{
model = string.IsNullOrWhiteSpace(s.AiModel) ? "gpt-4o-mini" : s.AiModel,
temperature = 0,
response_format = new { type = "json_object" },
messages = new object[]
{
new { role = "system", content = s.AiSystemPrompt },
new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." },
},
};
var client = _http.CreateClient("ai");
client.Timeout = TimeSpan.FromSeconds(30);
using var req = new HttpRequestMessage(HttpMethod.Post, s.AiEndpoint)
{
Content = new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"),
};
if (!string.IsNullOrWhiteSpace(s.AiApiKey))
req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", s.AiApiKey);
using var resp = await client.SendAsync(req, ct);
resp.EnsureSuccessStatusCode();
var body = await resp.Content.ReadAsStringAsync(ct);
using var doc = JsonDocument.Parse(body);
var content = doc.RootElement
.GetProperty("choices")[0].GetProperty("message").GetProperty("content").GetString();
if (string.IsNullOrWhiteSpace(content)) return null;
return ParseVerdict(content);
}
catch (Exception ex)
{
_log.LogWarning(ex, "AI audit failed — falling back to rule-based decision.");
return null;
}
}
private static AiAuditResult? ParseVerdict(string json)
{
// The content itself should be a JSON object; tolerate code fences.
json = json.Trim().Trim('`');
var start = json.IndexOf('{');
var end = json.LastIndexOf('}');
if (start < 0 || end <= start) return null;
json = json.Substring(start, end - start + 1);
using var doc = JsonDocument.Parse(json);
var r = doc.RootElement;
string? S(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.String ? v.GetString() : null;
int I(string k, int d) => r.TryGetProperty(k, out var v) && v.TryGetInt32(out var n) ? n : d;
long? L(string k) => r.TryGetProperty(k, out var v) && v.TryGetInt64(out var n) ? n : null;
int? NI(string k) => r.TryGetProperty(k, out var v) && v.TryGetInt32(out var n) ? n : null;
var decision = (S("decision") ?? "review").ToLowerInvariant();
var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"),
S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"));
return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data);
}
}
@@ -0,0 +1,68 @@
using System.Text.Json;
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping;
public class BaleOptions
{
public bool Enabled { get; set; }
public string? BotToken { get; set; }
public string BaseUrl { get; set; } = "https://tapi.bale.ai"; // Bale Bot API host
}
/// <summary>
/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. The bot must
/// be a member/admin of the channels it should read. Pulls text from messages and channel posts.
/// </summary>
public class BaleListingSource : IListingSource
{
private readonly BaleOptions _opts;
private readonly IHttpClientFactory _http;
private readonly ILogger<BaleListingSource> _log;
public BaleListingSource(IOptions<BaleOptions> opts, IHttpClientFactory http,
ILogger<BaleListingSource> log)
{
_opts = opts.Value;
_http = http;
_log = log;
}
public string Name => "بله";
public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken);
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
{
if (!Enabled) { _log.LogInformation("Bale source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); }
try
{
var client = _http.CreateClient("scrape");
var url = $"{_opts.BaseUrl.TrimEnd('/')}/bot{_opts.BotToken}/getUpdates";
var body = await client.GetStringAsync(url, ct);
using var doc = JsonDocument.Parse(body);
if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array)
return Array.Empty<ScrapedItem>();
var items = new List<ScrapedItem>();
foreach (var update in result.EnumerateArray())
{
var text = TextOf(update, "channel_post") ?? TextOf(update, "message");
if (!string.IsNullOrWhiteSpace(text) && text!.Trim().Length >= 15)
items.Add(new ScrapedItem("بله", text.Trim()));
}
return items;
}
catch (Exception ex)
{
_log.LogWarning(ex, "Bale fetch failed.");
return Array.Empty<ScrapedItem>();
}
}
private static string? TextOf(JsonElement update, string key)
=> update.TryGetProperty(key, out var m)
&& m.TryGetProperty("text", out var t) && t.ValueKind == JsonValueKind.String
? t.GetString() : null;
}
@@ -1,3 +1,5 @@
using System.Text;
using System.Text.Json;
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping;
@@ -5,38 +7,80 @@ namespace JobsMedical.Web.Services.Scraping;
public class DivarOptions
{
public bool Enabled { get; set; }
public string? City { get; set; } // e.g. "tehran"
public List<string> Queries { get; set; } = new(); // search terms, e.g. "استخدام پزشک"
public string City { get; set; } = "tehran";
public string Category { get; set; } = "jobs";
public List<string> Queries { get; set; } = new(); // e.g. "پرستار", "پزشک عمومی", "درمانگاه"
public string BaseUrl { get; set; } = "https://api.divar.ir/v8/web-search";
public int PerQuery { get; set; } = 25;
}
/// <summary>
/// Divar source. Credential-ready: configure city + queries in (Ingestion:Divar) and implement
/// the fetch against Divar's listing API/HTML. Dormant until enabled.
/// Best-effort Divar fetch: queries Divar's web-search JSON for each term and harvests post
/// titles + descriptions. Divar's private API shifts shape over time, so we walk the JSON
/// tolerantly for any object carrying a "title" plus a nearby description field, and fail soft.
/// </summary>
public class DivarListingSource : IListingSource
{
private readonly DivarOptions _opts;
private readonly IHttpClientFactory _http;
private readonly ILogger<DivarListingSource> _log;
public DivarListingSource(IOptions<DivarOptions> opts, ILogger<DivarListingSource> log)
public DivarListingSource(IOptions<DivarOptions> opts, IHttpClientFactory http,
ILogger<DivarListingSource> log)
{
_opts = opts.Value;
_http = http;
_log = log;
}
public string Name => "دیوار";
public bool Enabled => _opts.Enabled && _opts.Queries.Count > 0;
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
{
if (!Enabled)
if (!Enabled) { _log.LogInformation("Divar source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); }
var client = _http.CreateClient("scrape");
var items = new List<ScrapedItem>();
foreach (var q in _opts.Queries.Where(q => q.Trim().Length > 0))
{
_log.LogInformation("Divar source not configured — skipping.");
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
try
{
var url = $"{_opts.BaseUrl.TrimEnd('/')}/{_opts.City}/{_opts.Category}?q={Uri.EscapeDataString(q)}";
var body = await client.GetStringAsync(url, ct);
using var doc = JsonDocument.Parse(body);
foreach (var text in Harvest(doc.RootElement).Take(_opts.PerQuery))
items.Add(new ScrapedItem("دیوار", text, "https://divar.ir"));
}
catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); }
}
return items;
}
private static readonly string[] DescKeys =
{ "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" };
/// <summary>Walk the JSON; for each object with a string "title", emit title + first description.</summary>
private static IEnumerable<string> Harvest(JsonElement el)
{
if (el.ValueKind == JsonValueKind.Object)
{
if (el.TryGetProperty("title", out var t) && t.ValueKind == JsonValueKind.String)
{
var sb = new StringBuilder(t.GetString());
foreach (var k in DescKeys)
if (el.TryGetProperty(k, out var d) && d.ValueKind == JsonValueKind.String)
{ sb.Append(" — ").Append(d.GetString()); break; }
var text = sb.ToString().Trim();
if (text.Length >= 15) yield return text;
}
foreach (var p in el.EnumerateObject())
foreach (var s in Harvest(p.Value)) yield return s;
}
else if (el.ValueKind == JsonValueKind.Array)
{
foreach (var item in el.EnumerateArray())
foreach (var s in Harvest(item)) yield return s;
}
// TODO(prod): query Divar for each term in the configured city, map each ad's
// title+description to new ScrapedItem(Name, text, adUrl).
_log.LogWarning("Divar fetch not yet implemented; returning empty.");
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
}
}
@@ -7,22 +7,24 @@ using Microsoft.EntityFrameworkCore;
namespace JobsMedical.Web.Services.Scraping;
public record SourceResult(string Source, int Fetched, int Queued, int Flagged, int Spam, int Duplicates);
public record SourceResult(string Source, int Fetched, int Queued, int Published, int Flagged, int Spam, int Duplicates);
public record IngestionSummary(List<SourceResult> Sources)
{
public int TotalQueued => Sources.Sum(s => s.Queued);
public int TotalPublished => Sources.Sum(s => s.Published);
public int TotalFlagged => Sources.Sum(s => s.Flagged);
public int TotalSpam => Sources.Sum(s => s.Spam);
public int TotalDuplicates => Sources.Sum(s => s.Duplicates);
}
/// <summary>
/// The scrape engine. Pulls from every enabled <see cref="IListingSource"/>, dedupes by content
/// hash, parses with <see cref="IListingParser"/>, validates with <see cref="ListingValidator"/>,
/// and stores each as a <see cref="RawListing"/> with a status: New (queued for review),
/// Flagged (incomplete/suspicious), or Discarded (spam). Source-agnostic — add a source and it
/// flows through unchanged.
/// The scrape engine. For every enabled source: dedupe by content hash → parse → rule-validate →
/// (optional) AI audit → decide. Decision depends on admin settings:
/// • spam → Discarded
/// • AI on: AI verdict drives approve/reject/review; approve + Automatic + AiAutoApprove → publish
/// • AI off: Automatic + confidence ≥ threshold → publish; else queue/flag
/// "Publish" resolves-or-creates an (unverified) facility and creates the Shift/JobOpening.
/// </summary>
public class IngestionService
{
@@ -30,16 +32,15 @@ public class IngestionService
private readonly IEnumerable<IListingSource> _sources;
private readonly IListingParser _parser;
private readonly ListingValidator _validator;
private readonly IAiAuditor _ai;
private readonly SettingsService _settings;
private readonly ILogger<IngestionService> _log;
public IngestionService(AppDbContext db, IEnumerable<IListingSource> sources,
IListingParser parser, ListingValidator validator, ILogger<IngestionService> log)
public IngestionService(AppDbContext db, IEnumerable<IListingSource> sources, IListingParser parser,
ListingValidator validator, IAiAuditor ai, SettingsService settings, ILogger<IngestionService> log)
{
_db = db;
_sources = sources;
_parser = parser;
_validator = validator;
_log = log;
_db = db; _sources = sources; _parser = parser; _validator = validator;
_ai = ai; _settings = settings; _log = log;
}
public IReadOnlyList<(string Name, bool Enabled)> Sources =>
@@ -47,18 +48,22 @@ public class IngestionService
public async Task<IngestionSummary> RunAsync(CancellationToken ct = default)
{
var roles = await _db.Roles.Select(r => r.Name).ToListAsync(ct);
var cities = await _db.Cities.Select(c => c.Name).ToListAsync(ct);
var districts = await _db.Districts.Select(d => d.Name).ToListAsync(ct);
var settings = await _settings.GetAsync();
var roles = await _db.Roles.ToListAsync(ct);
var cities = await _db.Cities.ToListAsync(ct);
var districts = await _db.Districts.ToListAsync(ct);
var roleNames = roles.Select(r => r.Name).ToList();
var cityNames = cities.Select(c => c.Name).ToList();
var districtNames = districts.Select(d => d.Name).ToList();
var results = new List<SourceResult>();
foreach (var source in _sources.Where(s => s.Enabled))
{
int fetched = 0, queued = 0, flagged = 0, spam = 0, dupes = 0;
int fetched = 0, queued = 0, published = 0, flagged = 0, spam = 0, dupes = 0;
IReadOnlyList<ScrapedItem> items;
try { items = await source.FetchAsync(ct); }
catch (Exception ex) { _log.LogError(ex, "Source {Source} fetch failed", source.Name); continue; }
catch (Exception ex) { _log.LogError(ex, "Source {Source} failed", source.Name); continue; }
foreach (var item in items)
{
@@ -66,42 +71,155 @@ public class IngestionService
var hash = Hash(item.RawText);
if (await _db.RawListings.AnyAsync(r => r.ContentHash == hash, ct)) { dupes++; continue; }
var parsed = _parser.Parse(item.RawText, roles, cities, districts);
var parsed = _parser.Parse(item.RawText, roleNames, cityNames, districtNames);
var val = _validator.Validate(item.RawText, parsed);
var status = val.IsSpam ? RawListingStatus.Discarded
: val.IsValid ? RawListingStatus.New
: RawListingStatus.Flagged;
if (status == RawListingStatus.New) queued++;
else if (status == RawListingStatus.Flagged) flagged++;
else spam++;
AiAuditResult? ai = null;
if (settings.AiEnabled && !val.IsSpam)
ai = await _ai.AuditAsync(item.RawText, settings, ct);
_db.RawListings.Add(new RawListing
var (status, reason, confidence) = Decide(settings, val, ai);
var raw = new RawListing
{
SourceChannel = item.Source,
SourceUrl = item.SourceUrl,
RawText = item.RawText.Trim(),
ContentHash = hash,
Confidence = val.Confidence,
ValidationNotes = val.Issues.Count > 0 ? string.Join("؛ ", val.Issues) : null,
Confidence = confidence,
ValidationNotes = reason,
Status = status,
});
};
_db.RawListings.Add(raw);
if (status == RawListingStatus.Normalized)
{
try { Publish(parsed, ai, raw, roles, cities, districts); published++; }
catch (Exception ex) { _log.LogWarning(ex, "Auto-publish failed; queueing instead"); raw.Status = RawListingStatus.New; queued++; }
}
else if (status == RawListingStatus.New) queued++;
else if (status == RawListingStatus.Flagged) flagged++;
else spam++;
}
await _db.SaveChangesAsync(ct);
results.Add(new SourceResult(source.Name, fetched, queued, flagged, spam, dupes));
_log.LogInformation("Ingestion {Source}: fetched={F} queued={Q} flagged={Fl} spam={S} dupes={D}",
source.Name, fetched, queued, flagged, spam, dupes);
results.Add(new SourceResult(source.Name, fetched, queued, published, flagged, spam, dupes));
_log.LogInformation("Ingest {S}: fetched={F} queued={Q} published={P} flagged={Fl} spam={Sp} dupes={D}",
source.Name, fetched, queued, published, flagged, spam, dupes);
}
return new IngestionSummary(results);
}
/// <summary>SHA-256 hex of the whitespace-normalized text (for cross-run dedupe).</summary>
private static (RawListingStatus status, string? reason, int confidence) Decide(
AppSetting s, ValidationResult val, AiAuditResult? ai)
{
var notes = val.Issues.Count > 0 ? string.Join("؛ ", val.Issues) : null;
if (val.IsSpam)
return (RawListingStatus.Discarded, Join("اسپم", notes), val.Confidence);
if (ai is not null)
{
var aiNote = Join($"AI: {ai.Decision} ({ai.Confidence}٪)" + (ai.Reason is null ? "" : $" — {ai.Reason}"), notes);
if (ai.Reject) return (RawListingStatus.Discarded, aiNote, ai.Confidence);
if (ai.Approve)
return (s.Mode == IngestionMode.Automatic && s.AiAutoApprove
? RawListingStatus.Normalized : RawListingStatus.New, aiNote, ai.Confidence);
return (RawListingStatus.Flagged, aiNote, ai.Confidence); // review
}
if (!val.IsValid) return (RawListingStatus.Flagged, notes, val.Confidence);
if (s.Mode == IngestionMode.Automatic && val.Confidence >= s.AutoPublishMinConfidence)
return (RawListingStatus.Normalized, notes, val.Confidence);
return (RawListingStatus.New, notes, val.Confidence);
}
private void Publish(ParsedListing parsed, AiAuditResult? ai, RawListing raw,
List<Role> roles, List<City> cities, List<District> districts)
{
var d = ai?.Data;
var roleName = d?.Role ?? parsed.RoleName;
var cityName = d?.City ?? parsed.CityName;
var districtName = d?.District ?? parsed.DistrictName;
var role = roles.FirstOrDefault(r => r.Name == roleName) ?? roles.First();
var city = cities.FirstOrDefault(c => c.Name == cityName)
?? cities.FirstOrDefault(c => c.IsActive) ?? cities.First();
var district = districts.FirstOrDefault(x => x.Name == districtName && x.CityId == city.Id);
var facilityName = !string.IsNullOrWhiteSpace(d?.FacilityName) ? d!.FacilityName!.Trim()
: $"مرکز درمانی (از {raw.SourceChannel})";
var facility = _db.Facilities.Local.FirstOrDefault(f => f.Name == facilityName && f.CityId == city.Id)
?? _db.Facilities.FirstOrDefault(f => f.Name == facilityName && f.CityId == city.Id);
if (facility is null)
{
facility = new Facility
{
Name = facilityName, Type = FacilityType.Clinic, City = city, DistrictId = district?.Id,
Phone = parsed.Phone, IsVerified = false,
};
_db.Facilities.Add(facility);
}
var kind = (d?.Kind ?? parsed.Kind.ToString()).ToLowerInvariant();
if (kind.Contains("job") || kind.Contains("استخدام"))
{
_db.JobOpenings.Add(new JobOpening
{
Facility = facility, Role = role,
Title = !string.IsNullOrWhiteSpace(d?.Title) ? d!.Title!.Trim() : $"استخدام {role.Name}",
EmploymentType = MapEmployment(d?.EmploymentType, parsed.EmploymentType),
SalaryMin = parsed.PayAmount,
Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated,
SourceUrl = raw.SourceUrl,
});
}
else
{
var st = MapShiftType(d?.ShiftType, parsed.ShiftType);
var (start, end) = DefaultTimes(st);
_db.Shifts.Add(new Shift
{
Facility = facility, Role = role,
Date = DateOnly.FromDateTime(DateTime.UtcNow).AddDays(1),
StartTime = start, EndTime = end, ShiftType = st,
SpecialtyRequired = role.Name, Description = raw.RawText,
PayType = parsed.SharePercent is not null && parsed.PayAmount is null ? PayType.Percentage
: parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift,
PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent,
Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl,
});
}
raw.Status = RawListingStatus.Normalized;
}
private static ShiftType MapShiftType(string? ai, ShiftType? parsed) => (ai?.ToLowerInvariant()) switch
{
"day" => ShiftType.Day, "evening" => ShiftType.Evening, "night" => ShiftType.Night, "oncall" => ShiftType.OnCall,
_ => parsed ?? ShiftType.Day,
};
private static EmploymentType MapEmployment(string? ai, EmploymentType? parsed) => (ai?.ToLowerInvariant()) switch
{
"parttime" => EmploymentType.PartTime, "contract" => EmploymentType.Contract,
"plan" => EmploymentType.Plan, "fulltime" => EmploymentType.FullTime,
_ => parsed ?? EmploymentType.FullTime,
};
private static (TimeOnly, TimeOnly) DefaultTimes(ShiftType t) => t switch
{
ShiftType.Day => (new TimeOnly(8, 0), new TimeOnly(14, 0)),
ShiftType.Evening => (new TimeOnly(14, 0), new TimeOnly(20, 0)),
ShiftType.Night => (new TimeOnly(20, 0), new TimeOnly(8, 0)),
_ => (new TimeOnly(8, 0), new TimeOnly(8, 0)),
};
private static string? Join(string a, string? b) => string.IsNullOrEmpty(b) ? a : $"{a} | {b}";
private static string Hash(string text)
{
var normalized = Regex.Replace((text ?? "").Trim(), @"\s+", " ");
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(normalized));
return Convert.ToHexString(bytes).ToLowerInvariant();
return Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(normalized))).ToLowerInvariant();
}
}
@@ -0,0 +1,40 @@
using JobsMedical.Web.Data;
using JobsMedical.Web.Models;
using Microsoft.EntityFrameworkCore;
namespace JobsMedical.Web.Services.Scraping;
/// <summary>Loads/creates the single platform-settings row (Id=1).</summary>
public class SettingsService
{
private readonly AppDbContext _db;
public SettingsService(AppDbContext db) => _db = db;
public async Task<AppSetting> GetAsync()
{
var s = await _db.AppSettings.FirstOrDefaultAsync(x => x.Id == 1);
if (s is null)
{
s = new AppSetting { Id = 1 };
_db.AppSettings.Add(s);
await _db.SaveChangesAsync();
}
return s;
}
public async Task SaveAsync(AppSetting incoming)
{
var s = await GetAsync();
s.Mode = incoming.Mode;
s.AutoPublishMinConfidence = Math.Clamp(incoming.AutoPublishMinConfidence, 0, 100);
s.AiEnabled = incoming.AiEnabled;
s.AiEndpoint = incoming.AiEndpoint?.Trim();
s.AiApiKey = incoming.AiApiKey?.Trim();
s.AiModel = incoming.AiModel?.Trim();
s.AiSystemPrompt = string.IsNullOrWhiteSpace(incoming.AiSystemPrompt)
? AppSetting.DefaultPrompt : incoming.AiSystemPrompt;
s.AiAutoApprove = incoming.AiAutoApprove;
s.UpdatedAt = DateTime.UtcNow;
await _db.SaveChangesAsync();
}
}
@@ -1,3 +1,5 @@
using System.Net;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Options;
namespace JobsMedical.Web.Services.Scraping;
@@ -5,40 +7,71 @@ namespace JobsMedical.Web.Services.Scraping;
public class TelegramOptions
{
public bool Enabled { get; set; }
public string? BotToken { get; set; }
public List<string> Channels { get; set; } = new(); // @channel handles to read
public string? BotToken { get; set; } // optional (for private channels later)
public List<string> Channels { get; set; } = new(); // public channel usernames (no @)
public int PerChannel { get; set; } = 20;
}
/// <summary>
/// Telegram/Bale channel source. Credential-ready: wire a bot token + channel list in config
/// (Ingestion:Telegram) and implement the fetch against the Bot API (getUpdates / channel posts)
/// or a userbot. Dormant until enabled, so the engine runs without it.
/// Reads public Telegram channels via the web preview (https://t.me/s/&lt;channel&gt;) — no bot
/// token or login needed for public channels. Each message's text becomes a ScrapedItem.
/// </summary>
public class TelegramListingSource : IListingSource
{
private readonly TelegramOptions _opts;
private readonly IHttpClientFactory _http;
private readonly ILogger<TelegramListingSource> _log;
public TelegramListingSource(IOptions<TelegramOptions> opts, ILogger<TelegramListingSource> log)
public TelegramListingSource(IOptions<TelegramOptions> opts, IHttpClientFactory http,
ILogger<TelegramListingSource> log)
{
_opts = opts.Value;
_http = http;
_log = log;
}
public string Name => "تلگرام/بله";
public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken) && _opts.Channels.Count > 0;
public string Name => "تلگرام";
public bool Enabled => _opts.Enabled && _opts.Channels.Count > 0;
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
{
if (!Enabled)
if (!Enabled) { _log.LogInformation("Telegram source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); }
var client = _http.CreateClient("scrape");
var items = new List<ScrapedItem>();
foreach (var ch in _opts.Channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0))
{
_log.LogInformation("Telegram source not configured — skipping.");
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
try
{
var html = await client.GetStringAsync($"https://t.me/s/{ch}", ct);
foreach (var text in ExtractMessages(html).Take(_opts.PerChannel))
items.Add(new ScrapedItem($"تلگرام/{ch}", text, $"https://t.me/{ch}"));
}
catch (Exception ex) { _log.LogWarning(ex, "Telegram fetch failed for {Channel}", ch); }
}
return items;
}
// Message bodies live in <div class="tgme_widget_message_text ...">...</div>.
private static IEnumerable<string> ExtractMessages(string html)
{
foreach (Match m in Regex.Matches(html,
"<div class=\"tgme_widget_message_text[^\"]*\"[^>]*>(.*?)</div>", RegexOptions.Singleline))
{
var text = HtmlUtil.ToPlainText(m.Groups[1].Value);
if (text.Length >= 15) yield return text;
}
// TODO(prod): call https://api.telegram.org/bot{token}/getUpdates (or channel history),
// map each message to new ScrapedItem(Name, message.Text, messageLink). The validation +
// dedupe pipeline downstream is already source-agnostic.
_log.LogWarning("Telegram fetch not yet implemented; returning empty.");
return Task.FromResult<IReadOnlyList<ScrapedItem>>(Array.Empty<ScrapedItem>());
}
}
internal static class HtmlUtil
{
public static string ToPlainText(string html)
{
var s = Regex.Replace(html, "<br\\s*/?>", "\n", RegexOptions.IgnoreCase);
s = Regex.Replace(s, "<[^>]+>", ""); // strip remaining tags
s = WebUtility.HtmlDecode(s);
s = Regex.Replace(s, "[ \\t]+", " ");
return s.Trim();
}
}
+3 -2
View File
@@ -15,7 +15,8 @@
"Ingestion": {
"Enabled": false,
"IntervalMinutes": 30,
"Telegram": { "Enabled": false, "BotToken": "", "Channels": [] },
"Divar": { "Enabled": false, "City": "tehran", "Queries": [] }
"Telegram": { "Enabled": false, "BotToken": "", "Channels": [], "PerChannel": 20 },
"Bale": { "Enabled": false, "BotToken": "", "BaseUrl": "https://tapi.bale.ai" },
"Divar": { "Enabled": false, "City": "tehran", "Category": "jobs", "Queries": [], "BaseUrl": "https://api.divar.ir/v8/web-search", "PerQuery": 25 }
}
}