From cea27c868492705fc9a829e363ab1c40a4e99f6e Mon Sep 17 00:00:00 2001 From: "soroush.asadi" Date: Thu, 4 Jun 2026 17:53:17 +0330 Subject: [PATCH] [Ingest] Route scraping through an optional V2Ray/Xray proxy (Telegram in Iran) Telegram and some sources are filtered in Iran. .NET cannot speak vmess/vless/trojan, so add an Xray sidecar (compose service 'xray', behind the 'proxy' profile) that converts the admin's config into a local SOCKS5 proxy (xray:10808). New ScrapeHttpClients provider builds a proxied or direct HttpClient (WebProxy supports socks5/socks4/http) cached per proxy URL; all five ingestion sources (Telegram/Bale/Divar/Medjobs/Websites) now use it. Admin settings gain IngestProxyEnabled + IngestProxyUrl (migration; UI under sources). Added deploy/xray/config.json template + README with vmess/vless/trojan examples. Co-Authored-By: Claude Opus 4.8 --- deploy/xray/README.md | 63 + deploy/xray/config.json | 53 + docker-compose.yml | 13 + .../20260604142204_IngestProxy.Designer.cs | 1133 +++++++++++++++++ .../Migrations/20260604142204_IngestProxy.cs | 40 + .../Migrations/AppDbContextModelSnapshot.cs | 7 + src/JobsMedical.Web/Models/AppSetting.cs | 7 + .../Pages/Admin/Settings.cshtml | 10 + .../Pages/Admin/Settings.cshtml.cs | 6 + src/JobsMedical.Web/Program.cs | 2 + .../Services/Scraping/BaleListingSource.cs | 8 +- .../Services/Scraping/DivarListingSource.cs | 8 +- .../Services/Scraping/MedjobsListingSource.cs | 8 +- .../Services/Scraping/ScrapeHttpClients.cs | 55 + .../Services/Scraping/SettingsService.cs | 2 + .../Scraping/TelegramListingSource.cs | 8 +- .../Services/Scraping/WebsiteListingSource.cs | 8 +- 17 files changed, 1411 insertions(+), 20 deletions(-) create mode 100644 deploy/xray/README.md create mode 100644 deploy/xray/config.json create mode 100644 src/JobsMedical.Web/Migrations/20260604142204_IngestProxy.Designer.cs create mode 100644 src/JobsMedical.Web/Migrations/20260604142204_IngestProxy.cs create mode 100644 src/JobsMedical.Web/Services/Scraping/ScrapeHttpClients.cs diff --git a/deploy/xray/README.md b/deploy/xray/README.md new file mode 100644 index 0000000..7c98f55 --- /dev/null +++ b/deploy/xray/README.md @@ -0,0 +1,63 @@ +# Ingestion proxy (Xray / V2Ray) — for scanning Telegram etc. from Iran + +The app's HttpClient can't speak `vmess` / `vless` / `trojan` directly. Instead, the **Xray +sidecar** (compose service `xray`) reads your config and exposes a plain **SOCKS5 proxy at +`xray:10808`** (and HTTP at `xray:10809`) on the internal compose network. The app is then +pointed at that proxy from the admin panel, and only ingestion traffic goes through it. + +``` +[app ingestion] → socks5://xray:10808 → [Xray client] → vmess/vless/trojan → server → Telegram +``` + +## Setup + +1. **Put your config** in `deploy/xray/config.json`. Replace the `proxy` outbound with your + own vmess / vless / trojan outbound (templates below). Keep the `inbounds` and `routing` + sections as-is so the local SOCKS/HTTP ports stay the same. + +2. **Start the sidecar** (it's behind a compose profile so normal deploys don't run it): + ```bash + docker compose --profile proxy up -d xray + docker logs hamkadr_xray --tail 30 # should show it listening, no errors + ``` + +3. **Point the app at it**: open `/Admin/Settings` → «کانال‌ها/منابع» → + - tick **«ارسال جمع‌آوری از طریق پروکسی»** + - set the proxy URL to **`socks5://xray:10808`** + - Save, then run ingestion (Telegram source enabled). + +4. **Quick test** the proxy reaches Telegram: + ```bash + docker exec hamkadr_api sh -c "wget -q -O- --timeout=15 -e use_proxy=yes -e http_proxy=http://xray:10809 https://t.me/s/telegram | head -c 200" || true + ``` + +## Where to get the config values +If you have a share link (`vmess://…`, `vless://…`, `trojan://…`), import it into the Xray/v2rayN +client and **export the JSON config**, or decode it and fill the templates below. + +### vless + ws + tls (matches the default template in config.json) +```json +{ "tag":"proxy","protocol":"vless","settings":{"vnext":[{"address":"HOST","port":443, + "users":[{"id":"UUID","encryption":"none"}]}]}, + "streamSettings":{"network":"ws","security":"tls","tlsSettings":{"serverName":"SNI"}, + "wsSettings":{"path":"/PATH","headers":{"Host":"SNI"}}} } +``` + +### vmess + ws + tls +```json +{ "tag":"proxy","protocol":"vmess","settings":{"vnext":[{"address":"HOST","port":443, + "users":[{"id":"UUID","alterId":0,"security":"auto"}]}]}, + "streamSettings":{"network":"ws","security":"tls","tlsSettings":{"serverName":"SNI"}, + "wsSettings":{"path":"/PATH","headers":{"Host":"SNI"}}} } +``` + +### trojan + tls +```json +{ "tag":"proxy","protocol":"trojan","settings":{"servers":[{"address":"HOST","port":443, + "password":"PASSWORD"}]}, + "streamSettings":{"network":"tcp","security":"tls","tlsSettings":{"serverName":"SNI"}} } +``` + +> Security note: `config.json` contains your VPN credentials. It's mounted read-only into the +> container. Do **not** commit a real config — keep the committed file as a placeholder and +> drop the real one on the server only (or add it to `.gitignore` if you keep it locally). diff --git a/deploy/xray/config.json b/deploy/xray/config.json new file mode 100644 index 0000000..ca57cc5 --- /dev/null +++ b/deploy/xray/config.json @@ -0,0 +1,53 @@ +{ + "log": { "loglevel": "warning" }, + "inbounds": [ + { + "tag": "socks-in", + "listen": "0.0.0.0", + "port": 10808, + "protocol": "socks", + "settings": { "udp": true, "auth": "noauth" } + }, + { + "tag": "http-in", + "listen": "0.0.0.0", + "port": 10809, + "protocol": "http" + } + ], + "outbounds": [ + { + "//": "REPLACE this whole outbound with YOUR vmess / vless / trojan config.", + "//vmess-example": "see deploy/xray/README.md for vmess & trojan templates", + "tag": "proxy", + "protocol": "vless", + "settings": { + "vnext": [ + { + "address": "YOUR_SERVER_ADDRESS", + "port": 443, + "users": [ + { + "id": "YOUR_UUID", + "encryption": "none", + "flow": "" + } + ] + } + ] + }, + "streamSettings": { + "network": "ws", + "security": "tls", + "tlsSettings": { "serverName": "YOUR_SNI" }, + "wsSettings": { "path": "/", "headers": { "Host": "YOUR_SNI" } } + } + }, + { "tag": "direct", "protocol": "freedom" } + ], + "routing": { + "rules": [ + { "type": "field", "inboundTag": ["socks-in", "http-in"], "outboundTag": "proxy" } + ] + } +} diff --git a/docker-compose.yml b/docker-compose.yml index e219eed..c5b08b1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,6 +29,19 @@ services: # healthcheck is defined in the Dockerfile (bash /dev/tcp probe) so the deploy # job's `docker inspect Health.Status` wait works. + # ── Xray/V2Ray client — converts a vmess/vless/trojan config into a local SOCKS proxy ── + # so ingestion can reach Telegram etc. from inside Iran. Behind the "proxy" profile, so it + # only runs when you opt in: `docker compose --profile proxy up -d xray`. Put YOUR config at + # deploy/xray/config.json (see deploy/xray/README.md), then in /Admin/Settings set the + # ingestion proxy to socks5://xray:10808. Not published — only the app reaches it. + xray: + image: mirror.soroushasadi.com/teddysun/xray:latest + container_name: hamkadr_xray + restart: unless-stopped + profiles: ["proxy"] + volumes: + - ./deploy/xray/config.json:/etc/xray/config.json:ro + # ── PostgreSQL (internal only — never published) ───────────────────────────── db: image: mirror.soroushasadi.com/postgres:16-alpine diff --git a/src/JobsMedical.Web/Migrations/20260604142204_IngestProxy.Designer.cs b/src/JobsMedical.Web/Migrations/20260604142204_IngestProxy.Designer.cs new file mode 100644 index 0000000..497a4f3 --- /dev/null +++ b/src/JobsMedical.Web/Migrations/20260604142204_IngestProxy.Designer.cs @@ -0,0 +1,1133 @@ +// +using System; +using JobsMedical.Web.Data; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; +using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata; + +#nullable disable + +namespace JobsMedical.Web.Migrations +{ + [DbContext(typeof(AppDbContext))] + [Migration("20260604142204_IngestProxy")] + partial class IngestProxy + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasAnnotation("ProductVersion", "10.0.0") + .HasAnnotation("Relational:MaxIdentifierLength", 63); + + NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder); + + modelBuilder.Entity("JobsMedical.Web.Models.AppSetting", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("AiApiKey") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("AiAutoApprove") + .HasColumnType("boolean"); + + b.Property("AiEnabled") + .HasColumnType("boolean"); + + b.Property("AiEndpoint") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("AiModel") + .HasMaxLength(120) + .HasColumnType("character varying(120)"); + + b.Property("AiSystemPrompt") + .IsRequired() + .HasMaxLength(4000) + .HasColumnType("character varying(4000)"); + + b.Property("AutoIngestEnabled") + .HasColumnType("boolean"); + + b.Property("AutoPublishMinConfidence") + .HasColumnType("integer"); + + b.Property("BaleBotToken") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("BaleEnabled") + .HasColumnType("boolean"); + + b.Property("DemoMode") + .HasColumnType("boolean"); + + b.Property("DivarCity") + .HasMaxLength(60) + .HasColumnType("character varying(60)"); + + b.Property("DivarEnabled") + .HasColumnType("boolean"); + + b.Property("DivarQueries") + .HasMaxLength(2000) + .HasColumnType("character varying(2000)"); + + b.Property("IngestIntervalMinutes") + .HasColumnType("integer"); + + b.Property("IngestProxyEnabled") + .HasColumnType("boolean"); + + b.Property("IngestProxyUrl") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("MedjobsEnabled") + .HasColumnType("boolean"); + + b.Property("MedjobsMaxAds") + .HasColumnType("integer"); + + b.Property("Mode") + .HasColumnType("integer"); + + b.Property("NeshanMapKey") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("PushEnabled") + .HasColumnType("boolean"); + + b.Property("SmsApiKey") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("SmsEnabled") + .HasColumnType("boolean"); + + b.Property("SmsSender") + .HasMaxLength(30) + .HasColumnType("character varying(30)"); + + b.Property("SmsTemplate") + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("TelegramChannels") + .HasMaxLength(2000) + .HasColumnType("character varying(2000)"); + + b.Property("TelegramEnabled") + .HasColumnType("boolean"); + + b.Property("UpdatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("VapidPrivateKey") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("VapidPublicKey") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("VapidSubject") + .HasMaxLength(120) + .HasColumnType("character varying(120)"); + + b.Property("WebNotificationsEnabled") + .HasColumnType("boolean"); + + b.Property("WebsiteUrls") + .HasMaxLength(4000) + .HasColumnType("character varying(4000)"); + + b.Property("WebsitesEnabled") + .HasColumnType("boolean"); + + b.HasKey("Id"); + + b.ToTable("AppSettings"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Application", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("DoctorId") + .HasColumnType("integer"); + + b.Property("Message") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("ShiftId") + .HasColumnType("integer"); + + b.Property("Status") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("DoctorId"); + + b.HasIndex("ShiftId", "DoctorId") + .IsUnique(); + + b.ToTable("Applications"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.City", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("IsActive") + .HasColumnType("boolean"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("Province") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.HasKey("Id"); + + b.ToTable("Cities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.District", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("IsActive") + .HasColumnType("boolean"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(120) + .HasColumnType("character varying(120)"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.ToTable("Districts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Bio") + .HasMaxLength(1000) + .HasColumnType("character varying(1000)"); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("IsVerified") + .HasColumnType("boolean"); + + b.Property("LicenseNo") + .HasMaxLength(20) + .HasColumnType("character varying(20)"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("Specialty") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("UserId") + .HasColumnType("integer"); + + b.Property("YearsExperience") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("UserId") + .IsUnique(); + + b.ToTable("DoctorProfiles"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Facility", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Address") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("BaleId") + .HasMaxLength(50) + .HasColumnType("character varying(50)"); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("DistrictId") + .HasColumnType("integer"); + + b.Property("IsDemo") + .HasColumnType("boolean"); + + b.Property("IsVerified") + .HasColumnType("boolean"); + + b.Property("Lat") + .HasColumnType("double precision"); + + b.Property("Lng") + .HasColumnType("double precision"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("OwnerUserId") + .HasColumnType("integer"); + + b.Property("Phone") + .HasMaxLength(20) + .HasColumnType("character varying(20)"); + + b.Property("Type") + .HasColumnType("integer"); + + b.Property("Verification") + .HasColumnType("integer"); + + b.Property("VerificationNote") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("VerificationRequestedAt") + .HasColumnType("timestamp with time zone"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.HasIndex("DistrictId"); + + b.HasIndex("OwnerUserId"); + + b.ToTable("Facilities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.FacilityDocument", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("ContentType") + .IsRequired() + .HasMaxLength(120) + .HasColumnType("character varying(120)"); + + b.Property("Data") + .IsRequired() + .HasColumnType("bytea"); + + b.Property("FacilityId") + .HasColumnType("integer"); + + b.Property("FileName") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("Size") + .HasColumnType("bigint"); + + b.Property("UploadedAt") + .HasColumnType("timestamp with time zone"); + + b.HasKey("Id"); + + b.HasIndex("FacilityId"); + + b.ToTable("FacilityDocuments"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("bigint"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("EventType") + .HasColumnType("integer"); + + b.Property("JobOpeningId") + .HasColumnType("integer"); + + b.Property("ShiftId") + .HasColumnType("integer"); + + b.Property("VisitorId") + .IsRequired() + .HasColumnType("character varying(36)"); + + b.HasKey("Id"); + + b.HasIndex("JobOpeningId"); + + b.HasIndex("ShiftId"); + + b.HasIndex("VisitorId", "CreatedAt"); + + b.ToTable("InterestEvents"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("Description") + .HasMaxLength(2000) + .HasColumnType("character varying(2000)"); + + b.Property("EmploymentType") + .HasColumnType("integer"); + + b.Property("FacilityId") + .HasColumnType("integer"); + + b.Property("GenderRequirement") + .HasColumnType("integer"); + + b.Property("Requirements") + .HasMaxLength(1000) + .HasColumnType("character varying(1000)"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("SalaryMax") + .HasColumnType("bigint"); + + b.Property("SalaryMin") + .HasColumnType("bigint"); + + b.Property("Source") + .HasColumnType("integer"); + + b.Property("SourceUrl") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("Status") + .HasColumnType("integer"); + + b.Property("Title") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.HasKey("Id"); + + b.HasIndex("FacilityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("Status"); + + b.ToTable("JobOpenings"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Notification", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("bigint"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Body") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("IsRead") + .HasColumnType("boolean"); + + b.Property("Title") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("Url") + .HasMaxLength(300) + .HasColumnType("character varying(300)"); + + b.Property("UserId") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("UserId", "IsRead", "CreatedAt"); + + b.ToTable("Notifications"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Confidence") + .HasColumnType("integer"); + + b.Property("ContentHash") + .HasMaxLength(64) + .HasColumnType("character varying(64)"); + + b.Property("FetchedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("LinkedShiftId") + .HasColumnType("integer"); + + b.Property("ParsedJson") + .HasColumnType("text"); + + b.Property("RawText") + .IsRequired() + .HasColumnType("text"); + + b.Property("SourceChannel") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("SourceUrl") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("Status") + .HasColumnType("integer"); + + b.Property("ValidationNotes") + .HasMaxLength(1000) + .HasColumnType("character varying(1000)"); + + b.HasKey("Id"); + + b.HasIndex("ContentHash"); + + b.HasIndex("LinkedShiftId"); + + b.HasIndex("Status"); + + b.ToTable("RawListings"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Report", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("Reason") + .IsRequired() + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("ReporterUserId") + .HasColumnType("integer"); + + b.Property("ReporterVisitorId") + .HasMaxLength(36) + .HasColumnType("character varying(36)"); + + b.Property("Status") + .HasColumnType("integer"); + + b.Property("TargetId") + .HasColumnType("integer"); + + b.Property("TargetLabel") + .HasMaxLength(160) + .HasColumnType("character varying(160)"); + + b.Property("TargetType") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("Status"); + + b.ToTable("Reports"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Role", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Category") + .IsRequired() + .HasMaxLength(50) + .HasColumnType("character varying(50)"); + + b.Property("IsActive") + .HasColumnType("boolean"); + + b.Property("Name") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("SortOrder") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.ToTable("Roles"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Shift", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("Date") + .HasColumnType("date"); + + b.Property("Description") + .HasMaxLength(1500) + .HasColumnType("character varying(1500)"); + + b.Property("EndTime") + .HasColumnType("time without time zone"); + + b.Property("FacilityId") + .HasColumnType("integer"); + + b.Property("GenderRequirement") + .HasColumnType("integer"); + + b.Property("PayAmount") + .HasColumnType("bigint"); + + b.Property("PayType") + .HasColumnType("integer"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("SharePercent") + .HasColumnType("integer"); + + b.Property("ShiftType") + .HasColumnType("integer"); + + b.Property("Source") + .HasColumnType("integer"); + + b.Property("SourceUrl") + .HasMaxLength(500) + .HasColumnType("character varying(500)"); + + b.Property("SpecialtyRequired") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("StartTime") + .HasColumnType("time without time zone"); + + b.Property("Status") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("FacilityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("Date", "Status"); + + b.ToTable("Shifts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.User", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("BanReason") + .HasMaxLength(300) + .HasColumnType("character varying(300)"); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("FullName") + .HasMaxLength(150) + .HasColumnType("character varying(150)"); + + b.Property("IsBanned") + .HasColumnType("boolean"); + + b.Property("IsPhoneVerified") + .HasColumnType("boolean"); + + b.Property("Phone") + .IsRequired() + .HasMaxLength(20) + .HasColumnType("character varying(20)"); + + b.Property("Role") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("Phone") + .IsUnique(); + + b.ToTable("Users"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("CityId") + .HasColumnType("integer"); + + b.Property("Gender") + .HasColumnType("integer"); + + b.Property("MinPay") + .HasColumnType("bigint"); + + b.Property("PreferredShiftType") + .HasColumnType("integer"); + + b.Property("RoleId") + .HasColumnType("integer"); + + b.Property("UpdatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("VisitorId") + .IsRequired() + .HasColumnType("character varying(36)"); + + b.HasKey("Id"); + + b.HasIndex("CityId"); + + b.HasIndex("RoleId"); + + b.HasIndex("VisitorId") + .IsUnique(); + + b.ToTable("UserPreferences"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b => + { + b.Property("Id") + .HasMaxLength(36) + .HasColumnType("character varying(36)"); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("LastSeenAt") + .HasColumnType("timestamp with time zone"); + + b.Property("UserId") + .HasColumnType("integer"); + + b.HasKey("Id"); + + b.HasIndex("UserId"); + + b.ToTable("Visitors"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.WebPushSubscription", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("Id")); + + b.Property("Auth") + .IsRequired() + .HasMaxLength(100) + .HasColumnType("character varying(100)"); + + b.Property("CreatedAt") + .HasColumnType("timestamp with time zone"); + + b.Property("Endpoint") + .IsRequired() + .HasMaxLength(600) + .HasColumnType("character varying(600)"); + + b.Property("P256dh") + .IsRequired() + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + + b.Property("VisitorId") + .HasMaxLength(36) + .HasColumnType("character varying(36)"); + + b.HasKey("Id"); + + b.HasIndex("Endpoint") + .IsUnique(); + + b.ToTable("WebPushSubscriptions"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Application", b => + { + b.HasOne("JobsMedical.Web.Models.User", "Doctor") + .WithMany("Applications") + .HasForeignKey("DoctorId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.Shift", "Shift") + .WithMany("Applications") + .HasForeignKey("ShiftId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("Doctor"); + + b.Navigation("Shift"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.District", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany() + .HasForeignKey("CityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("City"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.DoctorProfile", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany() + .HasForeignKey("CityId"); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany() + .HasForeignKey("RoleId"); + + b.HasOne("JobsMedical.Web.Models.User", "User") + .WithOne("DoctorProfile") + .HasForeignKey("JobsMedical.Web.Models.DoctorProfile", "UserId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("City"); + + b.Navigation("Role"); + + b.Navigation("User"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Facility", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany("Facilities") + .HasForeignKey("CityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.District", "District") + .WithMany("Facilities") + .HasForeignKey("DistrictId") + .OnDelete(DeleteBehavior.SetNull); + + b.HasOne("JobsMedical.Web.Models.User", "OwnerUser") + .WithMany() + .HasForeignKey("OwnerUserId") + .OnDelete(DeleteBehavior.SetNull); + + b.Navigation("City"); + + b.Navigation("District"); + + b.Navigation("OwnerUser"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.FacilityDocument", b => + { + b.HasOne("JobsMedical.Web.Models.Facility", "Facility") + .WithMany("Documents") + .HasForeignKey("FacilityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("Facility"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.InterestEvent", b => + { + b.HasOne("JobsMedical.Web.Models.JobOpening", "JobOpening") + .WithMany() + .HasForeignKey("JobOpeningId") + .OnDelete(DeleteBehavior.Cascade); + + b.HasOne("JobsMedical.Web.Models.Shift", "Shift") + .WithMany() + .HasForeignKey("ShiftId") + .OnDelete(DeleteBehavior.Cascade); + + b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor") + .WithMany("Events") + .HasForeignKey("VisitorId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("JobOpening"); + + b.Navigation("Shift"); + + b.Navigation("Visitor"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b => + { + b.HasOne("JobsMedical.Web.Models.Facility", "Facility") + .WithMany() + .HasForeignKey("FacilityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany() + .HasForeignKey("RoleId") + .OnDelete(DeleteBehavior.Restrict) + .IsRequired(); + + b.Navigation("Facility"); + + b.Navigation("Role"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Notification", b => + { + b.HasOne("JobsMedical.Web.Models.User", "User") + .WithMany() + .HasForeignKey("UserId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("User"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.RawListing", b => + { + b.HasOne("JobsMedical.Web.Models.Shift", "LinkedShift") + .WithMany() + .HasForeignKey("LinkedShiftId"); + + b.Navigation("LinkedShift"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Shift", b => + { + b.HasOne("JobsMedical.Web.Models.Facility", "Facility") + .WithMany("Shifts") + .HasForeignKey("FacilityId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany("Shifts") + .HasForeignKey("RoleId") + .OnDelete(DeleteBehavior.Restrict) + .IsRequired(); + + b.Navigation("Facility"); + + b.Navigation("Role"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.UserPreferences", b => + { + b.HasOne("JobsMedical.Web.Models.City", "City") + .WithMany() + .HasForeignKey("CityId"); + + b.HasOne("JobsMedical.Web.Models.Role", "Role") + .WithMany() + .HasForeignKey("RoleId"); + + b.HasOne("JobsMedical.Web.Models.Visitor", "Visitor") + .WithOne("Preferences") + .HasForeignKey("JobsMedical.Web.Models.UserPreferences", "VisitorId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("City"); + + b.Navigation("Role"); + + b.Navigation("Visitor"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b => + { + b.HasOne("JobsMedical.Web.Models.User", "User") + .WithMany() + .HasForeignKey("UserId") + .OnDelete(DeleteBehavior.SetNull); + + b.Navigation("User"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.City", b => + { + b.Navigation("Facilities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.District", b => + { + b.Navigation("Facilities"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Facility", b => + { + b.Navigation("Documents"); + + b.Navigation("Shifts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Role", b => + { + b.Navigation("Shifts"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Shift", b => + { + b.Navigation("Applications"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.User", b => + { + b.Navigation("Applications"); + + b.Navigation("DoctorProfile"); + }); + + modelBuilder.Entity("JobsMedical.Web.Models.Visitor", b => + { + b.Navigation("Events"); + + b.Navigation("Preferences"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/src/JobsMedical.Web/Migrations/20260604142204_IngestProxy.cs b/src/JobsMedical.Web/Migrations/20260604142204_IngestProxy.cs new file mode 100644 index 0000000..1fbfb12 --- /dev/null +++ b/src/JobsMedical.Web/Migrations/20260604142204_IngestProxy.cs @@ -0,0 +1,40 @@ +using Microsoft.EntityFrameworkCore.Migrations; + +#nullable disable + +namespace JobsMedical.Web.Migrations +{ + /// + public partial class IngestProxy : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + migrationBuilder.AddColumn( + name: "IngestProxyEnabled", + table: "AppSettings", + type: "boolean", + nullable: false, + defaultValue: false); + + migrationBuilder.AddColumn( + name: "IngestProxyUrl", + table: "AppSettings", + type: "character varying(200)", + maxLength: 200, + nullable: true); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.DropColumn( + name: "IngestProxyEnabled", + table: "AppSettings"); + + migrationBuilder.DropColumn( + name: "IngestProxyUrl", + table: "AppSettings"); + } + } +} diff --git a/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs b/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs index cdbd1c5..7deb678 100644 --- a/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs +++ b/src/JobsMedical.Web/Migrations/AppDbContextModelSnapshot.cs @@ -83,6 +83,13 @@ namespace JobsMedical.Web.Migrations b.Property("IngestIntervalMinutes") .HasColumnType("integer"); + b.Property("IngestProxyEnabled") + .HasColumnType("boolean"); + + b.Property("IngestProxyUrl") + .HasMaxLength(200) + .HasColumnType("character varying(200)"); + b.Property("MedjobsEnabled") .HasColumnType("boolean"); diff --git a/src/JobsMedical.Web/Models/AppSetting.cs b/src/JobsMedical.Web/Models/AppSetting.cs index 42ac734..5872a45 100644 --- a/src/JobsMedical.Web/Models/AppSetting.cs +++ b/src/JobsMedical.Web/Models/AppSetting.cs @@ -51,6 +51,13 @@ public class AppSetting /// Generic web pages to scrape, one URL per line. [MaxLength(4000)] public string? WebsiteUrls { get; set; } + /// Route ingestion fetches through a proxy (needed in Iran for Telegram etc.). + public bool IngestProxyEnabled { get; set; } = false; + /// Local proxy an Xray/V2Ray client sidecar exposes, e.g. socks5://xray:10808 + /// (also accepts socks4:// or http://). The app cannot read vmess/vless/trojan directly; + /// the sidecar converts that config into this local proxy. + [MaxLength(200)] public string? IngestProxyUrl { get; set; } + public bool DivarEnabled { get; set; } = false; [MaxLength(60)] public string? DivarCity { get; set; } = "tehran"; /// Divar search terms, one per line or comma-separated. diff --git a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml index 7b59942..17610b3 100644 --- a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml +++ b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml @@ -168,6 +168,16 @@

موتور هر آدرس را می‌خواند و متن آگهی را استخراج می‌کند (عنوان og + بدنه محتوا). برای هر صفحه شغلی، آرشیو کانال یا آگهی طبقه‌بندی.

+
+ + + +

یک کلاینت Xray/V2Ray (سرویس جانبی) کانفیگ vmess/vless/trojan تو را به یک پروکسی محلی SOCKS تبدیل می‌کند؛ آدرس همان را اینجا بگذار (socks5:// یا socks4:// یا http://).

+
+

حالت نمایشی (Demo)

diff --git a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs index 88007e7..5ec03de 100644 --- a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs +++ b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs @@ -55,6 +55,8 @@ public class SettingsModel : PageModel [BindProperty] public bool DemoMode { get; set; } [BindProperty] public bool WebsitesEnabled { get; set; } [BindProperty] public string? WebsiteUrls { get; set; } + [BindProperty] public bool IngestProxyEnabled { get; set; } + [BindProperty] public string? IngestProxyUrl { get; set; } [TempData] public string? Saved { get; set; } [TempData] public string? SmsTest { get; set; } [TempData] public string? DemoMsg { get; set; } @@ -89,6 +91,8 @@ public class SettingsModel : PageModel DemoMode = s.DemoMode; WebsitesEnabled = s.WebsitesEnabled; WebsiteUrls = s.WebsiteUrls; + IngestProxyEnabled = s.IngestProxyEnabled; + IngestProxyUrl = s.IngestProxyUrl; WebNotificationsEnabled = s.WebNotificationsEnabled; PushEnabled = s.PushEnabled; VapidPublicKey = s.VapidPublicKey; @@ -127,6 +131,8 @@ public class SettingsModel : PageModel DemoMode = DemoMode, WebsitesEnabled = WebsitesEnabled, WebsiteUrls = WebsiteUrls, + IngestProxyEnabled = IngestProxyEnabled, + IngestProxyUrl = IngestProxyUrl, WebNotificationsEnabled = WebNotificationsEnabled, PushEnabled = PushEnabled, VapidPublicKey = VapidPublicKey, diff --git a/src/JobsMedical.Web/Program.cs b/src/JobsMedical.Web/Program.cs index 13dcaed..d7799f0 100644 --- a/src/JobsMedical.Web/Program.cs +++ b/src/JobsMedical.Web/Program.cs @@ -37,6 +37,8 @@ builder.Services.AddHttpClient("scrape", c => c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)"); }); builder.Services.AddHttpClient("ai"); +// Proxy-aware client provider for ingestion (routes through Xray/V2Ray SOCKS proxy when set). +builder.Services.AddSingleton(); builder.Services.AddSingleton(); builder.Services.AddSingleton(); diff --git a/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs b/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs index 279c1da..e890584 100644 --- a/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs @@ -10,12 +10,12 @@ namespace JobsMedical.Web.Services.Scraping; public class BaleListingSource : IListingSource { private const string BaseUrl = "https://tapi.bale.ai"; - private readonly IHttpClientFactory _http; + private readonly ScrapeHttpClients _clients; private readonly ILogger _log; - public BaleListingSource(IHttpClientFactory http, ILogger log) + public BaleListingSource(ScrapeHttpClients clients, ILogger log) { - _http = http; + _clients = clients; _log = log; } @@ -27,7 +27,7 @@ public class BaleListingSource : IListingSource try { - var client = _http.CreateClient("scrape"); + var client = _clients.For(s); var body = await client.GetStringAsync($"{BaseUrl}/bot{s.BaleBotToken}/getUpdates", ct); using var doc = JsonDocument.Parse(body); if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array) diff --git a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs index c70bfdb..aa995be 100644 --- a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs @@ -12,12 +12,12 @@ namespace JobsMedical.Web.Services.Scraping; public class DivarListingSource : IListingSource { private const string BaseUrl = "https://api.divar.ir/v8/web-search"; - private readonly IHttpClientFactory _http; + private readonly ScrapeHttpClients _clients; private readonly ILogger _log; - public DivarListingSource(IHttpClientFactory http, ILogger log) + public DivarListingSource(ScrapeHttpClients clients, ILogger log) { - _http = http; + _clients = clients; _log = log; } @@ -29,7 +29,7 @@ public class DivarListingSource : IListingSource if (!s.DivarEnabled || queries.Count == 0) return Array.Empty(); var city = string.IsNullOrWhiteSpace(s.DivarCity) ? "tehran" : s.DivarCity.Trim(); - var client = _http.CreateClient("scrape"); + var client = _clients.For(s); var items = new List(); foreach (var q in queries) { diff --git a/src/JobsMedical.Web/Services/Scraping/MedjobsListingSource.cs b/src/JobsMedical.Web/Services/Scraping/MedjobsListingSource.cs index 14fa36d..62ad96b 100644 --- a/src/JobsMedical.Web/Services/Scraping/MedjobsListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/MedjobsListingSource.cs @@ -13,12 +13,12 @@ namespace JobsMedical.Web.Services.Scraping; public class MedjobsListingSource : IListingSource { private const string SitemapIndex = "https://medjobs.ir/sitemap_index.xml"; - private readonly IHttpClientFactory _http; + private readonly ScrapeHttpClients _clients; private readonly ILogger _log; - public MedjobsListingSource(IHttpClientFactory http, ILogger log) + public MedjobsListingSource(ScrapeHttpClients clients, ILogger log) { - _http = http; + _clients = clients; _log = log; } @@ -28,7 +28,7 @@ public class MedjobsListingSource : IListingSource { if (!s.MedjobsEnabled) return Array.Empty(); var max = Math.Clamp(s.MedjobsMaxAds, 1, 500); - var client = _http.CreateClient("scrape"); + var client = _clients.For(s); try { diff --git a/src/JobsMedical.Web/Services/Scraping/ScrapeHttpClients.cs b/src/JobsMedical.Web/Services/Scraping/ScrapeHttpClients.cs new file mode 100644 index 0000000..9d1863c --- /dev/null +++ b/src/JobsMedical.Web/Services/Scraping/ScrapeHttpClients.cs @@ -0,0 +1,55 @@ +using System.Collections.Concurrent; +using System.Net; +using JobsMedical.Web.Models; + +namespace JobsMedical.Web.Services.Scraping; + +/// +/// Supplies the HttpClient used by ingestion sources, optionally routed through a proxy. +/// +/// Telegram (t.me) and some other sources are filtered in Iran, so the admin can point +/// ingestion at a local proxy that an Xray/V2Ray client sidecar exposes (e.g. +/// socks5://xray:10808). .NET's WebProxy understands socks5://, socks4:// +/// and http:// schemes, so the same code path covers all of them. +/// +/// Clients are cached per proxy descriptor (singleton). Changing the proxy in admin settings +/// makes the next run pick up a new client; the old one is disposed. +/// +public sealed class ScrapeHttpClients : IDisposable +{ + private readonly ConcurrentDictionary _cache = new(); + + /// The HttpClient for the given settings — proxied when enabled, direct otherwise. + public HttpClient For(AppSetting s) + { + var key = (s.IngestProxyEnabled && !string.IsNullOrWhiteSpace(s.IngestProxyUrl)) + ? s.IngestProxyUrl.Trim() + : "direct"; + + // Drop stale clients if the proxy URL changed (keep only "direct" + the current proxy). + foreach (var k in _cache.Keys) + if (k != "direct" && k != key && _cache.TryRemove(k, out var stale)) + stale.Dispose(); + + return _cache.GetOrAdd(key, Build); + } + + private static HttpClient Build(string key) + { + var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All }; + if (key != "direct") + { + handler.Proxy = new WebProxy(key); // socks5:// | socks4:// | http:// + handler.UseProxy = true; + } + var c = new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(20) }; + c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)"); + return c; + } + + public void Dispose() + { + foreach (var c in _cache.Values) c.Dispose(); + _cache.Clear(); + } +} diff --git a/src/JobsMedical.Web/Services/Scraping/SettingsService.cs b/src/JobsMedical.Web/Services/Scraping/SettingsService.cs index 01d1f5c..caa141c 100644 --- a/src/JobsMedical.Web/Services/Scraping/SettingsService.cs +++ b/src/JobsMedical.Web/Services/Scraping/SettingsService.cs @@ -44,6 +44,8 @@ public class SettingsService s.DemoMode = incoming.DemoMode; s.WebsitesEnabled = incoming.WebsitesEnabled; s.WebsiteUrls = incoming.WebsiteUrls?.Trim(); + s.IngestProxyEnabled = incoming.IngestProxyEnabled; + s.IngestProxyUrl = incoming.IngestProxyUrl?.Trim(); s.DivarEnabled = incoming.DivarEnabled; s.DivarCity = string.IsNullOrWhiteSpace(incoming.DivarCity) ? "tehran" : incoming.DivarCity.Trim(); s.DivarQueries = incoming.DivarQueries?.Trim(); diff --git a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs index 85b0169..0bc403a 100644 --- a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs @@ -10,12 +10,12 @@ namespace JobsMedical.Web.Services.Scraping; /// public class TelegramListingSource : IListingSource { - private readonly IHttpClientFactory _http; + private readonly ScrapeHttpClients _clients; private readonly ILogger _log; - public TelegramListingSource(IHttpClientFactory http, ILogger log) + public TelegramListingSource(ScrapeHttpClients clients, ILogger log) { - _http = http; + _clients = clients; _log = log; } @@ -26,7 +26,7 @@ public class TelegramListingSource : IListingSource var channels = AppSetting.SplitList(s.TelegramChannels); if (!s.TelegramEnabled || channels.Count == 0) return Array.Empty(); - var client = _http.CreateClient("scrape"); + var client = _clients.For(s); var items = new List(); foreach (var ch in channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0)) { diff --git a/src/JobsMedical.Web/Services/Scraping/WebsiteListingSource.cs b/src/JobsMedical.Web/Services/Scraping/WebsiteListingSource.cs index 2e424ac..47fd144 100644 --- a/src/JobsMedical.Web/Services/Scraping/WebsiteListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/WebsiteListingSource.cs @@ -11,12 +11,12 @@ namespace JobsMedical.Web.Services.Scraping; /// public class WebsiteListingSource : IListingSource { - private readonly IHttpClientFactory _http; + private readonly ScrapeHttpClients _clients; private readonly ILogger _log; - public WebsiteListingSource(IHttpClientFactory http, ILogger log) + public WebsiteListingSource(ScrapeHttpClients clients, ILogger log) { - _http = http; + _clients = clients; _log = log; } @@ -27,7 +27,7 @@ public class WebsiteListingSource : IListingSource var urls = AppSetting.SplitList(s.WebsiteUrls); if (!s.WebsitesEnabled || urls.Count == 0) return Array.Empty(); - var client = _http.CreateClient("scrape"); + var client = _clients.For(s); var items = new List(); foreach (var url in urls.Where(u => u.StartsWith("http"))) {