b1e474ba33
Each ingestion source now decides independently whether to route through the proxy: added TelegramUseProxy/BaleUseProxy/DivarUseProxy/MedjobsUseProxy/WebsitesUseProxy flags (migration). ScrapeHttpClients.For(s, useProxy) takes the source's own flag; a source is proxied only when its flag is on AND a proxy URL is set. Settings 'sources' tab: removed the global enable checkbox, kept the proxy address field, and added an «از پروکسی استفاده شود» checkbox under each source. Old IngestProxyEnabled column kept for compatibility but no longer gates routing. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
57 lines
2.2 KiB
C#
57 lines
2.2 KiB
C#
using System.Collections.Concurrent;
|
|
using System.Net;
|
|
using JobsMedical.Web.Models;
|
|
|
|
namespace JobsMedical.Web.Services.Scraping;
|
|
|
|
/// <summary>
|
|
/// Supplies the HttpClient used by ingestion sources, optionally routed through a proxy.
|
|
///
|
|
/// Telegram (t.me) and some other sources are filtered in Iran, so the admin can point
|
|
/// ingestion at a local proxy that an Xray/V2Ray client sidecar exposes (e.g.
|
|
/// <c>socks5://xray:10808</c>). .NET's WebProxy understands <c>socks5://</c>, <c>socks4://</c>
|
|
/// and <c>http://</c> schemes, so the same code path covers all of them.
|
|
///
|
|
/// Clients are cached per proxy descriptor (singleton). Changing the proxy in admin settings
|
|
/// makes the next run pick up a new client; the old one is disposed.
|
|
/// </summary>
|
|
public sealed class ScrapeHttpClients : IDisposable
|
|
{
|
|
private readonly ConcurrentDictionary<string, HttpClient> _cache = new();
|
|
|
|
/// <summary>The HttpClient for a source — proxied only when that source opts in AND a proxy
|
|
/// URL is configured; otherwise a direct client. Pass the source's own per-source flag.</summary>
|
|
public HttpClient For(AppSetting s, bool useProxy)
|
|
{
|
|
var key = (useProxy && !string.IsNullOrWhiteSpace(s.IngestProxyUrl))
|
|
? s.IngestProxyUrl.Trim()
|
|
: "direct";
|
|
|
|
// Drop stale clients if the proxy URL changed (keep only "direct" + the current proxy).
|
|
foreach (var k in _cache.Keys)
|
|
if (k != "direct" && k != key && _cache.TryRemove(k, out var stale))
|
|
stale.Dispose();
|
|
|
|
return _cache.GetOrAdd(key, Build);
|
|
}
|
|
|
|
private static HttpClient Build(string key)
|
|
{
|
|
var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All };
|
|
if (key != "direct")
|
|
{
|
|
handler.Proxy = new WebProxy(key); // socks5:// | socks4:// | http://
|
|
handler.UseProxy = true;
|
|
}
|
|
var c = new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(20) };
|
|
c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)");
|
|
return c;
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
foreach (var c in _cache.Values) c.Dispose();
|
|
_cache.Clear();
|
|
}
|
|
}
|