using System.Collections.Concurrent; using System.Net; using JobsMedical.Web.Models; namespace JobsMedical.Web.Services.Scraping; /// /// Supplies the HttpClient used by ingestion sources, optionally routed through a proxy. /// /// Telegram (t.me) and some other sources are filtered in Iran, so the admin can point /// ingestion at a local proxy that an Xray/V2Ray client sidecar exposes (e.g. /// socks5://xray:10808). .NET's WebProxy understands socks5://, socks4:// /// and http:// schemes, so the same code path covers all of them. /// /// Clients are cached per proxy descriptor (singleton). Changing the proxy in admin settings /// makes the next run pick up a new client; the old one is disposed. /// public sealed class ScrapeHttpClients : IDisposable { private readonly ConcurrentDictionary _cache = new(); /// The HttpClient for a source — proxied only when that source opts in AND a proxy /// URL is configured; otherwise a direct client. Pass the source's own per-source flag. public HttpClient For(AppSetting s, bool useProxy) { var key = (useProxy && !string.IsNullOrWhiteSpace(s.IngestProxyUrl)) ? s.IngestProxyUrl.Trim() : "direct"; // Drop stale clients if the proxy URL changed (keep "direct", current proxy, and AI clients). foreach (var k in _cache.Keys) if (k != "direct" && k != key && !k.StartsWith("ai:") && _cache.TryRemove(k, out var stale)) stale.Dispose(); return _cache.GetOrAdd(key, Build); } /// HttpClient for AI calls — routed through the proxy when AiUseProxy is on (e.g. to /// reach api.openai.com from Iran). Longer timeout; cached per proxy URL. public HttpClient ForAi(AppSetting s) { var useProxy = s.AiUseProxy && !string.IsNullOrWhiteSpace(s.IngestProxyUrl); var url = useProxy ? s.IngestProxyUrl!.Trim() : null; var key = "ai:" + (url ?? "direct"); return _cache.GetOrAdd(key, _ => { var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All }; if (url is not null) { handler.Proxy = new WebProxy(url); handler.UseProxy = true; } return new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(100) }; // LLMs can be slow }); } private static HttpClient Build(string key) { var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All }; if (key != "direct") { handler.Proxy = new WebProxy(key); // socks5:// | socks4:// | http:// handler.UseProxy = true; } var c = new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(20) }; c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)"); return c; } public void Dispose() { foreach (var c in _cache.Values) c.Dispose(); _cache.Clear(); } }