using System.Collections.Concurrent; using System.Net; using JobsMedical.Web.Models; namespace JobsMedical.Web.Services.Scraping; /// /// Supplies the HttpClient used by ingestion sources, optionally routed through a proxy. /// /// Telegram (t.me) and some other sources are filtered in Iran, so the admin can point /// ingestion at a local proxy that an Xray/V2Ray client sidecar exposes (e.g. /// socks5://xray:10808). .NET's WebProxy understands socks5://, socks4:// /// and http:// schemes, so the same code path covers all of them. /// /// Clients are cached per proxy descriptor (singleton). Changing the proxy in admin settings /// makes the next run pick up a new client; the old one is disposed. /// public sealed class ScrapeHttpClients : IDisposable { private readonly ConcurrentDictionary _cache = new(); /// The HttpClient for a source — proxied only when that source opts in AND a proxy /// URL is configured; otherwise a direct client. Pass the source's own per-source flag. public HttpClient For(AppSetting s, bool useProxy) { var key = (useProxy && !string.IsNullOrWhiteSpace(s.IngestProxyUrl)) ? s.IngestProxyUrl.Trim() : "direct"; // Drop stale clients if the proxy URL changed (keep only "direct" + the current proxy). foreach (var k in _cache.Keys) if (k != "direct" && k != key && _cache.TryRemove(k, out var stale)) stale.Dispose(); return _cache.GetOrAdd(key, Build); } private static HttpClient Build(string key) { var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All }; if (key != "direct") { handler.Proxy = new WebProxy(key); // socks5:// | socks4:// | http:// handler.UseProxy = true; } var c = new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(20) }; c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)"); return c; } public void Dispose() { foreach (var c in _cache.Values) c.Dispose(); _cache.Clear(); } }