using System.Collections.Concurrent;
using System.Net;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
///
/// Supplies the HttpClient used by ingestion sources, optionally routed through a proxy.
///
/// Telegram (t.me) and some other sources are filtered in Iran, so the admin can point
/// ingestion at a local proxy that an Xray/V2Ray client sidecar exposes (e.g.
/// socks5://xray:10808). .NET's WebProxy understands socks5://, socks4://
/// and http:// schemes, so the same code path covers all of them.
///
/// Clients are cached per proxy descriptor (singleton). Changing the proxy in admin settings
/// makes the next run pick up a new client; the old one is disposed.
///
public sealed class ScrapeHttpClients : IDisposable
{
private readonly ConcurrentDictionary _cache = new();
/// The HttpClient for a source — proxied only when that source opts in AND a proxy
/// URL is configured; otherwise a direct client. Pass the source's own per-source flag.
public HttpClient For(AppSetting s, bool useProxy)
{
var key = (useProxy && !string.IsNullOrWhiteSpace(s.IngestProxyUrl))
? s.IngestProxyUrl.Trim()
: "direct";
// Drop stale clients if the proxy URL changed (keep "direct", current proxy, and AI clients).
foreach (var k in _cache.Keys)
if (k != "direct" && k != key && !k.StartsWith("ai:") && _cache.TryRemove(k, out var stale))
stale.Dispose();
return _cache.GetOrAdd(key, Build);
}
/// HttpClient for AI calls — routed through the proxy when AiUseProxy is on (e.g. to
/// reach api.openai.com from Iran). Longer timeout; cached per proxy URL.
public HttpClient ForAi(AppSetting s)
{
var useProxy = s.AiUseProxy && !string.IsNullOrWhiteSpace(s.IngestProxyUrl);
var url = useProxy ? s.IngestProxyUrl!.Trim() : null;
var key = "ai:" + (url ?? "direct");
return _cache.GetOrAdd(key, _ =>
{
var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All };
if (url is not null) { handler.Proxy = new WebProxy(url); handler.UseProxy = true; }
return new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(100) }; // LLMs can be slow
});
}
private static HttpClient Build(string key)
{
var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All };
if (key != "direct")
{
handler.Proxy = new WebProxy(key); // socks5:// | socks4:// | http://
handler.UseProxy = true;
}
var c = new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(20) };
c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)");
return c;
}
public void Dispose()
{
foreach (var c in _cache.Values) c.Dispose();
_cache.Clear();
}
}