Files
hamkadr/src/JobsMedical.Web/Services/Scraping/ScrapeHttpClients.cs
T
soroush.asadi 0c49b89891
CI/CD / CI · dotnet build (push) Successful in 1m46s
CI/CD / Deploy · hamkadr (push) Failing after 1m58s
[AI] Route AI calls through the Xray/V2Ray proxy (reach OpenAI from Iran)
Add AiUseProxy setting + a toggle in the AI settings section. ScrapeHttpClients.ForAi(settings) returns a proxied HttpClient (reusing IngestProxyUrl, 100s timeout) when AiUseProxy is on, otherwise direct; AI-cache keys are protected from the scrape-client cleanup. OpenAiCompatibleAuditor now uses it, so the AI auditor (e.g. api.openai.com) is reachable through the same Xray sidecar that serves Telegram. Migration adds the column.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 22:55:07 +03:30

72 lines
3.0 KiB
C#

using System.Collections.Concurrent;
using System.Net;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
/// <summary>
/// Supplies the HttpClient used by ingestion sources, optionally routed through a proxy.
///
/// Telegram (t.me) and some other sources are filtered in Iran, so the admin can point
/// ingestion at a local proxy that an Xray/V2Ray client sidecar exposes (e.g.
/// <c>socks5://xray:10808</c>). .NET's WebProxy understands <c>socks5://</c>, <c>socks4://</c>
/// and <c>http://</c> schemes, so the same code path covers all of them.
///
/// Clients are cached per proxy descriptor (singleton). Changing the proxy in admin settings
/// makes the next run pick up a new client; the old one is disposed.
/// </summary>
public sealed class ScrapeHttpClients : IDisposable
{
private readonly ConcurrentDictionary<string, HttpClient> _cache = new();
/// <summary>The HttpClient for a source — proxied only when that source opts in AND a proxy
/// URL is configured; otherwise a direct client. Pass the source's own per-source flag.</summary>
public HttpClient For(AppSetting s, bool useProxy)
{
var key = (useProxy && !string.IsNullOrWhiteSpace(s.IngestProxyUrl))
? s.IngestProxyUrl.Trim()
: "direct";
// Drop stale clients if the proxy URL changed (keep "direct", current proxy, and AI clients).
foreach (var k in _cache.Keys)
if (k != "direct" && k != key && !k.StartsWith("ai:") && _cache.TryRemove(k, out var stale))
stale.Dispose();
return _cache.GetOrAdd(key, Build);
}
/// <summary>HttpClient for AI calls — routed through the proxy when AiUseProxy is on (e.g. to
/// reach api.openai.com from Iran). Longer timeout; cached per proxy URL.</summary>
public HttpClient ForAi(AppSetting s)
{
var useProxy = s.AiUseProxy && !string.IsNullOrWhiteSpace(s.IngestProxyUrl);
var url = useProxy ? s.IngestProxyUrl!.Trim() : null;
var key = "ai:" + (url ?? "direct");
return _cache.GetOrAdd(key, _ =>
{
var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All };
if (url is not null) { handler.Proxy = new WebProxy(url); handler.UseProxy = true; }
return new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(100) }; // LLMs can be slow
});
}
private static HttpClient Build(string key)
{
var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All };
if (key != "direct")
{
handler.Proxy = new WebProxy(key); // socks5:// | socks4:// | http://
handler.UseProxy = true;
}
var c = new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(20) };
c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)");
return c;
}
public void Dispose()
{
foreach (var c in _cache.Values) c.Dispose();
_cache.Clear();
}
}