0c49b89891
Add AiUseProxy setting + a toggle in the AI settings section. ScrapeHttpClients.ForAi(settings) returns a proxied HttpClient (reusing IngestProxyUrl, 100s timeout) when AiUseProxy is on, otherwise direct; AI-cache keys are protected from the scrape-client cleanup. OpenAiCompatibleAuditor now uses it, so the AI auditor (e.g. api.openai.com) is reachable through the same Xray sidecar that serves Telegram. Migration adds the column. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
72 lines
3.0 KiB
C#
72 lines
3.0 KiB
C#
using System.Collections.Concurrent;
|
|
using System.Net;
|
|
using JobsMedical.Web.Models;
|
|
|
|
namespace JobsMedical.Web.Services.Scraping;
|
|
|
|
/// <summary>
|
|
/// Supplies the HttpClient used by ingestion sources, optionally routed through a proxy.
|
|
///
|
|
/// Telegram (t.me) and some other sources are filtered in Iran, so the admin can point
|
|
/// ingestion at a local proxy that an Xray/V2Ray client sidecar exposes (e.g.
|
|
/// <c>socks5://xray:10808</c>). .NET's WebProxy understands <c>socks5://</c>, <c>socks4://</c>
|
|
/// and <c>http://</c> schemes, so the same code path covers all of them.
|
|
///
|
|
/// Clients are cached per proxy descriptor (singleton). Changing the proxy in admin settings
|
|
/// makes the next run pick up a new client; the old one is disposed.
|
|
/// </summary>
|
|
public sealed class ScrapeHttpClients : IDisposable
|
|
{
|
|
private readonly ConcurrentDictionary<string, HttpClient> _cache = new();
|
|
|
|
/// <summary>The HttpClient for a source — proxied only when that source opts in AND a proxy
|
|
/// URL is configured; otherwise a direct client. Pass the source's own per-source flag.</summary>
|
|
public HttpClient For(AppSetting s, bool useProxy)
|
|
{
|
|
var key = (useProxy && !string.IsNullOrWhiteSpace(s.IngestProxyUrl))
|
|
? s.IngestProxyUrl.Trim()
|
|
: "direct";
|
|
|
|
// Drop stale clients if the proxy URL changed (keep "direct", current proxy, and AI clients).
|
|
foreach (var k in _cache.Keys)
|
|
if (k != "direct" && k != key && !k.StartsWith("ai:") && _cache.TryRemove(k, out var stale))
|
|
stale.Dispose();
|
|
|
|
return _cache.GetOrAdd(key, Build);
|
|
}
|
|
|
|
/// <summary>HttpClient for AI calls — routed through the proxy when AiUseProxy is on (e.g. to
|
|
/// reach api.openai.com from Iran). Longer timeout; cached per proxy URL.</summary>
|
|
public HttpClient ForAi(AppSetting s)
|
|
{
|
|
var useProxy = s.AiUseProxy && !string.IsNullOrWhiteSpace(s.IngestProxyUrl);
|
|
var url = useProxy ? s.IngestProxyUrl!.Trim() : null;
|
|
var key = "ai:" + (url ?? "direct");
|
|
return _cache.GetOrAdd(key, _ =>
|
|
{
|
|
var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All };
|
|
if (url is not null) { handler.Proxy = new WebProxy(url); handler.UseProxy = true; }
|
|
return new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(100) }; // LLMs can be slow
|
|
});
|
|
}
|
|
|
|
private static HttpClient Build(string key)
|
|
{
|
|
var handler = new HttpClientHandler { AutomaticDecompression = DecompressionMethods.All };
|
|
if (key != "direct")
|
|
{
|
|
handler.Proxy = new WebProxy(key); // socks5:// | socks4:// | http://
|
|
handler.UseProxy = true;
|
|
}
|
|
var c = new HttpClient(handler) { Timeout = TimeSpan.FromSeconds(20) };
|
|
c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)");
|
|
return c;
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
foreach (var c in _cache.Values) c.Dispose();
|
|
_cache.Clear();
|
|
}
|
|
}
|