Real channel fetch (Telegram/Bale/Divar) + AI-audited automation engine + CI/CD
- Fetch: Telegram via t.me/s, Bale via Bot API, Divar via web-search (HttpClient, config-gated, graceful) - AI layer: DB-backed AppSetting (mode auto/manual, thresholds, AI endpoint/model/key/prompt/framework, auto-approve); OpenAI-compatible IAiAuditor (self-host/Iranian endpoints; fails safe to manual) - Pipeline: fetch → dedupe(hash) → parse → validate → AI audit → Discard/Flag/Queue/auto-publish (resolve-or-create facility) - Admin: /Admin/Settings automation+AI panel; queue shows confidence + AI verdict; flagged section - CI/CD: Dockerfile, docker-compose.prod.yml, .gitea/workflows/ci-cd.yml, nginx vhost, DEPLOY.md; forwarded headers + /healthz + prod reference-only seed; ports 22/80/443 only Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,68 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace JobsMedical.Web.Services.Scraping;
|
||||
|
||||
public class BaleOptions
|
||||
{
|
||||
public bool Enabled { get; set; }
|
||||
public string? BotToken { get; set; }
|
||||
public string BaseUrl { get; set; } = "https://tapi.bale.ai"; // Bale Bot API host
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. The bot must
|
||||
/// be a member/admin of the channels it should read. Pulls text from messages and channel posts.
|
||||
/// </summary>
|
||||
public class BaleListingSource : IListingSource
|
||||
{
|
||||
private readonly BaleOptions _opts;
|
||||
private readonly IHttpClientFactory _http;
|
||||
private readonly ILogger<BaleListingSource> _log;
|
||||
|
||||
public BaleListingSource(IOptions<BaleOptions> opts, IHttpClientFactory http,
|
||||
ILogger<BaleListingSource> log)
|
||||
{
|
||||
_opts = opts.Value;
|
||||
_http = http;
|
||||
_log = log;
|
||||
}
|
||||
|
||||
public string Name => "بله";
|
||||
public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken);
|
||||
|
||||
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
|
||||
{
|
||||
if (!Enabled) { _log.LogInformation("Bale source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); }
|
||||
|
||||
try
|
||||
{
|
||||
var client = _http.CreateClient("scrape");
|
||||
var url = $"{_opts.BaseUrl.TrimEnd('/')}/bot{_opts.BotToken}/getUpdates";
|
||||
var body = await client.GetStringAsync(url, ct);
|
||||
|
||||
using var doc = JsonDocument.Parse(body);
|
||||
if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array)
|
||||
return Array.Empty<ScrapedItem>();
|
||||
|
||||
var items = new List<ScrapedItem>();
|
||||
foreach (var update in result.EnumerateArray())
|
||||
{
|
||||
var text = TextOf(update, "channel_post") ?? TextOf(update, "message");
|
||||
if (!string.IsNullOrWhiteSpace(text) && text!.Trim().Length >= 15)
|
||||
items.Add(new ScrapedItem("بله", text.Trim()));
|
||||
}
|
||||
return items;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_log.LogWarning(ex, "Bale fetch failed.");
|
||||
return Array.Empty<ScrapedItem>();
|
||||
}
|
||||
}
|
||||
|
||||
private static string? TextOf(JsonElement update, string key)
|
||||
=> update.TryGetProperty(key, out var m)
|
||||
&& m.TryGetProperty("text", out var t) && t.ValueKind == JsonValueKind.String
|
||||
? t.GetString() : null;
|
||||
}
|
||||
Reference in New Issue
Block a user