SplitList(string? s) => string.IsNullOrWhiteSpace(s)
+ ? new()
+ : s.Split(new[] { '\n', '\r', ',', '،' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
+ .ToList();
+
public const string DefaultPrompt = """
تو دستیار بررسی آگهیهای کاری حوزه درمان برای پلتفرم «همکادر» هستی.
هر آگهی خام را بخوان و تصمیم بگیر:
diff --git a/src/JobsMedical.Web/Pages/Admin/Index.cshtml b/src/JobsMedical.Web/Pages/Admin/Index.cshtml
index e40c974..440d9bb 100644
--- a/src/JobsMedical.Web/Pages/Admin/Index.cshtml
+++ b/src/JobsMedical.Web/Pages/Admin/Index.cshtml
@@ -26,16 +26,10 @@
+
+
+ منابع جمعآوری (اسکرپ کانالها)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs
index 2f38445..4a975cc 100644
--- a/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs
+++ b/src/JobsMedical.Web/Pages/Admin/Settings.cshtml.cs
@@ -20,6 +20,16 @@ public class SettingsModel : PageModel
[BindProperty] public string? AiModel { get; set; }
[BindProperty] public string AiSystemPrompt { get; set; } = "";
[BindProperty] public bool AiAutoApprove { get; set; }
+ // Channel scraping sources
+ [BindProperty] public bool AutoIngestEnabled { get; set; }
+ [BindProperty] public int IngestIntervalMinutes { get; set; } = 30;
+ [BindProperty] public bool TelegramEnabled { get; set; }
+ [BindProperty] public string? TelegramChannels { get; set; }
+ [BindProperty] public bool BaleEnabled { get; set; }
+ [BindProperty] public string? BaleBotToken { get; set; }
+ [BindProperty] public bool DivarEnabled { get; set; }
+ [BindProperty] public string? DivarCity { get; set; }
+ [BindProperty] public string? DivarQueries { get; set; }
[TempData] public string? Saved { get; set; }
public async Task OnGetAsync()
@@ -33,6 +43,15 @@ public class SettingsModel : PageModel
AiModel = s.AiModel;
AiSystemPrompt = s.AiSystemPrompt;
AiAutoApprove = s.AiAutoApprove;
+ AutoIngestEnabled = s.AutoIngestEnabled;
+ IngestIntervalMinutes = s.IngestIntervalMinutes;
+ TelegramEnabled = s.TelegramEnabled;
+ TelegramChannels = s.TelegramChannels;
+ BaleEnabled = s.BaleEnabled;
+ BaleBotToken = s.BaleBotToken;
+ DivarEnabled = s.DivarEnabled;
+ DivarCity = s.DivarCity;
+ DivarQueries = s.DivarQueries;
}
public async Task OnPostAsync()
@@ -47,6 +66,15 @@ public class SettingsModel : PageModel
AiModel = AiModel,
AiSystemPrompt = AiSystemPrompt,
AiAutoApprove = AiAutoApprove,
+ AutoIngestEnabled = AutoIngestEnabled,
+ IngestIntervalMinutes = IngestIntervalMinutes,
+ TelegramEnabled = TelegramEnabled,
+ TelegramChannels = TelegramChannels,
+ BaleEnabled = BaleEnabled,
+ BaleBotToken = BaleBotToken,
+ DivarEnabled = DivarEnabled,
+ DivarCity = DivarCity,
+ DivarQueries = DivarQueries,
});
Saved = "تنظیمات ذخیره شد.";
return RedirectToPage();
diff --git a/src/JobsMedical.Web/Program.cs b/src/JobsMedical.Web/Program.cs
index 4eb294c..6b2a554 100644
--- a/src/JobsMedical.Web/Program.cs
+++ b/src/JobsMedical.Web/Program.cs
@@ -28,14 +28,6 @@ builder.Services.AddHttpClient("scrape", c =>
c.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; HamkadrBot/1.0)");
});
builder.Services.AddHttpClient("ai");
-builder.Services.Configure(
- builder.Configuration.GetSection("Ingestion"));
-builder.Services.Configure(
- builder.Configuration.GetSection("Ingestion:Telegram"));
-builder.Services.Configure(
- builder.Configuration.GetSection("Ingestion:Bale"));
-builder.Services.Configure(
- builder.Configuration.GetSection("Ingestion:Divar"));
builder.Services.AddSingleton();
builder.Services.AddSingleton();
diff --git a/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs b/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs
index 21ef6b0..279c1da 100644
--- a/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs
@@ -1,46 +1,34 @@
using System.Text.Json;
-using Microsoft.Extensions.Options;
+using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
-public class BaleOptions
-{
- public bool Enabled { get; set; }
- public string? BotToken { get; set; }
- public string BaseUrl { get; set; } = "https://tapi.bale.ai"; // Bale Bot API host
-}
-
///
-/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. The bot must
-/// be a member/admin of the channels it should read. Pulls text from messages and channel posts.
+/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. Enabled +
+/// bot token come from admin settings (DB). The bot must be a member of the channels it reads.
///
public class BaleListingSource : IListingSource
{
- private readonly BaleOptions _opts;
+ private const string BaseUrl = "https://tapi.bale.ai";
private readonly IHttpClientFactory _http;
private readonly ILogger _log;
- public BaleListingSource(IOptions opts, IHttpClientFactory http,
- ILogger log)
+ public BaleListingSource(IHttpClientFactory http, ILogger log)
{
- _opts = opts.Value;
_http = http;
_log = log;
}
public string Name => "بله";
- public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken);
- public async Task> FetchAsync(CancellationToken ct = default)
+ public async Task> FetchAsync(AppSetting s, CancellationToken ct = default)
{
- if (!Enabled) { _log.LogInformation("Bale source disabled/unconfigured."); return Array.Empty(); }
+ if (!s.BaleEnabled || string.IsNullOrWhiteSpace(s.BaleBotToken)) return Array.Empty();
try
{
var client = _http.CreateClient("scrape");
- var url = $"{_opts.BaseUrl.TrimEnd('/')}/bot{_opts.BotToken}/getUpdates";
- var body = await client.GetStringAsync(url, ct);
-
+ var body = await client.GetStringAsync($"{BaseUrl}/bot{s.BaleBotToken}/getUpdates", ct);
using var doc = JsonDocument.Parse(body);
if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array)
return Array.Empty();
@@ -54,11 +42,7 @@ public class BaleListingSource : IListingSource
}
return items;
}
- catch (Exception ex)
- {
- _log.LogWarning(ex, "Bale fetch failed.");
- return Array.Empty();
- }
+ catch (Exception ex) { _log.LogWarning(ex, "Bale fetch failed."); return Array.Empty(); }
}
private static string? TextOf(JsonElement update, string key)
diff --git a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs
index cdb60c9..c70bfdb 100644
--- a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs
@@ -1,55 +1,44 @@
using System.Text;
using System.Text.Json;
-using Microsoft.Extensions.Options;
+using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
-public class DivarOptions
-{
- public bool Enabled { get; set; }
- public string City { get; set; } = "tehran";
- public string Category { get; set; } = "jobs";
- public List Queries { get; set; } = new(); // e.g. "پرستار", "پزشک عمومی", "درمانگاه"
- public string BaseUrl { get; set; } = "https://api.divar.ir/v8/web-search";
- public int PerQuery { get; set; } = 25;
-}
-
///
/// Best-effort Divar fetch: queries Divar's web-search JSON for each term and harvests post
-/// titles + descriptions. Divar's private API shifts shape over time, so we walk the JSON
-/// tolerantly for any object carrying a "title" plus a nearby description field, and fail soft.
+/// titles + descriptions. Enabled + city + queries come from admin settings (DB). Divar's
+/// private API shifts shape, so we walk JSON tolerantly and fail soft.
///
public class DivarListingSource : IListingSource
{
- private readonly DivarOptions _opts;
+ private const string BaseUrl = "https://api.divar.ir/v8/web-search";
private readonly IHttpClientFactory _http;
private readonly ILogger _log;
- public DivarListingSource(IOptions opts, IHttpClientFactory http,
- ILogger log)
+ public DivarListingSource(IHttpClientFactory http, ILogger log)
{
- _opts = opts.Value;
_http = http;
_log = log;
}
public string Name => "دیوار";
- public bool Enabled => _opts.Enabled && _opts.Queries.Count > 0;
- public async Task> FetchAsync(CancellationToken ct = default)
+ public async Task> FetchAsync(AppSetting s, CancellationToken ct = default)
{
- if (!Enabled) { _log.LogInformation("Divar source disabled/unconfigured."); return Array.Empty(); }
+ var queries = AppSetting.SplitList(s.DivarQueries);
+ if (!s.DivarEnabled || queries.Count == 0) return Array.Empty();
+ var city = string.IsNullOrWhiteSpace(s.DivarCity) ? "tehran" : s.DivarCity.Trim();
var client = _http.CreateClient("scrape");
var items = new List();
- foreach (var q in _opts.Queries.Where(q => q.Trim().Length > 0))
+ foreach (var q in queries)
{
try
{
- var url = $"{_opts.BaseUrl.TrimEnd('/')}/{_opts.City}/{_opts.Category}?q={Uri.EscapeDataString(q)}";
+ var url = $"{BaseUrl}/{city}/jobs?q={Uri.EscapeDataString(q)}";
var body = await client.GetStringAsync(url, ct);
using var doc = JsonDocument.Parse(body);
- foreach (var text in Harvest(doc.RootElement).Take(_opts.PerQuery))
+ foreach (var text in Harvest(doc.RootElement).Take(25))
items.Add(new ScrapedItem("دیوار", text, "https://divar.ir"));
}
catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); }
@@ -60,7 +49,6 @@ public class DivarListingSource : IListingSource
private static readonly string[] DescKeys =
{ "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" };
- /// Walk the JSON; for each object with a string "title", emit title + first description.
private static IEnumerable Harvest(JsonElement el)
{
if (el.ValueKind == JsonValueKind.Object)
@@ -75,12 +63,12 @@ public class DivarListingSource : IListingSource
if (text.Length >= 15) yield return text;
}
foreach (var p in el.EnumerateObject())
- foreach (var s in Harvest(p.Value)) yield return s;
+ foreach (var x in Harvest(p.Value)) yield return x;
}
else if (el.ValueKind == JsonValueKind.Array)
{
foreach (var item in el.EnumerateArray())
- foreach (var s in Harvest(item)) yield return s;
+ foreach (var x in Harvest(item)) yield return x;
}
}
}
diff --git a/src/JobsMedical.Web/Services/Scraping/IListingSource.cs b/src/JobsMedical.Web/Services/Scraping/IListingSource.cs
index 8079fa2..a7fe8f9 100644
--- a/src/JobsMedical.Web/Services/Scraping/IListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/IListingSource.cs
@@ -1,15 +1,17 @@
+using JobsMedical.Web.Models;
+
namespace JobsMedical.Web.Services.Scraping;
/// One raw post pulled from a source (a Telegram message, a Divar ad, etc.).
public record ScrapedItem(string Source, string RawText, string? SourceUrl = null);
///
-/// A pluggable source the ingestion engine pulls from. Implement once per channel/site.
-/// `Enabled` lets a source be present but dormant until it's configured with credentials.
+/// A pluggable source the ingestion engine pulls from. Configuration (enabled, channels, tokens)
+/// comes from the DB-backed passed in — set in the admin panel, not env.
+/// A disabled/unconfigured source returns an empty list.
///
public interface IListingSource
{
string Name { get; }
- bool Enabled { get; }
- Task> FetchAsync(CancellationToken ct = default);
+ Task> FetchAsync(AppSetting settings, CancellationToken ct = default);
}
diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
index a9c21b5..857d08b 100644
--- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
+++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
@@ -43,8 +43,7 @@ public class IngestionService
_ai = ai; _settings = settings; _log = log;
}
- public IReadOnlyList<(string Name, bool Enabled)> Sources =>
- _sources.Select(s => (s.Name, s.Enabled)).ToList();
+ public IReadOnlyList SourceNames => _sources.Select(s => s.Name).ToList();
public async Task RunAsync(CancellationToken ct = default)
{
@@ -58,12 +57,13 @@ public class IngestionService
var results = new List();
- foreach (var source in _sources.Where(s => s.Enabled))
+ foreach (var source in _sources)
{
int fetched = 0, queued = 0, published = 0, flagged = 0, spam = 0, dupes = 0;
IReadOnlyList items;
- try { items = await source.FetchAsync(ct); }
+ try { items = await source.FetchAsync(settings, ct); }
catch (Exception ex) { _log.LogError(ex, "Source {Source} failed", source.Name); continue; }
+ if (items.Count == 0) continue; // disabled/unconfigured source
foreach (var item in items)
{
diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionWorker.cs b/src/JobsMedical.Web/Services/Scraping/IngestionWorker.cs
index 3ed6173..ef3b920 100644
--- a/src/JobsMedical.Web/Services/Scraping/IngestionWorker.cs
+++ b/src/JobsMedical.Web/Services/Scraping/IngestionWorker.cs
@@ -1,58 +1,52 @@
-using Microsoft.Extensions.Options;
-
namespace JobsMedical.Web.Services.Scraping;
-public class IngestionOptions
-{
- public bool Enabled { get; set; } = false; // off by default — opt in via config
- public int IntervalMinutes { get; set; } = 30;
-}
-
///
-/// Periodically runs the ingestion engine when enabled (Ingestion:Enabled=true). Off by default
-/// so nothing scrapes uninvited; admins can also trigger a run on demand from the admin UI.
+/// Periodically runs the ingestion engine when the admin has turned auto-ingest ON
+/// (AppSetting.AutoIngestEnabled) — read fresh from the DB each cycle, so it can be toggled at
+/// runtime from the admin panel with no redeploy. When off, it idles and re-checks.
///
public class IngestionWorker : BackgroundService
{
private readonly IServiceScopeFactory _scopes;
- private readonly IngestionOptions _opts;
private readonly ILogger _log;
- public IngestionWorker(IServiceScopeFactory scopes, IOptions opts,
- ILogger log)
+ public IngestionWorker(IServiceScopeFactory scopes, ILogger log)
{
_scopes = scopes;
- _opts = opts.Value;
_log = log;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
- if (!_opts.Enabled)
- {
- _log.LogInformation("Ingestion worker disabled (Ingestion:Enabled=false).");
- return;
- }
-
- var interval = TimeSpan.FromMinutes(Math.Max(1, _opts.IntervalMinutes));
- _log.LogInformation("Ingestion worker on; every {Min} min.", _opts.IntervalMinutes);
+ // Small startup delay so the DB/migrations are ready.
+ try { await Task.Delay(TimeSpan.FromSeconds(20), stoppingToken); }
+ catch (OperationCanceledException) { return; }
while (!stoppingToken.IsCancellationRequested)
{
+ var idleMinutes = 10;
try
{
using var scope = _scopes.CreateScope();
- var svc = scope.ServiceProvider.GetRequiredService();
- var summary = await svc.RunAsync(stoppingToken);
- _log.LogInformation("Scheduled ingestion: queued={Q} flagged={F} spam={S} dupes={D}",
- summary.TotalQueued, summary.TotalFlagged, summary.TotalSpam, summary.TotalDuplicates);
+ var settings = await scope.ServiceProvider
+ .GetRequiredService().GetAsync();
+
+ if (settings.AutoIngestEnabled)
+ {
+ var svc = scope.ServiceProvider.GetRequiredService();
+ var summary = await svc.RunAsync(stoppingToken);
+ _log.LogInformation("Auto-ingest: queued={Q} published={P} flagged={F} spam={S} dupes={D}",
+ summary.TotalQueued, summary.TotalPublished, summary.TotalFlagged,
+ summary.TotalSpam, summary.TotalDuplicates);
+ idleMinutes = Math.Max(1, settings.IngestIntervalMinutes);
+ }
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
- _log.LogError(ex, "Scheduled ingestion run failed");
+ _log.LogError(ex, "Auto-ingest cycle failed");
}
- try { await Task.Delay(interval, stoppingToken); }
+ try { await Task.Delay(TimeSpan.FromMinutes(idleMinutes), stoppingToken); }
catch (OperationCanceledException) { break; }
}
}
diff --git a/src/JobsMedical.Web/Services/Scraping/SampleListingSource.cs b/src/JobsMedical.Web/Services/Scraping/SampleListingSource.cs
index 7484e6f..246fa1f 100644
--- a/src/JobsMedical.Web/Services/Scraping/SampleListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/SampleListingSource.cs
@@ -1,27 +1,33 @@
+using JobsMedical.Web.Models;
+using Microsoft.Extensions.Hosting;
+
namespace JobsMedical.Web.Services.Scraping;
///
-/// A built-in source of representative Persian posts (the kind found in shift channels). Always
-/// available, needs no credentials — it lets the whole ingestion → validation → review pipeline
-/// run and be demoed today, and doubles as a fixture mix of good, incomplete, and spam posts.
+/// Built-in representative Persian posts (good, incomplete, and spam) so the whole pipeline can be
+/// demoed. Only active in Development — never injects sample data into production.
///
public class SampleListingSource : IListingSource
{
+ private readonly IHostEnvironment _env;
+ public SampleListingSource(IHostEnvironment env) => _env = env;
+
public string Name => "نمونه (کانال آزمایشی)";
- public bool Enabled => true;
private static readonly string[] Posts =
{
"درمانگاه شبانهروزی در سعادتآباد نیازمند پزشک عمومی برای شیفت شب، کارانه ۳ میلیون تومان. تماس ۰۹۱۲۳۴۵۶۷۸۹",
- "کلینیک تخصصی در تهران به پرستار برای شیفت عصر نیازمند است، ۵۰٪ سهم درآمد. ۰۹۳۵۱۱۱۲۲۳۳",
+ "کلینیک تخصصی در تهران به پرستار خانم برای شیفت عصر نیازمند است، ۵۰٪ سهم درآمد. ۰۹۳۵۱۱۱۲۲۳۳",
"استخدام ماما تماموقت در بیمارستان خصوصی، حقوق توافقی. منطقه شهرک غرب.",
- "نیازمند تکنسین اتاق عمل جهت همکاری در نارمک، شیفت صبح. ۰۹۱۲۰۰۰۰۰۰۰",
- "فروش فالوور و بک لینک ارزان، سرمایه گذاری در ارز دیجیتال با سود تضمینی!", // spam
- "پزشک", // too short / incomplete
+ "نیازمند تکنسین اتاق عمل آقا جهت همکاری در نارمک، شیفت صبح. ۰۹۱۲۰۰۰۰۰۰۰",
+ "فروش فالوور و بک لینک ارزان، سرمایه گذاری در ارز دیجیتال با سود تضمینی!",
+ "پزشک",
"بیمارستان آتیه جهت تکمیل کادر درمان به پزشک عمومی مقیم نیازمند است. قرارداد یکساله، حقوق ۴۵ میلیون ماهانه. تهرانپارس.",
};
- public Task> FetchAsync(CancellationToken ct = default)
+ public Task> FetchAsync(AppSetting settings, CancellationToken ct = default)
=> Task.FromResult>(
- Posts.Select(p => new ScrapedItem(Name, p)).ToList());
+ _env.IsDevelopment()
+ ? Posts.Select(p => new ScrapedItem(Name, p)).ToList()
+ : Array.Empty());
}
diff --git a/src/JobsMedical.Web/Services/Scraping/SettingsService.cs b/src/JobsMedical.Web/Services/Scraping/SettingsService.cs
index ff24df0..ef1ed6e 100644
--- a/src/JobsMedical.Web/Services/Scraping/SettingsService.cs
+++ b/src/JobsMedical.Web/Services/Scraping/SettingsService.cs
@@ -34,6 +34,16 @@ public class SettingsService
s.AiSystemPrompt = string.IsNullOrWhiteSpace(incoming.AiSystemPrompt)
? AppSetting.DefaultPrompt : incoming.AiSystemPrompt;
s.AiAutoApprove = incoming.AiAutoApprove;
+ // Channel scraping sources
+ s.AutoIngestEnabled = incoming.AutoIngestEnabled;
+ s.IngestIntervalMinutes = Math.Max(1, incoming.IngestIntervalMinutes);
+ s.TelegramEnabled = incoming.TelegramEnabled;
+ s.TelegramChannels = incoming.TelegramChannels?.Trim();
+ s.BaleEnabled = incoming.BaleEnabled;
+ s.BaleBotToken = incoming.BaleBotToken?.Trim();
+ s.DivarEnabled = incoming.DivarEnabled;
+ s.DivarCity = string.IsNullOrWhiteSpace(incoming.DivarCity) ? "tehran" : incoming.DivarCity.Trim();
+ s.DivarQueries = incoming.DivarQueries?.Trim();
s.UpdatedAt = DateTime.UtcNow;
await _db.SaveChangesAsync();
}
diff --git a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
index b655819..85b0169 100644
--- a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
@@ -1,50 +1,39 @@
using System.Net;
using System.Text.RegularExpressions;
-using Microsoft.Extensions.Options;
+using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
-public class TelegramOptions
-{
- public bool Enabled { get; set; }
- public string? BotToken { get; set; } // optional (for private channels later)
- public List Channels { get; set; } = new(); // public channel usernames (no @)
- public int PerChannel { get; set; } = 20;
-}
-
///
/// Reads public Telegram channels via the web preview (https://t.me/s/<channel>) — no bot
-/// token or login needed for public channels. Each message's text becomes a ScrapedItem.
+/// token needed for public channels. Enabled + channel list come from the admin settings (DB).
///
public class TelegramListingSource : IListingSource
{
- private readonly TelegramOptions _opts;
private readonly IHttpClientFactory _http;
private readonly ILogger _log;
- public TelegramListingSource(IOptions opts, IHttpClientFactory http,
- ILogger log)
+ public TelegramListingSource(IHttpClientFactory http, ILogger log)
{
- _opts = opts.Value;
_http = http;
_log = log;
}
public string Name => "تلگرام";
- public bool Enabled => _opts.Enabled && _opts.Channels.Count > 0;
- public async Task> FetchAsync(CancellationToken ct = default)
+ public async Task> FetchAsync(AppSetting s, CancellationToken ct = default)
{
- if (!Enabled) { _log.LogInformation("Telegram source disabled/unconfigured."); return Array.Empty(); }
+ var channels = AppSetting.SplitList(s.TelegramChannels);
+ if (!s.TelegramEnabled || channels.Count == 0) return Array.Empty();
var client = _http.CreateClient("scrape");
var items = new List();
- foreach (var ch in _opts.Channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0))
+ foreach (var ch in channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0))
{
try
{
var html = await client.GetStringAsync($"https://t.me/s/{ch}", ct);
- foreach (var text in ExtractMessages(html).Take(_opts.PerChannel))
+ foreach (var text in ExtractMessages(html).Take(20))
items.Add(new ScrapedItem($"تلگرام/{ch}", text, $"https://t.me/{ch}"));
}
catch (Exception ex) { _log.LogWarning(ex, "Telegram fetch failed for {Channel}", ch); }
@@ -52,7 +41,6 @@ public class TelegramListingSource : IListingSource
return items;
}
- // Message bodies live in ...
.
private static IEnumerable ExtractMessages(string html)
{
foreach (Match m in Regex.Matches(html,
@@ -69,7 +57,7 @@ internal static class HtmlUtil
public static string ToPlainText(string html)
{
var s = Regex.Replace(html, "
", "\n", RegexOptions.IgnoreCase);
- s = Regex.Replace(s, "<[^>]+>", ""); // strip remaining tags
+ s = Regex.Replace(s, "<[^>]+>", "");
s = WebUtility.HtmlDecode(s);
s = Regex.Replace(s, "[ \\t]+", " ");
return s.Trim();
diff --git a/src/JobsMedical.Web/appsettings.json b/src/JobsMedical.Web/appsettings.json
index 53d6c0d..7956808 100644
--- a/src/JobsMedical.Web/appsettings.json
+++ b/src/JobsMedical.Web/appsettings.json
@@ -11,12 +11,5 @@
},
"Auth": {
"AdminPhone": "09120000000"
- },
- "Ingestion": {
- "Enabled": false,
- "IntervalMinutes": 30,
- "Telegram": { "Enabled": false, "BotToken": "", "Channels": [], "PerChannel": 20 },
- "Bale": { "Enabled": false, "BotToken": "", "BaseUrl": "https://tapi.bale.ai" },
- "Divar": { "Enabled": false, "City": "tehran", "Category": "jobs", "Queries": [], "BaseUrl": "https://api.divar.ir/v8/web-search", "PerQuery": 25 }
}
}