Per-ad contacts for shifts/jobs, stale-applicant filter, review source link
CI/CD / CI · dotnet build (push) Successful in 1m3s
CI/CD / Deploy · hamkadr (push) Successful in 1m18s

Phone fix: shifts/jobs showed Facility.Phone, but unnamed ads all share one
placeholder facility, so every such listing displayed the same stale number
while the ad's real phone sat unused in the description. ContactMethod is now
attachable to a Shift/JobOpening (not just talent); ingestion stores the ad's
own number(s) on each listing and the detail pages render them (new
_ContactList partial), falling back to the facility phone only when the ad had
none. Migration ShiftJobContacts (nullable owner FKs) — auto-applies on deploy.

Stale applicants: skip «آماده به کار» posts older than 7 days at ingest, by the
source's real timestamp (Telegram <time>, Bale date) or a Persian time-ago
phrase in the text (Divar «۲ هفته پیش»). Recorded as Discarded; shifts/jobs
are not aged out.

Admin: Review page now shows a «مشاهده آگهی در منبع» link (RawListing.SourceUrl)
so the source post can be checked before publishing.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-10 21:28:12 +03:30
parent b71d8b362b
commit 38031cb189
18 changed files with 1943 additions and 47 deletions
+13
View File
@@ -0,0 +1,13 @@
{
"version": 1,
"isRoot": true,
"tools": {
"dotnet-ef": {
"version": "10.0.0",
"commands": [
"dotnet-ef"
],
"rollForward": false
}
}
}
+7
View File
@@ -156,9 +156,16 @@ public class AppDbContext : DbContext, IDataProtectionKeyContext
.HasForeignKey(t => t.DistrictId).OnDelete(DeleteBehavior.SetNull); .HasForeignKey(t => t.DistrictId).OnDelete(DeleteBehavior.SetNull);
b.Entity<TalentListing>().HasIndex(t => t.Status); b.Entity<TalentListing>().HasIndex(t => t.Status);
b.Entity<TalentListing>().HasIndex(t => new { t.CityId, t.RoleId }); b.Entity<TalentListing>().HasIndex(t => new { t.CityId, t.RoleId });
// A ContactMethod belongs to exactly one of talent / shift / job (all optional FKs).
b.Entity<ContactMethod>() b.Entity<ContactMethod>()
.HasOne(c => c.TalentListing).WithMany(t => t.Contacts) .HasOne(c => c.TalentListing).WithMany(t => t.Contacts)
.HasForeignKey(c => c.TalentListingId).OnDelete(DeleteBehavior.Cascade); .HasForeignKey(c => c.TalentListingId).OnDelete(DeleteBehavior.Cascade);
b.Entity<ContactMethod>()
.HasOne(c => c.Shift).WithMany(s => s.Contacts)
.HasForeignKey(c => c.ShiftId).OnDelete(DeleteBehavior.Cascade);
b.Entity<ContactMethod>()
.HasOne(c => c.JobOpening).WithMany(j => j.Contacts)
.HasForeignKey(c => c.JobOpeningId).OnDelete(DeleteBehavior.Cascade);
b.Entity<WebPushSubscription>().HasIndex(s => s.Endpoint).IsUnique(); b.Entity<WebPushSubscription>().HasIndex(s => s.Endpoint).IsUnique();
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,98 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace JobsMedical.Web.Migrations
{
/// <inheritdoc />
public partial class ShiftJobContacts : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AlterColumn<int>(
name: "TalentListingId",
table: "ContactMethods",
type: "integer",
nullable: true,
oldClrType: typeof(int),
oldType: "integer");
migrationBuilder.AddColumn<int>(
name: "JobOpeningId",
table: "ContactMethods",
type: "integer",
nullable: true);
migrationBuilder.AddColumn<int>(
name: "ShiftId",
table: "ContactMethods",
type: "integer",
nullable: true);
migrationBuilder.CreateIndex(
name: "IX_ContactMethods_JobOpeningId",
table: "ContactMethods",
column: "JobOpeningId");
migrationBuilder.CreateIndex(
name: "IX_ContactMethods_ShiftId",
table: "ContactMethods",
column: "ShiftId");
migrationBuilder.AddForeignKey(
name: "FK_ContactMethods_JobOpenings_JobOpeningId",
table: "ContactMethods",
column: "JobOpeningId",
principalTable: "JobOpenings",
principalColumn: "Id",
onDelete: ReferentialAction.Cascade);
migrationBuilder.AddForeignKey(
name: "FK_ContactMethods_Shifts_ShiftId",
table: "ContactMethods",
column: "ShiftId",
principalTable: "Shifts",
principalColumn: "Id",
onDelete: ReferentialAction.Cascade);
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropForeignKey(
name: "FK_ContactMethods_JobOpenings_JobOpeningId",
table: "ContactMethods");
migrationBuilder.DropForeignKey(
name: "FK_ContactMethods_Shifts_ShiftId",
table: "ContactMethods");
migrationBuilder.DropIndex(
name: "IX_ContactMethods_JobOpeningId",
table: "ContactMethods");
migrationBuilder.DropIndex(
name: "IX_ContactMethods_ShiftId",
table: "ContactMethods");
migrationBuilder.DropColumn(
name: "JobOpeningId",
table: "ContactMethods");
migrationBuilder.DropColumn(
name: "ShiftId",
table: "ContactMethods");
migrationBuilder.AlterColumn<int>(
name: "TalentListingId",
table: "ContactMethods",
type: "integer",
nullable: false,
defaultValue: 0,
oldClrType: typeof(int),
oldType: "integer",
oldNullable: true);
}
}
}
@@ -293,10 +293,16 @@ namespace JobsMedical.Web.Migrations
NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id")); NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property<int>("Id"));
b.Property<int?>("JobOpeningId")
.HasColumnType("integer");
b.Property<int?>("ShiftId")
.HasColumnType("integer");
b.Property<int>("SortOrder") b.Property<int>("SortOrder")
.HasColumnType("integer"); .HasColumnType("integer");
b.Property<int>("TalentListingId") b.Property<int?>("TalentListingId")
.HasColumnType("integer"); .HasColumnType("integer");
b.Property<int>("Type") b.Property<int>("Type")
@@ -309,6 +315,10 @@ namespace JobsMedical.Web.Migrations
b.HasKey("Id"); b.HasKey("Id");
b.HasIndex("JobOpeningId");
b.HasIndex("ShiftId");
b.HasIndex("TalentListingId"); b.HasIndex("TalentListingId");
b.ToTable("ContactMethods"); b.ToTable("ContactMethods");
@@ -1261,11 +1271,24 @@ namespace JobsMedical.Web.Migrations
modelBuilder.Entity("JobsMedical.Web.Models.ContactMethod", b => modelBuilder.Entity("JobsMedical.Web.Models.ContactMethod", b =>
{ {
b.HasOne("JobsMedical.Web.Models.JobOpening", "JobOpening")
.WithMany("Contacts")
.HasForeignKey("JobOpeningId")
.OnDelete(DeleteBehavior.Cascade);
b.HasOne("JobsMedical.Web.Models.Shift", "Shift")
.WithMany("Contacts")
.HasForeignKey("ShiftId")
.OnDelete(DeleteBehavior.Cascade);
b.HasOne("JobsMedical.Web.Models.TalentListing", "TalentListing") b.HasOne("JobsMedical.Web.Models.TalentListing", "TalentListing")
.WithMany("Contacts") .WithMany("Contacts")
.HasForeignKey("TalentListingId") .HasForeignKey("TalentListingId")
.OnDelete(DeleteBehavior.Cascade) .OnDelete(DeleteBehavior.Cascade);
.IsRequired();
b.Navigation("JobOpening");
b.Navigation("Shift");
b.Navigation("TalentListing"); b.Navigation("TalentListing");
}); });
@@ -1550,6 +1573,11 @@ namespace JobsMedical.Web.Migrations
b.Navigation("Shifts"); b.Navigation("Shifts");
}); });
modelBuilder.Entity("JobsMedical.Web.Models.JobOpening", b =>
{
b.Navigation("Contacts");
});
modelBuilder.Entity("JobsMedical.Web.Models.Role", b => modelBuilder.Entity("JobsMedical.Web.Models.Role", b =>
{ {
b.Navigation("Shifts"); b.Navigation("Shifts");
@@ -1558,6 +1586,8 @@ namespace JobsMedical.Web.Migrations
modelBuilder.Entity("JobsMedical.Web.Models.Shift", b => modelBuilder.Entity("JobsMedical.Web.Models.Shift", b =>
{ {
b.Navigation("Applications"); b.Navigation("Applications");
b.Navigation("Contacts");
}); });
modelBuilder.Entity("JobsMedical.Web.Models.TalentListing", b => modelBuilder.Entity("JobsMedical.Web.Models.TalentListing", b =>
+13 -5
View File
@@ -3,16 +3,24 @@ using System.ComponentModel.DataAnnotations;
namespace JobsMedical.Web.Models; namespace JobsMedical.Web.Models;
/// <summary> /// <summary>
/// One contact channel for an applicant («آماده به کار») listing. A listing can carry several — /// One contact channel for a listing — an applicant («آماده به کار»), a <see cref="Shift"/>, or a
/// e.g. three phones + an email + an Instagram page. <see cref="Value"/> holds the raw handle / /// <see cref="JobOpening"/>. A listing can carry several — e.g. three phones + an email + an
/// number / address; <see cref="Type"/> decides how it's linked (tel:, mailto:, t.me/…, etc.). /// Instagram page. <see cref="Value"/> holds the raw handle / number / address; <see cref="Type"/>
/// decides how it's linked (tel:, mailto:, t.me/…, etc.). Exactly one owner FK is set.
/// </summary> /// </summary>
public class ContactMethod public class ContactMethod
{ {
public int Id { get; set; } public int Id { get; set; }
public int TalentListingId { get; set; } // Owner — exactly one of these is non-null.
public TalentListing TalentListing { get; set; } = null!; public int? TalentListingId { get; set; }
public TalentListing? TalentListing { get; set; }
public int? ShiftId { get; set; }
public Shift? Shift { get; set; }
public int? JobOpeningId { get; set; }
public JobOpening? JobOpening { get; set; }
public ContactType Type { get; set; } public ContactType Type { get; set; }
+4
View File
@@ -42,6 +42,10 @@ public class JobOpening
public DateTime CreatedAt { get; set; } = DateTime.UtcNow; public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
/// <summary>Contact channels harvested from the source ad (aggregated openings). When empty, the
/// detail page falls back to the facility's phone.</summary>
public ICollection<ContactMethod> Contacts { get; set; } = new List<ContactMethod>();
// Transient: distance (km) when "near me" is active. Not persisted. // Transient: distance (km) when "near me" is active. Not persisted.
[NotMapped] public double? DistanceKm { get; set; } [NotMapped] public double? DistanceKm { get; set; }
} }
+4
View File
@@ -44,6 +44,10 @@ public class Shift
public ICollection<Application> Applications { get; set; } = new List<Application>(); public ICollection<Application> Applications { get; set; } = new List<Application>();
/// <summary>Contact channels harvested from the source ad (aggregated shifts). When empty, the
/// detail page falls back to the facility's phone.</summary>
public ICollection<ContactMethod> Contacts { get; set; } = new List<ContactMethod>();
// Transient: distance (km) from the visitor when "near me" is active. Not persisted. // Transient: distance (km) from the visitor when "near me" is active. Not persisted.
[System.ComponentModel.DataAnnotations.Schema.NotMapped] [System.ComponentModel.DataAnnotations.Schema.NotMapped]
public double? DistanceKm { get; set; } public double? DistanceKm { get; set; }
@@ -19,6 +19,16 @@
<div class="card card-pad"> <div class="card card-pad">
<h3 style="margin-top:0;">متن خام</h3> <h3 style="margin-top:0;">متن خام</h3>
<p style="white-space:pre-wrap; margin:0;">@r.RawText</p> <p style="white-space:pre-wrap; margin:0;">@r.RawText</p>
@if (!string.IsNullOrWhiteSpace(r.SourceUrl))
{
<p style="margin:12px 0 0;">
<a class="btn btn-outline" href="@r.SourceUrl" target="_blank" rel="noopener noreferrer">🔗 مشاهده آگهی در منبع (@r.SourceChannel)</a>
</p>
}
else
{
<p class="muted" style="font-size:12px; margin:12px 0 0;">لینک منبع برای این آگهی ثبت نشده است.</p>
}
</div> </div>
@if (Model.Parsed is not null) @if (Model.Parsed is not null)
+13 -4
View File
@@ -3,6 +3,7 @@
@{ @{
var j = Model.Job!; var j = Model.Job!;
var f = j.Facility!; var f = j.Facility!;
var jobContacts = (j.Contacts ?? new List<JobsMedical.Web.Models.ContactMethod>()).ToList();
ViewData["Title"] = j.Title; ViewData["Title"] = j.Title;
ViewData["Description"] = $"{j.Title} در {f.Name}، {f.City?.Name}. موقعیت استخدامی برای {j.Role?.Name}."; ViewData["Description"] = $"{j.Title} در {f.Name}، {f.City?.Name}. موقعیت استخدامی برای {j.Role?.Name}.";
// Don't let Google index filled/expired openings (avoids dead "Job for jobs" results). // Don't let Google index filled/expired openings (avoids dead "Job for jobs" results).
@@ -38,11 +39,18 @@
@if (Model.ShowContact) @if (Model.ShowContact)
{ {
<div class="contact-reveal" style="margin-bottom:16px;"> <div class="contact-reveal" style="margin-bottom:16px;">
<h4>✓ راه‌های ارتباطی مرکز</h4> <h4>✓ راه‌های ارتباطی</h4>
@if (jobContacts.Count > 0)
{
@* Numbers from THIS ad (aggregated) — the correct, per-listing contacts. *@
<partial name="_ContactList" model="jobContacts" />
}
else if (!string.IsNullOrEmpty(f.Phone) || !string.IsNullOrEmpty(f.BaleId))
{
@if (!string.IsNullOrEmpty(f.Phone)) @if (!string.IsNullOrEmpty(f.Phone))
{ {
<div class="contact-row"> <div class="contact-row">
<span class="c-meta"><span class="c-type">📞 تلفن</span><span class="c-val" dir="ltr">@f.Phone</span></span> <span class="c-meta"><span class="c-type">📞 تلفن مرکز</span><span class="c-val" dir="ltr">@f.Phone</span></span>
<a class="btn btn-accent" href="tel:@f.Phone">تماس</a> <a class="btn btn-accent" href="tel:@f.Phone">تماس</a>
</div> </div>
} }
@@ -53,9 +61,10 @@
<a class="btn btn-outline" href="https://ble.ir/@f.BaleId" target="_blank" rel="noopener">باز کردن</a> <a class="btn btn-outline" href="https://ble.ir/@f.BaleId" target="_blank" rel="noopener">باز کردن</a>
</div> </div>
} }
@if (string.IsNullOrEmpty(f.Phone) && string.IsNullOrEmpty(f.BaleId)) }
else
{ {
<p class="muted" style="margin:0;">شماره‌ای برای این مرکز ثبت نشده است.</p> <p class="muted" style="margin:0;">شماره‌ای ثبت نشده است.</p>
} }
</div> </div>
} }
@@ -67,6 +67,7 @@ public class DetailsModel : PageModel
.Include(j => j.Facility).ThenInclude(f => f.City) .Include(j => j.Facility).ThenInclude(f => f.City)
.Include(j => j.Facility).ThenInclude(f => f.District) .Include(j => j.Facility).ThenInclude(f => f.District)
.Include(j => j.Role) .Include(j => j.Role)
.Include(j => j.Contacts)
.FirstOrDefaultAsync(j => j.Id == id); .FirstOrDefaultAsync(j => j.Id == id);
} }
} }
@@ -0,0 +1,17 @@
@model IReadOnlyList<JobsMedical.Web.Models.ContactMethod>
@* Renders one row per contact channel (phone/Bale/Telegram/email/…) with a clickable action.
Shared by the shift, job, and applicant detail pages. *@
@foreach (var c in Model.OrderBy(c => c.SortOrder))
{
var href = JobsMedical.Web.Services.ContactInfo.Href(c.Type, c.Value);
var label = JobsMedical.Web.Services.ContactInfo.Label(c.Type);
var icon = JobsMedical.Web.Services.ContactInfo.Icon(c.Type);
var cls = c.Type is JobsMedical.Web.Models.ContactType.Mobile or JobsMedical.Web.Models.ContactType.Phone ? "btn-accent" : "btn-outline";
<div class="contact-row">
<span class="c-meta"><span class="c-type">@icon @label</span><span class="c-val" dir="ltr">@c.Value</span></span>
@if (href is not null)
{
<a class="btn @cls" href="@href" target="_blank" rel="noopener">@(c.Type is JobsMedical.Web.Models.ContactType.Mobile or JobsMedical.Web.Models.ContactType.Phone ? "تماس" : "باز کردن")</a>
}
</div>
}
@@ -3,6 +3,7 @@
@{ @{
var s = Model.Shift!; var s = Model.Shift!;
var f = s.Facility!; var f = s.Facility!;
var shiftContacts = (s.Contacts ?? new List<JobsMedical.Web.Models.ContactMethod>()).ToList();
ViewData["Title"] = $"شیفت {s.SpecialtyRequired} - {f.Name}"; ViewData["Title"] = $"شیفت {s.SpecialtyRequired} - {f.Name}";
ViewData["Description"] = $"شیفت {s.SpecialtyRequired} در {f.Name}، {f.City?.Name}، تاریخ {JalaliDate.ToLongDate(s.Date)} از ساعت {JalaliDate.Time(s.StartTime)}."; ViewData["Description"] = $"شیفت {s.SpecialtyRequired} در {f.Name}، {f.City?.Name}، تاریخ {JalaliDate.ToLongDate(s.Date)} از ساعت {JalaliDate.Time(s.StartTime)}.";
// Past/filled shifts shouldn't stay in the index as dead pages. // Past/filled shifts shouldn't stay in the index as dead pages.
@@ -37,11 +38,18 @@
@if (Model.ShowContact) @if (Model.ShowContact)
{ {
<div class="contact-reveal" style="margin-bottom:16px;"> <div class="contact-reveal" style="margin-bottom:16px;">
<h4>✓ راه‌های ارتباطی مرکز</h4> <h4>✓ راه‌های ارتباطی</h4>
@if (shiftContacts.Count > 0)
{
@* Numbers from THIS ad (aggregated) — the correct, per-listing contacts. *@
<partial name="_ContactList" model="shiftContacts" />
}
else if (!string.IsNullOrEmpty(f.Phone) || !string.IsNullOrEmpty(f.BaleId))
{
@if (!string.IsNullOrEmpty(f.Phone)) @if (!string.IsNullOrEmpty(f.Phone))
{ {
<div class="contact-row"> <div class="contact-row">
<span class="c-meta"><span class="c-type">📞 تلفن</span><span class="c-val" dir="ltr">@f.Phone</span></span> <span class="c-meta"><span class="c-type">📞 تلفن مرکز</span><span class="c-val" dir="ltr">@f.Phone</span></span>
<a class="btn btn-accent" href="tel:@f.Phone">تماس</a> <a class="btn btn-accent" href="tel:@f.Phone">تماس</a>
</div> </div>
} }
@@ -52,9 +60,10 @@
<a class="btn btn-outline" href="https://ble.ir/@f.BaleId" target="_blank" rel="noopener">باز کردن</a> <a class="btn btn-outline" href="https://ble.ir/@f.BaleId" target="_blank" rel="noopener">باز کردن</a>
</div> </div>
} }
@if (string.IsNullOrEmpty(f.Phone) && string.IsNullOrEmpty(f.BaleId)) }
else
{ {
<p class="muted" style="margin:0;">شماره‌ای برای این مرکز ثبت نشده است.</p> <p class="muted" style="margin:0;">شماره‌ای ثبت نشده است.</p>
} }
</div> </div>
} }
@@ -69,6 +69,7 @@ public class DetailsModel : PageModel
Shift = await _db.Shifts Shift = await _db.Shifts
.Include(s => s.Facility).ThenInclude(f => f.City) .Include(s => s.Facility).ThenInclude(f => f.City)
.Include(s => s.Role) .Include(s => s.Role)
.Include(s => s.Contacts)
.FirstOrDefaultAsync(s => s.Id == id); .FirstOrDefaultAsync(s => s.Id == id);
if (Shift is not null) if (Shift is not null)
@@ -36,17 +36,20 @@ public class BaleListingSource : IListingSource
var items = new List<ScrapedItem>(); var items = new List<ScrapedItem>();
foreach (var update in result.EnumerateArray()) foreach (var update in result.EnumerateArray())
{ {
var text = TextOf(update, "channel_post") ?? TextOf(update, "message"); var post = Msg(update, "channel_post") ?? Msg(update, "message");
if (!string.IsNullOrWhiteSpace(text) && text!.Trim().Length >= 15) if (post is not { } p) continue;
items.Add(new ScrapedItem("بله", text.Trim())); var text = p.TryGetProperty("text", out var t) && t.ValueKind == JsonValueKind.String ? t.GetString() : null;
if (string.IsNullOrWhiteSpace(text) || text!.Trim().Length < 15) continue;
// Bot API messages carry a unix `date` — keep it so stale posts can be aged out.
DateTime? postedAt = p.TryGetProperty("date", out var d) && d.ValueKind == JsonValueKind.Number && d.TryGetInt64(out var epoch)
? DateTimeOffset.FromUnixTimeSeconds(epoch).UtcDateTime : null;
items.Add(new ScrapedItem("بله", text.Trim(), PostedAt: postedAt));
} }
return items; return items;
} }
catch (Exception ex) { _log.LogWarning(ex, "Bale fetch failed."); return Array.Empty<ScrapedItem>(); } catch (Exception ex) { _log.LogWarning(ex, "Bale fetch failed."); return Array.Empty<ScrapedItem>(); }
} }
private static string? TextOf(JsonElement update, string key) private static JsonElement? Msg(JsonElement update, string key)
=> update.TryGetProperty(key, out var m) => update.TryGetProperty(key, out var m) && m.ValueKind == JsonValueKind.Object ? m : null;
&& m.TryGetProperty("text", out var t) && t.ValueKind == JsonValueKind.String
? t.GetString() : null;
} }
@@ -4,9 +4,11 @@ namespace JobsMedical.Web.Services.Scraping;
/// <summary>One raw post pulled from a source (a Telegram message, a Divar ad, etc.). /// <summary>One raw post pulled from a source (a Telegram message, a Divar ad, etc.).
/// Lat/Lng are an APPROXIMATE location when the source exposes one (e.g. Divar's privacy-fuzzed /// Lat/Lng are an APPROXIMATE location when the source exposes one (e.g. Divar's privacy-fuzzed
/// map center) — used to place an aggregated facility on the map / enable «near me».</summary> /// map center) — used to place an aggregated facility on the map / enable «near me».
/// PostedAt is the post's ORIGINAL publish time when the source exposes it (Telegram &lt;time&gt;,
/// Bale message date…) — used to drop stale applicant ads at ingest. Null when unknown.</summary>
public record ScrapedItem(string Source, string RawText, string? SourceUrl = null, public record ScrapedItem(string Source, string RawText, string? SourceUrl = null,
double? Lat = null, double? Lng = null); double? Lat = null, double? Lng = null, DateTime? PostedAt = null);
/// <summary> /// <summary>
/// A pluggable source the ingestion engine pulls from. Configuration (enabled, channels, tokens) /// A pluggable source the ingestion engine pulls from. Configuration (enabled, channels, tokens)
@@ -29,6 +29,10 @@ public record IngestionSummary(List<SourceResult> Sources)
/// </summary> /// </summary>
public class IngestionService public class IngestionService
{ {
/// <summary>Applicant posts older than this (by the source's date, or a Persian "time ago"
/// phrase in the text) are skipped at ingest — availability goes stale fast.</summary>
private const int TalentMaxAgeDays = 7;
private readonly AppDbContext _db; private readonly AppDbContext _db;
private readonly IEnumerable<IListingSource> _sources; private readonly IEnumerable<IListingSource> _sources;
private readonly IListingParser _parser; private readonly IListingParser _parser;
@@ -90,6 +94,22 @@ public class IngestionService
var parsed = _parser.Parse(item.RawText, roleNames, cityNames, districtNames); var parsed = _parser.Parse(item.RawText, roleNames, cityNames, districtNames);
var val = _validator.Validate(item.RawText, parsed); var val = _validator.Validate(item.RawText, parsed);
// Drop STALE applicant («آماده به کار») posts — a person's availability goes cold fast.
// Age = the source's real timestamp, else a Persian "time ago" phrase in the text
// (Divar embeds «۲ هفته پیش»…). Recorded as Discarded (keeps the dedupe hash + audit
// trail; no AI spend). Shifts/jobs are NOT aged out — their dates are in the future.
if (parsed.Kind == ListingKind.Talent && PostAgeDays(item) is int age && age > TalentMaxAgeDays)
{
_db.RawListings.Add(new RawListing
{
SourceChannel = item.Source, SourceUrl = item.SourceUrl, RawText = item.RawText.Trim(),
ContentHash = hash, Confidence = 0, Status = RawListingStatus.Discarded,
ValidationNotes = $"آماده‌به‌کارِ قدیمی ({age} روز) — نادیده گرفته شد",
Lat = item.Lat, Lng = item.Lng,
});
spam++; continue;
}
AiAuditResult? ai = null; AiAuditResult? ai = null;
if (settings.AiEnabled && !val.IsSpam) if (settings.AiEnabled && !val.IsSpam)
ai = await _ai.AuditAsync(item.RawText, settings, ct); ai = await _ai.AuditAsync(item.RawText, settings, ct);
@@ -280,6 +300,7 @@ public class IngestionService
SalaryMin = parsed.PayAmount, SalaryMin = parsed.PayAmount,
Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated,
SourceUrl = raw.SourceUrl, SourceUrl = raw.SourceUrl,
Contacts = BuildContacts(d, parsed), // the ad's OWN number(s) — fresh per listing
}); });
} }
else else
@@ -297,6 +318,7 @@ public class IngestionService
: parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift, : parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift,
PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent, PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent,
Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl,
Contacts = BuildContacts(d, parsed), // the ad's OWN number(s) — fresh per listing
}); });
} }
raw.Status = RawListingStatus.Normalized; raw.Status = RawListingStatus.Normalized;
@@ -449,4 +471,14 @@ public class IngestionService
var normalized = Regex.Replace((text ?? "").Trim(), @"\s+", " "); var normalized = Regex.Replace((text ?? "").Trim(), @"\s+", " ");
return Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(normalized))).ToLowerInvariant(); return Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(normalized))).ToLowerInvariant();
} }
/// <summary>Age of a post in whole days — from the source's real timestamp when present, else a
/// Persian "time ago" phrase in the text (Divar). Null when neither is available (= unknown age,
/// so it's NOT filtered out).</summary>
private static int? PostAgeDays(ScrapedItem item)
{
if (item.PostedAt is DateTime posted)
return Math.Max(0, (int)Math.Floor((DateTime.UtcNow - posted).TotalDays));
return HtmlUtil.AgeDaysFromPersianText(item.RawText);
}
} }
@@ -33,21 +33,28 @@ public class TelegramListingSource : IListingSource
try try
{ {
var html = await client.GetStringAsync($"https://t.me/s/{ch}", ct); var html = await client.GetStringAsync($"https://t.me/s/{ch}", ct);
foreach (var text in ExtractMessages(html).Take(20)) foreach (var (text, postedAt) in ExtractMessages(html).Take(20))
items.Add(new ScrapedItem($"تلگرام/{ch}", text, $"https://t.me/{ch}")); items.Add(new ScrapedItem($"تلگرام/{ch}", text, $"https://t.me/{ch}", PostedAt: postedAt));
} }
catch (Exception ex) { _log.LogWarning(ex, "Telegram fetch failed for {Channel}", ch); } catch (Exception ex) { _log.LogWarning(ex, "Telegram fetch failed for {Channel}", ch); }
} }
return items; return items;
} }
private static IEnumerable<string> ExtractMessages(string html) private static IEnumerable<(string text, DateTime? postedAt)> ExtractMessages(string html)
{ {
foreach (Match m in Regex.Matches(html, foreach (Match m in Regex.Matches(html,
"<div class=\"tgme_widget_message_text[^\"]*\"[^>]*>(.*?)</div>", RegexOptions.Singleline)) "<div class=\"tgme_widget_message_text[^\"]*\"[^>]*>(.*?)</div>", RegexOptions.Singleline))
{ {
var text = HtmlUtil.ToPlainText(m.Groups[1].Value); var text = HtmlUtil.ToPlainText(m.Groups[1].Value);
if (text.Length >= 15) yield return text; if (text.Length < 15) continue;
// The message's date link (<time datetime="…">) follows its text in the same bubble —
// grab the nearest one after this match.
DateTime? postedAt = null;
var tail = html.Substring(m.Index + m.Length, Math.Min(2000, html.Length - (m.Index + m.Length)));
var dm = Regex.Match(tail, "datetime=\"([^\"]+)\"");
if (dm.Success && DateTimeOffset.TryParse(dm.Groups[1].Value, out var dto)) postedAt = dto.UtcDateTime;
yield return (text, postedAt);
} }
} }
} }
@@ -63,6 +70,30 @@ internal static class HtmlUtil
return s.Trim(); return s.Trim();
} }
/// <summary>Best-effort age (in days) of a post from a Persian "time ago" phrase in its text
/// («دیروز»، «۳ روز پیش»، «هفته پیش»، «۲ هفته پیش»، «ماه پیش»…). Divar embeds this in the row
/// text, so we can age-filter it without a real timestamp. Now/minutes/hours → 0; null when no
/// such phrase is present (caller then treats age as unknown).</summary>
public static int? AgeDaysFromPersianText(string? text)
{
if (string.IsNullOrEmpty(text)) return null;
var t = ToLatinDigits(text);
if (Regex.IsMatch(t, "لحظات|هم[‌ ]?اکنون|چند لحظه|دقیقه پیش|دقایقی پیش|ساعت پیش|ساعتی پیش")) return 0;
if (t.Contains("پریروز")) return 2;
if (t.Contains("دیروز")) return 1;
var m = Regex.Match(t, @"(\d+)\s*(روز|هفته|ماه|سال)\s*پیش");
if (m.Success)
{
var n = int.Parse(m.Groups[1].Value);
return m.Groups[2].Value switch
{ "روز" => n, "هفته" => n * 7, "ماه" => n * 30, "سال" => n * 365, _ => (int?)null };
}
if (Regex.IsMatch(t, @"هفته\s*پیش")) return 7; // bare «هفته پیش» = ۱ هفته
if (Regex.IsMatch(t, @"ماه\s*پیش")) return 30;
if (Regex.IsMatch(t, @"سال\s*پیش") || t.Contains("پارسال")) return 365;
return null;
}
/// <summary>Convert Persian/Arabic-Indic digits to Latin.</summary> /// <summary>Convert Persian/Arabic-Indic digits to Latin.</summary>
public static string ToLatinDigits(string s) public static string ToLatinDigits(string s)
{ {