diff --git a/src/JobsMedical.Web/Models/AppSetting.cs b/src/JobsMedical.Web/Models/AppSetting.cs
index bdbfcea..cb4aa1f 100644
--- a/src/JobsMedical.Web/Models/AppSetting.cs
+++ b/src/JobsMedical.Web/Models/AppSetting.cs
@@ -115,14 +115,20 @@ public class AppSetting
public const string DefaultPrompt = """
تو دستیار بررسی آگهیهای کاری حوزه درمان برای پلتفرم «همکادر» هستی.
هر آگهی خام را بخوان و تصمیم بگیر:
- - approve: آگهی واقعی و مرتبط با شیفت/استخدام کادر درمان است و اطلاعات کافی دارد.
+ - approve: آگهی واقعی و مرتبط با کادر درمان است و اطلاعات کافی دارد.
- reject: تبلیغ، اسپم، نامرتبط، یا فاقد اطلاعات حداقلی است.
- review: مرتبط است اما ناقص/مبهم و نیاز به بررسی انسانی دارد.
- نقش، شهر/محله، نوع شیفت، نوع همکاری، مبلغ یا درصد سهم، و عنوان را در صورت وجود استخراج کن.
+ سه نوع آگهی داریم:
+ - shift: مرکز درمانی برای یک شیفت نیرو میخواهد.
+ - job: مرکز درمانی برای استخدام دائم نیرو میخواهد.
+ - talent: خودِ کادر درمان اعلام «آماده به کار / آماده همکاری» کرده است.
+ نقش، شهر/محله، نوع شیفت/همکاری، مبلغ یا درصد سهم، عنوان، نام مرکز، و شماره تماس را در صورت وجود استخراج کن.
+ برای talent: نام فرد، سال سابقه و پروانهدار بودن را هم استخراج کن.
فقط با یک شیء JSON پاسخ بده با کلیدهای:
decision (approve|reject|review)، confidence (0-100)، reason (فارسی کوتاه)،
- kind (shift|job)، role، city، district، shiftType (day|evening|night|oncall)،
+ kind (shift|job|talent)، role، city، district، shiftType (day|evening|night|oncall)،
employmentType (fulltime|parttime|contract|plan)، payAmount (عدد تومان یا null)،
- sharePercent (0-100 یا null)، title، facilityName.
+ sharePercent (0-100 یا null)، title، facilityName، phone،
+ personName، yearsExperience (عدد یا null)، isLicensed (true|false).
""";
}
diff --git a/src/JobsMedical.Web/Services/ListingParser.cs b/src/JobsMedical.Web/Services/ListingParser.cs
index 68172d7..98096af 100644
--- a/src/JobsMedical.Web/Services/ListingParser.cs
+++ b/src/JobsMedical.Web/Services/ListingParser.cs
@@ -154,9 +154,21 @@ public class HeuristicListingParser : IListingParser
if (p.FacilityName is not null) p.Notes.Add($"مرکز: {p.FacilityName}");
}
- // --- Phone ---
- var phone = Regex.Match(ToLatinDigits(text), @"0?9\d{9}");
- if (phone.Success) p.Phone = phone.Value;
+ // --- Phone (mobile preferred, landline as fallback) ---
+ var latinPhone = ToLatinDigits(text);
+ var mobile = Regex.Match(latinPhone, @"(?:\+?98|0)?9\d{9}");
+ if (mobile.Success)
+ {
+ var d = Regex.Replace(mobile.Value, @"\D", "");
+ if (d.StartsWith("98")) d = "0" + d[2..];
+ if (d.Length == 10 && d.StartsWith("9")) d = "0" + d;
+ p.Phone = d;
+ }
+ else
+ {
+ var land = Regex.Match(latinPhone, @"0\d{2,3}[\s-]?\d{7,8}");
+ if (land.Success) p.Phone = Regex.Replace(land.Value, @"\D", "");
+ }
return p;
}
diff --git a/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs b/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs
index 41c271e..6a5aed7 100644
--- a/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs
+++ b/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs
@@ -7,7 +7,8 @@ namespace JobsMedical.Web.Services.Scraping;
public record AiStructured(
string? Kind, string? Role, string? City, string? District, string? ShiftType,
- string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName);
+ string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName,
+ string? Phone = null, string? PersonName = null, int? YearsExperience = null, bool? IsLicensed = null);
/// An AI verdict on a raw listing.
public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data)
@@ -30,6 +31,24 @@ public interface IAiAuditor
///
public class OpenAiCompatibleAuditor : IAiAuditor
{
+ // Authoritative output contract appended to the admin prompt so tags/categories stay correct
+ // (including the «آماده به کار» type and contact phone) regardless of the stored prompt text.
+ private const string OutputSchema = """
+ فقط یک شیء JSON با این کلیدها برگردان (هر فیلد نامشخص = null):
+ decision: approve|reject|review
+ confidence: عدد ۰ تا ۱۰۰
+ reason: توضیح کوتاه فارسی
+ kind: shift (شیفت توسط مرکز) | job (استخدام توسط مرکز) | talent (کادر درمان که خودش «آماده به کار» است)
+ role: عنوان دقیق نقش درمانی (مثل پرستار، پزشک عمومی، دندانپزشک، تکنسین اتاق عمل، ماما، کارشناس آزمایشگاه)
+ city, district: نام شهر و محله/منطقه در صورت ذکر
+ shiftType: day|evening|night|oncall (فقط برای shift)
+ employmentType: fulltime|parttime|contract|plan
+ payAmount: عدد تومان یا null ، sharePercent: عدد ۰ تا ۱۰۰ یا null (مثل «۵۰٪ تسویه»)
+ title: عنوان کوتاه ، facilityName: نام مرکز درمانی (فقط برای shift/job)
+ phone: شماره تماس (موبایل یا ثابت) بهصورت رقم لاتین، یا null
+ personName: نام فرد (فقط برای talent) ، yearsExperience: سال سابقه عدد یا null ، isLicensed: true/false (پروانهدار)
+ """;
+
private readonly ScrapeHttpClients _clients;
private readonly ILogger _log;
@@ -52,7 +71,9 @@ public class OpenAiCompatibleAuditor : IAiAuditor
response_format = new { type = "json_object" },
messages = new object[]
{
- new { role = "system", content = s.AiSystemPrompt },
+ // Admin prompt + an authoritative output schema, so classification/tags stay
+ // correct even if the stored prompt predates the talent/phone fields.
+ new { role = "system", content = s.AiSystemPrompt + "\n\n" + OutputSchema },
new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." },
},
};
@@ -100,10 +121,12 @@ public class OpenAiCompatibleAuditor : IAiAuditor
int I(string k, int d) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : d;
long? L(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt64(out var n) ? n : null;
int? NI(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : null;
+ bool? B(string k) => r.TryGetProperty(k, out var v) && (v.ValueKind == JsonValueKind.True || v.ValueKind == JsonValueKind.False) ? v.GetBoolean() : null;
var decision = (S("decision") ?? "review").ToLowerInvariant();
var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"),
- S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"));
+ S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"),
+ Phone: S("phone"), PersonName: S("personName"), YearsExperience: NI("yearsExperience"), IsLicensed: B("isLicensed"));
return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data);
}
}
diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
index fdcbe33..e45dd52 100644
--- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
+++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs
@@ -175,16 +175,23 @@ public class IngestionService
// «آماده به کار» — a worker offering themselves. No facility involved.
if (parsed.Kind == ListingKind.Talent || kindStr.Contains("talent") || kindStr.Contains("آماده"))
{
+ // Prefer the AI's tags when present, else the heuristic parser.
+ var tPay = d?.PayAmount ?? parsed.PayAmount;
+ var tShare = d?.SharePercent ?? parsed.SharePercent;
_db.TalentListings.Add(new TalentListing
{
Role = role, City = city, DistrictId = district?.Id,
- PersonName = parsed.PersonName, YearsExperience = parsed.YearsExperience,
- IsLicensed = parsed.IsLicensed, AreaNote = parsed.AreaNote,
- Availability = parsed.EmploymentType, Gender = parsed.Gender,
- PayType = parsed.SharePercent is not null && parsed.PayAmount is null ? PayType.Percentage
- : parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift,
- PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent,
- Phone = parsed.Phone, Description = raw.RawText,
+ PersonName = !string.IsNullOrWhiteSpace(d?.PersonName) ? d!.PersonName!.Trim() : parsed.PersonName,
+ YearsExperience = d?.YearsExperience ?? parsed.YearsExperience,
+ IsLicensed = d?.IsLicensed ?? parsed.IsLicensed,
+ AreaNote = parsed.AreaNote,
+ Availability = MapEmployment(d?.EmploymentType, parsed.EmploymentType),
+ Gender = parsed.Gender,
+ PayType = tShare is not null && tPay is null ? PayType.Percentage
+ : tPay is null ? PayType.Negotiable : PayType.PerShift,
+ PayAmount = tPay, SharePercent = tShare,
+ Phone = !string.IsNullOrWhiteSpace(d?.Phone) ? d!.Phone!.Trim() : parsed.Phone,
+ Description = raw.RawText,
Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl,
});
raw.Status = RawListingStatus.Normalized;
@@ -201,7 +208,7 @@ public class IngestionService
facility = new Facility
{
Name = facilityName, Type = FacilityType.Clinic, City = city, DistrictId = district?.Id,
- Phone = parsed.Phone, IsVerified = false,
+ Phone = !string.IsNullOrWhiteSpace(d?.Phone) ? d!.Phone!.Trim() : parsed.Phone, IsVerified = false,
};
_db.Facilities.Add(facility);
facilities.Add(facility); // so later listings in this run match it too
diff --git a/src/JobsMedical.Web/Services/Scraping/MedjobsListingSource.cs b/src/JobsMedical.Web/Services/Scraping/MedjobsListingSource.cs
index bba29fd..592b8a0 100644
--- a/src/JobsMedical.Web/Services/Scraping/MedjobsListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/MedjobsListingSource.cs
@@ -93,7 +93,14 @@ public class MedjobsListingSource : IListingSource
var parts = new[] { title, body }.Where(p => !string.IsNullOrWhiteSpace(p));
var text = HtmlUtil.ToPlainText(string.Join("\n", parts));
- return text.Length > 1800 ? text[..1800] : text;
+ if (text.Length > 1800) text = text[..1800];
+
+ // The contact number is often outside the description (in a tel: link / data attribute the
+ // page reveals on click). Harvest it from the full HTML and append so the parser/AI see it.
+ var phones = HtmlUtil.HarvestPhones(html);
+ if (phones.Count > 0 && !phones.Any(text.Contains))
+ text += "\nشماره تماس: " + string.Join("، ", phones);
+ return text;
}
private static string? Meta(string html, string prop)
diff --git a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
index 8c8e1e4..91b2a6b 100644
--- a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
@@ -62,4 +62,49 @@ internal static class HtmlUtil
s = Regex.Replace(s, "[ \\t]+", " ");
return s.Trim();
}
+
+ /// Convert Persian/Arabic-Indic digits to Latin.
+ public static string ToLatinDigits(string s)
+ {
+ var a = s.ToCharArray();
+ for (var i = 0; i < a.Length; i++)
+ {
+ if (a[i] >= '۰' && a[i] <= '۹') a[i] = (char)('0' + (a[i] - '۰'));
+ else if (a[i] >= '٠' && a[i] <= '٩') a[i] = (char)('0' + (a[i] - '٠'));
+ }
+ return new string(a);
+ }
+
+ ///
+ /// Pull Iranian phone numbers out of a page's HTML — including ones a site reveals on click
+ /// (often still present as a tel: link, a data-*phone* attribute, or JSON-LD "telephone").
+ /// Returns normalized numbers (mobiles as 09xxxxxxxxx, landlines with leading 0), mobiles first.
+ ///
+ public static List HarvestPhones(string html)
+ {
+ if (string.IsNullOrEmpty(html)) return new();
+ var latin = ToLatinDigits(html);
+ var found = new List();
+
+ void Add(string raw)
+ {
+ var d = Regex.Replace(raw, @"\D", "");
+ if (d.StartsWith("0098")) d = "0" + d[4..];
+ else if (d.StartsWith("98") && d.Length >= 12) d = "0" + d[2..];
+ if (Regex.IsMatch(d, @"^9\d{9}$")) d = "0" + d; // 9xxxxxxxxx → 09xxxxxxxxx
+ bool ok = Regex.IsMatch(d, @"^09\d{9}$") // mobile
+ || Regex.IsMatch(d, @"^0\d{10}$"); // landline w/ area code
+ if (ok && !found.Contains(d)) found.Add(d);
+ }
+
+ // Highest-signal sources first.
+ foreach (Match m in Regex.Matches(latin, @"tel:\+?([\d\s\-]{7,})")) Add(m.Groups[1].Value);
+ foreach (Match m in Regex.Matches(latin, "\"telephone\"\\s*:\\s*\"([^\"]+)\"")) Add(m.Groups[1].Value);
+ foreach (Match m in Regex.Matches(latin, "data-[\\w-]*phone[\\w-]*=[\"']([^\"']+)[\"']", RegexOptions.IgnoreCase)) Add(m.Groups[1].Value);
+ // Then bare numbers anywhere in the markup — mobiles, then landlines.
+ foreach (Match m in Regex.Matches(latin, @"(?:\+?98|0)?9\d{9}")) Add(m.Value);
+ foreach (Match m in Regex.Matches(latin, @"0\d{2,3}[\s-]?\d{7,8}")) Add(m.Value);
+
+ return found.Take(3).ToList();
+ }
}
diff --git a/src/JobsMedical.Web/Services/Scraping/WebsiteListingSource.cs b/src/JobsMedical.Web/Services/Scraping/WebsiteListingSource.cs
index 65d0257..c50e8df 100644
--- a/src/JobsMedical.Web/Services/Scraping/WebsiteListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/WebsiteListingSource.cs
@@ -51,7 +51,13 @@ public class WebsiteListingSource : IListingSource
string? body = Between(html, "rtcl-description") ?? Between(html, "entry-content")
?? Between(html, "job-description") ?? Meta(html, "og:description");
var text = HtmlUtil.ToPlainText(string.Join("\n", new[] { title, body }.Where(x => !string.IsNullOrWhiteSpace(x))));
- return text.Length > 1800 ? text[..1800] : text;
+ if (text.Length > 1800) text = text[..1800];
+
+ // Append any contact number found in the full markup (tel:/data-phone/JSON-LD/inline).
+ var phones = HtmlUtil.HarvestPhones(html);
+ if (phones.Count > 0 && !phones.Any(text.Contains))
+ text += "\nشماره تماس: " + string.Join("، ", phones);
+ return text;
}
private static string? Meta(string html, string prop)