Multi-role ads: parse all roles + fan-out publish one listing per role
An ad like «استخدام پرستار سالمند و کودک و همراه بیمار» names several roles; we kept only the first. Now: - Parser collects ALL roles (ParsedListing.RoleNames): exact taxonomy matches (substring-deduped so پرستار⊂پرستار سالمندان) plus synonyms (سالمند→پرستار سالمندان, کودک/همراه بیمار→پرستار, اتاق عمل→تکنسین اتاق عمل…), capped at 4. - Ingestion publishes one Shift/Job/Talent per resolved role (AI role + parser roles, distinct, capped), so each role is independently browsable and filterable. RawListing dedupe is unchanged (one raw → N posts). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,8 @@ namespace JobsMedical.Web.Services;
|
||||
public class ParsedListing
|
||||
{
|
||||
public ListingKind Kind { get; set; } = ListingKind.Shift;
|
||||
public string? RoleName { get; set; }
|
||||
public string? RoleName { get; set; } // primary role (first match)
|
||||
public List<string> RoleNames { get; set; } = new(); // all roles in the ad (e.g. سالمند + کودک)
|
||||
public ShiftType? ShiftType { get; set; }
|
||||
public EmploymentType? EmploymentType { get; set; }
|
||||
public long? PayAmount { get; set; } // shift pay or single salary figure
|
||||
@@ -67,25 +68,36 @@ public class HeuristicListingParser : IListingParser
|
||||
p.Notes.Add(p.Kind == ListingKind.Job ? "نوع: استخدام (تشخیص خودکار)" : "نوع: شیفت (تشخیص خودکار)");
|
||||
}
|
||||
|
||||
// --- Role (longest match first so «پزشک متخصص» beats «پزشک») ---
|
||||
foreach (var role in knownRoles.OrderByDescending(r => r.Length))
|
||||
// --- Roles (an ad can name several at once: «پرستار سالمند و کودک و همراه بیمار») ---
|
||||
var known = knownRoles.ToList();
|
||||
var hits = new List<string>();
|
||||
// Exact taxonomy matches (longest first so «پزشک متخصص» beats «پزشک»).
|
||||
foreach (var role in known.OrderByDescending(r => r.Length))
|
||||
if (text.Contains(Normalize(role))) hits.Add(role);
|
||||
// Drop a role that's a substring of a longer matched role (پرستار ⊂ پرستار سالمندان).
|
||||
hits = hits.Where(r => !hits.Any(o => o != r && o.Contains(r))).Distinct().ToList();
|
||||
|
||||
// Synonyms → canonical role names (covers terms not written verbatim). Only add a canonical
|
||||
// that actually exists in the taxonomy, and isn't already a hit.
|
||||
void AddSyn(string canonical, params string[] needles)
|
||||
{
|
||||
if (text.Contains(Normalize(role))) { p.RoleName = role; break; }
|
||||
if (ContainsAny(text, needles) && known.Contains(canonical) && !hits.Contains(canonical))
|
||||
hits.Add(canonical);
|
||||
}
|
||||
// Synonyms common on Divar/Medjobs → canonical seeded role names.
|
||||
if (p.RoleName is null)
|
||||
{
|
||||
p.RoleName =
|
||||
ContainsAny(text, "اتاق عمل", "اسکراب") ? "تکنسین اتاق عمل"
|
||||
: ContainsAny(text, "فوریت", "اورژانس پیش بیمارستانی", "آمبولانس") ? "تکنسین فوریتهای پزشکی"
|
||||
: ContainsAny(text, "آزمایشگاه", "علوم آزمایشگاهی", "نمونه گیر") ? "کارشناس آزمایشگاه"
|
||||
: ContainsAny(text, "بهیار", "کمک بهیار", "کمک پرستار", "بیماربر", "مراقب", "سالمند", "همراه بیمار", "تزریقات", "پانسمان") ? "پرستار"
|
||||
: ContainsAny(text, "ماما", "مامایی") ? "ماما"
|
||||
: ContainsAny(text, "فوق تخصص", "متخصص") ? "پزشک متخصص"
|
||||
: ContainsAny(text, "پزشک", "دکتر", "طبیب") ? "پزشک عمومی"
|
||||
: null;
|
||||
}
|
||||
p.Notes.Add(p.RoleName is null ? "نقش: تشخیص داده نشد" : $"نقش: {p.RoleName}");
|
||||
AddSyn("پرستار سالمندان", "سالمند", "سالمندان", "نگهداری سالمند");
|
||||
AddSyn("دندانپزشک", "دندان", "دندانپزشک", "دندانپزشک");
|
||||
AddSyn("تکنسین اتاق عمل", "اتاق عمل", "اسکراب");
|
||||
AddSyn("تکنسین فوریتهای پزشکی", "فوریت", "اورژانس پیش بیمارستانی", "آمبولانس");
|
||||
AddSyn("کارشناس آزمایشگاه", "آزمایشگاه", "علوم آزمایشگاهی", "نمونه گیر");
|
||||
AddSyn("ماما", "مامایی");
|
||||
AddSyn("پرستار", "بهیار", "کمک بهیار", "کمک پرستار", "بیماربر", "مراقب", "همراه بیمار",
|
||||
"کودک", "اطفال", "نوزاد", "تزریقات", "پانسمان");
|
||||
AddSyn("پزشک متخصص", "فوق تخصص", "متخصص");
|
||||
AddSyn("پزشک عمومی", "پزشک", "دکتر", "طبیب");
|
||||
|
||||
p.RoleNames = hits.Distinct().Take(4).ToList(); // cap fan-out
|
||||
p.RoleName = p.RoleNames.FirstOrDefault();
|
||||
p.Notes.Add(p.RoleNames.Count == 0 ? "نقش: تشخیص داده نشد" : $"نقشها: {string.Join("، ", p.RoleNames)}");
|
||||
|
||||
// --- Shift type ---
|
||||
if (ContainsAny(text, "آنکال", "انکال")) p.ShiftType = Models.ShiftType.OnCall;
|
||||
|
||||
@@ -161,11 +161,21 @@ public class IngestionService
|
||||
List<Role> roles, List<City> cities, List<District> districts, List<Facility> facilities)
|
||||
{
|
||||
var d = ai?.Data;
|
||||
var roleName = d?.Role ?? parsed.RoleName;
|
||||
var cityName = d?.City ?? parsed.CityName;
|
||||
var districtName = d?.District ?? parsed.DistrictName;
|
||||
|
||||
var role = roles.FirstOrDefault(r => r.Name == roleName) ?? roles.First();
|
||||
// One ad can name several roles («پرستار سالمند و کودک و همراه بیمار») — resolve them all
|
||||
// and publish one listing per role so each is browsable/filterable. Capped to avoid spam.
|
||||
var roleNames = new List<string>();
|
||||
if (!string.IsNullOrWhiteSpace(d?.Role)) roleNames.Add(d!.Role!.Trim());
|
||||
roleNames.AddRange(parsed.RoleNames);
|
||||
if (parsed.RoleName is not null) roleNames.Add(parsed.RoleName);
|
||||
var pubRoles = roleNames
|
||||
.Select(n => roles.FirstOrDefault(r => r.Name == n))
|
||||
.Where(r => r is not null).Cast<Role>()
|
||||
.Distinct().Take(4).ToList();
|
||||
if (pubRoles.Count == 0) pubRoles.Add(roles.First());
|
||||
|
||||
var city = cities.FirstOrDefault(c => c.Name == cityName)
|
||||
?? cities.FirstOrDefault(c => c.IsActive) ?? cities.First();
|
||||
var district = districts.FirstOrDefault(x => x.Name == districtName && x.CityId == city.Id);
|
||||
@@ -178,6 +188,7 @@ public class IngestionService
|
||||
// Prefer the AI's tags when present, else the heuristic parser.
|
||||
var tPay = d?.PayAmount ?? parsed.PayAmount;
|
||||
var tShare = d?.SharePercent ?? parsed.SharePercent;
|
||||
foreach (var role in pubRoles)
|
||||
_db.TalentListings.Add(new TalentListing
|
||||
{
|
||||
Role = role, City = city, DistrictId = district?.Id,
|
||||
@@ -217,10 +228,11 @@ public class IngestionService
|
||||
|
||||
if (kindStr.Contains("job") || kindStr.Contains("استخدام"))
|
||||
{
|
||||
foreach (var role in pubRoles)
|
||||
_db.JobOpenings.Add(new JobOpening
|
||||
{
|
||||
Facility = facility, Role = role,
|
||||
Title = !string.IsNullOrWhiteSpace(d?.Title) ? d!.Title!.Trim() : $"استخدام {role.Name}",
|
||||
Title = !string.IsNullOrWhiteSpace(d?.Title) && pubRoles.Count == 1 ? d!.Title!.Trim() : $"استخدام {role.Name}",
|
||||
EmploymentType = MapEmployment(d?.EmploymentType, parsed.EmploymentType),
|
||||
SalaryMin = parsed.PayAmount,
|
||||
Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated,
|
||||
@@ -231,6 +243,7 @@ public class IngestionService
|
||||
{
|
||||
var st = MapShiftType(d?.ShiftType, parsed.ShiftType);
|
||||
var (start, end) = DefaultTimes(st);
|
||||
foreach (var role in pubRoles)
|
||||
_db.Shifts.Add(new Shift
|
||||
{
|
||||
Facility = facility, Role = role,
|
||||
|
||||
Reference in New Issue
Block a user