From 48760c4e831d0e3602bd6d18e8ad6a5a19081563 Mon Sep 17 00:00:00 2001 From: "soroush.asadi" Date: Mon, 8 Jun 2026 10:58:29 +0330 Subject: [PATCH] Multi-role ads: parse all roles + fan-out publish one listing per role MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An ad like «استخدام پرستار سالمند و کودک و همراه بیمار» names several roles; we kept only the first. Now: - Parser collects ALL roles (ParsedListing.RoleNames): exact taxonomy matches (substring-deduped so پرستار⊂پرستار سالمندان) plus synonyms (سالمند→پرستار سالمندان, کودک/همراه بیمار→پرستار, اتاق عمل→تکنسین اتاق عمل…), capped at 4. - Ingestion publishes one Shift/Job/Talent per resolved role (AI role + parser roles, distinct, capped), so each role is independently browsable and filterable. RawListing dedupe is unchanged (one raw → N posts). Co-Authored-By: Claude Opus 4.8 --- src/JobsMedical.Web/Services/ListingParser.cs | 48 ++++++---- .../Services/Scraping/IngestionService.cs | 89 +++++++++++-------- 2 files changed, 81 insertions(+), 56 deletions(-) diff --git a/src/JobsMedical.Web/Services/ListingParser.cs b/src/JobsMedical.Web/Services/ListingParser.cs index 244739f..5eb03ce 100644 --- a/src/JobsMedical.Web/Services/ListingParser.cs +++ b/src/JobsMedical.Web/Services/ListingParser.cs @@ -7,7 +7,8 @@ namespace JobsMedical.Web.Services; public class ParsedListing { public ListingKind Kind { get; set; } = ListingKind.Shift; - public string? RoleName { get; set; } + public string? RoleName { get; set; } // primary role (first match) + public List RoleNames { get; set; } = new(); // all roles in the ad (e.g. سالمند + کودک) public ShiftType? ShiftType { get; set; } public EmploymentType? EmploymentType { get; set; } public long? PayAmount { get; set; } // shift pay or single salary figure @@ -67,25 +68,36 @@ public class HeuristicListingParser : IListingParser p.Notes.Add(p.Kind == ListingKind.Job ? "نوع: استخدام (تشخیص خودکار)" : "نوع: شیفت (تشخیص خودکار)"); } - // --- Role (longest match first so «پزشک متخصص» beats «پزشک») --- - foreach (var role in knownRoles.OrderByDescending(r => r.Length)) + // --- Roles (an ad can name several at once: «پرستار سالمند و کودک و همراه بیمار») --- + var known = knownRoles.ToList(); + var hits = new List(); + // Exact taxonomy matches (longest first so «پزشک متخصص» beats «پزشک»). + foreach (var role in known.OrderByDescending(r => r.Length)) + if (text.Contains(Normalize(role))) hits.Add(role); + // Drop a role that's a substring of a longer matched role (پرستار ⊂ پرستار سالمندان). + hits = hits.Where(r => !hits.Any(o => o != r && o.Contains(r))).Distinct().ToList(); + + // Synonyms → canonical role names (covers terms not written verbatim). Only add a canonical + // that actually exists in the taxonomy, and isn't already a hit. + void AddSyn(string canonical, params string[] needles) { - if (text.Contains(Normalize(role))) { p.RoleName = role; break; } + if (ContainsAny(text, needles) && known.Contains(canonical) && !hits.Contains(canonical)) + hits.Add(canonical); } - // Synonyms common on Divar/Medjobs → canonical seeded role names. - if (p.RoleName is null) - { - p.RoleName = - ContainsAny(text, "اتاق عمل", "اسکراب") ? "تکنسین اتاق عمل" - : ContainsAny(text, "فوریت", "اورژانس پیش بیمارستانی", "آمبولانس") ? "تکنسین فوریت‌های پزشکی" - : ContainsAny(text, "آزمایشگاه", "علوم آزمایشگاهی", "نمونه گیر") ? "کارشناس آزمایشگاه" - : ContainsAny(text, "بهیار", "کمک بهیار", "کمک پرستار", "بیماربر", "مراقب", "سالمند", "همراه بیمار", "تزریقات", "پانسمان") ? "پرستار" - : ContainsAny(text, "ماما", "مامایی") ? "ماما" - : ContainsAny(text, "فوق تخصص", "متخصص") ? "پزشک متخصص" - : ContainsAny(text, "پزشک", "دکتر", "طبیب") ? "پزشک عمومی" - : null; - } - p.Notes.Add(p.RoleName is null ? "نقش: تشخیص داده نشد" : $"نقش: {p.RoleName}"); + AddSyn("پرستار سالمندان", "سالمند", "سالمندان", "نگهداری سالمند"); + AddSyn("دندانپزشک", "دندان", "دندانپزشک", "دندان‌پزشک"); + AddSyn("تکنسین اتاق عمل", "اتاق عمل", "اسکراب"); + AddSyn("تکنسین فوریت‌های پزشکی", "فوریت", "اورژانس پیش بیمارستانی", "آمبولانس"); + AddSyn("کارشناس آزمایشگاه", "آزمایشگاه", "علوم آزمایشگاهی", "نمونه گیر"); + AddSyn("ماما", "مامایی"); + AddSyn("پرستار", "بهیار", "کمک بهیار", "کمک پرستار", "بیماربر", "مراقب", "همراه بیمار", + "کودک", "اطفال", "نوزاد", "تزریقات", "پانسمان"); + AddSyn("پزشک متخصص", "فوق تخصص", "متخصص"); + AddSyn("پزشک عمومی", "پزشک", "دکتر", "طبیب"); + + p.RoleNames = hits.Distinct().Take(4).ToList(); // cap fan-out + p.RoleName = p.RoleNames.FirstOrDefault(); + p.Notes.Add(p.RoleNames.Count == 0 ? "نقش: تشخیص داده نشد" : $"نقش‌ها: {string.Join("، ", p.RoleNames)}"); // --- Shift type --- if (ContainsAny(text, "آنکال", "انکال")) p.ShiftType = Models.ShiftType.OnCall; diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs index 8872ad4..c61cd2e 100644 --- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs +++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs @@ -161,11 +161,21 @@ public class IngestionService List roles, List cities, List districts, List facilities) { var d = ai?.Data; - var roleName = d?.Role ?? parsed.RoleName; var cityName = d?.City ?? parsed.CityName; var districtName = d?.District ?? parsed.DistrictName; - var role = roles.FirstOrDefault(r => r.Name == roleName) ?? roles.First(); + // One ad can name several roles («پرستار سالمند و کودک و همراه بیمار») — resolve them all + // and publish one listing per role so each is browsable/filterable. Capped to avoid spam. + var roleNames = new List(); + if (!string.IsNullOrWhiteSpace(d?.Role)) roleNames.Add(d!.Role!.Trim()); + roleNames.AddRange(parsed.RoleNames); + if (parsed.RoleName is not null) roleNames.Add(parsed.RoleName); + var pubRoles = roleNames + .Select(n => roles.FirstOrDefault(r => r.Name == n)) + .Where(r => r is not null).Cast() + .Distinct().Take(4).ToList(); + if (pubRoles.Count == 0) pubRoles.Add(roles.First()); + var city = cities.FirstOrDefault(c => c.Name == cityName) ?? cities.FirstOrDefault(c => c.IsActive) ?? cities.First(); var district = districts.FirstOrDefault(x => x.Name == districtName && x.CityId == city.Id); @@ -178,22 +188,23 @@ public class IngestionService // Prefer the AI's tags when present, else the heuristic parser. var tPay = d?.PayAmount ?? parsed.PayAmount; var tShare = d?.SharePercent ?? parsed.SharePercent; - _db.TalentListings.Add(new TalentListing - { - Role = role, City = city, DistrictId = district?.Id, - PersonName = !string.IsNullOrWhiteSpace(d?.PersonName) ? d!.PersonName!.Trim() : parsed.PersonName, - YearsExperience = d?.YearsExperience ?? parsed.YearsExperience, - IsLicensed = d?.IsLicensed ?? parsed.IsLicensed, - AreaNote = parsed.AreaNote, - Availability = MapEmployment(d?.EmploymentType, parsed.EmploymentType), - Gender = parsed.Gender, - PayType = tShare is not null && tPay is null ? PayType.Percentage - : tPay is null ? PayType.Negotiable : PayType.PerShift, - PayAmount = tPay, SharePercent = tShare, - Phone = !string.IsNullOrWhiteSpace(d?.Phone) ? d!.Phone!.Trim() : parsed.Phone, - Description = raw.RawText, - Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl, - }); + foreach (var role in pubRoles) + _db.TalentListings.Add(new TalentListing + { + Role = role, City = city, DistrictId = district?.Id, + PersonName = !string.IsNullOrWhiteSpace(d?.PersonName) ? d!.PersonName!.Trim() : parsed.PersonName, + YearsExperience = d?.YearsExperience ?? parsed.YearsExperience, + IsLicensed = d?.IsLicensed ?? parsed.IsLicensed, + AreaNote = parsed.AreaNote, + Availability = MapEmployment(d?.EmploymentType, parsed.EmploymentType), + Gender = parsed.Gender, + PayType = tShare is not null && tPay is null ? PayType.Percentage + : tPay is null ? PayType.Negotiable : PayType.PerShift, + PayAmount = tPay, SharePercent = tShare, + Phone = !string.IsNullOrWhiteSpace(d?.Phone) ? d!.Phone!.Trim() : parsed.Phone, + Description = raw.RawText, + Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl, + }); raw.Status = RawListingStatus.Normalized; return; } @@ -217,31 +228,33 @@ public class IngestionService if (kindStr.Contains("job") || kindStr.Contains("استخدام")) { - _db.JobOpenings.Add(new JobOpening - { - Facility = facility, Role = role, - Title = !string.IsNullOrWhiteSpace(d?.Title) ? d!.Title!.Trim() : $"استخدام {role.Name}", - EmploymentType = MapEmployment(d?.EmploymentType, parsed.EmploymentType), - SalaryMin = parsed.PayAmount, - Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, - SourceUrl = raw.SourceUrl, - }); + foreach (var role in pubRoles) + _db.JobOpenings.Add(new JobOpening + { + Facility = facility, Role = role, + Title = !string.IsNullOrWhiteSpace(d?.Title) && pubRoles.Count == 1 ? d!.Title!.Trim() : $"استخدام {role.Name}", + EmploymentType = MapEmployment(d?.EmploymentType, parsed.EmploymentType), + SalaryMin = parsed.PayAmount, + Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, + SourceUrl = raw.SourceUrl, + }); } else { var st = MapShiftType(d?.ShiftType, parsed.ShiftType); var (start, end) = DefaultTimes(st); - _db.Shifts.Add(new Shift - { - Facility = facility, Role = role, - Date = DateOnly.FromDateTime(DateTime.UtcNow).AddDays(1), - StartTime = start, EndTime = end, ShiftType = st, - SpecialtyRequired = role.Name, Description = raw.RawText, - PayType = parsed.SharePercent is not null && parsed.PayAmount is null ? PayType.Percentage - : parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift, - PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent, - Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl, - }); + foreach (var role in pubRoles) + _db.Shifts.Add(new Shift + { + Facility = facility, Role = role, + Date = DateOnly.FromDateTime(DateTime.UtcNow).AddDays(1), + StartTime = start, EndTime = end, ShiftType = st, + SpecialtyRequired = role.Name, Description = raw.RawText, + PayType = parsed.SharePercent is not null && parsed.PayAmount is null ? PayType.Percentage + : parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift, + PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent, + Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl, + }); } raw.Status = RawListingStatus.Normalized; }