Match crawled listings to existing facilities (fuzzy) before creating new
When publishing a scraped listing we now look for a facility we already have that is exactly or closely the same, and only create a new one when there is no match — avoiding duplicates like «بیمارستان میلاد» vs «میلاد». - ListingParser: extract a facility name (keyword + distinctive words) from the post and surface it in the parser notes. - FacilityMatcher: Persian-aware normalization (ي/ك, ZWNJ, punctuation), type-word stripping for a "core" name, contains + Levenshtein similarity, and FindBest (same-city exact → any-city exact → same-city fuzzy → fuzzy). - Review (manual publish): auto-select a matching facility or prefill the new-facility name; resolve-or-create uses fuzzy match; dropdown preselects. - IngestionService (auto-publish): reuse FacilityMatcher against a run-wide facility list (grows as new ones are created) instead of exact-name only. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -51,10 +51,10 @@
|
||||
<div class="filter-group">
|
||||
<label>مرکز درمانی</label>
|
||||
<select name="FacilityId">
|
||||
<option value="0">— انتخاب نشده —</option>
|
||||
<option value="0" selected="@(Model.FacilityId == 0)">— انتخاب نشده —</option>
|
||||
@foreach (var f in Model.Facilities)
|
||||
{
|
||||
<option value="@f.Id">@f.Name — @f.City?.Name</option>
|
||||
<option value="@f.Id" selected="@(Model.FacilityId == f.Id)">@f.Name — @f.City?.Name</option>
|
||||
}
|
||||
</select>
|
||||
<input type="text" name="NewFacilityName" placeholder="یا نام مرکز جدید را وارد کن…" style="margin-top:6px;" />
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
using JobsMedical.Web.Data;
|
||||
using JobsMedical.Web.Models;
|
||||
using JobsMedical.Web.Services;
|
||||
using JobsMedical.Web.Services.Scraping;
|
||||
using Microsoft.AspNetCore.Authorization;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.AspNetCore.Mvc.RazorPages;
|
||||
@@ -72,6 +73,25 @@ public class ReviewModel : PageModel
|
||||
if (Parsed.PayAmount is not null) { PayAmount = Parsed.PayAmount; SalaryMin = Parsed.PayAmount; }
|
||||
Description = Raw.RawText;
|
||||
Title = Parsed.RoleName is not null ? $"استخدام {Parsed.RoleName}" : "موقعیت استخدامی";
|
||||
|
||||
// Facility: try to match the listing's facility to one we already have; otherwise
|
||||
// prefill the "new facility" box so publishing creates it.
|
||||
if (!string.IsNullOrWhiteSpace(Parsed.FacilityName))
|
||||
{
|
||||
var cityId = await _db.Cities.Where(c => c.Name == Parsed.CityName)
|
||||
.Select(c => (int?)c.Id).FirstOrDefaultAsync();
|
||||
var match = FacilityMatcher.FindBest(Facilities, Parsed.FacilityName, cityId);
|
||||
if (match is not null)
|
||||
{
|
||||
FacilityId = match.Id;
|
||||
Parsed.Notes.Add($"مرکز منطبق در سیستم: «{match.Name}» — همین انتخاب شد.");
|
||||
}
|
||||
else
|
||||
{
|
||||
NewFacilityName = Parsed.FacilityName;
|
||||
Parsed.Notes.Add($"مرکز جدید پیشنهادی: «{Parsed.FacilityName}» — هنگام انتشار ساخته میشود.");
|
||||
}
|
||||
}
|
||||
return Page();
|
||||
}
|
||||
|
||||
@@ -181,15 +201,17 @@ public class ReviewModel : PageModel
|
||||
if (string.IsNullOrWhiteSpace(NewFacilityName))
|
||||
return null;
|
||||
|
||||
// Reuse a same-named facility if one already exists, else create it.
|
||||
var name = NewFacilityName.Trim();
|
||||
var existing = await _db.Facilities.FirstOrDefaultAsync(f => f.Name == name);
|
||||
if (existing is not null) return existing.Id;
|
||||
|
||||
var cityId = await _db.Cities.OrderByDescending(c => c.IsActive)
|
||||
.Select(c => (int?)c.Id).FirstOrDefaultAsync();
|
||||
if (cityId is null) return null; // no cities seeded — cannot create a facility
|
||||
|
||||
// Reuse an existing facility that's exactly or closely the same (Persian-aware fuzzy
|
||||
// match), so we don't create duplicates like «بیمارستان میلاد» vs «میلاد».
|
||||
var all = await _db.Facilities.ToListAsync();
|
||||
var match = FacilityMatcher.FindBest(all, name, cityId);
|
||||
if (match is not null) return match.Id;
|
||||
|
||||
var facility = new Facility
|
||||
{
|
||||
Name = name,
|
||||
|
||||
Reference in New Issue
Block a user