diff --git a/src/JobsMedical.Web/Pages/Talent/Details.cshtml b/src/JobsMedical.Web/Pages/Talent/Details.cshtml index 6ccef77..f3aa463 100644 --- a/src/JobsMedical.Web/Pages/Talent/Details.cshtml +++ b/src/JobsMedical.Web/Pages/Talent/Details.cshtml @@ -19,13 +19,10 @@ var digits = new string(t.Phone.Where(char.IsDigit).ToArray()); if (digits.Length >= 7) telHref = "tel:" + digits; } - // Friendly source name (used to point users to the original ad when no number was extracted). - string? sourceName = null; - if (!string.IsNullOrWhiteSpace(t.SourceUrl)) - { - var host = System.Uri.TryCreate(t.SourceUrl, UriKind.Absolute, out var su) ? su.Host : t.SourceUrl!; - sourceName = host.Contains("divar") ? "دیوار" : host.Contains("medjobs") ? "مدجابز" : host; - } + // Only Divar is surfaced as a fallback source (and only when no number was extracted). + // We never name other crawl sources (medjobs/telegram/…) publicly. + bool isDivar = !string.IsNullOrWhiteSpace(t.SourceUrl) + && System.Uri.TryCreate(t.SourceUrl, UriKind.Absolute, out var su) && su.Host.Contains("divar"); }
@@ -75,12 +72,12 @@ 📞 @t.Phone

با این فرد مستقیم تماس بگیرید.

} - else if (!string.IsNullOrWhiteSpace(t.SourceUrl)) + else if (isDivar) { - @* Number wasn't extractable (e.g. behind a login-gated reveal) — point to the source. *@ + @* Divar hides the number behind a login-gated reveal — point to the original ad. *@

شماره مستقیم استخراج نشد.

- مشاهده شماره در @sourceName ↗ -

این آگهی از @sourceName جمع‌آوری شده؛ برای دریافت شماره به آگهی اصلی مراجعه کن.

+ مشاهده شماره در دیوار ↗ +

برای دریافت شماره به آگهی اصلی در دیوار مراجعه کن.

} else { diff --git a/src/JobsMedical.Web/Services/ListingParser.cs b/src/JobsMedical.Web/Services/ListingParser.cs index c45005b..244739f 100644 --- a/src/JobsMedical.Web/Services/ListingParser.cs +++ b/src/JobsMedical.Web/Services/ListingParser.cs @@ -254,24 +254,41 @@ public class HeuristicListingParser : IListingParser return t; } - /// Pull a Toman figure out of free text, handling «میلیون» and Persian digits. + /// Pull a figure out of free text and normalize to TOMAN (ریال → تومان = ÷۱۰), + /// handling «میلیون» and Persian digits. private static long? ExtractAmount(string text) { var latin = ToLatinDigits(text); - // e.g. "۲ میلیون" / "2.5 میلیون" - var million = Regex.Match(latin, @"(\d+(?:[.,]\d+)?)\s*میلیون"); + bool hasToman = latin.Contains("تومان") || latin.Contains("تومن"); + bool hasRial = (latin.Contains("ریال") || latin.Contains("ريال")) && !hasToman; + + // e.g. "۲ میلیون" / "2.5 میلیون [ریال]" + var million = Regex.Match(latin, @"(\d+(?:[.,]\d+)?)\s*میلیون\s*(ریال|ريال)?"); if (million.Success && double.TryParse(million.Groups[1].Value.Replace(",", "."), System.Globalization.NumberStyles.Any, System.Globalization.CultureInfo.InvariantCulture, out var m)) - return (long)(m * 1_000_000); - - // Otherwise the largest plain number that looks like money (6–10 digits, no leading zero — - // a leading zero or 11+ digits means it's a phone/id, not a price). - long best = 0; - foreach (Match num in Regex.Matches(latin, @"(?= 6 and <= 10 && long.TryParse(digits, out var v) && v > best) best = v; + var val = (long)(m * 1_000_000); + if (million.Groups[2].Success) val /= 10; // «میلیون ریال» + return val; } + + // Largest plain number that looks like money (6–10 digits, no leading zero — a leading + // zero or 11+ digits means it's a phone/id). Convert ریال→تومان by the unit next to the + // number, else by the ad's overall currency. + long best = 0; + foreach (Match num in Regex.Matches(latin, @"(? 10 || !long.TryParse(digits, out var v)) continue; + var unit = num.Groups[2].Value; + bool isRial = unit is "ریال" or "ريال" || (unit.Length == 0 && hasRial); + if (isRial) v /= 10; + if (v > best) best = v; + } + + // Sanity: a monthly figure of 200M+ تومان is implausible in Iran — if the ad never said + // «تومان», it was almost certainly ریال, so normalize. + if (best >= 200_000_000 && !hasToman) best /= 10; return best > 0 ? best : null; } diff --git a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs index e45dd52..8872ad4 100644 --- a/src/JobsMedical.Web/Services/Scraping/IngestionService.cs +++ b/src/JobsMedical.Web/Services/Scraping/IngestionService.cs @@ -198,9 +198,10 @@ public class IngestionService return; } + // Never surface the crawl source (e.g. «مدجابز») in a public facility name. var facilityName = !string.IsNullOrWhiteSpace(d?.FacilityName) ? d!.FacilityName!.Trim() : !string.IsNullOrWhiteSpace(parsed.FacilityName) ? parsed.FacilityName!.Trim() - : $"مرکز درمانی (از {raw.SourceChannel})"; + : "مرکز درمانی (نامشخص)"; // Reuse an existing facility (exact or Persian-aware fuzzy match) before creating a new one. var facility = FacilityMatcher.FindBest(facilities, facilityName, city.Id); if (facility is null)