diff --git a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs
index ae83213..1583d1d 100644
--- a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs
@@ -59,8 +59,18 @@ public class DivarListingSource : IListingSource
continue;
}
using var doc = JsonDocument.Parse(body);
- foreach (var text in Harvest(doc.RootElement).Take(25))
- items.Add(new ScrapedItem("دیوار", text, "https://divar.ir"));
+ foreach (var (text, token) in Harvest(doc.RootElement).Take(25))
+ {
+ var url = token is not null ? $"https://divar.ir/v/{token}" : "https://divar.ir";
+ var withPhone = text;
+ if (token is not null)
+ {
+ var phones = await RevealPhonesAsync(client, token, s, ct);
+ if (phones.Count > 0 && !phones.Any(text.Contains))
+ withPhone = text + "\nشماره تماس: " + string.Join("، ", phones);
+ }
+ items.Add(new ScrapedItem("دیوار", withPhone, url));
+ }
}
catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); }
}
@@ -85,10 +95,39 @@ public class DivarListingSource : IListingSource
};
}
+ // The post detail endpoint returns the FULL description — many Divar job ads write the phone
+ // straight into the body, so we can harvest it without Divar's (login-gated) contact reveal.
+ private const string PostDetailUrl = "https://api.divar.ir/v8/posts-v2/web/";
+
+ ///
+ /// Fetch a post's detail JSON and harvest any contact number it contains (mostly numbers the
+ /// poster wrote into the description). Divar's true "نمایش شماره" reveal is auth-gated; this
+ /// covers the common case where the number is in the ad text. Fails soft.
+ ///
+ private async Task> RevealPhonesAsync(HttpClient client, string token, AppSetting s, CancellationToken ct)
+ {
+ try
+ {
+ using var req = new HttpRequestMessage(HttpMethod.Get, PostDetailUrl + token);
+ req.Headers.TryAddWithoutValidation("User-Agent", Ua);
+ req.Headers.TryAddWithoutValidation("Accept", "application/json");
+ using var resp = await client.SendAsync(req, ct);
+ if (!resp.IsSuccessStatusCode) return new();
+ var body = await resp.Content.ReadAsStringAsync(ct);
+ if (body.Contains("BLOCKING_VIEW")) return new();
+ return HtmlUtil.HarvestPhones(body);
+ }
+ catch (Exception ex)
+ {
+ _log.LogWarning(ex, "Divar detail/reveal failed for {Token}", token);
+ return new();
+ }
+ }
+
private static readonly string[] DescKeys =
{ "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" };
- private static IEnumerable Harvest(JsonElement el)
+ private static IEnumerable<(string text, string? token)> Harvest(JsonElement el)
{
if (el.ValueKind == JsonValueKind.Object)
{
@@ -99,7 +138,7 @@ public class DivarListingSource : IListingSource
if (el.TryGetProperty(k, out var d) && d.ValueKind == JsonValueKind.String)
{ sb.Append(" — ").Append(d.GetString()); break; }
var text = sb.ToString().Trim();
- if (text.Length >= 15) yield return text;
+ if (text.Length >= 15) yield return (text, FindToken(el));
}
foreach (var p in el.EnumerateObject())
foreach (var x in Harvest(p.Value)) yield return x;
@@ -110,4 +149,24 @@ public class DivarListingSource : IListingSource
foreach (var x in Harvest(item)) yield return x;
}
}
+
+ /// Find the post token within a widget object (Divar tokens: 6–16 alphanumerics).
+ private static string? FindToken(JsonElement el)
+ {
+ if (el.ValueKind == JsonValueKind.Object)
+ {
+ foreach (var p in el.EnumerateObject())
+ if (p.NameEquals("token") && p.Value.ValueKind == JsonValueKind.String)
+ {
+ var v = p.Value.GetString();
+ if (v is not null && v.Length is >= 6 and <= 16 && v.All(char.IsLetterOrDigit)) return v;
+ }
+ foreach (var p in el.EnumerateObject())
+ { var r = FindToken(p.Value); if (r is not null) return r; }
+ }
+ else if (el.ValueKind == JsonValueKind.Array)
+ foreach (var item in el.EnumerateArray())
+ { var r = FindToken(item); if (r is not null) return r; }
+ return null;
+ }
}
diff --git a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
index 91b2a6b..9d1bccc 100644
--- a/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
+++ b/src/JobsMedical.Web/Services/Scraping/TelegramListingSource.cs
@@ -101,9 +101,10 @@ internal static class HtmlUtil
foreach (Match m in Regex.Matches(latin, @"tel:\+?([\d\s\-]{7,})")) Add(m.Groups[1].Value);
foreach (Match m in Regex.Matches(latin, "\"telephone\"\\s*:\\s*\"([^\"]+)\"")) Add(m.Groups[1].Value);
foreach (Match m in Regex.Matches(latin, "data-[\\w-]*phone[\\w-]*=[\"']([^\"']+)[\"']", RegexOptions.IgnoreCase)) Add(m.Groups[1].Value);
- // Then bare numbers anywhere in the markup — mobiles, then landlines.
- foreach (Match m in Regex.Matches(latin, @"(?:\+?98|0)?9\d{9}")) Add(m.Value);
- foreach (Match m in Regex.Matches(latin, @"0\d{2,3}[\s-]?\d{7,8}")) Add(m.Value);
+ // Then bare numbers anywhere in the text — mobiles, then landlines. The (?