Divar: capture post token + harvest phone from full ad detail
CI/CD / CI · dotnet build (push) Successful in 2m4s
CI/CD / Deploy · hamkadr (push) Successful in 2m18s

- Harvest now keeps each post's token, so we build a real post URL
  (divar.ir/v/{token}) instead of a generic link.
- For each post we fetch the detail JSON (posts-v2/web/{token}) and
  harvest any contact number from it — covering the very common case
  where the poster writes the phone into the ad description. Divar's
  click-to-reveal is login-gated, so this gets the in-text numbers
  without auth; fails soft (blocking/errors → skip).
- HarvestPhones hardened with digit-boundary guards so it can't grab a
  slice of a longer numeric id/timestamp inside JSON.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-08 08:28:37 +03:30
parent d238888710
commit a5d6e212e2
2 changed files with 67 additions and 7 deletions
@@ -59,8 +59,18 @@ public class DivarListingSource : IListingSource
continue;
}
using var doc = JsonDocument.Parse(body);
foreach (var text in Harvest(doc.RootElement).Take(25))
items.Add(new ScrapedItem("دیوار", text, "https://divar.ir"));
foreach (var (text, token) in Harvest(doc.RootElement).Take(25))
{
var url = token is not null ? $"https://divar.ir/v/{token}" : "https://divar.ir";
var withPhone = text;
if (token is not null)
{
var phones = await RevealPhonesAsync(client, token, s, ct);
if (phones.Count > 0 && !phones.Any(text.Contains))
withPhone = text + "\nشماره تماس: " + string.Join("، ", phones);
}
items.Add(new ScrapedItem("دیوار", withPhone, url));
}
}
catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); }
}
@@ -85,10 +95,39 @@ public class DivarListingSource : IListingSource
};
}
// The post detail endpoint returns the FULL description — many Divar job ads write the phone
// straight into the body, so we can harvest it without Divar's (login-gated) contact reveal.
private const string PostDetailUrl = "https://api.divar.ir/v8/posts-v2/web/";
/// <summary>
/// Fetch a post's detail JSON and harvest any contact number it contains (mostly numbers the
/// poster wrote into the description). Divar's true "نمایش شماره" reveal is auth-gated; this
/// covers the common case where the number is in the ad text. Fails soft.
/// </summary>
private async Task<List<string>> RevealPhonesAsync(HttpClient client, string token, AppSetting s, CancellationToken ct)
{
try
{
using var req = new HttpRequestMessage(HttpMethod.Get, PostDetailUrl + token);
req.Headers.TryAddWithoutValidation("User-Agent", Ua);
req.Headers.TryAddWithoutValidation("Accept", "application/json");
using var resp = await client.SendAsync(req, ct);
if (!resp.IsSuccessStatusCode) return new();
var body = await resp.Content.ReadAsStringAsync(ct);
if (body.Contains("BLOCKING_VIEW")) return new();
return HtmlUtil.HarvestPhones(body);
}
catch (Exception ex)
{
_log.LogWarning(ex, "Divar detail/reveal failed for {Token}", token);
return new();
}
}
private static readonly string[] DescKeys =
{ "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" };
private static IEnumerable<string> Harvest(JsonElement el)
private static IEnumerable<(string text, string? token)> Harvest(JsonElement el)
{
if (el.ValueKind == JsonValueKind.Object)
{
@@ -99,7 +138,7 @@ public class DivarListingSource : IListingSource
if (el.TryGetProperty(k, out var d) && d.ValueKind == JsonValueKind.String)
{ sb.Append(" — ").Append(d.GetString()); break; }
var text = sb.ToString().Trim();
if (text.Length >= 15) yield return text;
if (text.Length >= 15) yield return (text, FindToken(el));
}
foreach (var p in el.EnumerateObject())
foreach (var x in Harvest(p.Value)) yield return x;
@@ -110,4 +149,24 @@ public class DivarListingSource : IListingSource
foreach (var x in Harvest(item)) yield return x;
}
}
/// <summary>Find the post token within a widget object (Divar tokens: 616 alphanumerics).</summary>
private static string? FindToken(JsonElement el)
{
if (el.ValueKind == JsonValueKind.Object)
{
foreach (var p in el.EnumerateObject())
if (p.NameEquals("token") && p.Value.ValueKind == JsonValueKind.String)
{
var v = p.Value.GetString();
if (v is not null && v.Length is >= 6 and <= 16 && v.All(char.IsLetterOrDigit)) return v;
}
foreach (var p in el.EnumerateObject())
{ var r = FindToken(p.Value); if (r is not null) return r; }
}
else if (el.ValueKind == JsonValueKind.Array)
foreach (var item in el.EnumerateArray())
{ var r = FindToken(item); if (r is not null) return r; }
return null;
}
}