Divar geo-coords to facility map + medical gate + RawListing FK/geo migrations
This commit is contained in:
@@ -59,17 +59,25 @@ public class DivarListingSource : IListingSource
|
||||
continue;
|
||||
}
|
||||
using var doc = JsonDocument.Parse(body);
|
||||
var cityLabel = CityLabel(s.DivarCity); // every result is from the city we searched
|
||||
foreach (var (text, token) in Harvest(doc.RootElement).Take(25))
|
||||
{
|
||||
var url = token is not null ? $"https://divar.ir/v/{token}" : "https://divar.ir";
|
||||
var withPhone = text;
|
||||
var itemText = text;
|
||||
// Stamp the city so the parser/AI always resolve a location (Divar's own location
|
||||
// line isn't always in the search row; the searched city is authoritative).
|
||||
if (!string.IsNullOrWhiteSpace(cityLabel) && !text.Contains(cityLabel))
|
||||
itemText += $"\n📍 {cityLabel}";
|
||||
double? lat = null, lng = null;
|
||||
if (token is not null)
|
||||
{
|
||||
var phones = await RevealPhonesAsync(client, token, s, ct);
|
||||
if (phones.Count > 0 && !phones.Any(text.Contains))
|
||||
withPhone = text + "\nشماره تماس: " + string.Join("، ", phones);
|
||||
// One detail fetch yields BOTH the phone and the map coordinates.
|
||||
var (phones, gLat, gLng) = await FetchDetailAsync(client, token, ct);
|
||||
if (phones.Count > 0 && !phones.Any(itemText.Contains))
|
||||
itemText += "\nشماره تماس: " + string.Join("، ", phones);
|
||||
lat = gLat; lng = gLng;
|
||||
}
|
||||
items.Add(new ScrapedItem("دیوار", withPhone, url));
|
||||
items.Add(new ScrapedItem("دیوار", itemText, url, lat, lng));
|
||||
}
|
||||
}
|
||||
catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); }
|
||||
@@ -95,16 +103,31 @@ public class DivarListingSource : IListingSource
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>Persian display name for the searched city (slug/number/Persian → Persian), used to
|
||||
/// stamp every Divar result with its (authoritative) location.</summary>
|
||||
private static string CityLabel(string? city) => (city ?? "").Trim().ToLowerInvariant() switch
|
||||
{
|
||||
"1" or "tehran" or "تهران" => "تهران",
|
||||
"3" or "isfahan" or "esfahan" or "اصفهان" => "اصفهان",
|
||||
"4" or "mashhad" or "مشهد" => "مشهد",
|
||||
"5" or "shiraz" or "شیراز" => "شیراز",
|
||||
"6" or "tabriz" or "تبریز" => "تبریز",
|
||||
"1745" or "karaj" or "کرج" => "کرج",
|
||||
_ => (city ?? "").Trim(),
|
||||
};
|
||||
|
||||
// The post detail endpoint returns the FULL description — many Divar job ads write the phone
|
||||
// straight into the body, so we can harvest it without Divar's (login-gated) contact reveal.
|
||||
private const string PostDetailUrl = "https://api.divar.ir/v8/posts-v2/web/";
|
||||
|
||||
/// <summary>
|
||||
/// Fetch a post's detail JSON and harvest any contact number it contains (mostly numbers the
|
||||
/// poster wrote into the description). Divar's true "نمایش شماره" reveal is auth-gated; this
|
||||
/// covers the common case where the number is in the ad text. Fails soft.
|
||||
/// Fetch a post's detail JSON ONCE and harvest both (a) any contact number it contains (mostly
|
||||
/// numbers the poster wrote into the description; Divar's true "نمایش شماره" reveal is auth-gated)
|
||||
/// and (b) the post's APPROXIMATE map coordinates (the privacy-fuzzed center Divar shows as a
|
||||
/// circle). Fails soft — returns whatever it could extract.
|
||||
/// </summary>
|
||||
private async Task<List<string>> RevealPhonesAsync(HttpClient client, string token, AppSetting s, CancellationToken ct)
|
||||
private async Task<(List<string> phones, double? lat, double? lng)> FetchDetailAsync(
|
||||
HttpClient client, string token, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -112,18 +135,68 @@ public class DivarListingSource : IListingSource
|
||||
req.Headers.TryAddWithoutValidation("User-Agent", Ua);
|
||||
req.Headers.TryAddWithoutValidation("Accept", "application/json");
|
||||
using var resp = await client.SendAsync(req, ct);
|
||||
if (!resp.IsSuccessStatusCode) return new();
|
||||
if (!resp.IsSuccessStatusCode) return (new(), null, null);
|
||||
var body = await resp.Content.ReadAsStringAsync(ct);
|
||||
if (body.Contains("BLOCKING_VIEW")) return new();
|
||||
return HtmlUtil.HarvestPhones(body);
|
||||
if (body.Contains("BLOCKING_VIEW")) return (new(), null, null);
|
||||
var phones = HtmlUtil.HarvestPhones(body);
|
||||
double? lat = null, lng = null;
|
||||
try { using var doc = JsonDocument.Parse(body); if (FindLatLng(doc.RootElement) is { } g) { lat = g.lat; lng = g.lng; } }
|
||||
catch (JsonException) { /* detail wasn't JSON — phones still harvested from text */ }
|
||||
return (phones, lat, lng);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_log.LogWarning(ex, "Divar detail/reveal failed for {Token}", token);
|
||||
return new();
|
||||
return (new(), null, null);
|
||||
}
|
||||
}
|
||||
|
||||
// Iran's bounding box — guards against picking up an unrelated number pair (timestamps, ids…).
|
||||
private const double MinLat = 24, MaxLat = 40, MinLng = 44, MaxLng = 64;
|
||||
|
||||
/// <summary>
|
||||
/// Tolerantly find an approximate (lat, lng) anywhere in Divar's detail JSON. Divar's shape
|
||||
/// shifts (sometimes `latitude`/`longitude`, sometimes nested under `location`/`coordinates`),
|
||||
/// so we walk the tree and accept the first OBJECT that holds BOTH a latitude-like and a
|
||||
/// longitude-like numeric property whose values fall inside Iran. Pairing within one object
|
||||
/// avoids matching a stray lat to an unrelated lng. Returns null if nothing plausible is found.
|
||||
/// </summary>
|
||||
private static (double lat, double lng)? FindLatLng(JsonElement el)
|
||||
{
|
||||
if (el.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
double? lat = null, lng = null;
|
||||
foreach (var p in el.EnumerateObject())
|
||||
{
|
||||
if (lat is null && IsLatKey(p.Name) && TryNum(p.Value, out var la)) lat = la;
|
||||
else if (lng is null && IsLngKey(p.Name) && TryNum(p.Value, out var lo)) lng = lo;
|
||||
}
|
||||
if (lat is double L && lng is double G && L is >= MinLat and <= MaxLat && G is >= MinLng and <= MaxLng)
|
||||
return (L, G);
|
||||
foreach (var p in el.EnumerateObject())
|
||||
if (FindLatLng(p.Value) is { } r) return r;
|
||||
}
|
||||
else if (el.ValueKind == JsonValueKind.Array)
|
||||
foreach (var item in el.EnumerateArray())
|
||||
if (FindLatLng(item) is { } r) return r;
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool IsLatKey(string k) => k.Equals("latitude", StringComparison.OrdinalIgnoreCase) || k.Equals("lat", StringComparison.OrdinalIgnoreCase);
|
||||
private static bool IsLngKey(string k) =>
|
||||
k.Equals("longitude", StringComparison.OrdinalIgnoreCase) || k.Equals("lng", StringComparison.OrdinalIgnoreCase)
|
||||
|| k.Equals("lon", StringComparison.OrdinalIgnoreCase) || k.Equals("long", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
/// <summary>Coordinate may be a JSON number or a numeric string ("35.7"). Invariant culture.</summary>
|
||||
private static bool TryNum(JsonElement v, out double d)
|
||||
{
|
||||
if (v.ValueKind == JsonValueKind.Number) return v.TryGetDouble(out d);
|
||||
if (v.ValueKind == JsonValueKind.String)
|
||||
return double.TryParse(v.GetString(), System.Globalization.NumberStyles.Float,
|
||||
System.Globalization.CultureInfo.InvariantCulture, out d);
|
||||
d = 0; return false;
|
||||
}
|
||||
|
||||
private static readonly string[] DescKeys =
|
||||
{ "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" };
|
||||
|
||||
@@ -134,9 +207,11 @@ public class DivarListingSource : IListingSource
|
||||
if (el.TryGetProperty("title", out var t) && t.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var sb = new StringBuilder(t.GetString());
|
||||
// Append ALL present description fields — the location/time line («… در تهران، جنتآباد»)
|
||||
// is usually in bottom_description_text, so don't stop at the first match.
|
||||
foreach (var k in DescKeys)
|
||||
if (el.TryGetProperty(k, out var d) && d.ValueKind == JsonValueKind.String)
|
||||
{ sb.Append(" — ").Append(d.GetString()); break; }
|
||||
if (el.TryGetProperty(k, out var d) && d.ValueKind == JsonValueKind.String && d.GetString() is { Length: > 0 } v)
|
||||
sb.Append(" — ").Append(v);
|
||||
var text = sb.ToString().Trim();
|
||||
if (text.Length >= 15) yield return (text, FindToken(el));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user