diff --git a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs index a6e0a1d..a4dcde6 100644 --- a/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs +++ b/src/JobsMedical.Web/Services/Scraping/DivarListingSource.cs @@ -71,8 +71,12 @@ public class DivarListingSource : IListingSource double? lat = null, lng = null; if (token is not null) { - // One detail fetch yields BOTH the phone and the map coordinates. - var (phones, gLat, gLng) = await FetchDetailAsync(client, token, ct); + // One detail fetch yields the FULL description, the phone, AND the map center. + // (The search row only carries a short one-line summary — the rich ad body lives + // on the post detail, so without this the listing looked "censored".) + var (phones, gLat, gLng, fullDesc) = await FetchDetailAsync(client, token, ct); + if (!string.IsNullOrWhiteSpace(fullDesc) && !itemText.Contains(fullDesc)) + itemText += "\n" + fullDesc; if (phones.Count > 0 && !phones.Any(itemText.Contains)) itemText += "\nشماره تماس: " + string.Join("، ", phones); lat = gLat; lng = gLng; @@ -126,7 +130,7 @@ public class DivarListingSource : IListingSource /// and (b) the post's APPROXIMATE map coordinates (the privacy-fuzzed center Divar shows as a /// circle). Fails soft — returns whatever it could extract. /// - private async Task<(List phones, double? lat, double? lng)> FetchDetailAsync( + private async Task<(List phones, double? lat, double? lng, string? description)> FetchDetailAsync( HttpClient client, string token, CancellationToken ct) { try @@ -135,22 +139,55 @@ public class DivarListingSource : IListingSource req.Headers.TryAddWithoutValidation("User-Agent", Ua); req.Headers.TryAddWithoutValidation("Accept", "application/json"); using var resp = await client.SendAsync(req, ct); - if (!resp.IsSuccessStatusCode) return (new(), null, null); + if (!resp.IsSuccessStatusCode) return (new(), null, null, null); var body = await resp.Content.ReadAsStringAsync(ct); - if (body.Contains("BLOCKING_VIEW")) return (new(), null, null); + if (body.Contains("BLOCKING_VIEW")) return (new(), null, null, null); var phones = HtmlUtil.HarvestPhones(body); - double? lat = null, lng = null; - try { using var doc = JsonDocument.Parse(body); if (FindLatLng(doc.RootElement) is { } g) { lat = g.lat; lng = g.lng; } } + double? lat = null, lng = null; string? desc = null; + try + { + using var doc = JsonDocument.Parse(body); + if (FindLatLng(doc.RootElement) is { } g) { lat = g.lat; lng = g.lng; } + desc = FindLongestText(doc.RootElement); // the full ad body + } catch (JsonException) { /* detail wasn't JSON — phones still harvested from text */ } - return (phones, lat, lng); + return (phones, lat, lng, desc); } catch (Exception ex) { _log.LogWarning(ex, "Divar detail/reveal failed for {Token}", token); - return (new(), null, null); + return (new(), null, null, null); } } + /// The full ad description in Divar's detail JSON = the longest free-text string. We skip + /// Divar's own safety/boilerplate notices (which mention «دیوار») and absurdly long blobs. + private static string? FindLongestText(JsonElement root) + { + string? best = null; + var stack = new Stack(); + stack.Push(root); + while (stack.Count > 0) + { + var e = stack.Pop(); + switch (e.ValueKind) + { + case JsonValueKind.Object: + foreach (var p in e.EnumerateObject()) stack.Push(p.Value); + break; + case JsonValueKind.Array: + foreach (var it in e.EnumerateArray()) stack.Push(it); + break; + case JsonValueKind.String: + var s = e.GetString(); + if (s is { Length: >= 40 and <= 4000 } && s.Contains(' ') && !s.Contains("دیوار") + && (best is null || s.Length > best.Length)) best = s; + break; + } + } + return best?.Trim(); + } + // Iran's bounding box — guards against picking up an unrelated number pair (timestamps, ids…). private const double MinLat = 24, MaxLat = 40, MinLng = 44, MaxLng = 64;