
Ranking 1.7M Airbnbs by TV location
Dataset: Inside Airbnb snapshots
import json
from dataclasses import dataclass
from pathlib import Path
from burla import remote_parallel_map
ROOT = Path("/workspace/shared/airbnb")
LISTING_DIR = ROOT / "listings"
PHOTO_DIR = ROOT / "photos"
REVIEW_DIR = ROOT / "reviews"
FINAL_DIR = ROOT / "final"
N_WORKERS = 1000
@dataclass(frozen=True)
class CitySnapshot:
city_slug: str
snapshot: str
listings_url: str
reviews_url: str
city_snapshots = [
CitySnapshot(**row)
for row in json.loads(Path("inside-airbnb-snapshots.json").read_text())
]Step 1: Write artifacts, not vibes
Step 2: Normalize listings first
Step 3: Score every photo first
Step 4: Validate the photo shortlists
Step 5: Funnel the reviews
Step 6: Reduce into analysis outputs
What's the point?
Last updated