
Search 192K artworks with CLIP
Dataset: Met Open Access images
import json
import random
from pathlib import Path
import numpy as np
import pandas as pd
from burla import remote_parallel_map
from sentence_transformers import SentenceTransformer
CRD_IMAGE_BASE = "https://images.metmuseum.org/CRDImages/"
OBJECTS_PATH = Path("/workspace/shared/met-weirdest/objects.parquet")
VEC_DIR = Path("/workspace/shared/met-weirdest/vectors")
FINAL_DIR = Path("/workspace/shared/met-weirdest/final")
HTTP_THREADS = 16
CLIP_BATCH = 64
BATCH_SIZE = 512
MODEL_NAME = "clip-ViT-B-32"Step 1: Build the image queue
Step 2: Fetch and embed
Step 3: Run the image batches
Step 4: Search the museum
What's the point?
Last updated