#!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.14" # dependencies = [ # "bs4>=0.0.2", # "selenium>=4.44.0", # ] # /// import csv from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver import Chrome from selenium.webdriver.chrome.options import Options url = "https://dani-ellie.livejournal.com/photo/album/2422/" csv_file = "album.csv" options = webdriver.ChromeOptions() options.add_argument('--headless') driver = webdriver.Chrome(options=options) driver.get(url) page_body = driver.page_source soup = BeautifulSoup(page_body, "html.parser") containers = soup.find_all("a", {"class" : lambda L: L and L.startswith("Container")}) with open(csv_file, mode="w", newline="") as f: writer = csv.writer(f) header = ["source", "description"] writer.writerow(header) for body in containers: img_divs = body.find_all("div", {"class" : lambda L: L and L.startswith("ImgWrapper")}) for t in img_divs: img_tags = t.find_all("img") img_srcs_all = [img["src"] for img in img_tags] for i in img_tags: descriptions = i.find_all_next("p", {"class" : lambda L: L and L.startswith("Description")}) for src, txt in zip(img_srcs_all, descriptions): s = src.replace("600", "original") writer.writerow([s, txt.text])