Last active 3 hours ago

Revision ef246ffb5373b6e82d9e8b5241269ca1f794151e

lj_albums.py Raw
1#!/usr/bin/env -S uv run --script
2# /// script
3# requires-python = ">=3.14"
4# dependencies = [
5# "bs4>=0.0.2",
6# "selenium>=4.44.0",
7# ]
8# ///
9
10import csv
11from bs4 import BeautifulSoup
12from selenium import webdriver
13from selenium.webdriver import Chrome
14from selenium.webdriver.chrome.options import Options
15
16url = "https://dani-ellie.livejournal.com/photo/album/2422/"
17csv_file = "album.csv"
18
19options = webdriver.ChromeOptions()
20options.add_argument('--headless')
21driver = webdriver.Chrome(options=options)
22
23driver.get(url)
24page_body = driver.page_source
25
26soup = BeautifulSoup(page_body, "html.parser")
27
28containers = soup.find_all("a", {"class" : lambda L: L and L.startswith("Container")})
29
30with open(csv_file, mode="w", newline="") as f:
31 writer = csv.writer(f)
32 header = ["source", "description"]
33 writer.writerow(header)
34 for body in containers:
35 img_divs = body.find_all("div", {"class" : lambda L: L and L.startswith("ImgWrapper")})
36 for t in img_divs:
37 img_tags = t.find_all("img")
38 img_srcs_all = [img["src"] for img in img_tags]
39 for i in img_tags:
40 descriptions = i.find_all_next("p", {"class" : lambda L: L and L.startswith("Description")})
41 for src, txt in zip(img_srcs_all, descriptions):
42 s = src.replace("600", "original")
43 writer.writerow([s, txt.text])