lj_albums.py - snippets

Revision ef246ffb5373b6e82d9e8b5241269ca1f794151e

lj_albums.py · 1.4 KiB · Python Raw

1	#!/usr/bin/env -S uv run --script
2	# /// script
3	# requires-python = ">=3.14"
4	# dependencies = [
5	# "bs4>=0.0.2",
6	# "selenium>=4.44.0",
7	# ]
8	# ///
9
10	import csv
11	from bs4 import BeautifulSoup
12	from selenium import webdriver
13	from selenium.webdriver import Chrome
14	from selenium.webdriver.chrome.options import Options
15
16	url = "https://dani-ellie.livejournal.com/photo/album/2422/"
17	csv_file = "album.csv"
18
19	options = webdriver.ChromeOptions()
20	options.add_argument('--headless')
21	driver = webdriver.Chrome(options=options)
22
23	driver.get(url)
24	page_body = driver.page_source
25
26	soup = BeautifulSoup(page_body, "html.parser")
27
28	containers = soup.find_all("a", {"class" : lambda L: L and L.startswith("Container")})
29
30	with open(csv_file, mode="w", newline="") as f:
31	writer = csv.writer(f)
32	header = ["source", "description"]
33	writer.writerow(header)
34	for body in containers:
35	img_divs = body.find_all("div", {"class" : lambda L: L and L.startswith("ImgWrapper")})
36	for t in img_divs:
37	img_tags = t.find_all("img")
38	img_srcs_all = [img["src"] for img in img_tags]
39	for i in img_tags:
40	descriptions = i.find_all_next("p", {"class" : lambda L: L and L.startswith("Description")})
41	for src, txt in zip(img_srcs_all, descriptions):
42	s = src.replace("600", "original")
43	writer.writerow([s, txt.text])