lj_albums.py
· 1.4 KiB · Python
Raw
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.14"
# dependencies = [
# "bs4>=0.0.2",
# "selenium>=4.44.0",
# ]
# ///
import csv
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
url = "https://dani-ellie.livejournal.com/photo/album/2422/"
csv_file = "album.csv"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
page_body = driver.page_source
soup = BeautifulSoup(page_body, "html.parser")
containers = soup.find_all("a", {"class" : lambda L: L and L.startswith("Container")})
with open(csv_file, mode="w", newline="") as f:
writer = csv.writer(f)
header = ["source", "description"]
writer.writerow(header)
for body in containers:
img_divs = body.find_all("div", {"class" : lambda L: L and L.startswith("ImgWrapper")})
for t in img_divs:
img_tags = t.find_all("img")
img_srcs_all = [img["src"] for img in img_tags]
for i in img_tags:
descriptions = i.find_all_next("p", {"class" : lambda L: L and L.startswith("Description")})
for src, txt in zip(img_srcs_all, descriptions):
s = src.replace("600", "original")
writer.writerow([s, txt.text])
| 1 | #!/usr/bin/env -S uv run --script |
| 2 | # /// script |
| 3 | # requires-python = ">=3.14" |
| 4 | # dependencies = [ |
| 5 | # "bs4>=0.0.2", |
| 6 | # "selenium>=4.44.0", |
| 7 | # ] |
| 8 | # /// |
| 9 | |
| 10 | import csv |
| 11 | from bs4 import BeautifulSoup |
| 12 | from selenium import webdriver |
| 13 | from selenium.webdriver import Chrome |
| 14 | from selenium.webdriver.chrome.options import Options |
| 15 | |
| 16 | url = "https://dani-ellie.livejournal.com/photo/album/2422/" |
| 17 | csv_file = "album.csv" |
| 18 | |
| 19 | options = webdriver.ChromeOptions() |
| 20 | options.add_argument('--headless') |
| 21 | driver = webdriver.Chrome(options=options) |
| 22 | |
| 23 | driver.get(url) |
| 24 | page_body = driver.page_source |
| 25 | |
| 26 | soup = BeautifulSoup(page_body, "html.parser") |
| 27 | |
| 28 | containers = soup.find_all("a", {"class" : lambda L: L and L.startswith("Container")}) |
| 29 | |
| 30 | with open(csv_file, mode="w", newline="") as f: |
| 31 | writer = csv.writer(f) |
| 32 | header = ["source", "description"] |
| 33 | writer.writerow(header) |
| 34 | for body in containers: |
| 35 | img_divs = body.find_all("div", {"class" : lambda L: L and L.startswith("ImgWrapper")}) |
| 36 | for t in img_divs: |
| 37 | img_tags = t.find_all("img") |
| 38 | img_srcs_all = [img["src"] for img in img_tags] |
| 39 | for i in img_tags: |
| 40 | descriptions = i.find_all_next("p", {"class" : lambda L: L and L.startswith("Description")}) |
| 41 | for src, txt in zip(img_srcs_all, descriptions): |
| 42 | s = src.replace("600", "original") |
| 43 | writer.writerow([s, txt.text]) |