Script used for only Blogger (blogspot, like this blog) . Use Selenium and Python.
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
target_url = ''### Blogger only, blogspot.jp or...
num_get = 3 ### how many pages to check
result = []
try:
b = webdriver.Firefox()
time.sleep(3)
except:
quit()
b.get(target_url)
wait = WebDriverWait(b, 30).until(
EC.element_to_be_clickable((By.CLASS_NAME, 'post-title')))
newest_title = b.find_elements_by_class_name('post-title')[0].text
newest_postlink = b.find_elements_by_partial_link_text(newest_title)[0].get_attribute('href')
b.get(newest_postlink)
# get post date and title, move to older page
cnt = 0
while cnt < num_get:
wait = WebDriverWait(b, 30).until(
EC.element_to_be_clickable((By.CLASS_NAME, 'date-header')))
date = b.find_elements_by_class_name('date-header')[0].text
title = b.find_elements_by_class_name('post-title')[0].text
print(date, title)
result.append([date, title])
b.find_elements_by_class_name('blog-pager-older-link')[0].click()
cnt += 1
b.close()
If you want to get result with csv
import pandas as pd
pd.DataFrame(result).to_csv('date_title.csv', header=None, index=None)
0 件のコメント:
コメントを投稿