Skip to content
image-20250416231626812
python
import streamlit as st
import requests
import time,os

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By


st.subheader('采集网页')

driver = None
save_path = '采集结果'
if not os.path.exists(save_path):
    os.makedirs(save_path)

chrome_user_data_dir = '/Users/wangxiaomin/my_code/patrick-notes/0Scripts/streamlit_demo/chrome_user_data'
if not os.path.exists(chrome_user_data_dir):
    os.makedirs(chrome_user_data_dir)

def get_aricle_content(driver):
    texts = []
    for item in driver.find_elements(By.TAG_NAME,'p'):
        line = item.text
        if not line in texts:
            texts.append(line)
            st.write(line)
    for item in driver.find_elements(By.TAG_NAME,'span'):
        line = item.text
        if not line in texts:
            texts.append(line)
            st.write(line)
    return texts


if "driver" not in st.session_state:
    st.session_state.driver = None


def start_driver():
    # driver_path = "/Users/wangxiaomin/Documents/selenium_driver/chromedriver_106"  # 替换为你的实际路径
    driver_path = "/Users/wangxiaomin/Documents/selenium_driver/chromedriver_135"  # 替换为你的实际路径
    chrome_options = Options()
    debugger_address = "127.0.0.1:9222"
    chrome_options.add_experimental_option("debuggerAddress", debugger_address)
    service = Service(executable_path=driver_path)  

    st.session_state.driver = webdriver.Chrome(service=service, options=chrome_options)


if st.button('启动'):
    start_driver()
    if st.session_state.driver is not None:
        pass

if "driver" in st.session_state and st.session_state.driver and st.session_state.driver.title:
     st.success("启动成功")

link = st.text_input('请输入链接')
if st.button('采集文章'):
    st.session_state.driver.get(link)
    st.write(st.session_state.driver.title)

    with st.spinner('采集中...'):
        get_aricle_content(st.session_state.driver)
        st.write('采集完成')

if st.button('关闭'):
    driver.close()