[파이썬] 셀레늄을 이용한 웹 크롤링
(웹 크롤, 크롤러 , web crawl, crawler )
* 파이썬 설치
https://www.python.org/downloads/
python 3
* 셀레늄 설치
pip install selenium
* 웹드라이버 다운로드
https://sites.google.com/a/chromium.org/chromedriver/downloads
//-------------------------
* crawl.py 소스
//-------------------------
from selenium import webdriver
#드라이버 설정
path = "드라이버 경로\chromedriver.exe"
driver = webdriver.Chrome(path)
# 웹 페이지 로딩
driver.get('https://www.google.com')
//-------------------------
//------------
노드 찾기
https://selenium-python.readthedocs.io/locating-elements.html
from selenium.webdriver.common.by import By
driver.find_element(By.XPATH, '//button[text()="Some text"]')
driver.find_elements(By.XPATH, '//button')
These are the attributes available for By class:
//
CSS_SELECTOR = "css selector"
ID = "id"
CLASS_NAME = "class name"
NAME = "name"
TAG_NAME = "tag name"
LINK_TEXT = "link text"
PARTIAL_LINK_TEXT = "partial link text"
XPATH = "xpath"
https://www.w3schools.com/xml/xpath_intro.asp
//--------------------------
글자 입력(키보드 입력)
https://selenium-python.readthedocs.io/navigating.html#interacting-with-the-page
element.send_keys("some text")
element.send_keys(" and some", Keys.ARROW_DOWN) # 화살표 키
element.clear() # 삭제
//-------------
노드 클릭
element.click()
//----------------------------
* 페이지 로딩 기다리기
https://selenium-python.readthedocs.io/waits.html
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get("http://somedomain/url_that_delays_loading")
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "myDynamicElement"))
)
finally:
driver.quit()