[파이썬] 셀레늄을 이용한 웹 크롤링
(웹 크롤, 크롤러 , web crawl, crawler )
* 파이썬 설치 
https://www.python.org/downloads/ 
python 3 
* 셀레늄 설치 
pip install selenium 
* 웹드라이버 다운로드 
https://sites.google.com/a/chromium.org/chromedriver/downloads 
//------------------------- 
* crawl.py 소스 
//------------------------- 
from selenium import webdriver 
#드라이버 설정 
path = "드라이버 경로\chromedriver.exe" 
driver = webdriver.Chrome(path) 
# 웹 페이지 로딩 
driver.get('https://www.google.com') 
//------------------------- 
//------------ 
노드 찾기 
https://selenium-python.readthedocs.io/locating-elements.html 
from selenium.webdriver.common.by import By 
driver.find_element(By.XPATH, '//button[text()="Some text"]') 
driver.find_elements(By.XPATH, '//button') 
These are the attributes available for By class: 
// 
CSS_SELECTOR = "css selector" 
ID = "id" 
CLASS_NAME = "class name" 
NAME = "name" 
TAG_NAME = "tag name" 
LINK_TEXT = "link text" 
PARTIAL_LINK_TEXT = "partial link text" 
XPATH = "xpath" 
https://www.w3schools.com/xml/xpath_intro.asp 
//-------------------------- 
글자 입력(키보드 입력) 
https://selenium-python.readthedocs.io/navigating.html#interacting-with-the-page 
element.send_keys("some text")  
element.send_keys(" and some", Keys.ARROW_DOWN) # 화살표 키 
element.clear() # 삭제 
//------------- 
노드 클릭 
element.click() 
//---------------------------- 
* 페이지 로딩 기다리기 
https://selenium-python.readthedocs.io/waits.html 
from selenium import webdriver 
from selenium.webdriver.common.by import By 
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC 
driver = webdriver.Firefox() 
driver.get("http://somedomain/url_that_delays_loading") 
try: 
    element = WebDriverWait(driver, 10).until( 
        EC.presence_of_element_located((By.ID, "myDynamicElement")) 
    ) 
finally: 
    driver.quit()