45 lines
1.4 KiB
Python
45 lines
1.4 KiB
Python
|
#!/usr/bin/python3
|
||
|
|
||
|
# Early exemple of how to use selenium with gecko to bypass cloudflare bots detections
|
||
|
# The only way to block this should be using of captcha in front of every yggtorrent pages by sessions...
|
||
|
|
||
|
import sys
|
||
|
import time
|
||
|
|
||
|
from selenium import webdriver
|
||
|
from selenium.webdriver.common.by import By
|
||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||
|
from selenium.webdriver.support import expected_conditions as EC
|
||
|
|
||
|
# Exit if no arguments
|
||
|
if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")
|
||
|
else: arg1 = sys.argv[1]
|
||
|
|
||
|
search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed"
|
||
|
|
||
|
# Load webdriver with Gecko
|
||
|
options = webdriver.FirefoxOptions()
|
||
|
options.add_argument('-headless')
|
||
|
driver = webdriver.Firefox(options=options, executable_path=r'/usr/local/bin/geckodriver')
|
||
|
driver.get(search_url)
|
||
|
|
||
|
# Wait to bypass cloudflare
|
||
|
print("Page atteinte, attente de redirection anti-crawling...")
|
||
|
wait = WebDriverWait(driver, 10)
|
||
|
wait.until(lambda driver: driver.current_url != search_url)
|
||
|
|
||
|
# Wait 2 seconds to load page
|
||
|
print("Anti-crawling passé, affichage dans 2 secondes ...")
|
||
|
time.sleep(2)
|
||
|
|
||
|
# Filter torrent urls
|
||
|
elems = driver.find_elements_by_css_selector(".results [href]")
|
||
|
links = [elem.get_attribute('href') for elem in elems]
|
||
|
links = [k for k in links if '/torrent/' in k]
|
||
|
|
||
|
# Print torrents urls
|
||
|
print("\n".join(links))
|
||
|
|
||
|
|
||
|
driver.quit()
|