#!/usr/bin/python3 # Early exemple of how to use selenium with gecko to bypass cloudflare bots detections # The only way to block this should be using of captcha in front of every yggtorrent pages by sessions... import sys import time from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC # Exit if no arguments if len(sys.argv)==1: sys.exit("Please choose a film ou serie name") else: arg1 = sys.argv[1] search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed" # Load webdriver with Gecko options = webdriver.FirefoxOptions() options.add_argument('-headless') driver = webdriver.Firefox(options=options, executable_path=r'/usr/local/bin/geckodriver') driver.get(search_url) # Wait to bypass cloudflare print("Page atteinte, attente de redirection anti-crawling...") wait = WebDriverWait(driver, 10) wait.until(lambda driver: driver.current_url != search_url) # Wait 2 seconds to load page print("Anti-crawling passé, affichage dans 2 secondes ...") time.sleep(2) # Filter torrent urls elems = driver.find_elements_by_css_selector(".results [href]") links = [elem.get_attribute('href') for elem in elems] links = [k for k in links if '/torrent/' in k] # Print torrents urls print("\n".join(links)) driver.quit()