astroport-iptubes/yggcrawl/gecko/torrent_search.py

#!/usr/bin/python3

# Early exemple of how to use selenium with gecko to bypass cloudflare bots detections
# The only way to block this should be using of captcha in front of every yggtorrent pages by sessions...

import sys
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Exit if no arguments
if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")
else: arg1 = sys.argv[1]

search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed"

# Load webdriver with Gecko
options = webdriver.FirefoxOptions()
options.add_argument('-headless')
driver = webdriver.Firefox(options=options, executable_path=r'/usr/local/bin/geckodriver')
driver.get(search_url)

# Wait to bypass cloudflare
print("Page atteinte, attente de redirection anti-crawling...")
wait = WebDriverWait(driver, 10)
wait.until(lambda driver: driver.current_url != search_url)

# Wait 2 seconds to load page
print("Anti-crawling passé, affichage dans 2 secondes ...")
time.sleep(2)

# Filter torrent urls
elems = driver.find_elements_by_css_selector(".results [href]")
links = [elem.get_attribute('href') for elem in elems]
links = [k for k in links if '/torrent/' in k]

# Print torrents urls
print("\n".join(links))


driver.quit()