astroport-iptubes/yggcrawl/gecko/torrent_search.py

#!/usr/bin/python3

# Early exemple of how to use selenium with gecko to bypass cloudflare bots detections
# The only way to block this should be using of captcha in front of every yggtorrent pages by sessions...

import sys
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Exit if no arguments
if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")
else: arg1 = sys.argv[1]

search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed"

# Load webdriver with Gecko
options = webdriver.FirefoxOptions()
options.add_argument('-headless')
driver = webdriver.Firefox(options=options, executable_path=r'/usr/local/bin/geckodriver')
driver.get(search_url)

# Wait to bypass cloudflare
print("Page atteinte, attente de redirection anti-crawling...")
wait = WebDriverWait(driver, 10)
wait.until(lambda driver: driver.current_url != search_url)

# Wait 2 seconds to load page
print("Anti-crawling passé, affichage dans 2 secondes ...")
time.sleep(2)

# Filter torrent urls
elems = driver.find_elements_by_css_selector(".results [href]")
links = [elem.get_attribute('href') for elem in elems]
links = [k for k in links if '/torrent/' in k]

# Print torrents urls
print("\n".join(links))


driver.quit()
[Early stage] Add selenium webdriver mechanic to bypass cloudflare 2020-05-08 06:48:26 +02:00			`#!/usr/bin/python3`

			`# Early exemple of how to use selenium with gecko to bypass cloudflare bots detections`
			`# The only way to block this should be using of captcha in front of every yggtorrent pages by sessions...`

			`import sys`
			`import time`

			`from selenium import webdriver`
			`from selenium.webdriver.common.by import By`
			`from selenium.webdriver.support.ui import WebDriverWait`
			`from selenium.webdriver.support import expected_conditions as EC`

			`# Exit if no arguments`
			`if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")`
			`else: arg1 = sys.argv[1]`

			`search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed"`

			`# Load webdriver with Gecko`
			`options = webdriver.FirefoxOptions()`
			`options.add_argument('-headless')`
			`driver = webdriver.Firefox(options=options, executable_path=r'/usr/local/bin/geckodriver')`
			`driver.get(search_url)`

			`# Wait to bypass cloudflare`
			`print("Page atteinte, attente de redirection anti-crawling...")`
			`wait = WebDriverWait(driver, 10)`
			`wait.until(lambda driver: driver.current_url != search_url)`

			`# Wait 2 seconds to load page`
			`print("Anti-crawling passé, affichage dans 2 secondes ...")`
			`time.sleep(2)`

			`# Filter torrent urls`
			`elems = driver.find_elements_by_css_selector(".results [href]")`
			`links = [elem.get_attribute('href') for elem in elems]`
			`links = [k for k in links if '/torrent/' in k]`

			`# Print torrents urls`
			`print("\n".join(links))`


			`driver.quit()`