Brut add selenium and co changes
This commit is contained in:
parent
2968a2d312
commit
fa272f340c
|
@ -0,0 +1,39 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Download IMDB's Top 250 data
|
||||||
|
#url = 'http://www.imdb.com/chart/top'
|
||||||
|
url = 'https://www.imdb.com/find?q=didier&ref_=nv_sr_sm'
|
||||||
|
response = requests.get(url)
|
||||||
|
soup = BeautifulSoup(response.text, 'lxml')
|
||||||
|
|
||||||
|
movies = soup.select('td.titleColumn')
|
||||||
|
links = [a.attrs.get('href') for a in soup.select('td.titleColumn a')]
|
||||||
|
crew = [a.attrs.get('title') for a in soup.select('td.titleColumn a')]
|
||||||
|
ratings = [b.attrs.get('data-value') for b in soup.select('td.posterColumn span[name=ir]')]
|
||||||
|
votes = [b.attrs.get('data-value') for b in soup.select('td.ratingColumn strong')]
|
||||||
|
|
||||||
|
imdb = []
|
||||||
|
|
||||||
|
# Store each item into dictionary (data), then put those into a list (imdb)
|
||||||
|
for index in range(0, len(movies)):
|
||||||
|
# Seperate movie into: 'place', 'title', 'year'
|
||||||
|
movie_string = movies[index].get_text()
|
||||||
|
movie = (' '.join(movie_string.split()).replace('.', ''))
|
||||||
|
movie_title = movie[len(str(index))+1:-7]
|
||||||
|
year = re.search('\((.*?)\)', movie_string).group(1)
|
||||||
|
place = movie[:len(str(index))-(len(movie))]
|
||||||
|
data = {"movie_title": movie_title,
|
||||||
|
"year": year,
|
||||||
|
"place": place,
|
||||||
|
"star_cast": crew[index],
|
||||||
|
"rating": ratings[index],
|
||||||
|
"vote": votes[index],
|
||||||
|
"link": links[index]}
|
||||||
|
imdb.append(data)
|
||||||
|
|
||||||
|
for item in imdb:
|
||||||
|
print(item['place'], '-', item['movie_title'], '('+item['year']+') -', 'Starring:', item['star_cast'])
|
|
@ -0,0 +1,11 @@
|
||||||
|
"""
|
||||||
|
__init__.py main
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .yggtorrentscraper import (
|
||||||
|
YggTorrentScraper,
|
||||||
|
set_yggtorrent_tld,
|
||||||
|
get_yggtorrent_tld,
|
||||||
|
)
|
||||||
|
from .torrent import Torrent, TorrentComment, TorrentFile
|
||||||
|
from .categories import categories
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,10 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Exit if no arguments
|
||||||
|
if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")
|
||||||
|
else: args = sys.argv[1:]
|
||||||
|
|
||||||
|
args = '+'.join(args)
|
||||||
|
print(args)
|
|
@ -13,9 +13,11 @@ from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
|
||||||
# Exit if no arguments
|
# Exit if no arguments
|
||||||
if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")
|
if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")
|
||||||
else: arg1 = sys.argv[1]
|
else: args = sys.argv[1:]
|
||||||
|
|
||||||
search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed"
|
args = '+'.join(args)
|
||||||
|
|
||||||
|
search_url = f"https://www2.yggtorrent.se/engine/search?name={args}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed"
|
||||||
|
|
||||||
# Load webdriver with Gecko
|
# Load webdriver with Gecko
|
||||||
options = webdriver.FirefoxOptions()
|
options = webdriver.FirefoxOptions()
|
||||||
|
@ -26,7 +28,7 @@ driver.get(search_url)
|
||||||
# Wait to bypass cloudflare
|
# Wait to bypass cloudflare
|
||||||
print("Page atteinte, attente de redirection anti-crawling...")
|
print("Page atteinte, attente de redirection anti-crawling...")
|
||||||
wait = WebDriverWait(driver, 10)
|
wait = WebDriverWait(driver, 10)
|
||||||
wait.until(lambda driver: driver.current_url != search_url)
|
page_search = wait.until(lambda driver: driver.current_url != search_url)
|
||||||
|
|
||||||
# Wait 2 seconds to load page
|
# Wait 2 seconds to load page
|
||||||
print("Anti-crawling passé, affichage dans 2 secondes ...")
|
print("Anti-crawling passé, affichage dans 2 secondes ...")
|
||||||
|
@ -38,7 +40,7 @@ links = [elem.get_attribute('href') for elem in elems]
|
||||||
links = [k for k in links if '/torrent/' in k]
|
links = [k for k in links if '/torrent/' in k]
|
||||||
|
|
||||||
# Print torrents urls
|
# Print torrents urls
|
||||||
print("\n".join(links))
|
#print("\n".join(links))
|
||||||
|
print(links[0])
|
||||||
|
|
||||||
driver.quit()
|
driver.quit()
|
|
@ -0,0 +1,26 @@
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from ..yggtorrentscraper import (
|
||||||
|
YggTorrentScraper,
|
||||||
|
set_yggtorrent_tld,
|
||||||
|
get_yggtorrent_tld,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestChangeYggtorrentTLD(unittest.TestCase):
|
||||||
|
current_yggtorrent_tld = get_yggtorrent_tld()
|
||||||
|
|
||||||
|
def test_read_tld(self):
|
||||||
|
self.current_yggtorrent_tld = get_yggtorrent_tld()
|
||||||
|
|
||||||
|
self.assertTrue(self.current_yggtorrent_tld == "se")
|
||||||
|
|
||||||
|
def test_set_yggtorrent_tld(self):
|
||||||
|
|
||||||
|
set_yggtorrent_tld("newtld")
|
||||||
|
|
||||||
|
self.assertTrue(get_yggtorrent_tld() == "newtld")
|
||||||
|
pass
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
set_yggtorrent_tld(self.current_yggtorrent_tld)
|
|
@ -0,0 +1,65 @@
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
|
class TestDownload(unittest.TestCase):
|
||||||
|
scraper = None
|
||||||
|
destination_path = None
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(TestDownload, self).__init__(*args, **kwargs)
|
||||||
|
yggtorrent_identifiant = os.environ.get("YGGTORRENT_IDENTIFIANT")
|
||||||
|
yggtorrent_password = os.environ.get("YGGTORRENT_PASSWORD")
|
||||||
|
|
||||||
|
self.destination_path = os.path.join(
|
||||||
|
".", "yggtorrentscraper", "tests", "test_download"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.scraper = YggTorrentScraper(requests.session())
|
||||||
|
|
||||||
|
self.scraper.login(yggtorrent_identifiant, yggtorrent_password)
|
||||||
|
|
||||||
|
def test_download_from_torrent(self):
|
||||||
|
most_completed = self.scraper.most_completed()
|
||||||
|
|
||||||
|
torrent = self.scraper.extract_details(most_completed[0])
|
||||||
|
|
||||||
|
self.assertTrue(torrent.url is not None)
|
||||||
|
|
||||||
|
file_full_path = self.scraper.download_from_torrent(
|
||||||
|
torrent=torrent, destination_path=self.destination_path
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(os.path.getsize(file_full_path) > 1000)
|
||||||
|
|
||||||
|
def test_download_from_torrent_url(self):
|
||||||
|
file_full_path = self.scraper.download_from_torrent_url(
|
||||||
|
torrent_url="https://www2.yggtorrent.pe/torrent/filmvideo/serie-tv/440445-game-of-thrones-s08e02-multi-1080p-amzn-web-dl-dd5-1-x264-ark01",
|
||||||
|
destination_path=self.destination_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(os.path.getsize(file_full_path) > 1000)
|
||||||
|
|
||||||
|
def test_download_from_torrent_download_url(self):
|
||||||
|
most_completed = self.scraper.most_completed()
|
||||||
|
|
||||||
|
torrent = self.scraper.extract_details(most_completed[0])
|
||||||
|
|
||||||
|
self.assertTrue(torrent.url is not None)
|
||||||
|
|
||||||
|
file_full_path = self.scraper.download_from_torrent_download_url(
|
||||||
|
torrent_url=torrent.url, destination_path=self.destination_path
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(os.path.getsize(file_full_path) > 1000)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
if os.path.exists(self.destination_path):
|
||||||
|
shutil.rmtree(self.destination_path, ignore_errors=True)
|
||||||
|
|
||||||
|
self.scraper.logout()
|
|
@ -0,0 +1,60 @@
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractDetails(unittest.TestCase):
|
||||||
|
scraper = YggTorrentScraper(requests.session())
|
||||||
|
|
||||||
|
def test_extract_details(self):
|
||||||
|
torrent = self.scraper.extract_details(
|
||||||
|
"https://www2.yggtorrent.pe/torrent/filmvideo/serie-tv/440445-game-of-thrones-s08e02-multi-1080p-amzn-web-dl-dd5-1-x264-ark01"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(torrent.name is not None)
|
||||||
|
self.assertTrue(torrent.uploaded_datetime is not None)
|
||||||
|
self.assertTrue(torrent.size is not None)
|
||||||
|
self.assertTrue(torrent.uploader is not None)
|
||||||
|
|
||||||
|
self.assertTrue(len(torrent.keywords) > 0)
|
||||||
|
|
||||||
|
self.assertTrue(torrent.completed > -1)
|
||||||
|
self.assertTrue(torrent.seeders > -1)
|
||||||
|
self.assertTrue(torrent.leechers > -1)
|
||||||
|
|
||||||
|
self.assertTrue(torrent.url is None)
|
||||||
|
|
||||||
|
self.assertTrue(len(torrent.files) > 0)
|
||||||
|
self.assertTrue(len(torrent.comments) > 0)
|
||||||
|
|
||||||
|
def test_extract_details_logged(self):
|
||||||
|
yggtorrent_identifiant = os.environ.get("YGGTORRENT_IDENTIFIANT")
|
||||||
|
yggtorrent_password = os.environ.get("YGGTORRENT_PASSWORD")
|
||||||
|
|
||||||
|
self.scraper.login(yggtorrent_identifiant, yggtorrent_password)
|
||||||
|
|
||||||
|
torrent = self.scraper.extract_details(
|
||||||
|
"https://www2.yggtorrent.pe/torrent/filmvideo/serie-tv/440445-game-of-thrones-s08e02-multi-1080p-amzn-web-dl-dd5-1-x264-ark01"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(torrent.name is not None)
|
||||||
|
self.assertTrue(torrent.uploaded_datetime is not None)
|
||||||
|
self.assertTrue(torrent.size is not None)
|
||||||
|
self.assertTrue(torrent.uploader is not None)
|
||||||
|
|
||||||
|
self.assertTrue(len(torrent.keywords) > 0)
|
||||||
|
|
||||||
|
self.assertTrue(torrent.completed > -1)
|
||||||
|
self.assertTrue(torrent.seeders > -1)
|
||||||
|
self.assertTrue(torrent.leechers > -1)
|
||||||
|
|
||||||
|
self.assertTrue(torrent.url is not None)
|
||||||
|
|
||||||
|
self.assertTrue(len(torrent.files) > 0)
|
||||||
|
self.assertTrue(len(torrent.comments) > 0)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.scraper.logout()
|
|
@ -0,0 +1,30 @@
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
|
class TestAuthentification(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.scraper = YggTorrentScraper(requests.session())
|
||||||
|
|
||||||
|
def test_login_success(self):
|
||||||
|
yggtorrent_identifiant = os.environ.get("YGGTORRENT_IDENTIFIANT")
|
||||||
|
yggtorrent_password = os.environ.get("YGGTORRENT_PASSWORD")
|
||||||
|
|
||||||
|
self.assertTrue(yggtorrent_identifiant is not None)
|
||||||
|
self.assertTrue(yggtorrent_password is not None)
|
||||||
|
|
||||||
|
self.assertTrue(self.scraper.login(yggtorrent_identifiant, yggtorrent_password))
|
||||||
|
|
||||||
|
self.scraper.logout()
|
||||||
|
|
||||||
|
def test_login_failed(self):
|
||||||
|
self.assertFalse(self.scraper.login("myidentifiant", "mypassword"))
|
||||||
|
|
||||||
|
self.scraper.logout()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.scraper.logout()
|
|
@ -0,0 +1,27 @@
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
|
class TestLogout(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.scraper = YggTorrentScraper(requests.session())
|
||||||
|
|
||||||
|
def test_logout_success(self):
|
||||||
|
yggtorrent_identifiant = os.environ.get("YGGTORRENT_IDENTIFIANT")
|
||||||
|
yggtorrent_password = os.environ.get("YGGTORRENT_PASSWORD")
|
||||||
|
|
||||||
|
self.assertTrue(self.scraper.login(yggtorrent_identifiant, yggtorrent_password))
|
||||||
|
|
||||||
|
self.assertTrue(self.scraper.logout())
|
||||||
|
|
||||||
|
def test_logout_failed(self):
|
||||||
|
self.scraper.login("myidentifiant", "mypassword")
|
||||||
|
|
||||||
|
self.assertFalse(self.scraper.logout())
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.scraper.logout()
|
|
@ -0,0 +1,16 @@
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
|
class TestMostCompleted(unittest.TestCase):
|
||||||
|
scraper = YggTorrentScraper(session=requests.session())
|
||||||
|
|
||||||
|
def test_most_completed(self):
|
||||||
|
most_completed = self.scraper.most_completed()
|
||||||
|
|
||||||
|
self.assertEqual(len(most_completed), 100)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.scraper.logout()
|
|
@ -0,0 +1,70 @@
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
|
class TestResearch(unittest.TestCase):
|
||||||
|
scraper = YggTorrentScraper(requests.session())
|
||||||
|
|
||||||
|
torrent_name = "walking dead s09"
|
||||||
|
torrent_uploader = "brandit"
|
||||||
|
|
||||||
|
torrent_name_2 = "blue oyster cult"
|
||||||
|
|
||||||
|
def test_search_by_name(self):
|
||||||
|
torrents_url = self.scraper.search({"name": self.torrent_name})
|
||||||
|
|
||||||
|
torrent = self.scraper.extract_details(torrents_url[0])
|
||||||
|
|
||||||
|
splited_searched_name = self.torrent_name.split(" ")
|
||||||
|
|
||||||
|
for word in splited_searched_name:
|
||||||
|
self.assertTrue(word.lower() in torrent.name.lower())
|
||||||
|
|
||||||
|
def test_search_by_uploader(self):
|
||||||
|
torrents_url = self.scraper.search(
|
||||||
|
{"name": self.torrent_name, "uploader": self.torrent_uploader}
|
||||||
|
)
|
||||||
|
|
||||||
|
for torrent_url in torrents_url:
|
||||||
|
torrent = self.scraper.extract_details(torrent_url)
|
||||||
|
|
||||||
|
self.assertTrue(torrent.uploader.lower() == self.torrent_uploader.lower())
|
||||||
|
|
||||||
|
def test_search_sort_completed_asc(self):
|
||||||
|
torrents_url = self.scraper.search(
|
||||||
|
{"name": "blue oyster cult", "sort": "completed", "order": "asc"}
|
||||||
|
)
|
||||||
|
|
||||||
|
torrent_old = None
|
||||||
|
|
||||||
|
for torrent_url in torrents_url:
|
||||||
|
torrent = self.scraper.extract_details(torrent_url)
|
||||||
|
|
||||||
|
if torrent_old is not None:
|
||||||
|
self.assertTrue(torrent_old.completed <= torrent.completed)
|
||||||
|
torrent_old = torrent
|
||||||
|
|
||||||
|
def test_search_sort_completed_desc(self):
|
||||||
|
torrents_url = self.scraper.search(
|
||||||
|
{"name": "blue oyster cult", "sort": "completed", "order": "desc"}
|
||||||
|
)
|
||||||
|
|
||||||
|
torrent_old = None
|
||||||
|
|
||||||
|
for torrent_url in torrents_url:
|
||||||
|
torrent = self.scraper.extract_details(torrent_url)
|
||||||
|
|
||||||
|
if torrent_old is not None:
|
||||||
|
self.assertTrue(torrent_old.completed >= torrent.completed)
|
||||||
|
torrent_old = torrent
|
||||||
|
|
||||||
|
def test_search_multiple_page(self):
|
||||||
|
torrents_url = self.scraper.search({"name": "walking dead"})
|
||||||
|
|
||||||
|
self.assertTrue(len(torrents_url) > 200)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.scraper.logout()
|
|
@ -0,0 +1,32 @@
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
|
class TestTorrent(unittest.TestCase):
|
||||||
|
scraper = YggTorrentScraper(requests.session())
|
||||||
|
|
||||||
|
def test_str(self):
|
||||||
|
torrent_url = self.scraper.most_completed()[0]
|
||||||
|
|
||||||
|
torrent = self.scraper.extract_details(torrent_url)
|
||||||
|
|
||||||
|
torrent.__str__(files=True, comments=True)
|
||||||
|
|
||||||
|
def test_str_logged(self):
|
||||||
|
yggtorrent_identifiant = os.environ.get("YGGTORRENT_IDENTIFIANT")
|
||||||
|
yggtorrent_password = os.environ.get("YGGTORRENT_PASSWORD")
|
||||||
|
|
||||||
|
self.scraper.login(yggtorrent_identifiant, yggtorrent_password)
|
||||||
|
|
||||||
|
torrent_url = self.scraper.most_completed()[0]
|
||||||
|
|
||||||
|
torrent = self.scraper.extract_details(torrent_url)
|
||||||
|
|
||||||
|
torrent.__str__(files=True, comments=True)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.scraper.logout()
|
|
@ -0,0 +1,146 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class Torrent:
|
||||||
|
"""
|
||||||
|
Torrent entity
|
||||||
|
"""
|
||||||
|
|
||||||
|
name = None
|
||||||
|
uploaded_datetime = None
|
||||||
|
size = None
|
||||||
|
uploader = None
|
||||||
|
|
||||||
|
keywords = []
|
||||||
|
|
||||||
|
completed = -1
|
||||||
|
seeders = -1
|
||||||
|
leechers = -1
|
||||||
|
|
||||||
|
url = None
|
||||||
|
|
||||||
|
files = []
|
||||||
|
comments = []
|
||||||
|
|
||||||
|
def __str__(self, comments=False, files=False):
|
||||||
|
to_string = ""
|
||||||
|
|
||||||
|
to_string += "Name : "
|
||||||
|
to_string += self.name
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += "Url : "
|
||||||
|
|
||||||
|
if self.url is not None:
|
||||||
|
to_string += self.url
|
||||||
|
else:
|
||||||
|
to_string += "N/A"
|
||||||
|
|
||||||
|
to_string += os.linesep
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += f"Keywords ({len(self.keywords)}) : "
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
for keyword in self.keywords:
|
||||||
|
to_string += f"- {keyword}"
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += "Uploaded : "
|
||||||
|
to_string += str(self.uploaded_datetime)
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += "Size : "
|
||||||
|
to_string += str(self.size)
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += "Uploader : "
|
||||||
|
to_string += self.uploader
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += "Completed : "
|
||||||
|
to_string += str(self.completed)
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += "Seeders : "
|
||||||
|
to_string += str(self.seeders)
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += "Leechers : "
|
||||||
|
to_string += str(self.leechers)
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += f"Files ({len(self.files)})"
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
if files:
|
||||||
|
for file in self.files:
|
||||||
|
to_string += str(file)
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += f"Comments ({len(self.comments)})"
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
if comments:
|
||||||
|
for comment in self.comments:
|
||||||
|
to_string += str(comment)
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
return to_string
|
||||||
|
|
||||||
|
|
||||||
|
class TorrentFile:
|
||||||
|
|
||||||
|
"""
|
||||||
|
Torrent's file entity
|
||||||
|
"""
|
||||||
|
|
||||||
|
size = ""
|
||||||
|
file_name = ""
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
to_string = ""
|
||||||
|
|
||||||
|
to_string += "size : "
|
||||||
|
to_string += self.size
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += "file_name : "
|
||||||
|
to_string += self.file_name
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
return to_string
|
||||||
|
|
||||||
|
|
||||||
|
class TorrentComment:
|
||||||
|
|
||||||
|
"""
|
||||||
|
Torrent's comment entity
|
||||||
|
"""
|
||||||
|
|
||||||
|
author = ""
|
||||||
|
posted = ""
|
||||||
|
text = ""
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
to_string = ""
|
||||||
|
|
||||||
|
to_string += "Author : "
|
||||||
|
to_string += self.author
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += "Posted : "
|
||||||
|
to_string += str(self.posted)
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
to_string += "Text : "
|
||||||
|
to_string += str(self.text)
|
||||||
|
to_string += os.linesep
|
||||||
|
|
||||||
|
return to_string
|
|
@ -0,0 +1,457 @@
|
||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from .torrent import Torrent, TorrentComment, TorrentFile
|
||||||
|
from .categories import categories
|
||||||
|
|
||||||
|
YGGTORRENT_TLD = "se"
|
||||||
|
|
||||||
|
YGGTORRENT_BASE_URL = f"https://www2.yggtorrent.{YGGTORRENT_TLD}"
|
||||||
|
|
||||||
|
YGGTORRENT_LOGIN_URL = f"{YGGTORRENT_BASE_URL}/user/login"
|
||||||
|
YGGTORRENT_LOGOUT_URL = f"{YGGTORRENT_BASE_URL}/user/logout?attempt=1"
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
|
||||||
|
|
||||||
|
logger = logging.getLogger("yggtorrentscraper")
|
||||||
|
|
||||||
|
YGGTORRENT_DOMAIN = f".yggtorrent.{YGGTORRENT_TLD}"
|
||||||
|
YGGTORRENT_TOKEN_COOKIE = "ygg_"
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL_DESCRIPTION = "&description="
|
||||||
|
YGGTORRENT_SEARCH_URL_FILE = "&file="
|
||||||
|
YGGTORRENT_SEARCH_URL_UPLOADER = "&uploader="
|
||||||
|
YGGTORRENT_SEARCH_URL_CATEGORY = "&category="
|
||||||
|
YGGTORRENT_SEARCH_URL_SUB_CATEGORY = "&sub_category="
|
||||||
|
YGGTORRENT_SEARCH_URL_ORDER = "&order="
|
||||||
|
YGGTORRENT_SEARCH_URL_SORT = "&sort="
|
||||||
|
YGGTORRENT_SEARCH_URL_DO = "&do="
|
||||||
|
YGGTORRENT_SEARCH_URL_PAGE = "&page="
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL_DESCRIPTION = "&description="
|
||||||
|
YGGTORRENT_SEARCH_URL_FILE = "&file="
|
||||||
|
YGGTORRENT_SEARCH_URL_UPLOADER = "&uploader="
|
||||||
|
YGGTORRENT_SEARCH_URL_CATEGORY = "&category="
|
||||||
|
YGGTORRENT_SEARCH_URL_SUB_CATEGORY = "&sub_category="
|
||||||
|
YGGTORRENT_SEARCH_URL_ORDER = "&order="
|
||||||
|
YGGTORRENT_SEARCH_URL_SORT = "&sort="
|
||||||
|
YGGTORRENT_SEARCH_URL_DO = "&do="
|
||||||
|
YGGTORRENT_SEARCH_URL_PAGE = "&page="
|
||||||
|
|
||||||
|
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
YGGTORRENT_GET_INFO = f"https://www2.yggtorrent.se/engine/get_nfo?torrent="
|
||||||
|
|
||||||
|
YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
|
||||||
|
|
||||||
|
TORRENT_PER_PAGE = 50
|
||||||
|
|
||||||
|
YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
|
||||||
|
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
|
||||||
|
|
||||||
|
def set_yggtorrent_tld(yggtorrent_tld=None):
|
||||||
|
"""
|
||||||
|
Redefine all string variable according to new TLD
|
||||||
|
"""
|
||||||
|
|
||||||
|
global YGGTORRENT_TLD
|
||||||
|
global YGGTORRENT_BASE_URL
|
||||||
|
global YGGTORRENT_LOGIN_URL
|
||||||
|
global YGGTORRENT_SEARCH_URL
|
||||||
|
global YGGTORRENT_DOMAIN
|
||||||
|
global YGGTORRENT_GET_FILES
|
||||||
|
global YGGTORRENT_GET_INFO
|
||||||
|
global YGGTORRENT_MOST_COMPLETED_URL
|
||||||
|
global YGGTORRENT_FILES_URL
|
||||||
|
|
||||||
|
YGGTORRENT_TLD = yggtorrent_tld
|
||||||
|
|
||||||
|
YGGTORRENT_BASE_URL = f"https://www2.yggtorrent.{YGGTORRENT_TLD}"
|
||||||
|
|
||||||
|
YGGTORRENT_LOGIN_URL = f"{YGGTORRENT_BASE_URL}/user/login"
|
||||||
|
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/user/logout"
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
|
||||||
|
|
||||||
|
YGGTORRENT_DOMAIN = ".yggtorrent.se"
|
||||||
|
|
||||||
|
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
|
||||||
|
|
||||||
|
YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
|
||||||
|
|
||||||
|
YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
|
||||||
|
|
||||||
|
def get_yggtorrent_tld():
|
||||||
|
return YGGTORRENT_TLD
|
||||||
|
|
||||||
|
|
||||||
|
class YggTorrentScraper:
|
||||||
|
session = None
|
||||||
|
|
||||||
|
def __init__(self, session):
|
||||||
|
self.session = session
|
||||||
|
|
||||||
|
def login(self, identifiant, password):
|
||||||
|
"""
|
||||||
|
Login request with the specified identifiant and password, return an yggtorrent_token, necessary to download
|
||||||
|
"""
|
||||||
|
self.session.cookies.clear()
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/x-www-form-urlencoded",
|
||||||
|
"User-Agent": "PostmanRuntime/7.17.1",
|
||||||
|
"Accept": "*/*",
|
||||||
|
"Cache-Control": "no-cache",
|
||||||
|
"Host": f"www2.yggtorrent.{YGGTORRENT_TLD}",
|
||||||
|
"Accept-Encoding": "gzip, deflate",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
}
|
||||||
|
|
||||||
|
response = self.session.post(
|
||||||
|
YGGTORRENT_LOGIN_URL,
|
||||||
|
data={"id": identifiant, "pass": password},
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug("status_code : %s", response.status_code)
|
||||||
|
|
||||||
|
yggtorrent_token = None
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
logger.debug("Login successful")
|
||||||
|
yggtorrent_token = response.cookies.get_dict()[YGGTORRENT_TOKEN_COOKIE]
|
||||||
|
|
||||||
|
cookie = requests.cookies.create_cookie(
|
||||||
|
domain=YGGTORRENT_DOMAIN,
|
||||||
|
name=YGGTORRENT_TOKEN_COOKIE,
|
||||||
|
value=yggtorrent_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.session.cookies.set_cookie(cookie)
|
||||||
|
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.debug("Login failed")
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def logout(self):
|
||||||
|
"""
|
||||||
|
Logout request
|
||||||
|
"""
|
||||||
|
response = self.session.get(YGGTORRENT_LOGOUT_URL, headers=headers)
|
||||||
|
|
||||||
|
self.session.cookies.clear()
|
||||||
|
|
||||||
|
logger.debug("status_code : %s", response.status_code)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
logger.debug("Logout successful")
|
||||||
|
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.debug("Logout failed")
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
#kopa
|
||||||
|
def search_old(self, parameters):
|
||||||
|
search_url = create_search_url(parameters)
|
||||||
|
torrents_url = self.get_torrents_url(search_url, parameters)
|
||||||
|
|
||||||
|
return torrents_url
|
||||||
|
|
||||||
|
def search(self, parameters):
|
||||||
|
# torrents_url = os.popen('gecko/torrent_search.py didier')
|
||||||
|
torrents_url = exec(open('/home/iptubes/astroport-iptubes/yggcrawl/gecko/torrent_search.py').read())
|
||||||
|
return torrents_url
|
||||||
|
|
||||||
|
def extract_details(self, torrent_url):
|
||||||
|
"""
|
||||||
|
Extract informations from torrent's url
|
||||||
|
"""
|
||||||
|
logger.debug("torrent_url : %s", torrent_url)
|
||||||
|
|
||||||
|
torrents = []
|
||||||
|
|
||||||
|
response = self.session.get(torrent_url, headers=headers)
|
||||||
|
|
||||||
|
torrent_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
torrent = Torrent()
|
||||||
|
|
||||||
|
term_tags = torrent_page.find_all("a", {"class": "term"})
|
||||||
|
|
||||||
|
for term_tag in term_tags:
|
||||||
|
torrent.keywords.append(term_tag.text)
|
||||||
|
|
||||||
|
connection_tags = torrent_page.find("tr", {"id": "adv_search_cat"}).find_all(
|
||||||
|
"strong"
|
||||||
|
)
|
||||||
|
|
||||||
|
informations_tag = (
|
||||||
|
torrent_page.find("table", {"class": "informations"})
|
||||||
|
.find("tbody")
|
||||||
|
.find_all("tr")
|
||||||
|
)
|
||||||
|
|
||||||
|
download_button = torrent_page.find("a", {"class": "butt"})
|
||||||
|
|
||||||
|
if download_button.has_attr("href"):
|
||||||
|
torrent.url = download_button["href"]
|
||||||
|
|
||||||
|
torrent.seeders = int(connection_tags[0].text.replace(" ", ""))
|
||||||
|
torrent.leechers = int(connection_tags[1].text.replace(" ", ""))
|
||||||
|
torrent.completed = int(connection_tags[2].text.replace(" ", ""))
|
||||||
|
|
||||||
|
torrent.name = informations_tag[0].find_all("td")[1].text
|
||||||
|
torrent.size = informations_tag[3].find_all("td")[1].text
|
||||||
|
torrent.uploader = informations_tag[5].find_all("td")[1].text
|
||||||
|
|
||||||
|
mydatetime = re.search(
|
||||||
|
"([0-9]*\/[0-9]*\/[0-9]* [0-9]*:[0-9]*)",
|
||||||
|
informations_tag[6].find_all("td")[1].text,
|
||||||
|
0,
|
||||||
|
).group(0)
|
||||||
|
|
||||||
|
torrent.uploaded_datetime = datetime.datetime.strptime(
|
||||||
|
mydatetime, "%d/%m/%Y %H:%M"
|
||||||
|
)
|
||||||
|
|
||||||
|
message_tags = torrent_page.find_all("div", {"class": "message"})
|
||||||
|
|
||||||
|
for message_tag in message_tags:
|
||||||
|
torrent_comment = TorrentComment()
|
||||||
|
|
||||||
|
torrent_comment.author = message_tag.find("a").text
|
||||||
|
torrent_comment.posted = message_tag.find("strong").text
|
||||||
|
torrent_comment.text = message_tag.find(
|
||||||
|
"span", {"id": "comment_text"}
|
||||||
|
).text.strip()
|
||||||
|
|
||||||
|
torrent.comments.append(torrent_comment)
|
||||||
|
|
||||||
|
torrents.append(torrent)
|
||||||
|
|
||||||
|
torrent_id = torrent_page.find("form", {"id": "report-torrent"}).find(
|
||||||
|
"input", {"type": "hidden", "name": "target"}
|
||||||
|
)["value"]
|
||||||
|
|
||||||
|
response = self.session.get(YGGTORRENT_GET_FILES + torrent_id, headers=headers)
|
||||||
|
|
||||||
|
files_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
file_tags = files_page.find_all("tr")
|
||||||
|
|
||||||
|
for file_tag in file_tags:
|
||||||
|
torrent_file = TorrentFile()
|
||||||
|
|
||||||
|
td_tags = file_tag.find_all("td")
|
||||||
|
|
||||||
|
torrent_file.file_size = (
|
||||||
|
td_tags[0]
|
||||||
|
.text.replace("\\r", "")
|
||||||
|
.replace("\\n", "")
|
||||||
|
.replace("\\t", "")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
torrent_file.file_name = (
|
||||||
|
td_tags[1]
|
||||||
|
.text.replace("\\r", "")
|
||||||
|
.replace("\\n", "")
|
||||||
|
.replace("\\t", "")
|
||||||
|
.replace("\\", "")
|
||||||
|
.replace(" ", "")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
torrent.files.append(torrent_file)
|
||||||
|
|
||||||
|
return torrent
|
||||||
|
|
||||||
|
def most_completed(self):
|
||||||
|
"""
|
||||||
|
Return the most completed torrents url (TOP 100)
|
||||||
|
"""
|
||||||
|
|
||||||
|
header = {"Accept": "application/json, text/javascript, */*; q=0.01"}
|
||||||
|
self.session.post(YGGTORRENT_MOST_COMPLETED_URL, headers=header)
|
||||||
|
|
||||||
|
json_response = self.session.post(
|
||||||
|
YGGTORRENT_MOST_COMPLETED_URL, headers=header
|
||||||
|
).json()
|
||||||
|
|
||||||
|
torrents_url = []
|
||||||
|
|
||||||
|
for json_item in json_response:
|
||||||
|
root = BeautifulSoup(json_item[1], features="lxml")
|
||||||
|
|
||||||
|
a_tag = root.find("a")
|
||||||
|
|
||||||
|
torrents_url.append(a_tag["href"])
|
||||||
|
|
||||||
|
return torrents_url
|
||||||
|
|
||||||
|
#kopaa
|
||||||
|
def get_torrents_url(self, search_url, parameters):
|
||||||
|
"""
|
||||||
|
Return
|
||||||
|
"""
|
||||||
|
response = self.session.get(search_url, headers=headers)
|
||||||
|
|
||||||
|
search_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
pagination = search_page.find("ul", {"class": "pagination"})
|
||||||
|
|
||||||
|
if pagination is None:
|
||||||
|
limit_page = 1
|
||||||
|
else:
|
||||||
|
pagination_item = pagination.find_all("a")
|
||||||
|
|
||||||
|
limit_page = int(pagination_item[-1]["data-ci-pagination-page"])
|
||||||
|
|
||||||
|
torrents = []
|
||||||
|
|
||||||
|
for page in range(0, limit_page):
|
||||||
|
parameters["page"] = page * TORRENT_PER_PAGE
|
||||||
|
|
||||||
|
search_url = create_search_url(parameters)
|
||||||
|
|
||||||
|
response = self.session.get(search_url, headers=headers)
|
||||||
|
|
||||||
|
search_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
torrents_tag = search_page.findAll("a", {"id": "torrent_name"})
|
||||||
|
|
||||||
|
for torrent_tag in torrents_tag:
|
||||||
|
torrents.append(torrent_tag["href"])
|
||||||
|
|
||||||
|
return torrents
|
||||||
|
|
||||||
|
def download_from_torrent_url(self, torrent_url=None, destination_path="./data/tmp/torrents/"):
|
||||||
|
if torrent_url is not None:
|
||||||
|
torrent = self.extract_details(torrent_url)
|
||||||
|
|
||||||
|
return self.download_from_torrent_download_url(
|
||||||
|
torrent_url=torrent.url, destination_path=destination_path
|
||||||
|
)
|
||||||
|
|
||||||
|
def download_from_torrent(self, torrent=None, destination_path="./data/tmp/torrents/"):
|
||||||
|
if torrent is not None:
|
||||||
|
return self.download_from_torrent_download_url(
|
||||||
|
torrent_url=torrent.url, destination_path=destination_path
|
||||||
|
)
|
||||||
|
|
||||||
|
def download_from_torrent_download_url(
|
||||||
|
self, torrent_url=None, destination_path="./data/tmp/torrents/"
|
||||||
|
):
|
||||||
|
if torrent_url is None:
|
||||||
|
raise Exception("Invalid torrent_url, make sure you are logged")
|
||||||
|
|
||||||
|
response = self.session.get(YGGTORRENT_BASE_URL + torrent_url, headers=headers)
|
||||||
|
|
||||||
|
temp_file_name = response.headers.get("content-disposition")
|
||||||
|
|
||||||
|
file_name = temp_file_name[temp_file_name.index("filename=") + 10 : -1]
|
||||||
|
|
||||||
|
if not os.path.exists(destination_path):
|
||||||
|
os.makedirs(destination_path)
|
||||||
|
|
||||||
|
file_full_path = os.path.join(destination_path, file_name)
|
||||||
|
|
||||||
|
file = open(file_full_path, "wb")
|
||||||
|
|
||||||
|
file.write(response.content)
|
||||||
|
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
return file_full_path
|
||||||
|
|
||||||
|
def create_search_url(parameters):
|
||||||
|
"""
|
||||||
|
Return a formated URL for torrent's search
|
||||||
|
"""
|
||||||
|
|
||||||
|
formated_search_url = YGGTORRENT_SEARCH_URL
|
||||||
|
|
||||||
|
if "name" in parameters:
|
||||||
|
formated_search_url += parameters["name"].replace(" ", "+")
|
||||||
|
|
||||||
|
if "page" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_PAGE
|
||||||
|
formated_search_url += str(parameters["page"])
|
||||||
|
|
||||||
|
if "descriptions" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_DESCRIPTION
|
||||||
|
|
||||||
|
for description in parameters["descriptions"]:
|
||||||
|
formated_search_url += description
|
||||||
|
formated_search_url += "+"
|
||||||
|
|
||||||
|
if "files" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_FILE
|
||||||
|
|
||||||
|
for file in parameters["files"]:
|
||||||
|
formated_search_url += file
|
||||||
|
formated_search_url += "+"
|
||||||
|
|
||||||
|
if "uploader" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_UPLOADER
|
||||||
|
formated_search_url += parameters["uploader"]
|
||||||
|
|
||||||
|
if "sort" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_SORT
|
||||||
|
formated_search_url += parameters["sort"]
|
||||||
|
|
||||||
|
if "order" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_ORDER
|
||||||
|
formated_search_url += parameters["order"]
|
||||||
|
|
||||||
|
if "category" in parameters:
|
||||||
|
for category in categories:
|
||||||
|
if parameters["category"] == category["name"]:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_CATEGORY
|
||||||
|
formated_search_url += category["id"]
|
||||||
|
|
||||||
|
if "subcategory" in parameters:
|
||||||
|
for subcategory in category["subcategories"]:
|
||||||
|
if parameters["subcategory"] == subcategory["name"]:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_SUB_CATEGORY
|
||||||
|
formated_search_url += subcategory["id"]
|
||||||
|
if "options" in parameters:
|
||||||
|
for key, values in parameters["options"].items():
|
||||||
|
for option in subcategory["options"]:
|
||||||
|
if key == option["name"]:
|
||||||
|
for searched_value in values:
|
||||||
|
for index, value in enumerate(
|
||||||
|
option["values"]
|
||||||
|
):
|
||||||
|
if searched_value == value:
|
||||||
|
formated_search_url += (
|
||||||
|
"&option_"
|
||||||
|
)
|
||||||
|
formated_search_url += option[
|
||||||
|
"name"
|
||||||
|
]
|
||||||
|
# options_index.append(index)
|
||||||
|
if "multiple" in option:
|
||||||
|
formated_search_url += (
|
||||||
|
"%3Amultiple"
|
||||||
|
)
|
||||||
|
|
||||||
|
formated_search_url += "[]="
|
||||||
|
formated_search_url += str(
|
||||||
|
index + 1
|
||||||
|
)
|
||||||
|
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_DO
|
||||||
|
formated_search_url += "search"
|
||||||
|
|
||||||
|
return formated_search_url
|
|
@ -0,0 +1,25 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from yggcrawl import YggTorrentScraperSelenium
|
||||||
|
from selenium import webdriver
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
options = webdriver.ChromeOptions()
|
||||||
|
options.add_argument("--log-level=3")
|
||||||
|
options.add_argument("--disable-blink-features")
|
||||||
|
options.add_argument("--disable-blink-features=AutomationControlled")
|
||||||
|
options.add_experimental_option("excludeSwitches", ["enable-logging"])
|
||||||
|
|
||||||
|
driver = webdriver.Chrome("D:\chromedriver.exe", options=options)
|
||||||
|
|
||||||
|
scraper = YggTorrentScraperSelenium(driver=driver)
|
||||||
|
# or
|
||||||
|
# scraper = YggTorrentScraperSelenium(driver_path="D:\chromedriver.exe")
|
||||||
|
|
||||||
|
if scraper.login("myidentifiant", "mypassword"):
|
||||||
|
print("Login success")
|
||||||
|
torrents_url = scraper.search({"name": "walking dead"})
|
||||||
|
print(torrents_url)
|
||||||
|
else:
|
||||||
|
print("Login failed")
|
|
@ -7,5 +7,11 @@ from .yggtorrentscraper import (
|
||||||
set_yggtorrent_tld,
|
set_yggtorrent_tld,
|
||||||
get_yggtorrent_tld,
|
get_yggtorrent_tld,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from .yggtorrentscraper_selenium import (
|
||||||
|
YggTorrentScraperSelenium,
|
||||||
|
set_yggtorrent_tld,
|
||||||
|
get_yggtorrent_tld,
|
||||||
|
)
|
||||||
from .torrent import Torrent, TorrentComment, TorrentFile
|
from .torrent import Torrent, TorrentComment, TorrentFile
|
||||||
from .categories import categories
|
from .categories import categories
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
[0512/133633.143:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/133633.143:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/133633.144:ERROR:exception_snapshot_win.cc(98)] thread ID 14952 not found in process
|
||||||
|
[0512/133633.144:ERROR:exception_snapshot_win.cc(98)] thread ID 49204 not found in process
|
||||||
|
[0512/133633.158:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/133633.158:ERROR:exception_snapshot_win.cc(98)] thread ID 46960 not found in process
|
||||||
|
[0512/133700.448:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/133700.448:ERROR:exception_snapshot_win.cc(98)] thread ID 45656 not found in process
|
||||||
|
[0512/133700.470:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/133700.470:ERROR:exception_snapshot_win.cc(98)] thread ID 47508 not found in process
|
||||||
|
[0512/134349.949:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/134349.962:ERROR:exception_snapshot_win.cc(98)] thread ID 39532 not found in process
|
||||||
|
[0512/134349.956:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/134349.962:ERROR:exception_snapshot_win.cc(98)] thread ID 48284 not found in process
|
||||||
|
[0512/134349.963:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/134349.963:ERROR:exception_snapshot_win.cc(98)] thread ID 51964 not found in process
|
||||||
|
[0512/134349.982:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/134349.982:ERROR:exception_snapshot_win.cc(98)] thread ID 19944 not found in process
|
||||||
|
[0512/134349.986:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/134349.986:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/134349.986:ERROR:exception_snapshot_win.cc(98)] thread ID 32616 not found in process
|
||||||
|
[0512/134349.986:ERROR:exception_snapshot_win.cc(98)] thread ID 42324 not found in process
|
||||||
|
[0512/135659.781:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/135659.784:ERROR:exception_snapshot_win.cc(98)] thread ID 50808 not found in process
|
||||||
|
[0512/135659.830:ERROR:process_reader_win.cc(151)] SuspendThread: Accès refusé. (0x5)
|
||||||
|
[0512/135659.830:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/135659.831:ERROR:exception_snapshot_win.cc(98)] thread ID 45060 not found in process
|
||||||
|
[0512/135659.847:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/135659.847:ERROR:exception_snapshot_win.cc(98)] thread ID 52004 not found in process
|
||||||
|
[0512/135659.882:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/135659.882:ERROR:exception_snapshot_win.cc(98)] thread ID 56756 not found in process
|
||||||
|
[0512/135659.994:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/135659.994:ERROR:exception_snapshot_win.cc(98)] thread ID 22536 not found in process
|
||||||
|
[0512/135700.025:ERROR:process_reader_win.cc(123)] NtOpenThread: {Accès refusé} Un processus a demandé l’accès a un objet, mais il ne bénéficie pas des autorisations nécessaires. (0xc0000022)
|
||||||
|
[0512/135700.025:ERROR:exception_snapshot_win.cc(98)] thread ID 56652 not found in process
|
|
@ -3,6 +3,7 @@ import shutil
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import cloudscraper
|
||||||
|
|
||||||
from ..yggtorrentscraper import YggTorrentScraper
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
@ -20,7 +21,7 @@ class TestDownload(unittest.TestCase):
|
||||||
".", "yggtorrentscraper", "tests", "test_download"
|
".", "yggtorrentscraper", "tests", "test_download"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.scraper = YggTorrentScraper(requests.session())
|
self.scraper = YggTorrentScraper(cloudscraper.create_scraper())
|
||||||
|
|
||||||
self.scraper.login(yggtorrent_identifiant, yggtorrent_password)
|
self.scraper.login(yggtorrent_identifiant, yggtorrent_password)
|
||||||
|
|
||||||
|
|
|
@ -2,12 +2,12 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import cloudscraper
|
||||||
from ..yggtorrentscraper import YggTorrentScraper
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
class TestExtractDetails(unittest.TestCase):
|
class TestExtractDetails(unittest.TestCase):
|
||||||
scraper = YggTorrentScraper(requests.session())
|
self.scraper = YggTorrentScraper(cloudscraper.create_scraper())
|
||||||
|
|
||||||
def test_extract_details(self):
|
def test_extract_details(self):
|
||||||
torrent = self.scraper.extract_details(
|
torrent = self.scraper.extract_details(
|
||||||
|
|
|
@ -2,13 +2,13 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import cloudscraper
|
||||||
from ..yggtorrentscraper import YggTorrentScraper
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
class TestAuthentification(unittest.TestCase):
|
class TestAuthentification(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.scraper = YggTorrentScraper(requests.session())
|
self.scraper = YggTorrentScraper(cloudscraper.create_scraper())
|
||||||
|
|
||||||
def test_login_success(self):
|
def test_login_success(self):
|
||||||
yggtorrent_identifiant = os.environ.get("YGGTORRENT_IDENTIFIANT")
|
yggtorrent_identifiant = os.environ.get("YGGTORRENT_IDENTIFIANT")
|
||||||
|
|
|
@ -2,13 +2,13 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import cloudscraper
|
||||||
from ..yggtorrentscraper import YggTorrentScraper
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
class TestLogout(unittest.TestCase):
|
class TestLogout(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.scraper = YggTorrentScraper(requests.session())
|
self.scraper = YggTorrentScraper(cloudscraper.create_scraper())
|
||||||
|
|
||||||
def test_logout_success(self):
|
def test_logout_success(self):
|
||||||
yggtorrent_identifiant = os.environ.get("YGGTORRENT_IDENTIFIANT")
|
yggtorrent_identifiant = os.environ.get("YGGTORRENT_IDENTIFIANT")
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import cloudscraper
|
||||||
from ..yggtorrentscraper import YggTorrentScraper
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
class TestMostCompleted(unittest.TestCase):
|
class TestMostCompleted(unittest.TestCase):
|
||||||
scraper = YggTorrentScraper(session=requests.session())
|
self.scraper = YggTorrentScraper(cloudscraper.create_scraper())
|
||||||
|
|
||||||
def test_most_completed(self):
|
def test_most_completed(self):
|
||||||
most_completed = self.scraper.most_completed()
|
most_completed = self.scraper.most_completed()
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import cloudscraper
|
||||||
|
|
||||||
from ..yggtorrentscraper import YggTorrentScraper
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
class TestResearch(unittest.TestCase):
|
class TestResearch(unittest.TestCase):
|
||||||
scraper = YggTorrentScraper(requests.session())
|
self.scraper = YggTorrentScraper(cloudscraper.create_scraper())
|
||||||
|
|
||||||
torrent_name = "walking dead s09"
|
torrent_name = "walking dead s09"
|
||||||
torrent_uploader = "brandit"
|
torrent_uploader = "brandit"
|
||||||
|
|
|
@ -2,12 +2,12 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import cloudscraper
|
||||||
from ..yggtorrentscraper import YggTorrentScraper
|
from ..yggtorrentscraper import YggTorrentScraper
|
||||||
|
|
||||||
|
|
||||||
class TestTorrent(unittest.TestCase):
|
class TestTorrent(unittest.TestCase):
|
||||||
scraper = YggTorrentScraper(requests.session())
|
self.scraper = YggTorrentScraper(cloudscraper.create_scraper())
|
||||||
|
|
||||||
def test_str(self):
|
def test_str(self):
|
||||||
torrent_url = self.scraper.most_completed()[0]
|
torrent_url = self.scraper.most_completed()[0]
|
||||||
|
|
|
@ -18,6 +18,7 @@ class Torrent:
|
||||||
leechers = -1
|
leechers = -1
|
||||||
|
|
||||||
url = None
|
url = None
|
||||||
|
download_url = None
|
||||||
|
|
||||||
files = []
|
files = []
|
||||||
comments = []
|
comments = []
|
||||||
|
@ -36,6 +37,11 @@ class Torrent:
|
||||||
else:
|
else:
|
||||||
to_string += "N/A"
|
to_string += "N/A"
|
||||||
|
|
||||||
|
if self.download_url is not None:
|
||||||
|
to_string += self.download_url
|
||||||
|
else:
|
||||||
|
to_string += "N/A"
|
||||||
|
|
||||||
to_string += os.linesep
|
to_string += os.linesep
|
||||||
to_string += os.linesep
|
to_string += os.linesep
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ YGGTORRENT_SEARCH_URL_DO = "&do="
|
||||||
YGGTORRENT_SEARCH_URL_PAGE = "&page="
|
YGGTORRENT_SEARCH_URL_PAGE = "&page="
|
||||||
|
|
||||||
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
YGGTORRENT_GET_INFO = f"https://www2.yggtorrent.se/engine/get_nfo?torrent="
|
YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
|
||||||
|
|
||||||
YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
|
YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
|
||||||
|
|
||||||
|
@ -52,7 +52,6 @@ TORRENT_PER_PAGE = 50
|
||||||
|
|
||||||
YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
|
||||||
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
|
|
||||||
|
|
||||||
def set_yggtorrent_tld(yggtorrent_tld=None):
|
def set_yggtorrent_tld(yggtorrent_tld=None):
|
||||||
"""
|
"""
|
||||||
|
@ -78,7 +77,7 @@ def set_yggtorrent_tld(yggtorrent_tld=None):
|
||||||
|
|
||||||
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
|
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
|
||||||
|
|
||||||
YGGTORRENT_DOMAIN = ".yggtorrent.se"
|
YGGTORRENT_DOMAIN = ".yggtorrent.gg"
|
||||||
|
|
||||||
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
|
YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
|
||||||
|
@ -109,7 +108,7 @@ class YggTorrentScraper:
|
||||||
"User-Agent": "PostmanRuntime/7.17.1",
|
"User-Agent": "PostmanRuntime/7.17.1",
|
||||||
"Accept": "*/*",
|
"Accept": "*/*",
|
||||||
"Cache-Control": "no-cache",
|
"Cache-Control": "no-cache",
|
||||||
"Host": f"www2.yggtorrent.{YGGTORRENT_TLD}",
|
"Host": f"www.yggtorrent.{YGGTORRENT_TLD}",
|
||||||
"Accept-Encoding": "gzip, deflate",
|
"Accept-Encoding": "gzip, deflate",
|
||||||
"Connection": "keep-alive",
|
"Connection": "keep-alive",
|
||||||
}
|
}
|
||||||
|
@ -146,7 +145,7 @@ class YggTorrentScraper:
|
||||||
"""
|
"""
|
||||||
Logout request
|
Logout request
|
||||||
"""
|
"""
|
||||||
response = self.session.get(YGGTORRENT_LOGOUT_URL, headers=headers)
|
response = self.session.get(YGGTORRENT_LOGOUT_URL)
|
||||||
|
|
||||||
self.session.cookies.clear()
|
self.session.cookies.clear()
|
||||||
|
|
||||||
|
@ -161,18 +160,13 @@ class YggTorrentScraper:
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
#kopa
|
def search(self, parameters):
|
||||||
def search_old(self, parameters):
|
|
||||||
search_url = create_search_url(parameters)
|
search_url = create_search_url(parameters)
|
||||||
|
|
||||||
torrents_url = self.get_torrents_url(search_url, parameters)
|
torrents_url = self.get_torrents_url(search_url, parameters)
|
||||||
|
|
||||||
return torrents_url
|
return torrents_url
|
||||||
|
|
||||||
def search(self, parameters):
|
|
||||||
# torrents_url = os.popen('gecko/torrent_search.py didier')
|
|
||||||
torrents_url = exec(open('/home/iptubes/astroport-iptubes/yggcrawl/gecko/torrent_search.py').read())
|
|
||||||
return torrents_url
|
|
||||||
|
|
||||||
def extract_details(self, torrent_url):
|
def extract_details(self, torrent_url):
|
||||||
"""
|
"""
|
||||||
Extract informations from torrent's url
|
Extract informations from torrent's url
|
||||||
|
@ -181,7 +175,7 @@ class YggTorrentScraper:
|
||||||
|
|
||||||
torrents = []
|
torrents = []
|
||||||
|
|
||||||
response = self.session.get(torrent_url, headers=headers)
|
response = self.session.get(torrent_url)
|
||||||
|
|
||||||
torrent_page = BeautifulSoup(response.content, features="lxml")
|
torrent_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
@ -244,7 +238,7 @@ class YggTorrentScraper:
|
||||||
"input", {"type": "hidden", "name": "target"}
|
"input", {"type": "hidden", "name": "target"}
|
||||||
)["value"]
|
)["value"]
|
||||||
|
|
||||||
response = self.session.get(YGGTORRENT_GET_FILES + torrent_id, headers=headers)
|
response = self.session.get(YGGTORRENT_GET_FILES + torrent_id)
|
||||||
|
|
||||||
files_page = BeautifulSoup(response.content, features="lxml")
|
files_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
@ -299,12 +293,12 @@ class YggTorrentScraper:
|
||||||
|
|
||||||
return torrents_url
|
return torrents_url
|
||||||
|
|
||||||
#kopaa
|
|
||||||
def get_torrents_url(self, search_url, parameters):
|
def get_torrents_url(self, search_url, parameters):
|
||||||
"""
|
"""
|
||||||
Return
|
Return
|
||||||
"""
|
"""
|
||||||
response = self.session.get(search_url, headers=headers)
|
|
||||||
|
response = self.session.get(search_url)
|
||||||
|
|
||||||
search_page = BeautifulSoup(response.content, features="lxml")
|
search_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
@ -324,7 +318,7 @@ class YggTorrentScraper:
|
||||||
|
|
||||||
search_url = create_search_url(parameters)
|
search_url = create_search_url(parameters)
|
||||||
|
|
||||||
response = self.session.get(search_url, headers=headers)
|
response = self.session.get(search_url)
|
||||||
|
|
||||||
search_page = BeautifulSoup(response.content, features="lxml")
|
search_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
@ -335,7 +329,7 @@ class YggTorrentScraper:
|
||||||
|
|
||||||
return torrents
|
return torrents
|
||||||
|
|
||||||
def download_from_torrent_url(self, torrent_url=None, destination_path="./data/tmp/torrents/"):
|
def download_from_torrent_url(self, torrent_url=None, destination_path="./"):
|
||||||
if torrent_url is not None:
|
if torrent_url is not None:
|
||||||
torrent = self.extract_details(torrent_url)
|
torrent = self.extract_details(torrent_url)
|
||||||
|
|
||||||
|
@ -343,19 +337,19 @@ class YggTorrentScraper:
|
||||||
torrent_url=torrent.url, destination_path=destination_path
|
torrent_url=torrent.url, destination_path=destination_path
|
||||||
)
|
)
|
||||||
|
|
||||||
def download_from_torrent(self, torrent=None, destination_path="./data/tmp/torrents/"):
|
def download_from_torrent(self, torrent=None, destination_path="./"):
|
||||||
if torrent is not None:
|
if torrent is not None:
|
||||||
return self.download_from_torrent_download_url(
|
return self.download_from_torrent_download_url(
|
||||||
torrent_url=torrent.url, destination_path=destination_path
|
torrent_url=torrent.url, destination_path=destination_path
|
||||||
)
|
)
|
||||||
|
|
||||||
def download_from_torrent_download_url(
|
def download_from_torrent_download_url(
|
||||||
self, torrent_url=None, destination_path="./data/tmp/torrents/"
|
self, torrent_url=None, destination_path="./"
|
||||||
):
|
):
|
||||||
if torrent_url is None:
|
if torrent_url is None:
|
||||||
raise Exception("Invalid torrent_url, make sure you are logged")
|
raise Exception("Invalid torrent_url, make sure you are logged")
|
||||||
|
|
||||||
response = self.session.get(YGGTORRENT_BASE_URL + torrent_url, headers=headers)
|
response = self.session.get(YGGTORRENT_BASE_URL + torrent_url)
|
||||||
|
|
||||||
temp_file_name = response.headers.get("content-disposition")
|
temp_file_name = response.headers.get("content-disposition")
|
||||||
|
|
||||||
|
@ -374,6 +368,7 @@ class YggTorrentScraper:
|
||||||
|
|
||||||
return file_full_path
|
return file_full_path
|
||||||
|
|
||||||
|
|
||||||
def create_search_url(parameters):
|
def create_search_url(parameters):
|
||||||
"""
|
"""
|
||||||
Return a formated URL for torrent's search
|
Return a formated URL for torrent's search
|
||||||
|
|
|
@ -0,0 +1,452 @@
|
||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from .torrent import Torrent, TorrentComment, TorrentFile
|
||||||
|
from .categories import categories
|
||||||
|
|
||||||
|
YGGTORRENT_TLD = "se"
|
||||||
|
|
||||||
|
YGGTORRENT_BASE_URL = f"https://www2.yggtorrent.{YGGTORRENT_TLD}"
|
||||||
|
|
||||||
|
YGGTORRENT_LOGIN_URL = f"{YGGTORRENT_BASE_URL}/user/login"
|
||||||
|
YGGTORRENT_LOGOUT_URL = f"{YGGTORRENT_BASE_URL}/user/logout?attempt=1"
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
|
||||||
|
|
||||||
|
logger = logging.getLogger("yggtorrentscraper")
|
||||||
|
|
||||||
|
YGGTORRENT_DOMAIN = f".yggtorrent.{YGGTORRENT_TLD}"
|
||||||
|
YGGTORRENT_TOKEN_COOKIE = "ygg_"
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL_DESCRIPTION = "&description="
|
||||||
|
YGGTORRENT_SEARCH_URL_FILE = "&file="
|
||||||
|
YGGTORRENT_SEARCH_URL_UPLOADER = "&uploader="
|
||||||
|
YGGTORRENT_SEARCH_URL_CATEGORY = "&category="
|
||||||
|
YGGTORRENT_SEARCH_URL_SUB_CATEGORY = "&sub_category="
|
||||||
|
YGGTORRENT_SEARCH_URL_ORDER = "&order="
|
||||||
|
YGGTORRENT_SEARCH_URL_SORT = "&sort="
|
||||||
|
YGGTORRENT_SEARCH_URL_DO = "&do="
|
||||||
|
YGGTORRENT_SEARCH_URL_PAGE = "&page="
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL_DESCRIPTION = "&description="
|
||||||
|
YGGTORRENT_SEARCH_URL_FILE = "&file="
|
||||||
|
YGGTORRENT_SEARCH_URL_UPLOADER = "&uploader="
|
||||||
|
YGGTORRENT_SEARCH_URL_CATEGORY = "&category="
|
||||||
|
YGGTORRENT_SEARCH_URL_SUB_CATEGORY = "&sub_category="
|
||||||
|
YGGTORRENT_SEARCH_URL_ORDER = "&order="
|
||||||
|
YGGTORRENT_SEARCH_URL_SORT = "&sort="
|
||||||
|
YGGTORRENT_SEARCH_URL_DO = "&do="
|
||||||
|
YGGTORRENT_SEARCH_URL_PAGE = "&page="
|
||||||
|
|
||||||
|
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
|
||||||
|
|
||||||
|
YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
|
||||||
|
|
||||||
|
TORRENT_PER_PAGE = 50
|
||||||
|
|
||||||
|
YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
|
||||||
|
|
||||||
|
def set_yggtorrent_tld(yggtorrent_tld=None):
|
||||||
|
"""
|
||||||
|
Redefine all string variable according to new TLD
|
||||||
|
"""
|
||||||
|
|
||||||
|
global YGGTORRENT_TLD
|
||||||
|
global YGGTORRENT_BASE_URL
|
||||||
|
global YGGTORRENT_LOGIN_URL
|
||||||
|
global YGGTORRENT_SEARCH_URL
|
||||||
|
global YGGTORRENT_DOMAIN
|
||||||
|
global YGGTORRENT_GET_FILES
|
||||||
|
global YGGTORRENT_GET_INFO
|
||||||
|
global YGGTORRENT_MOST_COMPLETED_URL
|
||||||
|
global YGGTORRENT_FILES_URL
|
||||||
|
|
||||||
|
YGGTORRENT_TLD = yggtorrent_tld
|
||||||
|
|
||||||
|
YGGTORRENT_BASE_URL = f"https://www2.yggtorrent.{YGGTORRENT_TLD}"
|
||||||
|
|
||||||
|
YGGTORRENT_LOGIN_URL = f"{YGGTORRENT_BASE_URL}/user/login"
|
||||||
|
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/user/logout"
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
|
||||||
|
|
||||||
|
YGGTORRENT_DOMAIN = ".yggtorrent.gg"
|
||||||
|
|
||||||
|
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
|
||||||
|
|
||||||
|
YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
|
||||||
|
|
||||||
|
YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
|
||||||
|
|
||||||
|
def get_yggtorrent_tld():
|
||||||
|
return YGGTORRENT_TLD
|
||||||
|
|
||||||
|
|
||||||
|
class YggTorrentScraper:
|
||||||
|
session = None
|
||||||
|
|
||||||
|
def __init__(self, session):
|
||||||
|
self.session = session
|
||||||
|
|
||||||
|
def login(self, identifiant, password):
|
||||||
|
"""
|
||||||
|
Login request with the specified identifiant and password, return an yggtorrent_token, necessary to download
|
||||||
|
"""
|
||||||
|
self.session.cookies.clear()
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/x-www-form-urlencoded",
|
||||||
|
"User-Agent": "PostmanRuntime/7.17.1",
|
||||||
|
"Accept": "*/*",
|
||||||
|
"Cache-Control": "no-cache",
|
||||||
|
"Host": f"www.yggtorrent.{YGGTORRENT_TLD}",
|
||||||
|
"Accept-Encoding": "gzip, deflate",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
}
|
||||||
|
|
||||||
|
response = self.session.post(
|
||||||
|
YGGTORRENT_LOGIN_URL,
|
||||||
|
data={"id": identifiant, "pass": password},
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug("status_code : %s", response.status_code)
|
||||||
|
|
||||||
|
yggtorrent_token = None
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
logger.debug("Login successful")
|
||||||
|
yggtorrent_token = response.cookies.get_dict()[YGGTORRENT_TOKEN_COOKIE]
|
||||||
|
|
||||||
|
cookie = requests.cookies.create_cookie(
|
||||||
|
domain=YGGTORRENT_DOMAIN,
|
||||||
|
name=YGGTORRENT_TOKEN_COOKIE,
|
||||||
|
value=yggtorrent_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.session.cookies.set_cookie(cookie)
|
||||||
|
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.debug("Login failed")
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def logout(self):
|
||||||
|
"""
|
||||||
|
Logout request
|
||||||
|
"""
|
||||||
|
response = self.session.get(YGGTORRENT_LOGOUT_URL)
|
||||||
|
|
||||||
|
self.session.cookies.clear()
|
||||||
|
|
||||||
|
logger.debug("status_code : %s", response.status_code)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
logger.debug("Logout successful")
|
||||||
|
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.debug("Logout failed")
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def search(self, parameters):
|
||||||
|
search_url = create_search_url(parameters)
|
||||||
|
|
||||||
|
torrents_url = self.get_torrents_url(search_url, parameters)
|
||||||
|
|
||||||
|
return torrents_url
|
||||||
|
|
||||||
|
def extract_details(self, torrent_url):
|
||||||
|
"""
|
||||||
|
Extract informations from torrent's url
|
||||||
|
"""
|
||||||
|
logger.debug("torrent_url : %s", torrent_url)
|
||||||
|
|
||||||
|
torrents = []
|
||||||
|
|
||||||
|
response = self.session.get(torrent_url)
|
||||||
|
|
||||||
|
torrent_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
torrent = Torrent()
|
||||||
|
|
||||||
|
term_tags = torrent_page.find_all("a", {"class": "term"})
|
||||||
|
|
||||||
|
for term_tag in term_tags:
|
||||||
|
torrent.keywords.append(term_tag.text)
|
||||||
|
|
||||||
|
connection_tags = torrent_page.find("tr", {"id": "adv_search_cat"}).find_all(
|
||||||
|
"strong"
|
||||||
|
)
|
||||||
|
|
||||||
|
informations_tag = (
|
||||||
|
torrent_page.find("table", {"class": "informations"})
|
||||||
|
.find("tbody")
|
||||||
|
.find_all("tr")
|
||||||
|
)
|
||||||
|
|
||||||
|
download_button = torrent_page.find("a", {"class": "butt"})
|
||||||
|
|
||||||
|
if download_button.has_attr("href"):
|
||||||
|
torrent.url = download_button["href"]
|
||||||
|
|
||||||
|
torrent.seeders = int(connection_tags[0].text.replace(" ", ""))
|
||||||
|
torrent.leechers = int(connection_tags[1].text.replace(" ", ""))
|
||||||
|
torrent.completed = int(connection_tags[2].text.replace(" ", ""))
|
||||||
|
|
||||||
|
torrent.name = informations_tag[0].find_all("td")[1].text
|
||||||
|
torrent.size = informations_tag[3].find_all("td")[1].text
|
||||||
|
torrent.uploader = informations_tag[5].find_all("td")[1].text
|
||||||
|
|
||||||
|
mydatetime = re.search(
|
||||||
|
"([0-9]*\/[0-9]*\/[0-9]* [0-9]*:[0-9]*)",
|
||||||
|
informations_tag[6].find_all("td")[1].text,
|
||||||
|
0,
|
||||||
|
).group(0)
|
||||||
|
|
||||||
|
torrent.uploaded_datetime = datetime.datetime.strptime(
|
||||||
|
mydatetime, "%d/%m/%Y %H:%M"
|
||||||
|
)
|
||||||
|
|
||||||
|
message_tags = torrent_page.find_all("div", {"class": "message"})
|
||||||
|
|
||||||
|
for message_tag in message_tags:
|
||||||
|
torrent_comment = TorrentComment()
|
||||||
|
|
||||||
|
torrent_comment.author = message_tag.find("a").text
|
||||||
|
torrent_comment.posted = message_tag.find("strong").text
|
||||||
|
torrent_comment.text = message_tag.find(
|
||||||
|
"span", {"id": "comment_text"}
|
||||||
|
).text.strip()
|
||||||
|
|
||||||
|
torrent.comments.append(torrent_comment)
|
||||||
|
|
||||||
|
torrents.append(torrent)
|
||||||
|
|
||||||
|
torrent_id = torrent_page.find("form", {"id": "report-torrent"}).find(
|
||||||
|
"input", {"type": "hidden", "name": "target"}
|
||||||
|
)["value"]
|
||||||
|
|
||||||
|
response = self.session.get(YGGTORRENT_GET_FILES + torrent_id)
|
||||||
|
|
||||||
|
files_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
file_tags = files_page.find_all("tr")
|
||||||
|
|
||||||
|
for file_tag in file_tags:
|
||||||
|
torrent_file = TorrentFile()
|
||||||
|
|
||||||
|
td_tags = file_tag.find_all("td")
|
||||||
|
|
||||||
|
torrent_file.file_size = (
|
||||||
|
td_tags[0]
|
||||||
|
.text.replace("\\r", "")
|
||||||
|
.replace("\\n", "")
|
||||||
|
.replace("\\t", "")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
torrent_file.file_name = (
|
||||||
|
td_tags[1]
|
||||||
|
.text.replace("\\r", "")
|
||||||
|
.replace("\\n", "")
|
||||||
|
.replace("\\t", "")
|
||||||
|
.replace("\\", "")
|
||||||
|
.replace(" ", "")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
torrent.files.append(torrent_file)
|
||||||
|
|
||||||
|
return torrent
|
||||||
|
|
||||||
|
def most_completed(self):
|
||||||
|
"""
|
||||||
|
Return the most completed torrents url (TOP 100)
|
||||||
|
"""
|
||||||
|
|
||||||
|
header = {"Accept": "application/json, text/javascript, */*; q=0.01"}
|
||||||
|
self.session.post(YGGTORRENT_MOST_COMPLETED_URL, headers=header)
|
||||||
|
|
||||||
|
json_response = self.session.post(
|
||||||
|
YGGTORRENT_MOST_COMPLETED_URL, headers=header
|
||||||
|
).json()
|
||||||
|
|
||||||
|
torrents_url = []
|
||||||
|
|
||||||
|
for json_item in json_response:
|
||||||
|
root = BeautifulSoup(json_item[1], features="lxml")
|
||||||
|
|
||||||
|
a_tag = root.find("a")
|
||||||
|
|
||||||
|
torrents_url.append(a_tag["href"])
|
||||||
|
|
||||||
|
return torrents_url
|
||||||
|
|
||||||
|
def get_torrents_url(self, search_url, parameters):
|
||||||
|
"""
|
||||||
|
Return
|
||||||
|
"""
|
||||||
|
|
||||||
|
response = self.session.get(search_url)
|
||||||
|
|
||||||
|
search_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
pagination = search_page.find("ul", {"class": "pagination"})
|
||||||
|
|
||||||
|
if pagination is None:
|
||||||
|
limit_page = 1
|
||||||
|
else:
|
||||||
|
pagination_item = pagination.find_all("a")
|
||||||
|
|
||||||
|
limit_page = int(pagination_item[-1]["data-ci-pagination-page"])
|
||||||
|
|
||||||
|
torrents = []
|
||||||
|
|
||||||
|
for page in range(0, limit_page):
|
||||||
|
parameters["page"] = page * TORRENT_PER_PAGE
|
||||||
|
|
||||||
|
search_url = create_search_url(parameters)
|
||||||
|
|
||||||
|
response = self.session.get(search_url)
|
||||||
|
|
||||||
|
search_page = BeautifulSoup(response.content, features="lxml")
|
||||||
|
|
||||||
|
torrents_tag = search_page.findAll("a", {"id": "torrent_name"})
|
||||||
|
|
||||||
|
for torrent_tag in torrents_tag:
|
||||||
|
torrents.append(torrent_tag["href"])
|
||||||
|
|
||||||
|
return torrents
|
||||||
|
|
||||||
|
def download_from_torrent_url(self, torrent_url=None, destination_path="./"):
|
||||||
|
if torrent_url is not None:
|
||||||
|
torrent = self.extract_details(torrent_url)
|
||||||
|
|
||||||
|
return self.download_from_torrent_download_url(
|
||||||
|
torrent_url=torrent.url, destination_path=destination_path
|
||||||
|
)
|
||||||
|
|
||||||
|
def download_from_torrent(self, torrent=None, destination_path="./"):
|
||||||
|
if torrent is not None:
|
||||||
|
return self.download_from_torrent_download_url(
|
||||||
|
torrent_url=torrent.url, destination_path=destination_path
|
||||||
|
)
|
||||||
|
|
||||||
|
def download_from_torrent_download_url(
|
||||||
|
self, torrent_url=None, destination_path="./"
|
||||||
|
):
|
||||||
|
if torrent_url is None:
|
||||||
|
raise Exception("Invalid torrent_url, make sure you are logged")
|
||||||
|
|
||||||
|
response = self.session.get(YGGTORRENT_BASE_URL + torrent_url)
|
||||||
|
|
||||||
|
temp_file_name = response.headers.get("content-disposition")
|
||||||
|
|
||||||
|
file_name = temp_file_name[temp_file_name.index("filename=") + 10 : -1]
|
||||||
|
|
||||||
|
if not os.path.exists(destination_path):
|
||||||
|
os.makedirs(destination_path)
|
||||||
|
|
||||||
|
file_full_path = os.path.join(destination_path, file_name)
|
||||||
|
|
||||||
|
file = open(file_full_path, "wb")
|
||||||
|
|
||||||
|
file.write(response.content)
|
||||||
|
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
return file_full_path
|
||||||
|
|
||||||
|
|
||||||
|
def create_search_url(parameters):
|
||||||
|
"""
|
||||||
|
Return a formated URL for torrent's search
|
||||||
|
"""
|
||||||
|
|
||||||
|
formated_search_url = YGGTORRENT_SEARCH_URL
|
||||||
|
|
||||||
|
if "name" in parameters:
|
||||||
|
formated_search_url += parameters["name"].replace(" ", "+")
|
||||||
|
|
||||||
|
if "page" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_PAGE
|
||||||
|
formated_search_url += str(parameters["page"])
|
||||||
|
|
||||||
|
if "descriptions" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_DESCRIPTION
|
||||||
|
|
||||||
|
for description in parameters["descriptions"]:
|
||||||
|
formated_search_url += description
|
||||||
|
formated_search_url += "+"
|
||||||
|
|
||||||
|
if "files" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_FILE
|
||||||
|
|
||||||
|
for file in parameters["files"]:
|
||||||
|
formated_search_url += file
|
||||||
|
formated_search_url += "+"
|
||||||
|
|
||||||
|
if "uploader" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_UPLOADER
|
||||||
|
formated_search_url += parameters["uploader"]
|
||||||
|
|
||||||
|
if "sort" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_SORT
|
||||||
|
formated_search_url += parameters["sort"]
|
||||||
|
|
||||||
|
if "order" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_ORDER
|
||||||
|
formated_search_url += parameters["order"]
|
||||||
|
|
||||||
|
if "category" in parameters:
|
||||||
|
for category in categories:
|
||||||
|
if parameters["category"] == category["name"]:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_CATEGORY
|
||||||
|
formated_search_url += category["id"]
|
||||||
|
|
||||||
|
if "subcategory" in parameters:
|
||||||
|
for subcategory in category["subcategories"]:
|
||||||
|
if parameters["subcategory"] == subcategory["name"]:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_SUB_CATEGORY
|
||||||
|
formated_search_url += subcategory["id"]
|
||||||
|
if "options" in parameters:
|
||||||
|
for key, values in parameters["options"].items():
|
||||||
|
for option in subcategory["options"]:
|
||||||
|
if key == option["name"]:
|
||||||
|
for searched_value in values:
|
||||||
|
for index, value in enumerate(
|
||||||
|
option["values"]
|
||||||
|
):
|
||||||
|
if searched_value == value:
|
||||||
|
formated_search_url += (
|
||||||
|
"&option_"
|
||||||
|
)
|
||||||
|
formated_search_url += option[
|
||||||
|
"name"
|
||||||
|
]
|
||||||
|
# options_index.append(index)
|
||||||
|
if "multiple" in option:
|
||||||
|
formated_search_url += (
|
||||||
|
"%3Amultiple"
|
||||||
|
)
|
||||||
|
|
||||||
|
formated_search_url += "[]="
|
||||||
|
formated_search_url += str(
|
||||||
|
index + 1
|
||||||
|
)
|
||||||
|
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_DO
|
||||||
|
formated_search_url += "search"
|
||||||
|
|
||||||
|
return formated_search_url
|
|
@ -0,0 +1,476 @@
|
||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from .torrent import Torrent, TorrentComment, TorrentFile
|
||||||
|
from .categories import categories
|
||||||
|
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
from selenium.common.exceptions import TimeoutException, NoSuchElementException
|
||||||
|
|
||||||
|
from random import randint
|
||||||
|
|
||||||
|
YGGTORRENT_TLD = "se"
|
||||||
|
|
||||||
|
YGGTORRENT_BASE_URL = f"https://www2.yggtorrent.{YGGTORRENT_TLD}"
|
||||||
|
|
||||||
|
YGGTORRENT_LOGIN_URL = f"{YGGTORRENT_BASE_URL}/user/login"
|
||||||
|
YGGTORRENT_LOGOUT_URL = f"{YGGTORRENT_BASE_URL}/user/logout"
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
|
||||||
|
|
||||||
|
logger = logging.getLogger("yggtorrentscraper")
|
||||||
|
|
||||||
|
YGGTORRENT_DOMAIN = f".yggtorrent.{YGGTORRENT_TLD}"
|
||||||
|
YGGTORRENT_TOKEN_COOKIE = "ygg_"
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL_DESCRIPTION = "&description="
|
||||||
|
YGGTORRENT_SEARCH_URL_FILE = "&file="
|
||||||
|
YGGTORRENT_SEARCH_URL_UPLOADER = "&uploader="
|
||||||
|
YGGTORRENT_SEARCH_URL_CATEGORY = "&category="
|
||||||
|
YGGTORRENT_SEARCH_URL_SUB_CATEGORY = "&sub_category="
|
||||||
|
YGGTORRENT_SEARCH_URL_ORDER = "&order="
|
||||||
|
YGGTORRENT_SEARCH_URL_SORT = "&sort="
|
||||||
|
YGGTORRENT_SEARCH_URL_DO = "&do="
|
||||||
|
YGGTORRENT_SEARCH_URL_PAGE = "&page="
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL_DESCRIPTION = "&description="
|
||||||
|
YGGTORRENT_SEARCH_URL_FILE = "&file="
|
||||||
|
YGGTORRENT_SEARCH_URL_UPLOADER = "&uploader="
|
||||||
|
YGGTORRENT_SEARCH_URL_CATEGORY = "&category="
|
||||||
|
YGGTORRENT_SEARCH_URL_SUB_CATEGORY = "&sub_category="
|
||||||
|
YGGTORRENT_SEARCH_URL_ORDER = "&order="
|
||||||
|
YGGTORRENT_SEARCH_URL_SORT = "&sort="
|
||||||
|
YGGTORRENT_SEARCH_URL_DO = "&do="
|
||||||
|
YGGTORRENT_SEARCH_URL_PAGE = "&page="
|
||||||
|
|
||||||
|
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
|
||||||
|
|
||||||
|
YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
|
||||||
|
|
||||||
|
TORRENT_PER_PAGE = 50
|
||||||
|
|
||||||
|
YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
|
||||||
|
|
||||||
|
def set_yggtorrent_tld(yggtorrent_tld=None):
|
||||||
|
"""
|
||||||
|
Redefine all string variable according to new TLD
|
||||||
|
"""
|
||||||
|
|
||||||
|
global YGGTORRENT_TLD
|
||||||
|
global YGGTORRENT_BASE_URL
|
||||||
|
global YGGTORRENT_LOGIN_URL
|
||||||
|
global YGGTORRENT_SEARCH_URL
|
||||||
|
global YGGTORRENT_DOMAIN
|
||||||
|
global YGGTORRENT_GET_FILES
|
||||||
|
global YGGTORRENT_GET_INFO
|
||||||
|
global YGGTORRENT_MOST_COMPLETED_URL
|
||||||
|
global YGGTORRENT_FILES_URL
|
||||||
|
|
||||||
|
YGGTORRENT_TLD = yggtorrent_tld
|
||||||
|
|
||||||
|
YGGTORRENT_BASE_URL = f"https://www2.yggtorrent.{YGGTORRENT_TLD}"
|
||||||
|
|
||||||
|
YGGTORRENT_LOGIN_URL = f"{YGGTORRENT_BASE_URL}/user/login"
|
||||||
|
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/user/logout"
|
||||||
|
|
||||||
|
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
|
||||||
|
|
||||||
|
YGGTORRENT_DOMAIN = ".yggtorrent.gg"
|
||||||
|
|
||||||
|
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
|
||||||
|
|
||||||
|
YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
|
||||||
|
|
||||||
|
YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||||
|
|
||||||
|
|
||||||
|
def get_yggtorrent_tld():
|
||||||
|
return YGGTORRENT_TLD
|
||||||
|
|
||||||
|
|
||||||
|
class YggTorrentScraperSelenium:
|
||||||
|
def __init__(self, driver=None, driver_path=None):
|
||||||
|
if driver_path is not None:
|
||||||
|
options = webdriver.ChromeOptions()
|
||||||
|
options.add_argument("--log-level=3")
|
||||||
|
options.add_argument("--disable-blink-features")
|
||||||
|
options.add_argument("--disable-blink-features=AutomationControlled")
|
||||||
|
options.add_experimental_option("excludeSwitches", ["enable-logging"])
|
||||||
|
|
||||||
|
self.driver = webdriver.Chrome(driver_path, options=options)
|
||||||
|
else:
|
||||||
|
self.driver = driver
|
||||||
|
|
||||||
|
def login(self, identifiant, password):
|
||||||
|
self.driver.get(YGGTORRENT_BASE_URL)
|
||||||
|
WebDriverWait(self.driver, 30000).until(
|
||||||
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#title"))
|
||||||
|
)
|
||||||
|
|
||||||
|
register_button = self.driver.find_element_by_css_selector("#register")
|
||||||
|
|
||||||
|
self.driver.execute_script("arguments[0].click();", register_button)
|
||||||
|
|
||||||
|
input_identifiant = self.driver.find_element_by_css_selector("input[name='id']")
|
||||||
|
|
||||||
|
input_identifiant.clear()
|
||||||
|
input_identifiant.send_keys(identifiant)
|
||||||
|
|
||||||
|
input_password = self.driver.find_element_by_css_selector("input[name='pass']")
|
||||||
|
|
||||||
|
input_password.clear()
|
||||||
|
input_password.send_keys(password)
|
||||||
|
|
||||||
|
login_button = self.driver.find_element_by_css_selector("#user-login button")
|
||||||
|
|
||||||
|
self.driver.execute_script("arguments[0].click();", login_button)
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
account_banned = self.driver.find_element_by_css_selector("#ban_msg_login")
|
||||||
|
invalid_password = self.driver.find_element_by_css_selector("#login_msg_pass")
|
||||||
|
not_activated_account = self.driver.find_element_by_css_selector(
|
||||||
|
"#login_msg_mail"
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
len(account_banned.get_attribute("style")) == 0
|
||||||
|
or len(invalid_password.get_attribute("style")) == 0
|
||||||
|
or len(not_activated_account.get_attribute("style")) == 0
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
WebDriverWait(self.driver, 5).until(
|
||||||
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#panel-btn"))
|
||||||
|
)
|
||||||
|
except TimeoutException:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def logout(self):
|
||||||
|
"""
|
||||||
|
Logout request
|
||||||
|
"""
|
||||||
|
|
||||||
|
# <a href="https://www2.yggtorrent.se/user/logout"> Déconnexion</a>
|
||||||
|
self.driver.get(YGGTORRENT_LOGOUT_URL)
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
panel_button = self.driver.find_element_by_css_selector("#panel-btn")
|
||||||
|
except NoSuchElementException:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def search(self, parameters):
|
||||||
|
search_url = create_search_url(parameters)
|
||||||
|
|
||||||
|
torrents_url = self.get_torrents_url(search_url, parameters)
|
||||||
|
|
||||||
|
return torrents_url
|
||||||
|
|
||||||
|
def extract_details(self, torrent_url):
|
||||||
|
"""
|
||||||
|
Extract informations from torrent's url
|
||||||
|
"""
|
||||||
|
logger.debug("torrent_url : %s", torrent_url)
|
||||||
|
|
||||||
|
self.driver.get(torrent_url)
|
||||||
|
|
||||||
|
WebDriverWait(self.driver, 30000).until(
|
||||||
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#title"))
|
||||||
|
)
|
||||||
|
|
||||||
|
torrents = []
|
||||||
|
|
||||||
|
torrent_page = BeautifulSoup(self.driver.page_source, features="lxml")
|
||||||
|
|
||||||
|
torrent = Torrent()
|
||||||
|
torrent.url = torrent_url
|
||||||
|
|
||||||
|
term_tags = torrent_page.find_all("a", {"class": "term"})
|
||||||
|
|
||||||
|
for term_tag in term_tags:
|
||||||
|
torrent.keywords.append(term_tag.text)
|
||||||
|
|
||||||
|
connection_tags = torrent_page.find("tr", {"id": "adv_search_cat"}).find_all(
|
||||||
|
"strong"
|
||||||
|
)
|
||||||
|
|
||||||
|
informations_tag = (
|
||||||
|
torrent_page.find("table", {"class": "informations"})
|
||||||
|
.find("tbody")
|
||||||
|
.find_all("tr")
|
||||||
|
)
|
||||||
|
|
||||||
|
download_button = torrent_page.find("a", {"class": "butt"})
|
||||||
|
|
||||||
|
if download_button.has_attr("href"):
|
||||||
|
torrent.download_url = download_button["href"]
|
||||||
|
|
||||||
|
torrent.seeders = int(connection_tags[0].text.replace(" ", ""))
|
||||||
|
torrent.leechers = int(connection_tags[1].text.replace(" ", ""))
|
||||||
|
torrent.completed = int(connection_tags[2].text.replace(" ", ""))
|
||||||
|
|
||||||
|
torrent.name = informations_tag[0].find_all("td")[1].text
|
||||||
|
torrent.size = informations_tag[3].find_all("td")[1].text
|
||||||
|
torrent.uploader = informations_tag[5].find_all("td")[1].text
|
||||||
|
|
||||||
|
mydatetime = re.search(
|
||||||
|
"([0-9]*\/[0-9]*\/[0-9]* [0-9]*:[0-9]*)",
|
||||||
|
informations_tag[6].find_all("td")[1].text,
|
||||||
|
0,
|
||||||
|
).group(0)
|
||||||
|
|
||||||
|
torrent.uploaded_datetime = datetime.datetime.strptime(
|
||||||
|
mydatetime, "%d/%m/%Y %H:%M"
|
||||||
|
)
|
||||||
|
|
||||||
|
message_tags = torrent_page.find_all("div", {"class": "message"})
|
||||||
|
|
||||||
|
for message_tag in message_tags:
|
||||||
|
torrent_comment = TorrentComment()
|
||||||
|
|
||||||
|
torrent_comment.author = message_tag.find("a").text
|
||||||
|
torrent_comment.posted = message_tag.find("strong").text
|
||||||
|
torrent_comment.text = message_tag.find(
|
||||||
|
"span", {"id": "comment_text"}
|
||||||
|
).text.strip()
|
||||||
|
|
||||||
|
torrent.comments.append(torrent_comment)
|
||||||
|
|
||||||
|
torrents.append(torrent)
|
||||||
|
|
||||||
|
torrent_id = torrent_page.find("form", {"id": "report-torrent"}).find(
|
||||||
|
"input", {"type": "hidden", "name": "target"}
|
||||||
|
)["value"]
|
||||||
|
|
||||||
|
self.driver.get(torrent_url)
|
||||||
|
|
||||||
|
WebDriverWait(self.driver, 30000).until(
|
||||||
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#informationsContainer"))
|
||||||
|
)
|
||||||
|
|
||||||
|
files_page = BeautifulSoup(self.driver.page_source, features="lxml")
|
||||||
|
|
||||||
|
file_tags = files_page.find_all("tr")
|
||||||
|
|
||||||
|
for file_tag in file_tags:
|
||||||
|
torrent_file = TorrentFile()
|
||||||
|
|
||||||
|
td_tags = file_tag.find_all("td")
|
||||||
|
|
||||||
|
torrent_file.file_size = (
|
||||||
|
td_tags[0]
|
||||||
|
.text.replace("\\r", "")
|
||||||
|
.replace("\\n", "")
|
||||||
|
.replace("\\t", "")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
torrent_file.file_name = (
|
||||||
|
td_tags[1]
|
||||||
|
.text.replace("\\r", "")
|
||||||
|
.replace("\\n", "")
|
||||||
|
.replace("\\t", "")
|
||||||
|
.replace("\\", "")
|
||||||
|
.replace(" ", "")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
torrent.files.append(torrent_file)
|
||||||
|
|
||||||
|
return torrent
|
||||||
|
|
||||||
|
def most_completed(self):
|
||||||
|
"""
|
||||||
|
Return the most completed torrents url (TOP 100)
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.driver.get(YGGTORRENT_MOST_COMPLETED_URL)
|
||||||
|
|
||||||
|
WebDriverWait(self.driver, 30000).until(
|
||||||
|
EC.presence_of_element_located(
|
||||||
|
(By.CSS_SELECTOR, "#DataTables_Table_0_wrapper")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
torrents_url = []
|
||||||
|
|
||||||
|
root = BeautifulSoup(self.driver.page_source, features="lxml")
|
||||||
|
|
||||||
|
tbody_element = root.find("tbody")
|
||||||
|
|
||||||
|
tr_elements = tbody_element.find_all("tr")
|
||||||
|
|
||||||
|
for tr_element in tr_elements:
|
||||||
|
a_elements = tr_element.find_all("a")
|
||||||
|
|
||||||
|
a_element = a_elements[1]
|
||||||
|
torrents_url.append(a_element["href"])
|
||||||
|
|
||||||
|
return torrents_url
|
||||||
|
|
||||||
|
def get_torrents_url(self, search_url, parameters):
|
||||||
|
"""
|
||||||
|
Return
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.driver.get(search_url)
|
||||||
|
|
||||||
|
WebDriverWait(self.driver, 30000).until(
|
||||||
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#criteriarecherche"))
|
||||||
|
)
|
||||||
|
|
||||||
|
search_page = BeautifulSoup(self.driver.page_source, features="lxml")
|
||||||
|
|
||||||
|
pagination = search_page.find("ul", {"class": "pagination"})
|
||||||
|
|
||||||
|
if pagination is None:
|
||||||
|
limit_page = 1
|
||||||
|
else:
|
||||||
|
pagination_item = pagination.find_all("a")
|
||||||
|
|
||||||
|
limit_page = int(pagination_item[-1]["data-ci-pagination-page"])
|
||||||
|
|
||||||
|
torrents = []
|
||||||
|
|
||||||
|
for page in range(0, limit_page):
|
||||||
|
parameters["page"] = page * TORRENT_PER_PAGE
|
||||||
|
|
||||||
|
search_url = create_search_url(parameters)
|
||||||
|
|
||||||
|
self.driver.get(search_url)
|
||||||
|
|
||||||
|
WebDriverWait(self.driver, 30000).until(
|
||||||
|
EC.presence_of_element_located(
|
||||||
|
(By.CSS_SELECTOR, "#over-18-notification")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
search_page = BeautifulSoup(self.driver.page_source, features="lxml")
|
||||||
|
|
||||||
|
torrents_tag = search_page.findAll("a", {"id": "torrent_name"})
|
||||||
|
|
||||||
|
for torrent_tag in torrents_tag:
|
||||||
|
torrents.append(torrent_tag["href"])
|
||||||
|
|
||||||
|
return torrents
|
||||||
|
|
||||||
|
def download_from_torrent_url(self, torrent_url=None, destination_path="./"):
|
||||||
|
if torrent_url is not None:
|
||||||
|
self.driver.get(torrent_url)
|
||||||
|
|
||||||
|
WebDriverWait(self.driver, 30000).until(
|
||||||
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#title"))
|
||||||
|
)
|
||||||
|
|
||||||
|
download_button = self.driver.find_element_by_css_selector("a.butt")
|
||||||
|
|
||||||
|
self.driver.execute_script("arguments[0].click();", download_button)
|
||||||
|
|
||||||
|
def download_from_torrent(self, torrent=None, destination_path="./"):
|
||||||
|
if torrent is not None:
|
||||||
|
self.download_from_torrent_url(torrent.url)
|
||||||
|
|
||||||
|
|
||||||
|
def create_search_url(parameters):
|
||||||
|
"""
|
||||||
|
Return a formated URL for torrent's search
|
||||||
|
"""
|
||||||
|
|
||||||
|
formated_search_url = YGGTORRENT_SEARCH_URL
|
||||||
|
|
||||||
|
if "name" in parameters:
|
||||||
|
formated_search_url += parameters["name"].replace(" ", "+")
|
||||||
|
|
||||||
|
if "page" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_PAGE
|
||||||
|
formated_search_url += str(parameters["page"])
|
||||||
|
|
||||||
|
if "descriptions" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_DESCRIPTION
|
||||||
|
|
||||||
|
for description in parameters["descriptions"]:
|
||||||
|
formated_search_url += description
|
||||||
|
formated_search_url += "+"
|
||||||
|
|
||||||
|
if "files" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_FILE
|
||||||
|
|
||||||
|
for file in parameters["files"]:
|
||||||
|
formated_search_url += file
|
||||||
|
formated_search_url += "+"
|
||||||
|
|
||||||
|
if "uploader" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_UPLOADER
|
||||||
|
formated_search_url += parameters["uploader"]
|
||||||
|
|
||||||
|
if "sort" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_SORT
|
||||||
|
formated_search_url += parameters["sort"]
|
||||||
|
|
||||||
|
if "order" in parameters:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_ORDER
|
||||||
|
formated_search_url += parameters["order"]
|
||||||
|
|
||||||
|
if "category" in parameters:
|
||||||
|
for category in categories:
|
||||||
|
if parameters["category"] == category["name"]:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_CATEGORY
|
||||||
|
formated_search_url += category["id"]
|
||||||
|
|
||||||
|
if "subcategory" in parameters:
|
||||||
|
for subcategory in category["subcategories"]:
|
||||||
|
if parameters["subcategory"] == subcategory["name"]:
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_SUB_CATEGORY
|
||||||
|
formated_search_url += subcategory["id"]
|
||||||
|
if "options" in parameters:
|
||||||
|
for key, values in parameters["options"].items():
|
||||||
|
for option in subcategory["options"]:
|
||||||
|
if key == option["name"]:
|
||||||
|
for searched_value in values:
|
||||||
|
for index, value in enumerate(
|
||||||
|
option["values"]
|
||||||
|
):
|
||||||
|
if searched_value == value:
|
||||||
|
formated_search_url += (
|
||||||
|
"&option_"
|
||||||
|
)
|
||||||
|
formated_search_url += option[
|
||||||
|
"name"
|
||||||
|
]
|
||||||
|
# options_index.append(index)
|
||||||
|
if "multiple" in option:
|
||||||
|
formated_search_url += (
|
||||||
|
"%3Amultiple"
|
||||||
|
)
|
||||||
|
|
||||||
|
formated_search_url += "[]="
|
||||||
|
formated_search_url += str(
|
||||||
|
index + 1
|
||||||
|
)
|
||||||
|
|
||||||
|
formated_search_url += YGGTORRENT_SEARCH_URL_DO
|
||||||
|
formated_search_url += "search"
|
||||||
|
|
||||||
|
return formated_search_url
|
Loading…
Reference in New Issue