From 75da5242d06d4af023721705f8cfbaba62a65dfd Mon Sep 17 00:00:00 2001 From: qo-op Date: Thu, 26 Mar 2020 21:30:50 +0100 Subject: [PATCH 1/7] poire --- crawl.py | 2 +- install.sh | 12 +++++++++--- lib/py/scrapactions.py | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/crawl.py b/crawl.py index dab5a8a..19a2c49 100755 --- a/crawl.py +++ b/crawl.py @@ -25,7 +25,7 @@ from termcolor import colored # Load scraper from yggcrawl import YggTorrentScraper scraper = YggTorrentScraper(requests.session()) -from yggtorrentscraper import set_yggtorrent_tld +from yggcrawl import set_yggtorrent_tld set_yggtorrent_tld("se") name = ' '.join(sys.argv[1:]) diff --git a/install.sh b/install.sh index fdf040f..dc40f98 100755 --- a/install.sh +++ b/install.sh @@ -28,9 +28,15 @@ sbotc() { } # Install Transmission -transmisison() { +transmission() { echo -e "${c_yellow}Installing Transmision...$c_" sudo apt install transmission-daemon --install-suggests + sudo apt install transmission-cli + + # stop + # Copy login.py info to /etc/transmission/settings.json + # start + } # Install pip tools @@ -68,8 +74,8 @@ pip3() { iptubes() { [[ -z $(which pip3) ]] && pip3 /usr/bin/pip3 install $(curl -s https://raw.githubusercontent.com/Harkame/YggTorrentScraper/master/requirements.txt) - chgrp -R debian-transmission data/ - chmod -R g+w data/ + sudo chgrp -R debian-transmission data/ + sudo chmod -R g+w data/ sudo service transmission-daemon restart cp login.py.template login.py cd lib/py/ diff --git a/lib/py/scrapactions.py b/lib/py/scrapactions.py index 06397dd..7dff908 100755 --- a/lib/py/scrapactions.py +++ b/lib/py/scrapactions.py @@ -16,7 +16,7 @@ try: except NameError: from yggcrawl import YggTorrentScraper scraper = YggTorrentScraper(requests.session()) - from yggtorrentscraper import set_yggtorrent_tld + from yggcrawl import set_yggtorrent_tld set_yggtorrent_tld("se") cmd = sys.argv[1] From f508f38e17ae79096d333c9e1886e2d57ce9ba49 Mon Sep 17 00:00:00 2001 From: poka Date: Thu, 26 Mar 2020 23:17:39 +0100 Subject: [PATCH 2/7] Add VPN support --- .vpn/countries | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 .vpn/countries diff --git a/.vpn/countries b/.vpn/countries new file mode 100644 index 0000000..36e0ef8 --- /dev/null +++ b/.vpn/countries @@ -0,0 +1,58 @@ +Albania +Chile +Georgia +Israel +New_Zealand +Slovenia +Ukraine +Argentina +Costa_Rica +Germany +Italy +North_Macedonia +South_Africa +United_Kingdom +Australia +Croatia +Greece +Japan +Norway +South_Korea +United_States +Austria +Cyprus +Hong_Kong +Latvia +Poland +Spain +Vietnam +Belgium +Czech_Republic +Hungary +Luxembourg +Portugal +Sweden +Bosnia_And_Herzegovina +Denmark +Iceland +Malaysia +Romania +Switzerland +Brazil +Estonia +India +Mexico +Serbia +Taiwan +Bulgaria +Finland +Indonesia +Moldova +Singapore +Thailand +Canada +France +Ireland +Netherlands +Slovakia +Turkey From 79e3106c0890454c062978e118fd4d2d6d19ae1e Mon Sep 17 00:00:00 2001 From: poka Date: Thu, 26 Mar 2020 23:18:29 +0100 Subject: [PATCH 3/7] Add VPN support --- lib/scrabash.sh | 9 +++++++++ tata.py | 7 ------- tata.sh | 17 +++++++++++++++++ 3 files changed, 26 insertions(+), 7 deletions(-) delete mode 100755 tata.py create mode 100755 tata.sh diff --git a/lib/scrabash.sh b/lib/scrabash.sh index df990cc..7807d5a 100755 --- a/lib/scrabash.sh +++ b/lib/scrabash.sh @@ -56,6 +56,15 @@ get_details() { fi } +vpn() { + [[ ! $(which nordvpn) ]] && echo "Installaling NordVPN client... && ./install.sh nordvpn" + vpn_citie=$(shuf -n1 .vpn/countries) + echo "Warning: trying to connect to random cities in the world via NordVPN. If you are connected to this machine via SSH, you will lost the connection..." + echo "VPN connection in 5 seconds, press CTRL+C to cancel..." + sleep 5 + nordvpn c $vpn_citie +} + $1 [[ $err == 1 ]] && exit 1 || exit 0 diff --git a/tata.py b/tata.py deleted file mode 100755 index e22ad8a..0000000 --- a/tata.py +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/python3 - -import os - -tata = "187364" - -os.popen(f'cd data/tmp/torrents/ && mv caca.torrent {tata}.torrent') \ No newline at end of file diff --git a/tata.sh b/tata.sh new file mode 100755 index 0000000..07003c8 --- /dev/null +++ b/tata.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + readWords() { + declare -i int="$1" + + (( int == 0 )) && { + printf "%s\n" "$int is 0, cant find 0 words" + return 1 + } + + while read getWords;do + if [[ ${#getWords} -eq $int ]];then + printf "%s\n" "$getWords" + fi + done < /usr/share/dict/words +} + +readWords 20 From 2968a2d312355493e8659840165ec3d6c6f0a3c9 Mon Sep 17 00:00:00 2001 From: poka Date: Fri, 8 May 2020 06:48:26 +0200 Subject: [PATCH 4/7] [Early stage] Add selenium webdriver mechanic to bypass cloudflare --- .gitignore | 1 + crawl.py | 7 +++-- yggcrawl/gecko/torrent_search.py | 44 ++++++++++++++++++++++++++++++++ yggcrawl/yggtorrentscraper.py | 31 ++++++++++++---------- 4 files changed, 66 insertions(+), 17 deletions(-) create mode 100755 yggcrawl/gecko/torrent_search.py diff --git a/.gitignore b/.gitignore index 951fe8d..9d9de72 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ __pycache__/ yggcrawl/__pycache__/ yggcrawl/__init__.pyc login.py +yggcrawl/gecko/geckodriver.log diff --git a/crawl.py b/crawl.py index fefd517..4bb6238 100755 --- a/crawl.py +++ b/crawl.py @@ -104,11 +104,10 @@ def downloadTorrent(): # Download torrent file if(scraper.login(login.user, login.passwd)): print(colored("Login success", 'green')) - subprocess.Popen('[[ $(ls data/tmp/torrents/) ]] && rm data/tmp/torrents/*', executable='/bin/bash') + subprocess.Popen('[[ $(ls data/tmp/torrents/) ]] && rm data/tmp/torrents/*', executable='/bin/bash', stdout=subprocess.PIPE, stderr=subprocess.PIPE) scraper.download_from_torrent_url(research) -# os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent}.torrent && mv *.torrent ../../torrents/') - os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/') - +# os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/').read() + os.popen('cd data/tmp/torrents/ && mv *.torrent ../../torrents/') else: print(colored("Login failed", 'red')) sys.exit(1) diff --git a/yggcrawl/gecko/torrent_search.py b/yggcrawl/gecko/torrent_search.py new file mode 100755 index 0000000..1746e73 --- /dev/null +++ b/yggcrawl/gecko/torrent_search.py @@ -0,0 +1,44 @@ +#!/usr/bin/python3 + +# Early exemple of how to use selenium with gecko to bypass cloudflare bots detections +# The only way to block this should be using of captcha in front of every yggtorrent pages by sessions... + +import sys +import time + +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC + +# Exit if no arguments +if len(sys.argv)==1: sys.exit("Please choose a film ou serie name") +else: arg1 = sys.argv[1] + +search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed" + +# Load webdriver with Gecko +options = webdriver.FirefoxOptions() +options.add_argument('-headless') +driver = webdriver.Firefox(options=options, executable_path=r'/usr/local/bin/geckodriver') +driver.get(search_url) + +# Wait to bypass cloudflare +print("Page atteinte, attente de redirection anti-crawling...") +wait = WebDriverWait(driver, 10) +wait.until(lambda driver: driver.current_url != search_url) + +# Wait 2 seconds to load page +print("Anti-crawling passé, affichage dans 2 secondes ...") +time.sleep(2) + +# Filter torrent urls +elems = driver.find_elements_by_css_selector(".results [href]") +links = [elem.get_attribute('href') for elem in elems] +links = [k for k in links if '/torrent/' in k] + +# Print torrents urls +print("\n".join(links)) + + +driver.quit() diff --git a/yggcrawl/yggtorrentscraper.py b/yggcrawl/yggtorrentscraper.py index d9c0802..66a2dd3 100644 --- a/yggcrawl/yggtorrentscraper.py +++ b/yggcrawl/yggtorrentscraper.py @@ -44,7 +44,7 @@ YGGTORRENT_SEARCH_URL_DO = "&do=" YGGTORRENT_SEARCH_URL_PAGE = "&page=" YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent=" -YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent=" +YGGTORRENT_GET_INFO = f"https://www2.yggtorrent.se/engine/get_nfo?torrent=" YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted" @@ -52,6 +52,7 @@ TORRENT_PER_PAGE = 50 YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent=" +headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} def set_yggtorrent_tld(yggtorrent_tld=None): """ @@ -77,7 +78,7 @@ def set_yggtorrent_tld(yggtorrent_tld=None): YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name=" - YGGTORRENT_DOMAIN = ".yggtorrent.gg" + YGGTORRENT_DOMAIN = ".yggtorrent.se" YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent=" YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent=" @@ -108,7 +109,7 @@ class YggTorrentScraper: "User-Agent": "PostmanRuntime/7.17.1", "Accept": "*/*", "Cache-Control": "no-cache", - "Host": f"www.yggtorrent.{YGGTORRENT_TLD}", + "Host": f"www2.yggtorrent.{YGGTORRENT_TLD}", "Accept-Encoding": "gzip, deflate", "Connection": "keep-alive", } @@ -145,7 +146,7 @@ class YggTorrentScraper: """ Logout request """ - response = self.session.get(YGGTORRENT_LOGOUT_URL) + response = self.session.get(YGGTORRENT_LOGOUT_URL, headers=headers) self.session.cookies.clear() @@ -160,12 +161,18 @@ class YggTorrentScraper: return False - def search(self, parameters): + #kopa + def search_old(self, parameters): search_url = create_search_url(parameters) torrents_url = self.get_torrents_url(search_url, parameters) return torrents_url + def search(self, parameters): +# torrents_url = os.popen('gecko/torrent_search.py didier') + torrents_url = exec(open('/home/iptubes/astroport-iptubes/yggcrawl/gecko/torrent_search.py').read()) + return torrents_url + def extract_details(self, torrent_url): """ Extract informations from torrent's url @@ -174,7 +181,7 @@ class YggTorrentScraper: torrents = [] - response = self.session.get(torrent_url) + response = self.session.get(torrent_url, headers=headers) torrent_page = BeautifulSoup(response.content, features="lxml") @@ -237,7 +244,7 @@ class YggTorrentScraper: "input", {"type": "hidden", "name": "target"} )["value"] - response = self.session.get(YGGTORRENT_GET_FILES + torrent_id) + response = self.session.get(YGGTORRENT_GET_FILES + torrent_id, headers=headers) files_page = BeautifulSoup(response.content, features="lxml") @@ -292,12 +299,12 @@ class YggTorrentScraper: return torrents_url +#kopaa def get_torrents_url(self, search_url, parameters): """ Return """ - - response = self.session.get(search_url) + response = self.session.get(search_url, headers=headers) search_page = BeautifulSoup(response.content, features="lxml") @@ -317,7 +324,7 @@ class YggTorrentScraper: search_url = create_search_url(parameters) - response = self.session.get(search_url) + response = self.session.get(search_url, headers=headers) search_page = BeautifulSoup(response.content, features="lxml") @@ -328,7 +335,6 @@ class YggTorrentScraper: return torrents -#kopa def download_from_torrent_url(self, torrent_url=None, destination_path="./data/tmp/torrents/"): if torrent_url is not None: torrent = self.extract_details(torrent_url) @@ -349,7 +355,7 @@ class YggTorrentScraper: if torrent_url is None: raise Exception("Invalid torrent_url, make sure you are logged") - response = self.session.get(YGGTORRENT_BASE_URL + torrent_url) + response = self.session.get(YGGTORRENT_BASE_URL + torrent_url, headers=headers) temp_file_name = response.headers.get("content-disposition") @@ -368,7 +374,6 @@ class YggTorrentScraper: return file_full_path - def create_search_url(parameters): """ Return a formated URL for torrent's search From 4cb3217de54fba1823388546ffd5066aba2e6000 Mon Sep 17 00:00:00 2001 From: poka Date: Wed, 27 May 2020 16:19:36 +0200 Subject: [PATCH 5/7] =?UTF-8?q?Mettre=20=C3=A0=20jour=20'README.md'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7355e5e..da94eb1 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## yggtorrent to IPFS This is a submodule of [Astroport project](https://git.p2p.legal/axiom-team/astroport). You can use it standalone. -IPTubes is a yggtorrent content migrator to a public IPFS swam. +IPTubes is a yggtorrent content migrator to semi-private IPFS swarm. ### Standalone installation From 04738629717923d4718ffc2df23a0f2ed290aafd Mon Sep 17 00:00:00 2001 From: poka Date: Wed, 27 May 2020 16:21:16 +0200 Subject: [PATCH 6/7] =?UTF-8?q?Mettre=20=C3=A0=20jour=20'.gitignore'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9d9de72..fb23d60 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ yggcrawl/__pycache__/ yggcrawl/__init__.pyc login.py yggcrawl/gecko/geckodriver.log +.vscode From 50be4043739f69975efc7eb00fa2ac0cd218b1c1 Mon Sep 17 00:00:00 2001 From: poka Date: Sun, 9 Aug 2020 16:17:27 +0200 Subject: [PATCH 7/7] change for .si tld --- yggcrawl/yggtorrentscraper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yggcrawl/yggtorrentscraper.py b/yggcrawl/yggtorrentscraper.py index 66a2dd3..8d1cd83 100644 --- a/yggcrawl/yggtorrentscraper.py +++ b/yggcrawl/yggtorrentscraper.py @@ -9,7 +9,7 @@ from bs4 import BeautifulSoup from .torrent import Torrent, TorrentComment, TorrentFile from .categories import categories -YGGTORRENT_TLD = "se" +YGGTORRENT_TLD = "si" YGGTORRENT_BASE_URL = f"https://www2.yggtorrent.{YGGTORRENT_TLD}" @@ -44,7 +44,7 @@ YGGTORRENT_SEARCH_URL_DO = "&do=" YGGTORRENT_SEARCH_URL_PAGE = "&page=" YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent=" -YGGTORRENT_GET_INFO = f"https://www2.yggtorrent.se/engine/get_nfo?torrent=" +YGGTORRENT_GET_INFO = f"https://www2.yggtorrent.si/engine/get_nfo?torrent=" YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted" @@ -78,7 +78,7 @@ def set_yggtorrent_tld(yggtorrent_tld=None): YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name=" - YGGTORRENT_DOMAIN = ".yggtorrent.se" + YGGTORRENT_DOMAIN = ".yggtorrent.si" YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent=" YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="