Merge branch 'master' into scrapalive

Merge branch 'scrapalive'
Comming back from older commit, scraper is alive.
2020-08-09 20:41:49 +02:00 · 2020-08-09 20:27:49 +02:00 · 2020-08-09 20:24:38 +02:00
11 changed files with 38 additions and 170 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,5 +3,3 @@ __pycache__/
 yggcrawl/__pycache__/
 yggcrawl/__init__.pyc
 login.py
 yggcrawl/gecko/geckodriver.log
 .vscode
--- a/.vpn/countries
+++ b/.vpn/countries
@ -1,58 +0,0 @@
 Albania
 Chile
 Georgia
 Israel
 New_Zealand
 Slovenia
 Ukraine
 Argentina
 Costa_Rica
 Germany
 Italy
 North_Macedonia
 South_Africa
 United_Kingdom
 Australia
 Croatia
 Greece
 Japan
 Norway
 South_Korea
 United_States
 Austria
 Cyprus
 Hong_Kong
 Latvia
 Poland
 Spain
 Vietnam
 Belgium
 Czech_Republic
 Hungary
 Luxembourg
 Portugal
 Sweden
 Bosnia_And_Herzegovina
 Denmark
 Iceland
 Malaysia
 Romania
 Switzerland
 Brazil
 Estonia
 India
 Mexico
 Serbia
 Taiwan
 Bulgaria
 Finland
 Indonesia
 Moldova
 Singapore
 Thailand
 Canada
 France
 Ireland
 Netherlands
 Slovakia
 Turkey
--- a/README.md
+++ b/README.md
@ -2,7 +2,7 @@
 ## yggtorrent to IPFS
 This is a submodule of [Astroport project](https://git.p2p.legal/axiom-team/astroport). You can use it standalone.
-IPTubes is a yggtorrent content migrator to semi-private IPFS swarm.
+IPTubes is a yggtorrent content migrator to a public IPFS swam.
 ### Standalone installation
--- a/crawl.py
+++ b/crawl.py
@ -16,6 +16,7 @@ import requests
 import json
 import sys
 import os
 import shutil
 import subprocess
 import login
 import time
@ -25,7 +26,7 @@ from termcolor import colored
 # Load scraper
 from yggcrawl import YggTorrentScraper
 scraper = YggTorrentScraper(requests.session())
-from yggcrawl import set_yggtorrent_tld
+from yggtorrentscraper import set_yggtorrent_tld
 set_yggtorrent_tld("se")
 name = ' '.join(sys.argv[1:])
@ -39,7 +40,7 @@ except ValueError:
 else:
    sys.exit(1)
-# Rollong Files
+# Allow only one torrent downling in same time, and remove oldest torrent if disk size is full.
 def rollingFiles():
        def isDL():
            downloading = os.popen('./trans-ctl.sh downloading').read()
@ -104,10 +105,13 @@ def downloadTorrent():
        # Download torrent file
        if(scraper.login(login.user, login.passwd)):
            print(colored("Login success", 'green'))
-            subprocess.Popen('[[ $(ls data/tmp/torrents/) ]] && rm data/tmp/torrents/*', executable='/bin/bash', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            if len(os.listdir('data/tmp/torrents') ) != 0:
                shutil.rmtree('data/tmp/torrents', ignore_errors=True)
                os.mkdir("data/tmp/torrents")
            scraper.download_from_torrent_url(research)
-#            os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/').read()
+#            os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent}.torrent && mv *.torrent ../../torrents/')
-            os.popen('cd data/tmp/torrents/ && mv *.torrent ../../torrents/')
+            os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/')
        else:
            print(colored("Login failed", 'red'))
            sys.exit(1)
@ -121,7 +125,6 @@ def removeTracker():
        time.sleep(tkdelay)
        os.popen('./trans-ctl.sh rmtracker ' + name)
        os.popen('./trans-ctl.sh rmtracker ' + higherid)
 #        print(tkresult)
 rollingFiles()
 downloadTorrent()
--- a/install.sh
+++ b/install.sh
@ -31,12 +31,6 @@ sbotc() {
 transmission() {
        echo -e "${c_yellow}Installing Transmision...$c_"
 	sudo apt install transmission-daemon --install-suggests
 	sudo apt install transmission-cli
 	# stop
 	# Copy login.py info to /etc/transmission/settings.json
 	# start
 }
 # Install pip tools
@ -74,8 +68,8 @@ pip3() {
 iptubes() {
 	[[ -z $(which pip3) ]] && pip3
 	/usr/bin/pip3 install $(curl -s https://raw.githubusercontent.com/Harkame/YggTorrentScraper/master/requirements.txt)
-	sudo chgrp -R debian-transmission data/
+	chgrp -R debian-transmission data/
-	sudo chmod -R g+w data/
+	chmod -R g+w data/
 	sudo service transmission-daemon restart
 	cp login.py.template login.py
 	cd lib/py/
--- a/lib/py/scrapactions.py
+++ b/lib/py/scrapactions.py
@ -16,7 +16,7 @@ try:
 except NameError:
    from yggcrawl import YggTorrentScraper
    scraper = YggTorrentScraper(requests.session())
-    from yggcrawl import set_yggtorrent_tld
+    from yggtorrentscraper import set_yggtorrent_tld
    set_yggtorrent_tld("se")
 cmd = sys.argv[1]
--- a/lib/scrabash.sh
+++ b/lib/scrabash.sh
@ -56,15 +56,6 @@ get_details() {
 	fi
 }
 vpn() {
 	[[ ! $(which nordvpn) ]] && echo "Installaling NordVPN client... && ./install.sh nordvpn"
 	vpn_citie=$(shuf -n1 .vpn/countries)
 	echo "Warning: trying to connect to random cities in the world via NordVPN. If you are connected to this machine via SSH, you will lost the connection..."
 	echo "VPN connection in 5 seconds, press CTRL+C to cancel..."
 	sleep 5
 	nordvpn c $vpn_citie
 }
 $1
 [[ $err == 1 ]] && exit 1 || exit 0
--- a/tata.sh
+++ b/tata.sh
@ -1,17 +0,0 @@
 #!/usr/bin/env bash
 readWords() {
    declare -i int="$1"
    (( int == 0 )) && {
       printf "%s\n" "$int is 0, cant find 0 words"
       return 1
    }
    while read getWords;do
       if [[ ${#getWords} -eq $int ]];then
         printf "%s\n" "$getWords"
       fi
   done < /usr/share/dict/words
 }
 readWords 20
--- a/trans-ctl.sh
+++ b/trans-ctl.sh
@ -26,9 +26,12 @@ getid() {
 	# Get ID
 	else
 		j=0
-		for i in "$name"; do
+		for i in $name; do
-			[[ $j == 0 ]] && result=$($transcmd --list | grep -vE 'Sum:|ID     Done' | grep -i "$i")
+			if [[ $j == 0 ]];then
-			result=$(echo "$result" | grep -vE 'Sum:|ID     Done' | grep -iw "$i")
+				result=$($transcmd --list | grep -vE 'Sum:|ID     Done' | grep -iw "$i")
 			else
 				result=$(echo "$result" | grep -iw "$i")
 			fi
 			((j++))
 		done
 	fi
@ -36,7 +39,7 @@ getid() {
 		echo "$result" | awk '{ print $1 }'
 	else
 		echo "No torrent found"
-		fi
+	fi
 }
 getlowerid() {
@ -86,13 +89,16 @@ case "$1" in
 	remove)
 		idt=$(getid | tr -d '*')
 		if [[ $idt =~ ^[+-]?[0-9]+([.][0-9]+)?$ ]]; then
-			for i in "$($transcmd --list | grep -vE 'Sum:|ID     Done' )"; do
+			torrentList=$($transcmd --list | grep -vE 'Sum:|ID     Done' )
 			IFS=$'\n'
 			for i in $torrentList; do
 				if [[ $(echo "$i" | awk '{ print $1 }') == $idt ]]; then
 					fileName=$(echo "$i" | awk '{ print $NF }')
 					break
 				fi
 			done
-
+			IFS=$' '
 			[[ ! $fileName ]] && echo "Can't find torrent to remove." && exit 1
 			cd data/meta
 			torrentId=$(grep -r $fileName | head -n1 | awk -F '/' '{ print $1 }')
 			rm -rf $torrentId
--- a/yggcrawl/gecko/torrent_search.py
+++ b/yggcrawl/gecko/torrent_search.py
@ -1,44 +0,0 @@
 #!/usr/bin/python3
 # Early exemple of how to use selenium with gecko to bypass cloudflare bots detections
 # The only way to block this should be using of captcha in front of every yggtorrent pages by sessions...
 import sys
 import time
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 # Exit if no arguments
 if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")
 else: arg1 = sys.argv[1]
 search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed"
 # Load webdriver with Gecko
 options = webdriver.FirefoxOptions()
 options.add_argument('-headless')
 driver = webdriver.Firefox(options=options, executable_path=r'/usr/local/bin/geckodriver')
 driver.get(search_url)
 # Wait to bypass cloudflare
 print("Page atteinte, attente de redirection anti-crawling...")
 wait = WebDriverWait(driver, 10)
 wait.until(lambda driver: driver.current_url != search_url)
 # Wait 2 seconds to load page
 print("Anti-crawling passé, affichage dans 2 secondes ...")
 time.sleep(2)
 # Filter torrent urls
 elems = driver.find_elements_by_css_selector(".results [href]")
 links = [elem.get_attribute('href') for elem in elems]
 links = [k for k in links if '/torrent/' in k]
 # Print torrents urls
 print("\n".join(links))
 driver.quit()
--- a/yggcrawl/yggtorrentscraper.py
+++ b/yggcrawl/yggtorrentscraper.py
@ -44,7 +44,7 @@ YGGTORRENT_SEARCH_URL_DO = "&do="
 YGGTORRENT_SEARCH_URL_PAGE = "&page="
 YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
-YGGTORRENT_GET_INFO = f"https://www2.yggtorrent.si/engine/get_nfo?torrent="
+YGGTORRENT_GET_INFO = f"{YGGTORRENT_BASE_URL}/engine/get_nfo?torrent="
 YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
@ -52,7 +52,6 @@ TORRENT_PER_PAGE = 50
 YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
 headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
 def set_yggtorrent_tld(yggtorrent_tld=None):
    """
@ -78,7 +77,7 @@ def set_yggtorrent_tld(yggtorrent_tld=None):
    YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
-    YGGTORRENT_DOMAIN = ".yggtorrent.si"
+    YGGTORRENT_DOMAIN = ".yggtorrent.gg"
    YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
    YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
@ -109,7 +108,7 @@ class YggTorrentScraper:
            "User-Agent": "PostmanRuntime/7.17.1",
            "Accept": "*/*",
            "Cache-Control": "no-cache",
-            "Host": f"www2.yggtorrent.{YGGTORRENT_TLD}",
+            "Host": f"www.yggtorrent.{YGGTORRENT_TLD}",
            "Accept-Encoding": "gzip, deflate",
            "Connection": "keep-alive",
        }
@ -146,7 +145,7 @@ class YggTorrentScraper:
        """
        Logout request
        """
-        response = self.session.get(YGGTORRENT_LOGOUT_URL, headers=headers)
+        response = self.session.get(YGGTORRENT_LOGOUT_URL)
        self.session.cookies.clear()
@ -161,18 +160,12 @@ class YggTorrentScraper:
            return False
-    #kopa
+    def search(self, parameters):
    def search_old(self, parameters):
        search_url = create_search_url(parameters)
        torrents_url = self.get_torrents_url(search_url, parameters)
        return torrents_url
    def search(self, parameters):
 #        torrents_url = os.popen('gecko/torrent_search.py didier')
        torrents_url = exec(open('/home/iptubes/astroport-iptubes/yggcrawl/gecko/torrent_search.py').read())
        return torrents_url
    def extract_details(self, torrent_url):
        """
        Extract informations from torrent's url
@ -181,7 +174,7 @@ class YggTorrentScraper:
        torrents = []
-        response = self.session.get(torrent_url, headers=headers)
+        response = self.session.get(torrent_url)
        torrent_page = BeautifulSoup(response.content, features="lxml")
@ -244,7 +237,7 @@ class YggTorrentScraper:
            "input", {"type": "hidden", "name": "target"}
        )["value"]
-        response = self.session.get(YGGTORRENT_GET_FILES + torrent_id, headers=headers)
+        response = self.session.get(YGGTORRENT_GET_FILES + torrent_id)
        files_page = BeautifulSoup(response.content, features="lxml")
@ -299,12 +292,12 @@ class YggTorrentScraper:
        return torrents_url
 #kopaa
    def get_torrents_url(self, search_url, parameters):
        """
        Return
        """
-        response = self.session.get(search_url, headers=headers)
+
        response = self.session.get(search_url)
        search_page = BeautifulSoup(response.content, features="lxml")
@ -324,7 +317,7 @@ class YggTorrentScraper:
            search_url = create_search_url(parameters)
-            response = self.session.get(search_url, headers=headers)
+            response = self.session.get(search_url)
            search_page = BeautifulSoup(response.content, features="lxml")
@ -335,6 +328,7 @@ class YggTorrentScraper:
        return torrents
 #kopa
    def download_from_torrent_url(self, torrent_url=None, destination_path="./data/tmp/torrents/"):
        if torrent_url is not None:
            torrent = self.extract_details(torrent_url)
@ -355,7 +349,7 @@ class YggTorrentScraper:
        if torrent_url is None:
            raise Exception("Invalid torrent_url, make sure you are logged")
-        response = self.session.get(YGGTORRENT_BASE_URL + torrent_url, headers=headers)
+        response = self.session.get(YGGTORRENT_BASE_URL + torrent_url)
        temp_file_name = response.headers.get("content-disposition")
@ -374,6 +368,7 @@ class YggTorrentScraper:
        return file_full_path
 def create_search_url(parameters):
    """
    Return a formated URL for torrent's search
Author	SHA1	Message	Date
poka	4af08a9d82	Merge branch 'master' into scrapalive	2020-08-09 20:41:49 +02:00
poka	56d723ab56	Merge branch 'scrapalive'	2020-08-09 20:27:49 +02:00
poka	b2ddcf2f95	Comming back from older commit, scraper is alive.	2020-08-09 20:24:38 +02:00