Merge branch 'master' into scrapalive

Merge branch 'scrapalive'
Comming back from older commit, scraper is alive.
2020-08-09 20:41:49 +02:00 · 2020-08-09 20:27:49 +02:00 · 2020-08-09 20:24:38 +02:00
11 changed files with 38 additions and 170 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,5 +3,3 @@ __pycache__/
 yggcrawl/__pycache__/
 yggcrawl/__init__.pyc
 login.py
-yggcrawl/gecko/geckodriver.log
-.vscode
--- a/.vpn/countries
+++ b/.vpn/countries
@ -1,58 +0,0 @@
-Albania
-Chile
-Georgia
-Israel
-New_Zealand
-Slovenia
-Ukraine
-Argentina
-Costa_Rica
-Germany
-Italy
-North_Macedonia
-South_Africa
-United_Kingdom
-Australia
-Croatia
-Greece
-Japan
-Norway
-South_Korea
-United_States
-Austria
-Cyprus
-Hong_Kong
-Latvia
-Poland
-Spain
-Vietnam
-Belgium
-Czech_Republic
-Hungary
-Luxembourg
-Portugal
-Sweden
-Bosnia_And_Herzegovina
-Denmark
-Iceland
-Malaysia
-Romania
-Switzerland
-Brazil
-Estonia
-India
-Mexico
-Serbia
-Taiwan
-Bulgaria
-Finland
-Indonesia
-Moldova
-Singapore
-Thailand
-Canada
-France
-Ireland
-Netherlands
-Slovakia
-Turkey
--- a/README.md
+++ b/README.md
@ -2,7 +2,7 @@
 ## yggtorrent to IPFS

 This is a submodule of [Astroport project](https://git.p2p.legal/axiom-team/astroport). You can use it standalone.
-IPTubes is a yggtorrent content migrator to semi-private IPFS swarm.
+IPTubes is a yggtorrent content migrator to a public IPFS swam.

 ### Standalone installation

--- a/crawl.py
+++ b/crawl.py
@ -16,6 +16,7 @@ import requests
 import json
 import sys
 import os
+import shutil
 import subprocess
 import login
 import time
@ -25,7 +26,7 @@ from termcolor import colored
 # Load scraper
 from yggcrawl import YggTorrentScraper
 scraper = YggTorrentScraper(requests.session())
-from yggcrawl import set_yggtorrent_tld
+from yggtorrentscraper import set_yggtorrent_tld
 set_yggtorrent_tld("se")
 name = ' '.join(sys.argv[1:])

@ -39,7 +40,7 @@ except ValueError:
 else:
    sys.exit(1)

-# Rollong Files
+# Allow only one torrent downling in same time, and remove oldest torrent if disk size is full.
 def rollingFiles():
        def isDL():
            downloading = os.popen('./trans-ctl.sh downloading').read()
@ -104,10 +105,13 @@ def downloadTorrent():
        # Download torrent file
        if(scraper.login(login.user, login.passwd)):
            print(colored("Login success", 'green'))
-            subprocess.Popen('[[ $(ls data/tmp/torrents/) ]] && rm data/tmp/torrents/*', executable='/bin/bash', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            if len(os.listdir('data/tmp/torrents') ) != 0:
+                shutil.rmtree('data/tmp/torrents', ignore_errors=True)
+                os.mkdir("data/tmp/torrents")
            scraper.download_from_torrent_url(research)
-#            os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/').read()
-            os.popen('cd data/tmp/torrents/ && mv *.torrent ../../torrents/')
+#            os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent}.torrent && mv *.torrent ../../torrents/')
+            os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/')
+
        else:
            print(colored("Login failed", 'red'))
            sys.exit(1)
@ -121,7 +125,6 @@ def removeTracker():
        time.sleep(tkdelay)
        os.popen('./trans-ctl.sh rmtracker ' + name)
        os.popen('./trans-ctl.sh rmtracker ' + higherid)
-#        print(tkresult)

 rollingFiles()
 downloadTorrent()
--- a/install.sh
+++ b/install.sh
@ -31,12 +31,6 @@ sbotc() {
 transmission() {
        echo -e "${c_yellow}Installing Transmision...$c_"
 	sudo apt install transmission-daemon --install-suggests
-	sudo apt install transmission-cli
-
-	# stop
-	# Copy login.py info to /etc/transmission/settings.json
-	# start
-
 }

 # Install pip tools
@ -74,8 +68,8 @@ pip3() {
 iptubes() {
 	[[ -z $(which pip3) ]] && pip3
 	/usr/bin/pip3 install $(curl -s https://raw.githubusercontent.com/Harkame/YggTorrentScraper/master/requirements.txt)
-	sudo chgrp -R debian-transmission data/
-	sudo chmod -R g+w data/
+	chgrp -R debian-transmission data/
+	chmod -R g+w data/
 	sudo service transmission-daemon restart
 	cp login.py.template login.py
 	cd lib/py/
--- a/lib/py/scrapactions.py
+++ b/lib/py/scrapactions.py
@ -16,7 +16,7 @@ try:
 except NameError:
    from yggcrawl import YggTorrentScraper
    scraper = YggTorrentScraper(requests.session())
-    from yggcrawl import set_yggtorrent_tld
+    from yggtorrentscraper import set_yggtorrent_tld
    set_yggtorrent_tld("se")

 cmd = sys.argv[1]
--- a/lib/scrabash.sh
+++ b/lib/scrabash.sh
@ -56,15 +56,6 @@ get_details() {
 	fi
 }

-vpn() {
-	[[ ! $(which nordvpn) ]] && echo "Installaling NordVPN client... && ./install.sh nordvpn"
-	vpn_citie=$(shuf -n1 .vpn/countries)
-	echo "Warning: trying to connect to random cities in the world via NordVPN. If you are connected to this machine via SSH, you will lost the connection..."
-	echo "VPN connection in 5 seconds, press CTRL+C to cancel..."
-	sleep 5
-	nordvpn c $vpn_citie
-}
-
 $1

 [[ $err == 1 ]] && exit 1 || exit 0
--- a/tata.sh
+++ b/tata.sh
@ -1,17 +0,0 @@
-#!/usr/bin/env bash
- readWords() {
-    declare -i int="$1"
-
-    (( int == 0 )) && {
-       printf "%s\n" "$int is 0, cant find 0 words"
-       return 1
-    }
-
-    while read getWords;do
-       if [[ ${#getWords} -eq $int ]];then
-         printf "%s\n" "$getWords"
-       fi
-   done < /usr/share/dict/words
-}
-
-readWords 20
--- a/trans-ctl.sh
+++ b/trans-ctl.sh
@ -26,9 +26,12 @@ getid() {
 	# Get ID
 	else
 		j=0
-		for i in "$name"; do
-			[[ $j == 0 ]] && result=$($transcmd --list | grep -vE 'Sum:|ID     Done' | grep -i "$i")
-			result=$(echo "$result" | grep -vE 'Sum:|ID     Done' | grep -iw "$i")
+		for i in $name; do
+			if [[ $j == 0 ]];then
+				result=$($transcmd --list | grep -vE 'Sum:|ID     Done' | grep -iw "$i")
+			else
+				result=$(echo "$result" | grep -iw "$i")
+			fi
 			((j++))
 		done
 	fi
@ -36,7 +39,7 @@ getid() {
 		echo "$result" | awk '{ print $1 }'
 	else
 		echo "No torrent found"
-		fi
+	fi
 }

 getlowerid() {
@ -86,13 +89,16 @@ case "$1" in
 	remove)
 		idt=$(getid | tr -d '*')
 		if [[ $idt =~ ^[+-]?[0-9]+([.][0-9]+)?$ ]]; then
-			for i in "$($transcmd --list | grep -vE 'Sum:|ID     Done' )"; do
+			torrentList=$($transcmd --list | grep -vE 'Sum:|ID     Done' )
+			IFS=$'\n'
+			for i in $torrentList; do
 				if [[ $(echo "$i" | awk '{ print $1 }') == $idt ]]; then
 					fileName=$(echo "$i" | awk '{ print $NF }')
 					break
 				fi
 			done
-
+			IFS=$' '
+			[[ ! $fileName ]] && echo "Can't find torrent to remove." && exit 1
 			cd data/meta
 			torrentId=$(grep -r $fileName | head -n1 | awk -F '/' '{ print $1 }')
 			rm -rf $torrentId
--- a/yggcrawl/gecko/torrent_search.py
+++ b/yggcrawl/gecko/torrent_search.py
@ -1,44 +0,0 @@
-#!/usr/bin/python3
-
-# Early exemple of how to use selenium with gecko to bypass cloudflare bots detections
-# The only way to block this should be using of captcha in front of every yggtorrent pages by sessions...
-
-import sys
-import time
-
-from selenium import webdriver
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-
-# Exit if no arguments
-if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")
-else: arg1 = sys.argv[1]
-
-search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed"
-
-# Load webdriver with Gecko
-options = webdriver.FirefoxOptions()
-options.add_argument('-headless')
-driver = webdriver.Firefox(options=options, executable_path=r'/usr/local/bin/geckodriver')
-driver.get(search_url)
-
-# Wait to bypass cloudflare
-print("Page atteinte, attente de redirection anti-crawling...")
-wait = WebDriverWait(driver, 10)
-wait.until(lambda driver: driver.current_url != search_url)
-
-# Wait 2 seconds to load page
-print("Anti-crawling passé, affichage dans 2 secondes ...")
-time.sleep(2)
-
-# Filter torrent urls
-elems = driver.find_elements_by_css_selector(".results [href]")
-links = [elem.get_attribute('href') for elem in elems]
-links = [k for k in links if '/torrent/' in k]
-
-# Print torrents urls
-print("\n".join(links))
-
-
-driver.quit()
--- a/yggcrawl/yggtorrentscraper.py
+++ b/yggcrawl/yggtorrentscraper.py
@ -44,7 +44,7 @@ YGGTORRENT_SEARCH_URL_DO = "&do="
 YGGTORRENT_SEARCH_URL_PAGE = "&page="

 YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
-YGGTORRENT_GET_INFO = f"https://www2.yggtorrent.si/engine/get_nfo?torrent="
+YGGTORRENT_GET_INFO = f"{YGGTORRENT_BASE_URL}/engine/get_nfo?torrent="

 YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"

@ -52,7 +52,6 @@ TORRENT_PER_PAGE = 50

 YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="

-headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}

 def set_yggtorrent_tld(yggtorrent_tld=None):
    """
@ -78,7 +77,7 @@ def set_yggtorrent_tld(yggtorrent_tld=None):

    YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="

-    YGGTORRENT_DOMAIN = ".yggtorrent.si"
+    YGGTORRENT_DOMAIN = ".yggtorrent.gg"

    YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
    YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
@ -109,7 +108,7 @@ class YggTorrentScraper:
            "User-Agent": "PostmanRuntime/7.17.1",
            "Accept": "*/*",
            "Cache-Control": "no-cache",
-            "Host": f"www2.yggtorrent.{YGGTORRENT_TLD}",
+            "Host": f"www.yggtorrent.{YGGTORRENT_TLD}",
            "Accept-Encoding": "gzip, deflate",
            "Connection": "keep-alive",
        }
@ -146,7 +145,7 @@ class YggTorrentScraper:
        """
        Logout request
        """
-        response = self.session.get(YGGTORRENT_LOGOUT_URL, headers=headers)
+        response = self.session.get(YGGTORRENT_LOGOUT_URL)

        self.session.cookies.clear()

@ -161,18 +160,12 @@ class YggTorrentScraper:

            return False

-    #kopa
-    def search_old(self, parameters):
+    def search(self, parameters):
        search_url = create_search_url(parameters)
        torrents_url = self.get_torrents_url(search_url, parameters)

        return torrents_url

-    def search(self, parameters):
-#        torrents_url = os.popen('gecko/torrent_search.py didier')
-        torrents_url = exec(open('/home/iptubes/astroport-iptubes/yggcrawl/gecko/torrent_search.py').read())
-        return torrents_url
-
    def extract_details(self, torrent_url):
        """
        Extract informations from torrent's url
@ -181,7 +174,7 @@ class YggTorrentScraper:

        torrents = []

-        response = self.session.get(torrent_url, headers=headers)
+        response = self.session.get(torrent_url)

        torrent_page = BeautifulSoup(response.content, features="lxml")

@ -244,7 +237,7 @@ class YggTorrentScraper:
            "input", {"type": "hidden", "name": "target"}
        )["value"]

-        response = self.session.get(YGGTORRENT_GET_FILES + torrent_id, headers=headers)
+        response = self.session.get(YGGTORRENT_GET_FILES + torrent_id)

        files_page = BeautifulSoup(response.content, features="lxml")

@ -299,12 +292,12 @@ class YggTorrentScraper:

        return torrents_url

-#kopaa
    def get_torrents_url(self, search_url, parameters):
        """
        Return
        """
-        response = self.session.get(search_url, headers=headers)
+
+        response = self.session.get(search_url)

        search_page = BeautifulSoup(response.content, features="lxml")

@ -324,7 +317,7 @@ class YggTorrentScraper:

            search_url = create_search_url(parameters)

-            response = self.session.get(search_url, headers=headers)
+            response = self.session.get(search_url)

            search_page = BeautifulSoup(response.content, features="lxml")

@ -335,6 +328,7 @@ class YggTorrentScraper:

        return torrents

+#kopa
    def download_from_torrent_url(self, torrent_url=None, destination_path="./data/tmp/torrents/"):
        if torrent_url is not None:
            torrent = self.extract_details(torrent_url)
@ -355,7 +349,7 @@ class YggTorrentScraper:
        if torrent_url is None:
            raise Exception("Invalid torrent_url, make sure you are logged")

-        response = self.session.get(YGGTORRENT_BASE_URL + torrent_url, headers=headers)
+        response = self.session.get(YGGTORRENT_BASE_URL + torrent_url)

        temp_file_name = response.headers.get("content-disposition")

@ -374,6 +368,7 @@ class YggTorrentScraper:

        return file_full_path

+
 def create_search_url(parameters):
    """
    Return a formated URL for torrent's search
Author	SHA1	Message	Date
poka	4af08a9d82	Merge branch 'master' into scrapalive	2020-08-09 20:41:49 +02:00
poka	56d723ab56	Merge branch 'scrapalive'	2020-08-09 20:27:49 +02:00
poka	b2ddcf2f95	Comming back from older commit, scraper is alive.	2020-08-09 20:24:38 +02:00