Compare commits

...

3 Commits

Author SHA1 Message Date
poka 4af08a9d82 Merge branch 'master' into scrapalive 2020-08-09 20:41:49 +02:00
poka 56d723ab56 Merge branch 'scrapalive' 2020-08-09 20:27:49 +02:00
poka b2ddcf2f95 Comming back from older commit, scraper is alive. 2020-08-09 20:24:38 +02:00
11 changed files with 38 additions and 170 deletions

2
.gitignore vendored
View File

@ -3,5 +3,3 @@ __pycache__/
yggcrawl/__pycache__/ yggcrawl/__pycache__/
yggcrawl/__init__.pyc yggcrawl/__init__.pyc
login.py login.py
yggcrawl/gecko/geckodriver.log
.vscode

View File

@ -1,58 +0,0 @@
Albania
Chile
Georgia
Israel
New_Zealand
Slovenia
Ukraine
Argentina
Costa_Rica
Germany
Italy
North_Macedonia
South_Africa
United_Kingdom
Australia
Croatia
Greece
Japan
Norway
South_Korea
United_States
Austria
Cyprus
Hong_Kong
Latvia
Poland
Spain
Vietnam
Belgium
Czech_Republic
Hungary
Luxembourg
Portugal
Sweden
Bosnia_And_Herzegovina
Denmark
Iceland
Malaysia
Romania
Switzerland
Brazil
Estonia
India
Mexico
Serbia
Taiwan
Bulgaria
Finland
Indonesia
Moldova
Singapore
Thailand
Canada
France
Ireland
Netherlands
Slovakia
Turkey

View File

@ -2,7 +2,7 @@
## yggtorrent to IPFS ## yggtorrent to IPFS
This is a submodule of [Astroport project](https://git.p2p.legal/axiom-team/astroport). You can use it standalone. This is a submodule of [Astroport project](https://git.p2p.legal/axiom-team/astroport). You can use it standalone.
IPTubes is a yggtorrent content migrator to semi-private IPFS swarm. IPTubes is a yggtorrent content migrator to a public IPFS swam.
### Standalone installation ### Standalone installation

View File

@ -16,6 +16,7 @@ import requests
import json import json
import sys import sys
import os import os
import shutil
import subprocess import subprocess
import login import login
import time import time
@ -25,7 +26,7 @@ from termcolor import colored
# Load scraper # Load scraper
from yggcrawl import YggTorrentScraper from yggcrawl import YggTorrentScraper
scraper = YggTorrentScraper(requests.session()) scraper = YggTorrentScraper(requests.session())
from yggcrawl import set_yggtorrent_tld from yggtorrentscraper import set_yggtorrent_tld
set_yggtorrent_tld("se") set_yggtorrent_tld("se")
name = ' '.join(sys.argv[1:]) name = ' '.join(sys.argv[1:])
@ -39,7 +40,7 @@ except ValueError:
else: else:
sys.exit(1) sys.exit(1)
# Rollong Files # Allow only one torrent downling in same time, and remove oldest torrent if disk size is full.
def rollingFiles(): def rollingFiles():
def isDL(): def isDL():
downloading = os.popen('./trans-ctl.sh downloading').read() downloading = os.popen('./trans-ctl.sh downloading').read()
@ -104,10 +105,13 @@ def downloadTorrent():
# Download torrent file # Download torrent file
if(scraper.login(login.user, login.passwd)): if(scraper.login(login.user, login.passwd)):
print(colored("Login success", 'green')) print(colored("Login success", 'green'))
subprocess.Popen('[[ $(ls data/tmp/torrents/) ]] && rm data/tmp/torrents/*', executable='/bin/bash', stdout=subprocess.PIPE, stderr=subprocess.PIPE) if len(os.listdir('data/tmp/torrents') ) != 0:
shutil.rmtree('data/tmp/torrents', ignore_errors=True)
os.mkdir("data/tmp/torrents")
scraper.download_from_torrent_url(research) scraper.download_from_torrent_url(research)
# os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/').read() # os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent}.torrent && mv *.torrent ../../torrents/')
os.popen('cd data/tmp/torrents/ && mv *.torrent ../../torrents/') os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/')
else: else:
print(colored("Login failed", 'red')) print(colored("Login failed", 'red'))
sys.exit(1) sys.exit(1)
@ -121,7 +125,6 @@ def removeTracker():
time.sleep(tkdelay) time.sleep(tkdelay)
os.popen('./trans-ctl.sh rmtracker ' + name) os.popen('./trans-ctl.sh rmtracker ' + name)
os.popen('./trans-ctl.sh rmtracker ' + higherid) os.popen('./trans-ctl.sh rmtracker ' + higherid)
# print(tkresult)
rollingFiles() rollingFiles()
downloadTorrent() downloadTorrent()

View File

@ -31,12 +31,6 @@ sbotc() {
transmission() { transmission() {
echo -e "${c_yellow}Installing Transmision...$c_" echo -e "${c_yellow}Installing Transmision...$c_"
sudo apt install transmission-daemon --install-suggests sudo apt install transmission-daemon --install-suggests
sudo apt install transmission-cli
# stop
# Copy login.py info to /etc/transmission/settings.json
# start
} }
# Install pip tools # Install pip tools
@ -74,8 +68,8 @@ pip3() {
iptubes() { iptubes() {
[[ -z $(which pip3) ]] && pip3 [[ -z $(which pip3) ]] && pip3
/usr/bin/pip3 install $(curl -s https://raw.githubusercontent.com/Harkame/YggTorrentScraper/master/requirements.txt) /usr/bin/pip3 install $(curl -s https://raw.githubusercontent.com/Harkame/YggTorrentScraper/master/requirements.txt)
sudo chgrp -R debian-transmission data/ chgrp -R debian-transmission data/
sudo chmod -R g+w data/ chmod -R g+w data/
sudo service transmission-daemon restart sudo service transmission-daemon restart
cp login.py.template login.py cp login.py.template login.py
cd lib/py/ cd lib/py/

View File

@ -16,7 +16,7 @@ try:
except NameError: except NameError:
from yggcrawl import YggTorrentScraper from yggcrawl import YggTorrentScraper
scraper = YggTorrentScraper(requests.session()) scraper = YggTorrentScraper(requests.session())
from yggcrawl import set_yggtorrent_tld from yggtorrentscraper import set_yggtorrent_tld
set_yggtorrent_tld("se") set_yggtorrent_tld("se")
cmd = sys.argv[1] cmd = sys.argv[1]

View File

@ -56,15 +56,6 @@ get_details() {
fi fi
} }
vpn() {
[[ ! $(which nordvpn) ]] && echo "Installaling NordVPN client... && ./install.sh nordvpn"
vpn_citie=$(shuf -n1 .vpn/countries)
echo "Warning: trying to connect to random cities in the world via NordVPN. If you are connected to this machine via SSH, you will lost the connection..."
echo "VPN connection in 5 seconds, press CTRL+C to cancel..."
sleep 5
nordvpn c $vpn_citie
}
$1 $1
[[ $err == 1 ]] && exit 1 || exit 0 [[ $err == 1 ]] && exit 1 || exit 0

17
tata.sh
View File

@ -1,17 +0,0 @@
#!/usr/bin/env bash
readWords() {
declare -i int="$1"
(( int == 0 )) && {
printf "%s\n" "$int is 0, cant find 0 words"
return 1
}
while read getWords;do
if [[ ${#getWords} -eq $int ]];then
printf "%s\n" "$getWords"
fi
done < /usr/share/dict/words
}
readWords 20

View File

@ -26,9 +26,12 @@ getid() {
# Get ID # Get ID
else else
j=0 j=0
for i in "$name"; do for i in $name; do
[[ $j == 0 ]] && result=$($transcmd --list | grep -vE 'Sum:|ID Done' | grep -i "$i") if [[ $j == 0 ]];then
result=$(echo "$result" | grep -vE 'Sum:|ID Done' | grep -iw "$i") result=$($transcmd --list | grep -vE 'Sum:|ID Done' | grep -iw "$i")
else
result=$(echo "$result" | grep -iw "$i")
fi
((j++)) ((j++))
done done
fi fi
@ -36,7 +39,7 @@ getid() {
echo "$result" | awk '{ print $1 }' echo "$result" | awk '{ print $1 }'
else else
echo "No torrent found" echo "No torrent found"
fi fi
} }
getlowerid() { getlowerid() {
@ -86,13 +89,16 @@ case "$1" in
remove) remove)
idt=$(getid | tr -d '*') idt=$(getid | tr -d '*')
if [[ $idt =~ ^[+-]?[0-9]+([.][0-9]+)?$ ]]; then if [[ $idt =~ ^[+-]?[0-9]+([.][0-9]+)?$ ]]; then
for i in "$($transcmd --list | grep -vE 'Sum:|ID Done' )"; do torrentList=$($transcmd --list | grep -vE 'Sum:|ID Done' )
IFS=$'\n'
for i in $torrentList; do
if [[ $(echo "$i" | awk '{ print $1 }') == $idt ]]; then if [[ $(echo "$i" | awk '{ print $1 }') == $idt ]]; then
fileName=$(echo "$i" | awk '{ print $NF }') fileName=$(echo "$i" | awk '{ print $NF }')
break break
fi fi
done done
IFS=$' '
[[ ! $fileName ]] && echo "Can't find torrent to remove." && exit 1
cd data/meta cd data/meta
torrentId=$(grep -r $fileName | head -n1 | awk -F '/' '{ print $1 }') torrentId=$(grep -r $fileName | head -n1 | awk -F '/' '{ print $1 }')
rm -rf $torrentId rm -rf $torrentId

View File

@ -1,44 +0,0 @@
#!/usr/bin/python3
# Early exemple of how to use selenium with gecko to bypass cloudflare bots detections
# The only way to block this should be using of captcha in front of every yggtorrent pages by sessions...
import sys
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Exit if no arguments
if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")
else: arg1 = sys.argv[1]
search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed"
# Load webdriver with Gecko
options = webdriver.FirefoxOptions()
options.add_argument('-headless')
driver = webdriver.Firefox(options=options, executable_path=r'/usr/local/bin/geckodriver')
driver.get(search_url)
# Wait to bypass cloudflare
print("Page atteinte, attente de redirection anti-crawling...")
wait = WebDriverWait(driver, 10)
wait.until(lambda driver: driver.current_url != search_url)
# Wait 2 seconds to load page
print("Anti-crawling passé, affichage dans 2 secondes ...")
time.sleep(2)
# Filter torrent urls
elems = driver.find_elements_by_css_selector(".results [href]")
links = [elem.get_attribute('href') for elem in elems]
links = [k for k in links if '/torrent/' in k]
# Print torrents urls
print("\n".join(links))
driver.quit()

View File

@ -44,7 +44,7 @@ YGGTORRENT_SEARCH_URL_DO = "&do="
YGGTORRENT_SEARCH_URL_PAGE = "&page=" YGGTORRENT_SEARCH_URL_PAGE = "&page="
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent=" YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
YGGTORRENT_GET_INFO = f"https://www2.yggtorrent.si/engine/get_nfo?torrent=" YGGTORRENT_GET_INFO = f"{YGGTORRENT_BASE_URL}/engine/get_nfo?torrent="
YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted" YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
@ -52,7 +52,6 @@ TORRENT_PER_PAGE = 50
YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent=" YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
def set_yggtorrent_tld(yggtorrent_tld=None): def set_yggtorrent_tld(yggtorrent_tld=None):
""" """
@ -78,7 +77,7 @@ def set_yggtorrent_tld(yggtorrent_tld=None):
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name=" YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
YGGTORRENT_DOMAIN = ".yggtorrent.si" YGGTORRENT_DOMAIN = ".yggtorrent.gg"
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent=" YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent=" YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
@ -109,7 +108,7 @@ class YggTorrentScraper:
"User-Agent": "PostmanRuntime/7.17.1", "User-Agent": "PostmanRuntime/7.17.1",
"Accept": "*/*", "Accept": "*/*",
"Cache-Control": "no-cache", "Cache-Control": "no-cache",
"Host": f"www2.yggtorrent.{YGGTORRENT_TLD}", "Host": f"www.yggtorrent.{YGGTORRENT_TLD}",
"Accept-Encoding": "gzip, deflate", "Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive", "Connection": "keep-alive",
} }
@ -146,7 +145,7 @@ class YggTorrentScraper:
""" """
Logout request Logout request
""" """
response = self.session.get(YGGTORRENT_LOGOUT_URL, headers=headers) response = self.session.get(YGGTORRENT_LOGOUT_URL)
self.session.cookies.clear() self.session.cookies.clear()
@ -161,18 +160,12 @@ class YggTorrentScraper:
return False return False
#kopa def search(self, parameters):
def search_old(self, parameters):
search_url = create_search_url(parameters) search_url = create_search_url(parameters)
torrents_url = self.get_torrents_url(search_url, parameters) torrents_url = self.get_torrents_url(search_url, parameters)
return torrents_url return torrents_url
def search(self, parameters):
# torrents_url = os.popen('gecko/torrent_search.py didier')
torrents_url = exec(open('/home/iptubes/astroport-iptubes/yggcrawl/gecko/torrent_search.py').read())
return torrents_url
def extract_details(self, torrent_url): def extract_details(self, torrent_url):
""" """
Extract informations from torrent's url Extract informations from torrent's url
@ -181,7 +174,7 @@ class YggTorrentScraper:
torrents = [] torrents = []
response = self.session.get(torrent_url, headers=headers) response = self.session.get(torrent_url)
torrent_page = BeautifulSoup(response.content, features="lxml") torrent_page = BeautifulSoup(response.content, features="lxml")
@ -244,7 +237,7 @@ class YggTorrentScraper:
"input", {"type": "hidden", "name": "target"} "input", {"type": "hidden", "name": "target"}
)["value"] )["value"]
response = self.session.get(YGGTORRENT_GET_FILES + torrent_id, headers=headers) response = self.session.get(YGGTORRENT_GET_FILES + torrent_id)
files_page = BeautifulSoup(response.content, features="lxml") files_page = BeautifulSoup(response.content, features="lxml")
@ -299,12 +292,12 @@ class YggTorrentScraper:
return torrents_url return torrents_url
#kopaa
def get_torrents_url(self, search_url, parameters): def get_torrents_url(self, search_url, parameters):
""" """
Return Return
""" """
response = self.session.get(search_url, headers=headers)
response = self.session.get(search_url)
search_page = BeautifulSoup(response.content, features="lxml") search_page = BeautifulSoup(response.content, features="lxml")
@ -324,7 +317,7 @@ class YggTorrentScraper:
search_url = create_search_url(parameters) search_url = create_search_url(parameters)
response = self.session.get(search_url, headers=headers) response = self.session.get(search_url)
search_page = BeautifulSoup(response.content, features="lxml") search_page = BeautifulSoup(response.content, features="lxml")
@ -335,6 +328,7 @@ class YggTorrentScraper:
return torrents return torrents
#kopa
def download_from_torrent_url(self, torrent_url=None, destination_path="./data/tmp/torrents/"): def download_from_torrent_url(self, torrent_url=None, destination_path="./data/tmp/torrents/"):
if torrent_url is not None: if torrent_url is not None:
torrent = self.extract_details(torrent_url) torrent = self.extract_details(torrent_url)
@ -355,7 +349,7 @@ class YggTorrentScraper:
if torrent_url is None: if torrent_url is None:
raise Exception("Invalid torrent_url, make sure you are logged") raise Exception("Invalid torrent_url, make sure you are logged")
response = self.session.get(YGGTORRENT_BASE_URL + torrent_url, headers=headers) response = self.session.get(YGGTORRENT_BASE_URL + torrent_url)
temp_file_name = response.headers.get("content-disposition") temp_file_name = response.headers.get("content-disposition")
@ -374,6 +368,7 @@ class YggTorrentScraper:
return file_full_path return file_full_path
def create_search_url(parameters): def create_search_url(parameters):
""" """
Return a formated URL for torrent's search Return a formated URL for torrent's search