Compare commits

...

3 Commits

Author SHA1 Message Date
poka 4af08a9d82 Merge branch 'master' into scrapalive 2020-08-09 20:41:49 +02:00
poka 56d723ab56 Merge branch 'scrapalive' 2020-08-09 20:27:49 +02:00
poka b2ddcf2f95 Comming back from older commit, scraper is alive. 2020-08-09 20:24:38 +02:00
11 changed files with 38 additions and 170 deletions

2
.gitignore vendored
View File

@ -3,5 +3,3 @@ __pycache__/
yggcrawl/__pycache__/
yggcrawl/__init__.pyc
login.py
yggcrawl/gecko/geckodriver.log
.vscode

View File

@ -1,58 +0,0 @@
Albania
Chile
Georgia
Israel
New_Zealand
Slovenia
Ukraine
Argentina
Costa_Rica
Germany
Italy
North_Macedonia
South_Africa
United_Kingdom
Australia
Croatia
Greece
Japan
Norway
South_Korea
United_States
Austria
Cyprus
Hong_Kong
Latvia
Poland
Spain
Vietnam
Belgium
Czech_Republic
Hungary
Luxembourg
Portugal
Sweden
Bosnia_And_Herzegovina
Denmark
Iceland
Malaysia
Romania
Switzerland
Brazil
Estonia
India
Mexico
Serbia
Taiwan
Bulgaria
Finland
Indonesia
Moldova
Singapore
Thailand
Canada
France
Ireland
Netherlands
Slovakia
Turkey

View File

@ -2,7 +2,7 @@
## yggtorrent to IPFS
This is a submodule of [Astroport project](https://git.p2p.legal/axiom-team/astroport). You can use it standalone.
IPTubes is a yggtorrent content migrator to semi-private IPFS swarm.
IPTubes is a yggtorrent content migrator to a public IPFS swam.
### Standalone installation

View File

@ -16,6 +16,7 @@ import requests
import json
import sys
import os
import shutil
import subprocess
import login
import time
@ -25,7 +26,7 @@ from termcolor import colored
# Load scraper
from yggcrawl import YggTorrentScraper
scraper = YggTorrentScraper(requests.session())
from yggcrawl import set_yggtorrent_tld
from yggtorrentscraper import set_yggtorrent_tld
set_yggtorrent_tld("se")
name = ' '.join(sys.argv[1:])
@ -39,7 +40,7 @@ except ValueError:
else:
sys.exit(1)
# Rollong Files
# Allow only one torrent downling in same time, and remove oldest torrent if disk size is full.
def rollingFiles():
def isDL():
downloading = os.popen('./trans-ctl.sh downloading').read()
@ -104,10 +105,13 @@ def downloadTorrent():
# Download torrent file
if(scraper.login(login.user, login.passwd)):
print(colored("Login success", 'green'))
subprocess.Popen('[[ $(ls data/tmp/torrents/) ]] && rm data/tmp/torrents/*', executable='/bin/bash', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if len(os.listdir('data/tmp/torrents') ) != 0:
shutil.rmtree('data/tmp/torrents', ignore_errors=True)
os.mkdir("data/tmp/torrents")
scraper.download_from_torrent_url(research)
# os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/').read()
os.popen('cd data/tmp/torrents/ && mv *.torrent ../../torrents/')
# os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent}.torrent && mv *.torrent ../../torrents/')
os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/')
else:
print(colored("Login failed", 'red'))
sys.exit(1)
@ -121,7 +125,6 @@ def removeTracker():
time.sleep(tkdelay)
os.popen('./trans-ctl.sh rmtracker ' + name)
os.popen('./trans-ctl.sh rmtracker ' + higherid)
# print(tkresult)
rollingFiles()
downloadTorrent()

View File

@ -31,12 +31,6 @@ sbotc() {
transmission() {
echo -e "${c_yellow}Installing Transmision...$c_"
sudo apt install transmission-daemon --install-suggests
sudo apt install transmission-cli
# stop
# Copy login.py info to /etc/transmission/settings.json
# start
}
# Install pip tools
@ -74,8 +68,8 @@ pip3() {
iptubes() {
[[ -z $(which pip3) ]] && pip3
/usr/bin/pip3 install $(curl -s https://raw.githubusercontent.com/Harkame/YggTorrentScraper/master/requirements.txt)
sudo chgrp -R debian-transmission data/
sudo chmod -R g+w data/
chgrp -R debian-transmission data/
chmod -R g+w data/
sudo service transmission-daemon restart
cp login.py.template login.py
cd lib/py/

View File

@ -16,7 +16,7 @@ try:
except NameError:
from yggcrawl import YggTorrentScraper
scraper = YggTorrentScraper(requests.session())
from yggcrawl import set_yggtorrent_tld
from yggtorrentscraper import set_yggtorrent_tld
set_yggtorrent_tld("se")
cmd = sys.argv[1]

View File

@ -56,15 +56,6 @@ get_details() {
fi
}
vpn() {
[[ ! $(which nordvpn) ]] && echo "Installaling NordVPN client... && ./install.sh nordvpn"
vpn_citie=$(shuf -n1 .vpn/countries)
echo "Warning: trying to connect to random cities in the world via NordVPN. If you are connected to this machine via SSH, you will lost the connection..."
echo "VPN connection in 5 seconds, press CTRL+C to cancel..."
sleep 5
nordvpn c $vpn_citie
}
$1
[[ $err == 1 ]] && exit 1 || exit 0

17
tata.sh
View File

@ -1,17 +0,0 @@
#!/usr/bin/env bash
readWords() {
declare -i int="$1"
(( int == 0 )) && {
printf "%s\n" "$int is 0, cant find 0 words"
return 1
}
while read getWords;do
if [[ ${#getWords} -eq $int ]];then
printf "%s\n" "$getWords"
fi
done < /usr/share/dict/words
}
readWords 20

View File

@ -26,9 +26,12 @@ getid() {
# Get ID
else
j=0
for i in "$name"; do
[[ $j == 0 ]] && result=$($transcmd --list | grep -vE 'Sum:|ID Done' | grep -i "$i")
result=$(echo "$result" | grep -vE 'Sum:|ID Done' | grep -iw "$i")
for i in $name; do
if [[ $j == 0 ]];then
result=$($transcmd --list | grep -vE 'Sum:|ID Done' | grep -iw "$i")
else
result=$(echo "$result" | grep -iw "$i")
fi
((j++))
done
fi
@ -36,7 +39,7 @@ getid() {
echo "$result" | awk '{ print $1 }'
else
echo "No torrent found"
fi
fi
}
getlowerid() {
@ -86,13 +89,16 @@ case "$1" in
remove)
idt=$(getid | tr -d '*')
if [[ $idt =~ ^[+-]?[0-9]+([.][0-9]+)?$ ]]; then
for i in "$($transcmd --list | grep -vE 'Sum:|ID Done' )"; do
torrentList=$($transcmd --list | grep -vE 'Sum:|ID Done' )
IFS=$'\n'
for i in $torrentList; do
if [[ $(echo "$i" | awk '{ print $1 }') == $idt ]]; then
fileName=$(echo "$i" | awk '{ print $NF }')
break
fi
done
IFS=$' '
[[ ! $fileName ]] && echo "Can't find torrent to remove." && exit 1
cd data/meta
torrentId=$(grep -r $fileName | head -n1 | awk -F '/' '{ print $1 }')
rm -rf $torrentId

View File

@ -1,44 +0,0 @@
#!/usr/bin/python3
# Early exemple of how to use selenium with gecko to bypass cloudflare bots detections
# The only way to block this should be using of captcha in front of every yggtorrent pages by sessions...
import sys
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Exit if no arguments
if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")
else: arg1 = sys.argv[1]
search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed"
# Load webdriver with Gecko
options = webdriver.FirefoxOptions()
options.add_argument('-headless')
driver = webdriver.Firefox(options=options, executable_path=r'/usr/local/bin/geckodriver')
driver.get(search_url)
# Wait to bypass cloudflare
print("Page atteinte, attente de redirection anti-crawling...")
wait = WebDriverWait(driver, 10)
wait.until(lambda driver: driver.current_url != search_url)
# Wait 2 seconds to load page
print("Anti-crawling passé, affichage dans 2 secondes ...")
time.sleep(2)
# Filter torrent urls
elems = driver.find_elements_by_css_selector(".results [href]")
links = [elem.get_attribute('href') for elem in elems]
links = [k for k in links if '/torrent/' in k]
# Print torrents urls
print("\n".join(links))
driver.quit()

View File

@ -44,7 +44,7 @@ YGGTORRENT_SEARCH_URL_DO = "&do="
YGGTORRENT_SEARCH_URL_PAGE = "&page="
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
YGGTORRENT_GET_INFO = f"https://www2.yggtorrent.si/engine/get_nfo?torrent="
YGGTORRENT_GET_INFO = f"{YGGTORRENT_BASE_URL}/engine/get_nfo?torrent="
YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
@ -52,7 +52,6 @@ TORRENT_PER_PAGE = 50
YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
def set_yggtorrent_tld(yggtorrent_tld=None):
"""
@ -78,7 +77,7 @@ def set_yggtorrent_tld(yggtorrent_tld=None):
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
YGGTORRENT_DOMAIN = ".yggtorrent.si"
YGGTORRENT_DOMAIN = ".yggtorrent.gg"
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
@ -109,7 +108,7 @@ class YggTorrentScraper:
"User-Agent": "PostmanRuntime/7.17.1",
"Accept": "*/*",
"Cache-Control": "no-cache",
"Host": f"www2.yggtorrent.{YGGTORRENT_TLD}",
"Host": f"www.yggtorrent.{YGGTORRENT_TLD}",
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
}
@ -146,7 +145,7 @@ class YggTorrentScraper:
"""
Logout request
"""
response = self.session.get(YGGTORRENT_LOGOUT_URL, headers=headers)
response = self.session.get(YGGTORRENT_LOGOUT_URL)
self.session.cookies.clear()
@ -161,18 +160,12 @@ class YggTorrentScraper:
return False
#kopa
def search_old(self, parameters):
def search(self, parameters):
search_url = create_search_url(parameters)
torrents_url = self.get_torrents_url(search_url, parameters)
return torrents_url
def search(self, parameters):
# torrents_url = os.popen('gecko/torrent_search.py didier')
torrents_url = exec(open('/home/iptubes/astroport-iptubes/yggcrawl/gecko/torrent_search.py').read())
return torrents_url
def extract_details(self, torrent_url):
"""
Extract informations from torrent's url
@ -181,7 +174,7 @@ class YggTorrentScraper:
torrents = []
response = self.session.get(torrent_url, headers=headers)
response = self.session.get(torrent_url)
torrent_page = BeautifulSoup(response.content, features="lxml")
@ -244,7 +237,7 @@ class YggTorrentScraper:
"input", {"type": "hidden", "name": "target"}
)["value"]
response = self.session.get(YGGTORRENT_GET_FILES + torrent_id, headers=headers)
response = self.session.get(YGGTORRENT_GET_FILES + torrent_id)
files_page = BeautifulSoup(response.content, features="lxml")
@ -299,12 +292,12 @@ class YggTorrentScraper:
return torrents_url
#kopaa
def get_torrents_url(self, search_url, parameters):
"""
Return
"""
response = self.session.get(search_url, headers=headers)
response = self.session.get(search_url)
search_page = BeautifulSoup(response.content, features="lxml")
@ -324,7 +317,7 @@ class YggTorrentScraper:
search_url = create_search_url(parameters)
response = self.session.get(search_url, headers=headers)
response = self.session.get(search_url)
search_page = BeautifulSoup(response.content, features="lxml")
@ -335,6 +328,7 @@ class YggTorrentScraper:
return torrents
#kopa
def download_from_torrent_url(self, torrent_url=None, destination_path="./data/tmp/torrents/"):
if torrent_url is not None:
torrent = self.extract_details(torrent_url)
@ -355,7 +349,7 @@ class YggTorrentScraper:
if torrent_url is None:
raise Exception("Invalid torrent_url, make sure you are logged")
response = self.session.get(YGGTORRENT_BASE_URL + torrent_url, headers=headers)
response = self.session.get(YGGTORRENT_BASE_URL + torrent_url)
temp_file_name = response.headers.get("content-disposition")
@ -374,6 +368,7 @@ class YggTorrentScraper:
return file_full_path
def create_search_url(parameters):
"""
Return a formated URL for torrent's search