Compare commits
3 Commits
e280ba79aa
...
4af08a9d82
Author | SHA1 | Date |
---|---|---|
poka | 4af08a9d82 | |
poka | 56d723ab56 | |
poka | b2ddcf2f95 |
|
@ -3,5 +3,3 @@ __pycache__/
|
|||
yggcrawl/__pycache__/
|
||||
yggcrawl/__init__.pyc
|
||||
login.py
|
||||
yggcrawl/gecko/geckodriver.log
|
||||
.vscode
|
||||
|
|
|
@ -1,58 +0,0 @@
|
|||
Albania
|
||||
Chile
|
||||
Georgia
|
||||
Israel
|
||||
New_Zealand
|
||||
Slovenia
|
||||
Ukraine
|
||||
Argentina
|
||||
Costa_Rica
|
||||
Germany
|
||||
Italy
|
||||
North_Macedonia
|
||||
South_Africa
|
||||
United_Kingdom
|
||||
Australia
|
||||
Croatia
|
||||
Greece
|
||||
Japan
|
||||
Norway
|
||||
South_Korea
|
||||
United_States
|
||||
Austria
|
||||
Cyprus
|
||||
Hong_Kong
|
||||
Latvia
|
||||
Poland
|
||||
Spain
|
||||
Vietnam
|
||||
Belgium
|
||||
Czech_Republic
|
||||
Hungary
|
||||
Luxembourg
|
||||
Portugal
|
||||
Sweden
|
||||
Bosnia_And_Herzegovina
|
||||
Denmark
|
||||
Iceland
|
||||
Malaysia
|
||||
Romania
|
||||
Switzerland
|
||||
Brazil
|
||||
Estonia
|
||||
India
|
||||
Mexico
|
||||
Serbia
|
||||
Taiwan
|
||||
Bulgaria
|
||||
Finland
|
||||
Indonesia
|
||||
Moldova
|
||||
Singapore
|
||||
Thailand
|
||||
Canada
|
||||
France
|
||||
Ireland
|
||||
Netherlands
|
||||
Slovakia
|
||||
Turkey
|
|
@ -2,7 +2,7 @@
|
|||
## yggtorrent to IPFS
|
||||
|
||||
This is a submodule of [Astroport project](https://git.p2p.legal/axiom-team/astroport). You can use it standalone.
|
||||
IPTubes is a yggtorrent content migrator to semi-private IPFS swarm.
|
||||
IPTubes is a yggtorrent content migrator to a public IPFS swam.
|
||||
|
||||
### Standalone installation
|
||||
|
||||
|
|
15
crawl.py
15
crawl.py
|
@ -16,6 +16,7 @@ import requests
|
|||
import json
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import login
|
||||
import time
|
||||
|
@ -25,7 +26,7 @@ from termcolor import colored
|
|||
# Load scraper
|
||||
from yggcrawl import YggTorrentScraper
|
||||
scraper = YggTorrentScraper(requests.session())
|
||||
from yggcrawl import set_yggtorrent_tld
|
||||
from yggtorrentscraper import set_yggtorrent_tld
|
||||
set_yggtorrent_tld("se")
|
||||
name = ' '.join(sys.argv[1:])
|
||||
|
||||
|
@ -39,7 +40,7 @@ except ValueError:
|
|||
else:
|
||||
sys.exit(1)
|
||||
|
||||
# Rollong Files
|
||||
# Allow only one torrent downling in same time, and remove oldest torrent if disk size is full.
|
||||
def rollingFiles():
|
||||
def isDL():
|
||||
downloading = os.popen('./trans-ctl.sh downloading').read()
|
||||
|
@ -104,10 +105,13 @@ def downloadTorrent():
|
|||
# Download torrent file
|
||||
if(scraper.login(login.user, login.passwd)):
|
||||
print(colored("Login success", 'green'))
|
||||
subprocess.Popen('[[ $(ls data/tmp/torrents/) ]] && rm data/tmp/torrents/*', executable='/bin/bash', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if len(os.listdir('data/tmp/torrents') ) != 0:
|
||||
shutil.rmtree('data/tmp/torrents', ignore_errors=True)
|
||||
os.mkdir("data/tmp/torrents")
|
||||
scraper.download_from_torrent_url(research)
|
||||
# os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/').read()
|
||||
os.popen('cd data/tmp/torrents/ && mv *.torrent ../../torrents/')
|
||||
# os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent}.torrent && mv *.torrent ../../torrents/')
|
||||
os.popen(f'cd data/tmp/torrents/ && mv *.torrent {idTorrent.strip()}.torrent && mv {idTorrent.strip()}.torrent ../../torrents/')
|
||||
|
||||
else:
|
||||
print(colored("Login failed", 'red'))
|
||||
sys.exit(1)
|
||||
|
@ -121,7 +125,6 @@ def removeTracker():
|
|||
time.sleep(tkdelay)
|
||||
os.popen('./trans-ctl.sh rmtracker ' + name)
|
||||
os.popen('./trans-ctl.sh rmtracker ' + higherid)
|
||||
# print(tkresult)
|
||||
|
||||
rollingFiles()
|
||||
downloadTorrent()
|
||||
|
|
10
install.sh
10
install.sh
|
@ -31,12 +31,6 @@ sbotc() {
|
|||
transmission() {
|
||||
echo -e "${c_yellow}Installing Transmision...$c_"
|
||||
sudo apt install transmission-daemon --install-suggests
|
||||
sudo apt install transmission-cli
|
||||
|
||||
# stop
|
||||
# Copy login.py info to /etc/transmission/settings.json
|
||||
# start
|
||||
|
||||
}
|
||||
|
||||
# Install pip tools
|
||||
|
@ -74,8 +68,8 @@ pip3() {
|
|||
iptubes() {
|
||||
[[ -z $(which pip3) ]] && pip3
|
||||
/usr/bin/pip3 install $(curl -s https://raw.githubusercontent.com/Harkame/YggTorrentScraper/master/requirements.txt)
|
||||
sudo chgrp -R debian-transmission data/
|
||||
sudo chmod -R g+w data/
|
||||
chgrp -R debian-transmission data/
|
||||
chmod -R g+w data/
|
||||
sudo service transmission-daemon restart
|
||||
cp login.py.template login.py
|
||||
cd lib/py/
|
||||
|
|
|
@ -16,7 +16,7 @@ try:
|
|||
except NameError:
|
||||
from yggcrawl import YggTorrentScraper
|
||||
scraper = YggTorrentScraper(requests.session())
|
||||
from yggcrawl import set_yggtorrent_tld
|
||||
from yggtorrentscraper import set_yggtorrent_tld
|
||||
set_yggtorrent_tld("se")
|
||||
|
||||
cmd = sys.argv[1]
|
||||
|
|
|
@ -56,15 +56,6 @@ get_details() {
|
|||
fi
|
||||
}
|
||||
|
||||
vpn() {
|
||||
[[ ! $(which nordvpn) ]] && echo "Installaling NordVPN client... && ./install.sh nordvpn"
|
||||
vpn_citie=$(shuf -n1 .vpn/countries)
|
||||
echo "Warning: trying to connect to random cities in the world via NordVPN. If you are connected to this machine via SSH, you will lost the connection..."
|
||||
echo "VPN connection in 5 seconds, press CTRL+C to cancel..."
|
||||
sleep 5
|
||||
nordvpn c $vpn_citie
|
||||
}
|
||||
|
||||
$1
|
||||
|
||||
[[ $err == 1 ]] && exit 1 || exit 0
|
||||
|
|
17
tata.sh
17
tata.sh
|
@ -1,17 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
readWords() {
|
||||
declare -i int="$1"
|
||||
|
||||
(( int == 0 )) && {
|
||||
printf "%s\n" "$int is 0, cant find 0 words"
|
||||
return 1
|
||||
}
|
||||
|
||||
while read getWords;do
|
||||
if [[ ${#getWords} -eq $int ]];then
|
||||
printf "%s\n" "$getWords"
|
||||
fi
|
||||
done < /usr/share/dict/words
|
||||
}
|
||||
|
||||
readWords 20
|
18
trans-ctl.sh
18
trans-ctl.sh
|
@ -26,9 +26,12 @@ getid() {
|
|||
# Get ID
|
||||
else
|
||||
j=0
|
||||
for i in "$name"; do
|
||||
[[ $j == 0 ]] && result=$($transcmd --list | grep -vE 'Sum:|ID Done' | grep -i "$i")
|
||||
result=$(echo "$result" | grep -vE 'Sum:|ID Done' | grep -iw "$i")
|
||||
for i in $name; do
|
||||
if [[ $j == 0 ]];then
|
||||
result=$($transcmd --list | grep -vE 'Sum:|ID Done' | grep -iw "$i")
|
||||
else
|
||||
result=$(echo "$result" | grep -iw "$i")
|
||||
fi
|
||||
((j++))
|
||||
done
|
||||
fi
|
||||
|
@ -36,7 +39,7 @@ getid() {
|
|||
echo "$result" | awk '{ print $1 }'
|
||||
else
|
||||
echo "No torrent found"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
getlowerid() {
|
||||
|
@ -86,13 +89,16 @@ case "$1" in
|
|||
remove)
|
||||
idt=$(getid | tr -d '*')
|
||||
if [[ $idt =~ ^[+-]?[0-9]+([.][0-9]+)?$ ]]; then
|
||||
for i in "$($transcmd --list | grep -vE 'Sum:|ID Done' )"; do
|
||||
torrentList=$($transcmd --list | grep -vE 'Sum:|ID Done' )
|
||||
IFS=$'\n'
|
||||
for i in $torrentList; do
|
||||
if [[ $(echo "$i" | awk '{ print $1 }') == $idt ]]; then
|
||||
fileName=$(echo "$i" | awk '{ print $NF }')
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
IFS=$' '
|
||||
[[ ! $fileName ]] && echo "Can't find torrent to remove." && exit 1
|
||||
cd data/meta
|
||||
torrentId=$(grep -r $fileName | head -n1 | awk -F '/' '{ print $1 }')
|
||||
rm -rf $torrentId
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
# Early exemple of how to use selenium with gecko to bypass cloudflare bots detections
|
||||
# The only way to block this should be using of captcha in front of every yggtorrent pages by sessions...
|
||||
|
||||
import sys
|
||||
import time
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
|
||||
# Exit if no arguments
|
||||
if len(sys.argv)==1: sys.exit("Please choose a film ou serie name")
|
||||
else: arg1 = sys.argv[1]
|
||||
|
||||
search_url = f"https://www2.yggtorrent.se/engine/search?name={arg1}&description=&file=&uploader=&category=all&sub_category=&do=search&order=desc&sort=seed"
|
||||
|
||||
# Load webdriver with Gecko
|
||||
options = webdriver.FirefoxOptions()
|
||||
options.add_argument('-headless')
|
||||
driver = webdriver.Firefox(options=options, executable_path=r'/usr/local/bin/geckodriver')
|
||||
driver.get(search_url)
|
||||
|
||||
# Wait to bypass cloudflare
|
||||
print("Page atteinte, attente de redirection anti-crawling...")
|
||||
wait = WebDriverWait(driver, 10)
|
||||
wait.until(lambda driver: driver.current_url != search_url)
|
||||
|
||||
# Wait 2 seconds to load page
|
||||
print("Anti-crawling passé, affichage dans 2 secondes ...")
|
||||
time.sleep(2)
|
||||
|
||||
# Filter torrent urls
|
||||
elems = driver.find_elements_by_css_selector(".results [href]")
|
||||
links = [elem.get_attribute('href') for elem in elems]
|
||||
links = [k for k in links if '/torrent/' in k]
|
||||
|
||||
# Print torrents urls
|
||||
print("\n".join(links))
|
||||
|
||||
|
||||
driver.quit()
|
|
@ -44,7 +44,7 @@ YGGTORRENT_SEARCH_URL_DO = "&do="
|
|||
YGGTORRENT_SEARCH_URL_PAGE = "&page="
|
||||
|
||||
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||
YGGTORRENT_GET_INFO = f"https://www2.yggtorrent.si/engine/get_nfo?torrent="
|
||||
YGGTORRENT_GET_INFO = f"{YGGTORRENT_BASE_URL}/engine/get_nfo?torrent="
|
||||
|
||||
YGGTORRENT_MOST_COMPLETED_URL = f"{YGGTORRENT_BASE_URL}/engine/mostcompleted"
|
||||
|
||||
|
@ -52,7 +52,6 @@ TORRENT_PER_PAGE = 50
|
|||
|
||||
YGGTORRENT_FILES_URL = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
|
||||
|
||||
def set_yggtorrent_tld(yggtorrent_tld=None):
|
||||
"""
|
||||
|
@ -78,7 +77,7 @@ def set_yggtorrent_tld(yggtorrent_tld=None):
|
|||
|
||||
YGGTORRENT_SEARCH_URL = f"{YGGTORRENT_BASE_URL}/engine/search?name="
|
||||
|
||||
YGGTORRENT_DOMAIN = ".yggtorrent.si"
|
||||
YGGTORRENT_DOMAIN = ".yggtorrent.gg"
|
||||
|
||||
YGGTORRENT_GET_FILES = f"{YGGTORRENT_BASE_URL}/engine/get_files?torrent="
|
||||
YGGTORRENT_GET_INFO = f"https://www2.yggtorrentchg/engine/get_nfo?torrent="
|
||||
|
@ -109,7 +108,7 @@ class YggTorrentScraper:
|
|||
"User-Agent": "PostmanRuntime/7.17.1",
|
||||
"Accept": "*/*",
|
||||
"Cache-Control": "no-cache",
|
||||
"Host": f"www2.yggtorrent.{YGGTORRENT_TLD}",
|
||||
"Host": f"www.yggtorrent.{YGGTORRENT_TLD}",
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
"Connection": "keep-alive",
|
||||
}
|
||||
|
@ -146,7 +145,7 @@ class YggTorrentScraper:
|
|||
"""
|
||||
Logout request
|
||||
"""
|
||||
response = self.session.get(YGGTORRENT_LOGOUT_URL, headers=headers)
|
||||
response = self.session.get(YGGTORRENT_LOGOUT_URL)
|
||||
|
||||
self.session.cookies.clear()
|
||||
|
||||
|
@ -161,18 +160,12 @@ class YggTorrentScraper:
|
|||
|
||||
return False
|
||||
|
||||
#kopa
|
||||
def search_old(self, parameters):
|
||||
def search(self, parameters):
|
||||
search_url = create_search_url(parameters)
|
||||
torrents_url = self.get_torrents_url(search_url, parameters)
|
||||
|
||||
return torrents_url
|
||||
|
||||
def search(self, parameters):
|
||||
# torrents_url = os.popen('gecko/torrent_search.py didier')
|
||||
torrents_url = exec(open('/home/iptubes/astroport-iptubes/yggcrawl/gecko/torrent_search.py').read())
|
||||
return torrents_url
|
||||
|
||||
def extract_details(self, torrent_url):
|
||||
"""
|
||||
Extract informations from torrent's url
|
||||
|
@ -181,7 +174,7 @@ class YggTorrentScraper:
|
|||
|
||||
torrents = []
|
||||
|
||||
response = self.session.get(torrent_url, headers=headers)
|
||||
response = self.session.get(torrent_url)
|
||||
|
||||
torrent_page = BeautifulSoup(response.content, features="lxml")
|
||||
|
||||
|
@ -244,7 +237,7 @@ class YggTorrentScraper:
|
|||
"input", {"type": "hidden", "name": "target"}
|
||||
)["value"]
|
||||
|
||||
response = self.session.get(YGGTORRENT_GET_FILES + torrent_id, headers=headers)
|
||||
response = self.session.get(YGGTORRENT_GET_FILES + torrent_id)
|
||||
|
||||
files_page = BeautifulSoup(response.content, features="lxml")
|
||||
|
||||
|
@ -299,12 +292,12 @@ class YggTorrentScraper:
|
|||
|
||||
return torrents_url
|
||||
|
||||
#kopaa
|
||||
def get_torrents_url(self, search_url, parameters):
|
||||
"""
|
||||
Return
|
||||
"""
|
||||
response = self.session.get(search_url, headers=headers)
|
||||
|
||||
response = self.session.get(search_url)
|
||||
|
||||
search_page = BeautifulSoup(response.content, features="lxml")
|
||||
|
||||
|
@ -324,7 +317,7 @@ class YggTorrentScraper:
|
|||
|
||||
search_url = create_search_url(parameters)
|
||||
|
||||
response = self.session.get(search_url, headers=headers)
|
||||
response = self.session.get(search_url)
|
||||
|
||||
search_page = BeautifulSoup(response.content, features="lxml")
|
||||
|
||||
|
@ -335,6 +328,7 @@ class YggTorrentScraper:
|
|||
|
||||
return torrents
|
||||
|
||||
#kopa
|
||||
def download_from_torrent_url(self, torrent_url=None, destination_path="./data/tmp/torrents/"):
|
||||
if torrent_url is not None:
|
||||
torrent = self.extract_details(torrent_url)
|
||||
|
@ -355,7 +349,7 @@ class YggTorrentScraper:
|
|||
if torrent_url is None:
|
||||
raise Exception("Invalid torrent_url, make sure you are logged")
|
||||
|
||||
response = self.session.get(YGGTORRENT_BASE_URL + torrent_url, headers=headers)
|
||||
response = self.session.get(YGGTORRENT_BASE_URL + torrent_url)
|
||||
|
||||
temp_file_name = response.headers.get("content-disposition")
|
||||
|
||||
|
@ -374,6 +368,7 @@ class YggTorrentScraper:
|
|||
|
||||
return file_full_path
|
||||
|
||||
|
||||
def create_search_url(parameters):
|
||||
"""
|
||||
Return a formated URL for torrent's search
|
||||
|
|
Loading…
Reference in New Issue