Add transiscope scraping and add .gitignore file ...
This commit is contained in:
parent
3a97143d5d
commit
24429ba1f5
|
@ -0,0 +1,2 @@
|
|||
.profile
|
||||
zen/tools/scraping/transiscope/transiscope.json
|
|
@ -0,0 +1,9 @@
|
|||
#!/bin/bash
|
||||
if [[ ! -f transiscope.json ]]; then
|
||||
echo "Premier lancement, récupération des données, veuillez patientez ..."
|
||||
./generate_transiscope.sh
|
||||
fi
|
||||
|
||||
cat transiscope.json | jq '.[] | .name, .abstract, .geo'
|
||||
|
||||
exit 0
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
curl -s https://transiscope.gogocarto.fr/api/elements | jq .data > /tmp/tmp_transiscope.json || exit 1
|
||||
[[ -f transiscope.json ]] && rm transiscope.json
|
||||
mv /tmp/tmp_transiscope.json transiscope.json
|
||||
|
||||
exit 0
|
|
@ -0,0 +1,9 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import cloudscraper
|
||||
|
||||
url = "https://transiscope.org/carte-des-alternatives/#/carte/@46.33,-1.34,6z?cat=all"
|
||||
|
||||
scraper = cloudscraper.create_scraper()
|
||||
#scraper = cloudscraper.CloudScraper() # CloudScraper inherits from requests.Session
|
||||
print(scraper.get(url).content)
|
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
|
||||
import requests
|
||||
from parsel import Selector
|
||||
|
||||
|
||||
#url = 'https://transiscope.org/carte-des-alternatives/#/carte/@46.33,-1.34,6z?cat=all'
|
||||
url = 'https://www.kurzy.cz/banky/bankomaty/zatec-okres-louny/'
|
||||
r = requests.get(url)
|
||||
sel = Selector(r.text)
|
||||
all_address = sel.xpath('//script[contains(.,"point_list")]').re_first(r'point_list = \[(.*)\]\];')
|
||||
|
||||
for item in all_address.split(','):
|
||||
print(item)
|
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import urllib.request
|
||||
import csv
|
||||
|
||||
urlpage = 'https://transiscope.org/carte-des-alternatives/#/carte/@46.33,-1.34,6z?cat=all'
|
||||
|
||||
|
||||
# query the website and return the html to the variable 'page'
|
||||
page = urllib.request.urlopen(urlpage)
|
||||
# parse the html using beautiful soup and store in variable 'soup'
|
||||
soup = BeautifulSoup(page, 'html.parser')
|
||||
|
||||
|
||||
table = soup.find(attrs={'id': 'element-info'})
|
||||
results = table.find_all('li')
|
||||
print('Number of results', len(results))
|
Loading…
Reference in New Issue