Add transiscope scraping and add .gitignore file ...
This commit is contained in:
parent
3a97143d5d
commit
24429ba1f5
|
@ -0,0 +1,2 @@
|
||||||
|
.profile
|
||||||
|
zen/tools/scraping/transiscope/transiscope.json
|
|
@ -0,0 +1,9 @@
|
||||||
|
#!/bin/bash
|
||||||
|
if [[ ! -f transiscope.json ]]; then
|
||||||
|
echo "Premier lancement, récupération des données, veuillez patientez ..."
|
||||||
|
./generate_transiscope.sh
|
||||||
|
fi
|
||||||
|
|
||||||
|
cat transiscope.json | jq '.[] | .name, .abstract, .geo'
|
||||||
|
|
||||||
|
exit 0
|
|
@ -0,0 +1,7 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
curl -s https://transiscope.gogocarto.fr/api/elements | jq .data > /tmp/tmp_transiscope.json || exit 1
|
||||||
|
[[ -f transiscope.json ]] && rm transiscope.json
|
||||||
|
mv /tmp/tmp_transiscope.json transiscope.json
|
||||||
|
|
||||||
|
exit 0
|
|
@ -0,0 +1,9 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import cloudscraper
|
||||||
|
|
||||||
|
url = "https://transiscope.org/carte-des-alternatives/#/carte/@46.33,-1.34,6z?cat=all"
|
||||||
|
|
||||||
|
scraper = cloudscraper.create_scraper()
|
||||||
|
#scraper = cloudscraper.CloudScraper() # CloudScraper inherits from requests.Session
|
||||||
|
print(scraper.get(url).content)
|
|
@ -0,0 +1,15 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from parsel import Selector
|
||||||
|
|
||||||
|
|
||||||
|
#url = 'https://transiscope.org/carte-des-alternatives/#/carte/@46.33,-1.34,6z?cat=all'
|
||||||
|
url = 'https://www.kurzy.cz/banky/bankomaty/zatec-okres-louny/'
|
||||||
|
r = requests.get(url)
|
||||||
|
sel = Selector(r.text)
|
||||||
|
all_address = sel.xpath('//script[contains(.,"point_list")]').re_first(r'point_list = \[(.*)\]\];')
|
||||||
|
|
||||||
|
for item in all_address.split(','):
|
||||||
|
print(item)
|
|
@ -0,0 +1,18 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import urllib.request
|
||||||
|
import csv
|
||||||
|
|
||||||
|
urlpage = 'https://transiscope.org/carte-des-alternatives/#/carte/@46.33,-1.34,6z?cat=all'
|
||||||
|
|
||||||
|
|
||||||
|
# query the website and return the html to the variable 'page'
|
||||||
|
page = urllib.request.urlopen(urlpage)
|
||||||
|
# parse the html using beautiful soup and store in variable 'soup'
|
||||||
|
soup = BeautifulSoup(page, 'html.parser')
|
||||||
|
|
||||||
|
|
||||||
|
table = soup.find(attrs={'id': 'element-info'})
|
||||||
|
results = table.find_all('li')
|
||||||
|
print('Number of results', len(results))
|
Loading…
Reference in New Issue