Astroport.ONE/tools/FR_AssembleeNationale.sh

144 lines
4.5 KiB
Bash
Executable File

#!/usr/bin/env bash
set -e
function trap_errors() {
echo ""
echo "Debbuging:"
echo " pwd: $(pwd)"
echo " MAIN_DIR: ${MAIN_DIR}"
clean_on_exit
}
trap trap_errors ERR
MAIN_DIR=~/.zen/FR
mkdir -p ~/.zen/FR
USER_AGENT="Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0"
cd "${MAIN_DIR}"
echo -n "Creating directories..."
TMP_DIR=$(mktemp -d)
mkdir -p "${MAIN_DIR}/data/"
mkdir -p "data/gen/an/images/"
echo " done."
if [[ ! -s "${MAIN_DIR}/data/an.zip" ]]; then
echo -n "Downloading data..."
wget -q -U "${USER_AGENT}" "https://data.assemblee-nationale.fr/static/openData/repository/16/amo/deputes_actifs_mandats_actifs_organes/AMO10_deputes_actifs_mandats_actifs_organes.json.zip" -O "${MAIN_DIR}/data/an.zip"
cd "${MAIN_DIR}/data/"
echo " done."
echo -n "Extracting data..."
unzip an.zip
echo " done. "
fi
cd "${MAIN_DIR}/data/"
find json/acteur/ -type f | sed 's/\.json//i' | sed 's/json\/acteur\///i' | while read -r key; do
echo -n "Parsing ${key}"
first_name=$(jq -r .acteur.etatCivil.ident.prenom "json/acteur/${key}.json")
echo -n " (${first_name} "
last_name=$(jq -r .acteur.etatCivil.ident.nom "json/acteur/${key}.json")
echo "${last_name})"
email=$(jq -r '.acteur.adresses.adresse | map(. | select(.type=="15")) | .[].valElec' "json/acteur/${key}.json" | tac | awk '{print tolower($0)}')
phoneRaw=$(jq -r '.acteur.adresses.adresse | map(. | select(.type=="11")) | .[].valElec' "json/acteur/${key}.json" | tac)
IFS=$'\n'
phone=""
if [ ! -z "$phoneRaw" ]; then
for i in $phoneRaw; do
phone="$(echo ${i} | tr -d ' .' | sed 's/(0)//i' | sed 's/^00/\+/i' | sed 's/^0590/\+590/i' | sed 's/^0596/\+596/i' | sed 's/^0594/\+594/i' | sed 's/^0262/\+262/i' | sed 's/^0508/\+508/i' | sed 's/^0269/\+262269/i' )"$'\n'"${phone}"
done
fi
phonesChamber=$(jq -r "map(select(.name==\"${first_name} ${last_name}\")) | .[].phone" "json/acteur/${key}.json" | tr -d ' .' 2>/dev/null)
if [ ! -z "$phonesChamber" ]; then
for i in $phonesChamber; do
phone=$(echo "${phone}" | sed "s/${i}//g" | sort -u)
phone="${i}"$'\n'"${phone}"
done
fi
twitter=$(jq -r '.acteur.adresses.adresse | map(. | select(.type=="24")) | .[].valElec' "json/acteur/${key}.json" | sed 's/\@//i')
facebook=$(jq -r '.acteur.adresses.adresse | map(. | select(.type=="25")) | .[].valElec' "json/acteur/${key}.json" | sed 's/\@//i')
commissionsRef=$(jq -r '.acteur.mandats[] | map(. | select(.typeOrgane=="COMPER" or .typeOrgane=="COMNL")) | .[].organes.organeRef' "json/acteur/${key}.json" | sort -u)
commissions=""
if [ ! -z "$commissionsRef" ]; then
for i in $commissionsRef; do
commissions="${commissions}"$'\n'"$(jq -r .organe.libelleAbrege json/organe/${i}.json)"
done
fi
county=$(jq -r '.acteur.mandats[] | map(. | select(.typeOrgane=="ASSEMBLEE")) | .[].election.lieu.departement' "json/acteur/${key}.json" | head -1)
groupRef=$(jq -r '.acteur.mandats[] | map(. | select(.typeOrgane=="GP")) | .[].organes.organeRef' "json/acteur/${key}.json" | head -1)
group=$(jq -r .organe.libelle json/organe/${groupRef}.json)
photo=$(echo ${key} | sed 's/PA//i')
filename="${MAIN_DIR}/data/${key}.yml"
[[ -s ${filename} ]] && cat "${filename}" && continue
echo -n " Writing data..."
echo "id: ${key}" > "${filename}"
echo "last_name: ${last_name}" >> "${filename}"
echo "first_name: ${first_name}" >> "${filename}"
echo "group: ${group}" >> "${filename}"
echo "county: ${county}" >> "${filename}"
echo "commissions:" >> "${filename}"
if [ ! -z "${commissions}" ]; then
for i in ${commissions}; do
echo "- \"${i}\"" >> "${filename}"
done
fi
echo -n "phone:" >> "${filename}"
if [ ! -z "${phone}" ]; then
echo "" >> "${filename}"
for i in ${phone}; do
echo "- \"${i}\"" >> "${filename}"
done
else
echo " \"\"" >> "${filename}"
fi
echo "email:" >> "${filename}"
if [ ! -z "${email}" ]; then
for i in ${email}; do
echo "- \"${i}\"" >> "${filename}"
done
fi
echo "twitter: ${twitter}" >> "${filename}"
echo "facebook: ${facebook}" >> "${filename}"
echo "photo: ${photo}" >> "${filename}"
echo " done."
echo -n " Downloading photo..."
if [ ! -f "${MAIN_DIR}/data/gen/an/images/${photo}.jpg" ]; then
wget -q -U "${USER_AGENT}" "https://www2.assemblee-nationale.fr/static/tribun/16/photos/${photo}.jpg" -O "${MAIN_DIR}/data/gen/an/images/${photo}.jpg"
fi
echo " done."
cat "${filename}"
CUR=5
WHAT=${RANDOM:0:1}
echo "sleeping $((CUR+WHAT))"
sleep $((CUR+WHAT))
done