astrXbian/.install/.kodi/addons/plugin.video.vstream/resources/lib/parser.py

83 lines
3.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
# vStream https://github.com/Kodi-vStream/venom-xbmc-addons
import re
class cParser:
def parseSingleResult(self, sHtmlContent, sPattern):
aMatches = re.compile(sPattern).findall(sHtmlContent)
if (len(aMatches) == 1):
aMatches[0] = self.__replaceSpecialCharacters(aMatches[0])
return True, aMatches[0]
return False, aMatches
def __replaceSpecialCharacters(self, sString):
""" /!\ pas les mêmes tirets, tiret moyen et cadratin."""
return sString.replace('\r', '').replace('\n', '').replace('\t', '').replace('\\/', '/').replace('&', '&')\
.replace(''', "'").replace('–', '-').replace('—', '-').replace('é', 'é')\
.replace('â', 'â').replace('ê', 'ê').replace('î', 'î').replace('ô', 'ô')\
.replace('…', '...').replace('"', '"').replace('>', '>').replace('è', 'è')\
.replace('&ccedil;', 'ç').replace('&laquo;', '<<').replace('&raquo;', '>>').replace('\xc9', 'E')\
.replace('&ndash;', '-').replace('&eacute;', 'é').replace('&agrave;', 'à').replace('&lt;', '<')\
.replace('&rsquo;', "'").replace('&lsquo;', '\'').replace('&nbsp;', '').replace('&#8217;', "'")\
.replace('&#8230;', '...').replace('&#8242;', "'").replace('&#884;', '\'')\
.replace('&#038;', '&').replace('', '-').replace('', '-')
def parse(self, sHtmlContent, sPattern, iMinFoundValue=1):
sHtmlContent = self.__replaceSpecialCharacters(str(sHtmlContent))
aMatches = re.compile(sPattern, re.IGNORECASE).findall(sHtmlContent)
# extrait la page html après retraitement vStream
# fh = open('c:\\test.txt', "w")
# fh.write(sHtmlContent)
# fh.close()
if (len(aMatches) >= iMinFoundValue):
return True, aMatches
return False, aMatches
def replace(self, sPattern, sReplaceString, sValue):
return re.sub(sPattern, sReplaceString, sValue)
def escape(self, sValue):
return re.escape(sValue)
def getNumberFromString(self, sValue):
if '/0-9/' in sValue:
sPattern = '/0-9.+?(\d+)'
else:
sPattern = '\d+'
aMatches = re.findall(sPattern, sValue)
if (len(aMatches) > 0):
return aMatches[0]
return 0
def titleParse(self, sHtmlContent, sPattern):
sHtmlContent = self.__replaceSpecialCharacters(str(sHtmlContent))
aMatches = re.compile(sPattern, re.IGNORECASE)
try:
[m.groupdict() for m in aMatches.finditer(sHtmlContent)]
return m.groupdict()
except:
return {'title': sHtmlContent}
def abParse(self, sHtmlContent, start, end = None, startoffset=0):
# usage oParser.abParse(sHtmlContent, 'start', 'end')
# startoffset (int) décale le début pour ne pas prendre en compte start dans le résultat final si besoin
# la fin est recherchée forcement après le début
# la recherche de fin n'est pas obligatoire
# usage2 oParser.abParse(sHtmlContent, 'start', 'end', 6)
# ex youtube.py
startIdx = sHtmlContent.find(start)
if startIdx == -1 : # rien trouvé, retourner le texte complet
return sHtmlContent
if end:
endIdx = sHtmlContent[startoffset + startIdx : ].find(end)
if endIdx>0:
return sHtmlContent[startoffset + startIdx : startoffset + startIdx + endIdx]
return sHtmlContent[startoffset + startIdx :]