# -*- coding: utf-8 -*- # vStream https://github.com/Kodi-vStream/venom-xbmc-addons import re class cParser: def parseSingleResult(self, sHtmlContent, sPattern): aMatches = re.compile(sPattern).findall(sHtmlContent) if (len(aMatches) == 1): aMatches[0] = self.__replaceSpecialCharacters(aMatches[0]) return True, aMatches[0] return False, aMatches def __replaceSpecialCharacters(self, sString): """ /!\ pas les mêmes tirets, tiret moyen et cadratin.""" return sString.replace('\r', '').replace('\n', '').replace('\t', '').replace('\\/', '/').replace('&', '&')\ .replace(''', "'").replace('–', '-').replace('—', '-').replace('é', 'é')\ .replace('â', 'â').replace('ê', 'ê').replace('î', 'î').replace('ô', 'ô')\ .replace('…', '...').replace('"', '"').replace('>', '>').replace('è', 'è')\ .replace('ç', 'ç').replace('«', '<<').replace('»', '>>').replace('\xc9', 'E')\ .replace('–', '-').replace('é', 'é').replace('à', 'à').replace('<', '<')\ .replace('’', "'").replace('‘', '\'').replace(' ', '').replace('’', "'")\ .replace('…', '...').replace('′', "'").replace('ʹ', '\'')\ .replace('&', '&').replace('–', '-').replace('—', '-') def parse(self, sHtmlContent, sPattern, iMinFoundValue=1): sHtmlContent = self.__replaceSpecialCharacters(str(sHtmlContent)) aMatches = re.compile(sPattern, re.IGNORECASE).findall(sHtmlContent) # extrait la page html après retraitement vStream # fh = open('c:\\test.txt', "w") # fh.write(sHtmlContent) # fh.close() if (len(aMatches) >= iMinFoundValue): return True, aMatches return False, aMatches def replace(self, sPattern, sReplaceString, sValue): return re.sub(sPattern, sReplaceString, sValue) def escape(self, sValue): return re.escape(sValue) def getNumberFromString(self, sValue): if '/0-9/' in sValue: sPattern = '/0-9.+?(\d+)' else: sPattern = '\d+' aMatches = re.findall(sPattern, sValue) if (len(aMatches) > 0): return aMatches[0] return 0 def titleParse(self, sHtmlContent, sPattern): sHtmlContent = self.__replaceSpecialCharacters(str(sHtmlContent)) aMatches = re.compile(sPattern, re.IGNORECASE) try: [m.groupdict() for m in aMatches.finditer(sHtmlContent)] return m.groupdict() except: return {'title': sHtmlContent} def abParse(self, sHtmlContent, start, end = None, startoffset=0): # usage oParser.abParse(sHtmlContent, 'start', 'end') # startoffset (int) décale le début pour ne pas prendre en compte start dans le résultat final si besoin # la fin est recherchée forcement après le début # la recherche de fin n'est pas obligatoire # usage2 oParser.abParse(sHtmlContent, 'start', 'end', 6) # ex youtube.py startIdx = sHtmlContent.find(start) if startIdx == -1 : # rien trouvé, retourner le texte complet return sHtmlContent if end: endIdx = sHtmlContent[startoffset + startIdx : ].find(end) if endIdx>0: return sHtmlContent[startoffset + startIdx : startoffset + startIdx + endIdx] return sHtmlContent[startoffset + startIdx :]