Implement reverse engineer of search page

This commit is contained in:
Hexah 2020-06-11 16:15:19 +02:00
parent 5beeed2387
commit 2be6db10d3
4 changed files with 194 additions and 15 deletions

View File

@ -0,0 +1,151 @@
import 'dart:convert';
import 'package:html/dom.dart';
import 'package:html/parser.dart' as parser;
import '../../playlists/playlists.dart';
import '../../retry.dart';
import '../../search/related_query.dart';
import '../../videos/videos.dart';
import '../youtube_http_client.dart';
class SearchPage {
final Document _root;
_InitialData _initialData;
_InitialData get initialData =>
_initialData ??= _InitialData(json.decode(_matchJson(_extractJson(
_root
.querySelectorAll('script')
.map((e) => e.text)
.toList()
.firstWhere((e) => e.contains('window["ytInitialData"] =')),
'window["ytInitialData"] ='))));
String _extractJson(String html, String separator) {
return _matchJson(
html.substring(html.indexOf(separator) + separator.length));
}
String _matchJson(String str) {
var bracketCount = 0;
int lastI;
for (var i = 0; i < str.length; i++) {
lastI = i;
if (str[i] == '{') {
bracketCount++;
} else if (str[i] == '}') {
bracketCount--;
} else if (str[i] == ';') {
if (bracketCount == 0) {
return str.substring(0, i);
}
}
}
return str.substring(0, lastI + 1);
}
SearchPage(this._root);
static Future<SearchPage> get(
YoutubeHttpClient httpClient, String queryString) {
final url =
'https://www.youtube.com/results?search_query=${Uri.encodeQueryComponent(queryString)}';
return retry(() async {
var raw = await httpClient.postString(url);
return SearchPage.parse(raw);
});
}
SearchPage.parse(String raw) : _root = parser.parse(raw);
}
class _InitialData {
// Json parsed map
final Map<String, dynamic> _root;
_InitialData(this._root);
/* Cache results */
List<dynamic> _searchContent;
List<dynamic> _relatedVideos;
List<RelatedQuery> _relatedQueries;
// Contains only [VideoId] or [PlaylistId]
List<dynamic> get searchContent =>
_searchContent ??= _root['contents']['twoColumnSearchResultsRenderer']
['primaryContents']['sectionListRenderer']['contents']
.first['itemSectionRenderer']['contents']
.map(_parseContent)
.where((e) => e != null)
.toList();
List<RelatedQuery> get relatedQueries =>
_relatedQueries ??= _root['contents']['twoColumnSearchResultsRenderer']
['primaryContents']['sectionListRenderer']['contents']
.first['itemSectionRenderer']['contents']
.where((e) => e.containsKey('horizontalCardListRenderer') as bool)
.map((e) => e['horizontalCardListRenderer']['cards'])
.first
.map((e) => e['searchRefinementCardRenderer'])
.map((e) => RelatedQuery(
e['searchEndpoint']['searchEndpoint']['query'],
VideoId(Uri.parse(e['thumbnail']['thumbnails'].first['url'])
.pathSegments[1])))
.toList()
.cast<RelatedQuery>();
List<dynamic> get relatedVideos => _relatedVideos ??= _root['contents']
['twoColumnSearchResultsRenderer']['primaryContents']
['sectionListRenderer']['contents']
.first['itemSectionRenderer']['contents']
.where((e) => e.containsKey('shelfRenderer') as bool)
.map(
(e) => e['shelfRenderer']['content']['verticalListRenderer']['items'])
.first
.map(_parseContent)
.toList();
dynamic _parseContent(dynamic content) {
// If is a video
print(content);
if (content == null) {
return null;
}
if (content.containsKey('videoRenderer')) {
return VideoId(content['videoRenderer']['videoId']);
}
if (content.containsKey('radioRenderer')) {
return PlaylistId(content['radioRenderer']['playlistId']);
}
// Here ignore 'horizontalCardListRenderer' & 'shelfRenderer'
return null;
}
}
// ['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'].first['itemSectionRenderer']
// ['contents'] -> @See ContentsList
// ['continuations'] -> Data to see more
//ContentsList:
// Key -> 'videoRenderer'
// videoId --> VideoId
// title['runs'].loop -> ['text'] -> concatenate --> "Video Title"
// descriptionSnippet['runs'].loop -> ['text'] -> concatenate --> "Video Description snippet"
// ownerText['runs'].first -> ['text'] --> "Video Author"
// lengthText['simpleText'] -> Parse format H:M:S -> "Video Duration"
// viewCountText['simpleText'] -> Strip non digit -> int.parse --> "Video View Count"
//
// Key -> 'radioRenderer'
// playlistId -> PlaylistId
// title['simpleText'] --> "Playlist Title"
//
// Key -> 'horizontalCardListRenderer' // Queries related to this search
// cards --> List of Maps -> loop -> ['searchRefinementCardRenderer'].first
// thumbnail -> ['thumbnails'].first -> ['url'] --> "Thumbnail url" -> Find video id from id.
// searchEndpoint -> ['searchEndpoint'] -> ['query'] -> "Related query string"
//
// Key -> 'shelfRenderer' // Videos related to this search
// contents -> ['verticalListRenderer']['items'] -> loop -> parseContent

View File

@ -26,7 +26,6 @@ class WatchPage {
bool get isVideoAvailable =>
_root.querySelector('meta[property="og:url"]') != null;
//TODO: Update this to the new "parsing method" w/ regex "label"\s*:\s*"([\d,\.]+) likes"
int get videoLikeCount => int.parse(_videoLikeExp
.firstMatch(_root.outerHtml)
?.group(1)
@ -39,7 +38,6 @@ class WatchPage {
?.nullIfWhitespace ??
'0');
//TODO: Update this to the new "parsing method" w/ regex "label"\s*:\s*"([\d,\.]+) dislikes"
int get videoDislikeCount => int.parse(_videoDislikeExp
.firstMatch(_root.outerHtml)
?.group(1)
@ -52,14 +50,14 @@ class WatchPage {
?.nullIfWhitespace ??
'0');
_PlayerConfig get playerConfig => _PlayerConfig(json.decode(
_matchJson(_extractJson(_root.getElementsByTagName('html').first.text))));
_PlayerConfig get playerConfig =>
_PlayerConfig(json.decode(_matchJson(_extractJson(
_root.getElementsByTagName('html').first.text,
'ytplayer.config = '))));
final String configSep = 'ytplayer.config = ';
String _extractJson(String html) {
String _extractJson(String html, String separator) {
return _matchJson(
html.substring(html.indexOf(configSep) + configSep.length));
html.substring(html.indexOf(separator) + separator.length));
}
String _matchJson(String str) {

View File

@ -6,9 +6,10 @@ import '../videos/streams/streams.dart';
class YoutubeHttpClient {
final Client _httpClient = Client();
final Map<String, String> _userAgent = const {
final Map<String, String> _defaultHeaders = const {
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',
'accept-language': 'en-US,en;q=1.0'
};
/// Throws if something is wrong with the response.
@ -33,17 +34,33 @@ class YoutubeHttpClient {
}
Future<Response> get(dynamic url, {Map<String, String> headers}) {
return _httpClient.get(url, headers: {...?headers, ..._userAgent});
return _httpClient.get(url, headers: {...?headers, ..._defaultHeaders});
}
Future<Response> post(dynamic url, {Map<String, String> headers}) {
return _httpClient.post(url, headers: {...?headers, ..._defaultHeaders});
}
Future<Response> head(dynamic url, {Map<String, String> headers}) {
return _httpClient.head(url, headers: {...?headers, ..._userAgent});
return _httpClient.head(url, headers: {...?headers, ..._defaultHeaders});
}
Future<String> getString(dynamic url,
{Map<String, String> headers, bool validate = true}) async {
var response =
await _httpClient.get(url, headers: {...?headers, ..._userAgent});
await _httpClient.get(url, headers: {...?headers, ..._defaultHeaders});
if (validate) {
_validateResponse(response, response.statusCode);
}
return response.body;
}
Future<String> postString(dynamic url,
{Map<String, String> headers, bool validate = true}) async {
var response =
await _httpClient.post(url, headers: {...?headers, ..._defaultHeaders});
if (validate) {
_validateResponse(response, response.statusCode);
@ -57,7 +74,7 @@ class YoutubeHttpClient {
var url = streamInfo.url;
if (!streamInfo.isRateLimited()) {
var request = Request('get', url);
request.headers.addAll(_userAgent);
request.headers.addAll(_defaultHeaders);
var response = await request.send();
if (validate) {
_validateResponse(response, response.statusCode);
@ -67,7 +84,7 @@ class YoutubeHttpClient {
for (var i = 0; i < streamInfo.size.totalBytes; i += 9898989) {
var request = Request('get', url);
request.headers['range'] = 'bytes=$i-${i + 9898989}';
request.headers.addAll(_userAgent);
request.headers.addAll(_defaultHeaders);
var response = await request.send();
if (validate) {
_validateResponse(response, response.statusCode);

View File

@ -0,0 +1,13 @@
import '../videos/video_id.dart';
///
class RelatedQuery {
/// Query related to a search query.
final String query;
/// Video related to a seach query.
final VideoId videoId;
/// Initialize a [RelatedQuery] instance.
RelatedQuery(this.query, this.videoId);
}