Implement reverse engineer of search page
This commit is contained in:
parent
5beeed2387
commit
2be6db10d3
|
@ -0,0 +1,151 @@
|
|||
import 'dart:convert';
|
||||
|
||||
import 'package:html/dom.dart';
|
||||
import 'package:html/parser.dart' as parser;
|
||||
|
||||
import '../../playlists/playlists.dart';
|
||||
import '../../retry.dart';
|
||||
import '../../search/related_query.dart';
|
||||
import '../../videos/videos.dart';
|
||||
import '../youtube_http_client.dart';
|
||||
|
||||
class SearchPage {
|
||||
final Document _root;
|
||||
|
||||
_InitialData _initialData;
|
||||
|
||||
_InitialData get initialData =>
|
||||
_initialData ??= _InitialData(json.decode(_matchJson(_extractJson(
|
||||
_root
|
||||
.querySelectorAll('script')
|
||||
.map((e) => e.text)
|
||||
.toList()
|
||||
.firstWhere((e) => e.contains('window["ytInitialData"] =')),
|
||||
'window["ytInitialData"] ='))));
|
||||
|
||||
String _extractJson(String html, String separator) {
|
||||
return _matchJson(
|
||||
html.substring(html.indexOf(separator) + separator.length));
|
||||
}
|
||||
|
||||
String _matchJson(String str) {
|
||||
var bracketCount = 0;
|
||||
int lastI;
|
||||
for (var i = 0; i < str.length; i++) {
|
||||
lastI = i;
|
||||
if (str[i] == '{') {
|
||||
bracketCount++;
|
||||
} else if (str[i] == '}') {
|
||||
bracketCount--;
|
||||
} else if (str[i] == ';') {
|
||||
if (bracketCount == 0) {
|
||||
return str.substring(0, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return str.substring(0, lastI + 1);
|
||||
}
|
||||
|
||||
SearchPage(this._root);
|
||||
|
||||
static Future<SearchPage> get(
|
||||
YoutubeHttpClient httpClient, String queryString) {
|
||||
final url =
|
||||
'https://www.youtube.com/results?search_query=${Uri.encodeQueryComponent(queryString)}';
|
||||
return retry(() async {
|
||||
var raw = await httpClient.postString(url);
|
||||
return SearchPage.parse(raw);
|
||||
});
|
||||
}
|
||||
|
||||
SearchPage.parse(String raw) : _root = parser.parse(raw);
|
||||
}
|
||||
|
||||
class _InitialData {
|
||||
// Json parsed map
|
||||
final Map<String, dynamic> _root;
|
||||
|
||||
_InitialData(this._root);
|
||||
|
||||
/* Cache results */
|
||||
|
||||
List<dynamic> _searchContent;
|
||||
List<dynamic> _relatedVideos;
|
||||
List<RelatedQuery> _relatedQueries;
|
||||
|
||||
// Contains only [VideoId] or [PlaylistId]
|
||||
List<dynamic> get searchContent =>
|
||||
_searchContent ??= _root['contents']['twoColumnSearchResultsRenderer']
|
||||
['primaryContents']['sectionListRenderer']['contents']
|
||||
.first['itemSectionRenderer']['contents']
|
||||
.map(_parseContent)
|
||||
.where((e) => e != null)
|
||||
.toList();
|
||||
|
||||
List<RelatedQuery> get relatedQueries =>
|
||||
_relatedQueries ??= _root['contents']['twoColumnSearchResultsRenderer']
|
||||
['primaryContents']['sectionListRenderer']['contents']
|
||||
.first['itemSectionRenderer']['contents']
|
||||
.where((e) => e.containsKey('horizontalCardListRenderer') as bool)
|
||||
.map((e) => e['horizontalCardListRenderer']['cards'])
|
||||
.first
|
||||
.map((e) => e['searchRefinementCardRenderer'])
|
||||
.map((e) => RelatedQuery(
|
||||
e['searchEndpoint']['searchEndpoint']['query'],
|
||||
VideoId(Uri.parse(e['thumbnail']['thumbnails'].first['url'])
|
||||
.pathSegments[1])))
|
||||
.toList()
|
||||
.cast<RelatedQuery>();
|
||||
|
||||
List<dynamic> get relatedVideos => _relatedVideos ??= _root['contents']
|
||||
['twoColumnSearchResultsRenderer']['primaryContents']
|
||||
['sectionListRenderer']['contents']
|
||||
.first['itemSectionRenderer']['contents']
|
||||
.where((e) => e.containsKey('shelfRenderer') as bool)
|
||||
.map(
|
||||
(e) => e['shelfRenderer']['content']['verticalListRenderer']['items'])
|
||||
.first
|
||||
.map(_parseContent)
|
||||
.toList();
|
||||
|
||||
dynamic _parseContent(dynamic content) {
|
||||
// If is a video
|
||||
print(content);
|
||||
if (content == null) {
|
||||
return null;
|
||||
}
|
||||
if (content.containsKey('videoRenderer')) {
|
||||
return VideoId(content['videoRenderer']['videoId']);
|
||||
}
|
||||
if (content.containsKey('radioRenderer')) {
|
||||
return PlaylistId(content['radioRenderer']['playlistId']);
|
||||
}
|
||||
// Here ignore 'horizontalCardListRenderer' & 'shelfRenderer'
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'].first['itemSectionRenderer']
|
||||
// ['contents'] -> @See ContentsList
|
||||
// ['continuations'] -> Data to see more
|
||||
|
||||
//ContentsList:
|
||||
// Key -> 'videoRenderer'
|
||||
// videoId --> VideoId
|
||||
// title['runs'].loop -> ['text'] -> concatenate --> "Video Title"
|
||||
// descriptionSnippet['runs'].loop -> ['text'] -> concatenate --> "Video Description snippet"
|
||||
// ownerText['runs'].first -> ['text'] --> "Video Author"
|
||||
// lengthText['simpleText'] -> Parse format H:M:S -> "Video Duration"
|
||||
// viewCountText['simpleText'] -> Strip non digit -> int.parse --> "Video View Count"
|
||||
//
|
||||
// Key -> 'radioRenderer'
|
||||
// playlistId -> PlaylistId
|
||||
// title['simpleText'] --> "Playlist Title"
|
||||
//
|
||||
// Key -> 'horizontalCardListRenderer' // Queries related to this search
|
||||
// cards --> List of Maps -> loop -> ['searchRefinementCardRenderer'].first
|
||||
// thumbnail -> ['thumbnails'].first -> ['url'] --> "Thumbnail url" -> Find video id from id.
|
||||
// searchEndpoint -> ['searchEndpoint'] -> ['query'] -> "Related query string"
|
||||
//
|
||||
// Key -> 'shelfRenderer' // Videos related to this search
|
||||
// contents -> ['verticalListRenderer']['items'] -> loop -> parseContent
|
|
@ -26,7 +26,6 @@ class WatchPage {
|
|||
bool get isVideoAvailable =>
|
||||
_root.querySelector('meta[property="og:url"]') != null;
|
||||
|
||||
//TODO: Update this to the new "parsing method" w/ regex "label"\s*:\s*"([\d,\.]+) likes"
|
||||
int get videoLikeCount => int.parse(_videoLikeExp
|
||||
.firstMatch(_root.outerHtml)
|
||||
?.group(1)
|
||||
|
@ -39,7 +38,6 @@ class WatchPage {
|
|||
?.nullIfWhitespace ??
|
||||
'0');
|
||||
|
||||
//TODO: Update this to the new "parsing method" w/ regex "label"\s*:\s*"([\d,\.]+) dislikes"
|
||||
int get videoDislikeCount => int.parse(_videoDislikeExp
|
||||
.firstMatch(_root.outerHtml)
|
||||
?.group(1)
|
||||
|
@ -52,14 +50,14 @@ class WatchPage {
|
|||
?.nullIfWhitespace ??
|
||||
'0');
|
||||
|
||||
_PlayerConfig get playerConfig => _PlayerConfig(json.decode(
|
||||
_matchJson(_extractJson(_root.getElementsByTagName('html').first.text))));
|
||||
_PlayerConfig get playerConfig =>
|
||||
_PlayerConfig(json.decode(_matchJson(_extractJson(
|
||||
_root.getElementsByTagName('html').first.text,
|
||||
'ytplayer.config = '))));
|
||||
|
||||
final String configSep = 'ytplayer.config = ';
|
||||
|
||||
String _extractJson(String html) {
|
||||
String _extractJson(String html, String separator) {
|
||||
return _matchJson(
|
||||
html.substring(html.indexOf(configSep) + configSep.length));
|
||||
html.substring(html.indexOf(separator) + separator.length));
|
||||
}
|
||||
|
||||
String _matchJson(String str) {
|
||||
|
|
|
@ -6,9 +6,10 @@ import '../videos/streams/streams.dart';
|
|||
class YoutubeHttpClient {
|
||||
final Client _httpClient = Client();
|
||||
|
||||
final Map<String, String> _userAgent = const {
|
||||
final Map<String, String> _defaultHeaders = const {
|
||||
'user-agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',
|
||||
'accept-language': 'en-US,en;q=1.0'
|
||||
};
|
||||
|
||||
/// Throws if something is wrong with the response.
|
||||
|
@ -33,17 +34,33 @@ class YoutubeHttpClient {
|
|||
}
|
||||
|
||||
Future<Response> get(dynamic url, {Map<String, String> headers}) {
|
||||
return _httpClient.get(url, headers: {...?headers, ..._userAgent});
|
||||
return _httpClient.get(url, headers: {...?headers, ..._defaultHeaders});
|
||||
}
|
||||
|
||||
Future<Response> post(dynamic url, {Map<String, String> headers}) {
|
||||
return _httpClient.post(url, headers: {...?headers, ..._defaultHeaders});
|
||||
}
|
||||
|
||||
Future<Response> head(dynamic url, {Map<String, String> headers}) {
|
||||
return _httpClient.head(url, headers: {...?headers, ..._userAgent});
|
||||
return _httpClient.head(url, headers: {...?headers, ..._defaultHeaders});
|
||||
}
|
||||
|
||||
Future<String> getString(dynamic url,
|
||||
{Map<String, String> headers, bool validate = true}) async {
|
||||
var response =
|
||||
await _httpClient.get(url, headers: {...?headers, ..._userAgent});
|
||||
await _httpClient.get(url, headers: {...?headers, ..._defaultHeaders});
|
||||
|
||||
if (validate) {
|
||||
_validateResponse(response, response.statusCode);
|
||||
}
|
||||
|
||||
return response.body;
|
||||
}
|
||||
|
||||
Future<String> postString(dynamic url,
|
||||
{Map<String, String> headers, bool validate = true}) async {
|
||||
var response =
|
||||
await _httpClient.post(url, headers: {...?headers, ..._defaultHeaders});
|
||||
|
||||
if (validate) {
|
||||
_validateResponse(response, response.statusCode);
|
||||
|
@ -57,7 +74,7 @@ class YoutubeHttpClient {
|
|||
var url = streamInfo.url;
|
||||
if (!streamInfo.isRateLimited()) {
|
||||
var request = Request('get', url);
|
||||
request.headers.addAll(_userAgent);
|
||||
request.headers.addAll(_defaultHeaders);
|
||||
var response = await request.send();
|
||||
if (validate) {
|
||||
_validateResponse(response, response.statusCode);
|
||||
|
@ -67,7 +84,7 @@ class YoutubeHttpClient {
|
|||
for (var i = 0; i < streamInfo.size.totalBytes; i += 9898989) {
|
||||
var request = Request('get', url);
|
||||
request.headers['range'] = 'bytes=$i-${i + 9898989}';
|
||||
request.headers.addAll(_userAgent);
|
||||
request.headers.addAll(_defaultHeaders);
|
||||
var response = await request.send();
|
||||
if (validate) {
|
||||
_validateResponse(response, response.statusCode);
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
import '../videos/video_id.dart';
|
||||
|
||||
///
|
||||
class RelatedQuery {
|
||||
/// Query related to a search query.
|
||||
final String query;
|
||||
|
||||
/// Video related to a seach query.
|
||||
final VideoId videoId;
|
||||
|
||||
/// Initialize a [RelatedQuery] instance.
|
||||
RelatedQuery(this.query, this.videoId);
|
||||
}
|
Loading…
Reference in New Issue