youtube_explode/lib/src/reverse_engineering/responses/search_page.dart

282 lines
8.3 KiB
Dart
Raw Normal View History

import 'dart:convert';
import 'package:html/dom.dart';
import 'package:html/parser.dart' as parser;
import '../../../youtube_explode_dart.dart';
2020-06-13 22:54:53 +02:00
import '../../extensions/helpers_extension.dart';
import '../../retry.dart';
2020-11-01 15:05:19 +01:00
import '../../search/base_search_content.dart';
import '../../search/related_query.dart';
2020-06-13 22:54:53 +02:00
import '../../search/search_video.dart';
import '../../videos/videos.dart';
import '../youtube_http_client.dart';
2020-09-21 17:34:03 +02:00
import 'generated/search_page_id.g.dart' hide PlaylistId;
2020-07-16 20:02:54 +02:00
///
class SearchPage {
2020-09-21 17:34:03 +02:00
final _apiKeyExp = RegExp(r'"INNERTUBE_API_KEY":"(\w+?)"');
2020-06-22 17:40:57 +02:00
2020-07-16 20:02:54 +02:00
///
2020-06-13 22:54:53 +02:00
final String queryString;
final Document _root;
2020-09-21 17:34:03 +02:00
String _apiKey;
2020-07-16 20:02:54 +02:00
///
2020-09-21 17:34:03 +02:00
String get apiKey => _apiKey ??= _apiKeyExp
2020-06-13 22:54:53 +02:00
.firstMatch(_root
.querySelectorAll('script')
2020-09-21 17:34:03 +02:00
.firstWhere((e) => e.text.contains('INNERTUBE_API_KEY'))
2020-06-13 22:54:53 +02:00
.text)
.group(1);
_InitialData _initialData;
2020-07-16 20:02:54 +02:00
///
2020-10-17 22:09:52 +02:00
_InitialData get initialData {
if (_initialData != null) {
return _initialData;
}
2020-12-25 23:29:01 +01:00
final scriptText = _root
.querySelectorAll('script')
.map((e) => e.text)
.toList(growable: false);
var initialDataText = scriptText.firstWhere(
(e) => e.contains('window["ytInitialData"] ='),
orElse: () => null);
if (initialDataText != null) {
return _initialData = _InitialData(SearchPageId.fromRawJson(
_extractJson(initialDataText, 'window["ytInitialData"] =')));
}
initialDataText = scriptText.firstWhere(
(e) => e.contains('var ytInitialData = '),
orElse: () => null);
if (initialDataText != null) {
return _initialData = _InitialData(SearchPageId.fromRawJson(
_extractJson(initialDataText, 'var ytInitialData = ')));
}
throw TransientFailureException(
'Failed to retrieve initial data from the search page, please report this to the project GitHub page.'); // ignore: lines_longer_than_80_chars
2020-10-17 22:09:52 +02:00
}
String _extractJson(String html, String separator) {
2020-10-17 22:09:52 +02:00
if (html == null || separator == null) {
return null;
}
var index = html.indexOf(separator) + separator.length;
if (index > html.length) {
return null;
}
return _matchJson(html.substring(index));
}
String _matchJson(String str) {
var bracketCount = 0;
int lastI;
for (var i = 0; i < str.length; i++) {
lastI = i;
if (str[i] == '{') {
bracketCount++;
} else if (str[i] == '}') {
bracketCount--;
} else if (str[i] == ';') {
if (bracketCount == 0) {
return str.substring(0, i);
}
}
}
return str.substring(0, lastI + 1);
}
2020-07-16 20:02:54 +02:00
///
2020-06-13 22:54:53 +02:00
SearchPage(this._root, this.queryString,
2020-09-21 17:34:03 +02:00
[_InitialData initialData, this._apiKey])
: _initialData = initialData;
2020-06-13 22:54:53 +02:00
2020-07-16 20:02:54 +02:00
///
2020-06-22 17:40:57 +02:00
// TODO: Replace this in favour of async* when quering;
2020-07-16 20:02:54 +02:00
Future<SearchPage> nextPage(YoutubeHttpClient httpClient) async {
2020-09-21 17:34:03 +02:00
if (initialData.continuationToken == '' ||
initialData.estimatedResults == 0) {
2020-06-13 22:54:53 +02:00
return null;
}
2021-03-04 10:46:37 +01:00
return get(httpClient, queryString, token: initialData.continuationToken);
2020-06-13 22:54:53 +02:00
}
2020-07-16 20:02:54 +02:00
///
static Future<SearchPage> get(
2020-06-13 22:54:53 +02:00
YoutubeHttpClient httpClient, String queryString,
2021-03-04 10:46:37 +01:00
{String token}) {
2020-09-21 17:34:03 +02:00
if (token != null) {
2021-03-04 10:46:37 +01:00
var url =
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8';
2020-09-21 17:34:03 +02:00
return retry(() async {
var body = {
'context': const {
'client': {
'hl': 'en',
'clientName': 'WEB',
'clientVersion': '2.20200911.04.00'
}
},
'continuation': token
};
var raw = await httpClient.post(url, body: json.encode(body));
return SearchPage(null, queryString,
2021-03-04 10:46:37 +01:00
_InitialData(SearchPageId.fromJson(json.decode(raw.body))));
2020-09-21 17:34:03 +02:00
});
// Ask for next page,
}
2020-06-13 22:54:53 +02:00
var url =
'https://www.youtube.com/results?search_query=${Uri.encodeQueryComponent(queryString)}';
return retry(() async {
2020-09-21 17:34:03 +02:00
var raw = await httpClient.getString(url);
2020-06-13 22:54:53 +02:00
return SearchPage.parse(raw, queryString);
});
2020-09-21 17:34:03 +02:00
// ask for next page
}
2020-07-16 20:02:54 +02:00
///
2020-06-13 22:54:53 +02:00
SearchPage.parse(String raw, this.queryString) : _root = parser.parse(raw);
}
class _InitialData {
// Json parsed map
2020-09-21 17:34:03 +02:00
final SearchPageId root;
2020-09-21 17:34:03 +02:00
_InitialData(this.root);
2020-09-21 17:34:03 +02:00
List<PurpleContent> getContentContext() {
if (root.contents != null) {
return root.contents.twoColumnSearchResultsRenderer.primaryContents
.sectionListRenderer.contents.first.itemSectionRenderer.contents;
2020-06-13 22:54:53 +02:00
}
2020-09-21 17:34:03 +02:00
if (root.onResponseReceivedCommands != null) {
final itemSection = root
.onResponseReceivedCommands
.first
.appendContinuationItemsAction
.continuationItems[0]
.itemSectionRenderer;
if (itemSection == null) {
throw SearchItemSectionException();
}
return itemSection.contents;
2020-06-13 22:54:53 +02:00
}
2020-10-17 22:09:52 +02:00
return null;
2020-06-13 22:54:53 +02:00
}
2020-09-21 17:34:03 +02:00
String _getContinuationToken() {
if (root.contents != null) {
var contents = root.contents.twoColumnSearchResultsRenderer
.primaryContents.sectionListRenderer.contents;
if (contents.length <= 1) {
return null;
}
return contents[1]
.continuationItemRenderer
.continuationEndpoint
.continuationCommand
.token;
2020-06-13 22:54:53 +02:00
}
2020-09-21 17:34:03 +02:00
if (root.onResponseReceivedCommands != null) {
return root
.onResponseReceivedCommands
.first
.appendContinuationItemsAction
.continuationItems[1]
?.continuationItemRenderer
?.continuationEndpoint
?.continuationCommand
?.token ??
' ';
2020-06-13 22:54:53 +02:00
}
return null;
}
// Contains only [SearchVideo] or [SearchPlaylist]
List<BaseSearchContent> get searchContent =>
2020-09-21 17:34:03 +02:00
getContentContext().map(_parseContent).where((e) => e != null).toList();
2020-06-13 22:54:53 +02:00
List<RelatedQuery> get relatedQueries =>
getContentContext()
2020-09-21 17:34:03 +02:00
?.where((e) => e.horizontalCardListRenderer != null)
?.map((e) => e.horizontalCardListRenderer.cards)
2020-06-13 22:54:53 +02:00
?.firstOrNull
2020-09-21 17:34:03 +02:00
?.map((e) => e.searchRefinementCardRenderer)
2020-06-13 22:54:53 +02:00
?.map((e) => RelatedQuery(
2020-09-21 17:34:03 +02:00
e.searchEndpoint.searchEndpoint.query,
VideoId(
Uri.parse(e.thumbnail.thumbnails.first.url).pathSegments[1])))
2020-06-13 22:54:53 +02:00
?.toList()
?.cast<RelatedQuery>() ??
2020-06-13 22:54:53 +02:00
const [];
List<dynamic> get relatedVideos =>
getContentContext()
2020-09-21 17:34:03 +02:00
?.where((e) => e.shelfRenderer != null)
?.map((e) => e.shelfRenderer.content.verticalListRenderer.items)
2020-06-13 22:54:53 +02:00
?.firstOrNull
?.map(_parseContent)
?.toList() ??
2020-06-13 22:54:53 +02:00
const [];
2020-09-21 17:34:03 +02:00
String get continuationToken => _getContinuationToken();
2020-09-21 17:34:03 +02:00
int get estimatedResults => int.parse(root.estimatedResults ?? 0);
2020-09-21 17:34:03 +02:00
BaseSearchContent _parseContent(PurpleContent content) {
if (content == null) {
return null;
}
2020-09-21 17:34:03 +02:00
if (content.videoRenderer != null) {
var renderer = content.videoRenderer;
2020-06-16 21:52:03 +02:00
//TODO: Add if it's a live
2020-06-13 22:54:53 +02:00
return SearchVideo(
2020-09-21 17:34:03 +02:00
VideoId(renderer.videoId),
_parseRuns(renderer.title.runs),
_parseRuns(renderer.ownerText.runs),
_parseRuns(renderer.descriptionSnippet?.runs),
renderer.lengthText?.simpleText ?? '',
int.parse(renderer.viewCountText?.simpleText
?.stripNonDigits()
?.nullIfWhitespace ??
2021-02-27 18:58:42 +01:00
renderer.viewCountText?.runs?.first?.text
?.stripNonDigits()
?.nullIfWhitespace ??
'0'),
(renderer.thumbnail.thumbnails ?? <ThumbnailElement>[])
.map((e) => Thumbnail(Uri.parse(e.url), e.height, e.width))
2021-02-27 18:58:42 +01:00
.toList(),
renderer.publishedTimeText?.simpleText,
renderer?.viewCountText?.runs?.elementAt(1)?.text?.trim() ==
'watching');
}
2020-09-21 17:34:03 +02:00
if (content.radioRenderer != null) {
var renderer = content.radioRenderer;
2020-06-13 22:54:53 +02:00
return SearchPlaylist(
2020-09-21 17:34:03 +02:00
PlaylistId(renderer.playlistId),
renderer.title.simpleText,
int.parse(_parseRuns(renderer.videoCountText.runs)
2020-06-13 22:54:53 +02:00
.stripNonDigits()
.nullIfWhitespace ??
'0'));
}
// Here ignore 'horizontalCardListRenderer' & 'shelfRenderer'
return null;
}
2020-06-13 22:54:53 +02:00
2020-09-21 17:34:03 +02:00
String _parseRuns(List<dynamic> runs) =>
runs?.map((e) => e.text)?.join() ?? '';
}