2020-06-11 16:15:19 +02:00
|
|
|
import 'dart:convert';
|
|
|
|
|
|
|
|
import 'package:html/dom.dart';
|
|
|
|
import 'package:html/parser.dart' as parser;
|
|
|
|
|
2020-07-16 19:28:49 +02:00
|
|
|
import '../../../youtube_explode_dart.dart';
|
2020-06-13 22:54:53 +02:00
|
|
|
import '../../extensions/helpers_extension.dart';
|
2020-06-11 16:15:19 +02:00
|
|
|
import '../../retry.dart';
|
2020-11-01 15:05:19 +01:00
|
|
|
import '../../search/base_search_content.dart';
|
2020-06-11 16:15:19 +02:00
|
|
|
import '../../search/related_query.dart';
|
2020-06-13 22:54:53 +02:00
|
|
|
import '../../search/search_video.dart';
|
2020-06-11 16:15:19 +02:00
|
|
|
import '../../videos/videos.dart';
|
|
|
|
import '../youtube_http_client.dart';
|
2020-09-21 17:34:03 +02:00
|
|
|
import 'generated/search_page_id.g.dart' hide PlaylistId;
|
2020-06-11 16:15:19 +02:00
|
|
|
|
2020-07-16 20:02:54 +02:00
|
|
|
///
|
2020-06-11 16:15:19 +02:00
|
|
|
class SearchPage {
|
2020-09-21 17:34:03 +02:00
|
|
|
final _apiKeyExp = RegExp(r'"INNERTUBE_API_KEY":"(\w+?)"');
|
2020-06-22 17:40:57 +02:00
|
|
|
|
2020-07-16 20:02:54 +02:00
|
|
|
///
|
2020-06-13 22:54:53 +02:00
|
|
|
final String queryString;
|
2020-06-11 16:15:19 +02:00
|
|
|
final Document _root;
|
|
|
|
|
2020-09-21 17:34:03 +02:00
|
|
|
String _apiKey;
|
2020-06-11 16:15:19 +02:00
|
|
|
|
2020-07-16 20:02:54 +02:00
|
|
|
///
|
2020-09-21 17:34:03 +02:00
|
|
|
String get apiKey => _apiKey ??= _apiKeyExp
|
2020-06-13 22:54:53 +02:00
|
|
|
.firstMatch(_root
|
|
|
|
.querySelectorAll('script')
|
2020-09-21 17:34:03 +02:00
|
|
|
.firstWhere((e) => e.text.contains('INNERTUBE_API_KEY'))
|
2020-06-13 22:54:53 +02:00
|
|
|
.text)
|
|
|
|
.group(1);
|
|
|
|
|
2020-06-11 16:15:19 +02:00
|
|
|
_InitialData _initialData;
|
|
|
|
|
2020-07-16 20:02:54 +02:00
|
|
|
///
|
2020-10-17 22:09:52 +02:00
|
|
|
_InitialData get initialData {
|
|
|
|
if (_initialData != null) {
|
|
|
|
return _initialData;
|
|
|
|
}
|
2020-12-25 23:29:01 +01:00
|
|
|
|
|
|
|
final scriptText = _root
|
|
|
|
.querySelectorAll('script')
|
|
|
|
.map((e) => e.text)
|
|
|
|
.toList(growable: false);
|
|
|
|
|
|
|
|
var initialDataText = scriptText.firstWhere(
|
|
|
|
(e) => e.contains('window["ytInitialData"] ='),
|
|
|
|
orElse: () => null);
|
|
|
|
if (initialDataText != null) {
|
|
|
|
return _initialData = _InitialData(SearchPageId.fromRawJson(
|
|
|
|
_extractJson(initialDataText, 'window["ytInitialData"] =')));
|
|
|
|
}
|
|
|
|
|
|
|
|
initialDataText = scriptText.firstWhere(
|
|
|
|
(e) => e.contains('var ytInitialData = '),
|
|
|
|
orElse: () => null);
|
|
|
|
if (initialDataText != null) {
|
|
|
|
return _initialData = _InitialData(SearchPageId.fromRawJson(
|
|
|
|
_extractJson(initialDataText, 'var ytInitialData = ')));
|
|
|
|
}
|
|
|
|
|
|
|
|
throw TransientFailureException(
|
|
|
|
'Failed to retrieve initial data from the search page, please report this to the project GitHub page.'); // ignore: lines_longer_than_80_chars
|
2020-10-17 22:09:52 +02:00
|
|
|
}
|
2020-06-11 16:15:19 +02:00
|
|
|
|
|
|
|
String _extractJson(String html, String separator) {
|
2020-10-17 22:09:52 +02:00
|
|
|
if (html == null || separator == null) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
var index = html.indexOf(separator) + separator.length;
|
|
|
|
if (index > html.length) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
return _matchJson(html.substring(index));
|
2020-06-11 16:15:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
String _matchJson(String str) {
|
|
|
|
var bracketCount = 0;
|
|
|
|
int lastI;
|
|
|
|
for (var i = 0; i < str.length; i++) {
|
|
|
|
lastI = i;
|
|
|
|
if (str[i] == '{') {
|
|
|
|
bracketCount++;
|
|
|
|
} else if (str[i] == '}') {
|
|
|
|
bracketCount--;
|
|
|
|
} else if (str[i] == ';') {
|
|
|
|
if (bracketCount == 0) {
|
|
|
|
return str.substring(0, i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return str.substring(0, lastI + 1);
|
|
|
|
}
|
|
|
|
|
2020-07-16 20:02:54 +02:00
|
|
|
///
|
2020-06-13 22:54:53 +02:00
|
|
|
SearchPage(this._root, this.queryString,
|
2020-09-21 17:34:03 +02:00
|
|
|
[_InitialData initialData, this._apiKey])
|
|
|
|
: _initialData = initialData;
|
2020-06-13 22:54:53 +02:00
|
|
|
|
2020-07-16 20:02:54 +02:00
|
|
|
///
|
2020-06-22 17:40:57 +02:00
|
|
|
// TODO: Replace this in favour of async* when quering;
|
2020-07-16 20:02:54 +02:00
|
|
|
Future<SearchPage> nextPage(YoutubeHttpClient httpClient) async {
|
2020-09-21 17:34:03 +02:00
|
|
|
if (initialData.continuationToken == '' ||
|
|
|
|
initialData.estimatedResults == 0) {
|
2020-06-13 22:54:53 +02:00
|
|
|
return null;
|
|
|
|
}
|
2021-03-04 10:46:37 +01:00
|
|
|
return get(httpClient, queryString, token: initialData.continuationToken);
|
2020-06-13 22:54:53 +02:00
|
|
|
}
|
2020-06-11 16:15:19 +02:00
|
|
|
|
2020-07-16 20:02:54 +02:00
|
|
|
///
|
2020-06-11 16:15:19 +02:00
|
|
|
static Future<SearchPage> get(
|
2020-06-13 22:54:53 +02:00
|
|
|
YoutubeHttpClient httpClient, String queryString,
|
2021-03-04 10:46:37 +01:00
|
|
|
{String token}) {
|
2020-09-21 17:34:03 +02:00
|
|
|
if (token != null) {
|
2021-03-04 10:46:37 +01:00
|
|
|
var url =
|
|
|
|
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8';
|
2020-09-21 17:34:03 +02:00
|
|
|
|
|
|
|
return retry(() async {
|
|
|
|
var body = {
|
|
|
|
'context': const {
|
|
|
|
'client': {
|
|
|
|
'hl': 'en',
|
|
|
|
'clientName': 'WEB',
|
|
|
|
'clientVersion': '2.20200911.04.00'
|
|
|
|
}
|
|
|
|
},
|
|
|
|
'continuation': token
|
|
|
|
};
|
|
|
|
|
|
|
|
var raw = await httpClient.post(url, body: json.encode(body));
|
|
|
|
return SearchPage(null, queryString,
|
2021-03-04 10:46:37 +01:00
|
|
|
_InitialData(SearchPageId.fromJson(json.decode(raw.body))));
|
2020-09-21 17:34:03 +02:00
|
|
|
});
|
|
|
|
// Ask for next page,
|
|
|
|
|
|
|
|
}
|
2020-06-13 22:54:53 +02:00
|
|
|
var url =
|
2020-06-11 16:15:19 +02:00
|
|
|
'https://www.youtube.com/results?search_query=${Uri.encodeQueryComponent(queryString)}';
|
|
|
|
return retry(() async {
|
2020-09-21 17:34:03 +02:00
|
|
|
var raw = await httpClient.getString(url);
|
2020-06-13 22:54:53 +02:00
|
|
|
return SearchPage.parse(raw, queryString);
|
2020-06-11 16:15:19 +02:00
|
|
|
});
|
2020-09-21 17:34:03 +02:00
|
|
|
// ask for next page
|
2020-06-11 16:15:19 +02:00
|
|
|
}
|
|
|
|
|
2020-07-16 20:02:54 +02:00
|
|
|
///
|
2020-06-13 22:54:53 +02:00
|
|
|
SearchPage.parse(String raw, this.queryString) : _root = parser.parse(raw);
|
2020-06-11 16:15:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
class _InitialData {
|
|
|
|
// Json parsed map
|
2020-09-21 17:34:03 +02:00
|
|
|
final SearchPageId root;
|
2020-06-11 16:15:19 +02:00
|
|
|
|
2020-09-21 17:34:03 +02:00
|
|
|
_InitialData(this.root);
|
2020-06-11 16:15:19 +02:00
|
|
|
|
2020-09-21 17:34:03 +02:00
|
|
|
List<PurpleContent> getContentContext() {
|
|
|
|
if (root.contents != null) {
|
|
|
|
return root.contents.twoColumnSearchResultsRenderer.primaryContents
|
|
|
|
.sectionListRenderer.contents.first.itemSectionRenderer.contents;
|
2020-06-13 22:54:53 +02:00
|
|
|
}
|
2020-09-21 17:34:03 +02:00
|
|
|
if (root.onResponseReceivedCommands != null) {
|
2021-02-26 16:08:48 +01:00
|
|
|
final itemSection = root
|
|
|
|
.onResponseReceivedCommands
|
|
|
|
.first
|
|
|
|
.appendContinuationItemsAction
|
|
|
|
.continuationItems[0]
|
|
|
|
.itemSectionRenderer;
|
|
|
|
if (itemSection == null) {
|
|
|
|
throw SearchItemSectionException();
|
|
|
|
}
|
|
|
|
return itemSection.contents;
|
2020-06-13 22:54:53 +02:00
|
|
|
}
|
2020-10-17 22:09:52 +02:00
|
|
|
return null;
|
2020-06-13 22:54:53 +02:00
|
|
|
}
|
2020-06-11 16:15:19 +02:00
|
|
|
|
2020-09-21 17:34:03 +02:00
|
|
|
String _getContinuationToken() {
|
|
|
|
if (root.contents != null) {
|
|
|
|
var contents = root.contents.twoColumnSearchResultsRenderer
|
|
|
|
.primaryContents.sectionListRenderer.contents;
|
|
|
|
|
|
|
|
if (contents.length <= 1) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
return contents[1]
|
|
|
|
.continuationItemRenderer
|
|
|
|
.continuationEndpoint
|
|
|
|
.continuationCommand
|
|
|
|
.token;
|
2020-06-13 22:54:53 +02:00
|
|
|
}
|
2020-09-21 17:34:03 +02:00
|
|
|
if (root.onResponseReceivedCommands != null) {
|
|
|
|
return root
|
|
|
|
.onResponseReceivedCommands
|
|
|
|
.first
|
|
|
|
.appendContinuationItemsAction
|
|
|
|
.continuationItems[1]
|
|
|
|
?.continuationItemRenderer
|
|
|
|
?.continuationEndpoint
|
|
|
|
?.continuationCommand
|
|
|
|
?.token ??
|
|
|
|
' ';
|
2020-06-13 22:54:53 +02:00
|
|
|
}
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Contains only [SearchVideo] or [SearchPlaylist]
|
2021-02-26 16:08:48 +01:00
|
|
|
List<BaseSearchContent> get searchContent =>
|
2020-09-21 17:34:03 +02:00
|
|
|
getContentContext().map(_parseContent).where((e) => e != null).toList();
|
2020-06-13 22:54:53 +02:00
|
|
|
|
|
|
|
List<RelatedQuery> get relatedQueries =>
|
2021-02-26 16:08:48 +01:00
|
|
|
getContentContext()
|
2020-09-21 17:34:03 +02:00
|
|
|
?.where((e) => e.horizontalCardListRenderer != null)
|
|
|
|
?.map((e) => e.horizontalCardListRenderer.cards)
|
2020-06-13 22:54:53 +02:00
|
|
|
?.firstOrNull
|
2020-09-21 17:34:03 +02:00
|
|
|
?.map((e) => e.searchRefinementCardRenderer)
|
2020-06-13 22:54:53 +02:00
|
|
|
?.map((e) => RelatedQuery(
|
2020-09-21 17:34:03 +02:00
|
|
|
e.searchEndpoint.searchEndpoint.query,
|
|
|
|
VideoId(
|
|
|
|
Uri.parse(e.thumbnail.thumbnails.first.url).pathSegments[1])))
|
2020-06-13 22:54:53 +02:00
|
|
|
?.toList()
|
2021-02-26 16:08:48 +01:00
|
|
|
?.cast<RelatedQuery>() ??
|
2020-06-13 22:54:53 +02:00
|
|
|
const [];
|
|
|
|
|
|
|
|
List<dynamic> get relatedVideos =>
|
2021-02-26 16:08:48 +01:00
|
|
|
getContentContext()
|
2020-09-21 17:34:03 +02:00
|
|
|
?.where((e) => e.shelfRenderer != null)
|
|
|
|
?.map((e) => e.shelfRenderer.content.verticalListRenderer.items)
|
2020-06-13 22:54:53 +02:00
|
|
|
?.firstOrNull
|
|
|
|
?.map(_parseContent)
|
2021-02-26 16:08:48 +01:00
|
|
|
?.toList() ??
|
2020-06-13 22:54:53 +02:00
|
|
|
const [];
|
|
|
|
|
2020-09-21 17:34:03 +02:00
|
|
|
String get continuationToken => _getContinuationToken();
|
2020-06-11 16:15:19 +02:00
|
|
|
|
2020-09-21 17:34:03 +02:00
|
|
|
int get estimatedResults => int.parse(root.estimatedResults ?? 0);
|
2020-06-14 11:54:30 +02:00
|
|
|
|
2020-09-21 17:34:03 +02:00
|
|
|
BaseSearchContent _parseContent(PurpleContent content) {
|
2020-06-11 16:15:19 +02:00
|
|
|
if (content == null) {
|
|
|
|
return null;
|
|
|
|
}
|
2020-09-21 17:34:03 +02:00
|
|
|
if (content.videoRenderer != null) {
|
|
|
|
var renderer = content.videoRenderer;
|
2020-06-16 21:52:03 +02:00
|
|
|
//TODO: Add if it's a live
|
2020-06-13 22:54:53 +02:00
|
|
|
return SearchVideo(
|
2020-09-21 17:34:03 +02:00
|
|
|
VideoId(renderer.videoId),
|
|
|
|
_parseRuns(renderer.title.runs),
|
|
|
|
_parseRuns(renderer.ownerText.runs),
|
|
|
|
_parseRuns(renderer.descriptionSnippet?.runs),
|
|
|
|
renderer.lengthText?.simpleText ?? '',
|
|
|
|
int.parse(renderer.viewCountText?.simpleText
|
|
|
|
?.stripNonDigits()
|
|
|
|
?.nullIfWhitespace ??
|
2021-02-27 18:58:42 +01:00
|
|
|
renderer.viewCountText?.runs?.first?.text
|
|
|
|
?.stripNonDigits()
|
|
|
|
?.nullIfWhitespace ??
|
2020-10-18 11:55:35 +02:00
|
|
|
'0'),
|
|
|
|
(renderer.thumbnail.thumbnails ?? <ThumbnailElement>[])
|
2020-10-18 12:03:03 +02:00
|
|
|
.map((e) => Thumbnail(Uri.parse(e.url), e.height, e.width))
|
2021-02-27 18:58:42 +01:00
|
|
|
.toList(),
|
|
|
|
renderer.publishedTimeText?.simpleText,
|
|
|
|
renderer?.viewCountText?.runs?.elementAt(1)?.text?.trim() ==
|
|
|
|
'watching');
|
2020-06-11 16:15:19 +02:00
|
|
|
}
|
2020-09-21 17:34:03 +02:00
|
|
|
if (content.radioRenderer != null) {
|
|
|
|
var renderer = content.radioRenderer;
|
2020-06-13 22:54:53 +02:00
|
|
|
|
|
|
|
return SearchPlaylist(
|
2020-09-21 17:34:03 +02:00
|
|
|
PlaylistId(renderer.playlistId),
|
|
|
|
renderer.title.simpleText,
|
|
|
|
int.parse(_parseRuns(renderer.videoCountText.runs)
|
2020-06-13 22:54:53 +02:00
|
|
|
.stripNonDigits()
|
|
|
|
.nullIfWhitespace ??
|
2021-03-04 10:57:47 +01:00
|
|
|
'0'));
|
2020-06-11 16:15:19 +02:00
|
|
|
}
|
|
|
|
// Here ignore 'horizontalCardListRenderer' & 'shelfRenderer'
|
|
|
|
return null;
|
|
|
|
}
|
2020-06-13 22:54:53 +02:00
|
|
|
|
2020-09-21 17:34:03 +02:00
|
|
|
String _parseRuns(List<dynamic> runs) =>
|
|
|
|
runs?.map((e) => e.text)?.join() ?? '';
|
2020-06-11 16:15:19 +02:00
|
|
|
}
|