youtube_explode/lib/src/reverse_engineering/responses/search_page.dart

272 lines
8.6 KiB
Dart
Raw Normal View History

import 'dart:convert';
2021-03-11 14:20:10 +01:00
import 'package:collection/collection.dart';
import 'package:html/dom.dart';
import 'package:html/parser.dart' as parser;
2021-03-20 18:31:53 +01:00
import 'package:youtube_explode_dart/src/search/search_channel.dart';
import '../../../youtube_explode_dart.dart';
2020-06-13 22:54:53 +02:00
import '../../extensions/helpers_extension.dart';
import '../../retry.dart';
2020-11-01 15:05:19 +01:00
import '../../search/base_search_content.dart';
import '../../search/related_query.dart';
2021-03-20 18:31:53 +01:00
import '../../search/search_filter.dart';
2020-06-13 22:54:53 +02:00
import '../../search/search_video.dart';
import '../../videos/videos.dart';
import '../youtube_http_client.dart';
2020-07-16 20:02:54 +02:00
///
class SearchPage {
2020-07-16 20:02:54 +02:00
///
2020-06-13 22:54:53 +02:00
final String queryString;
2021-03-11 14:20:10 +01:00
final Document? root;
2021-03-11 14:20:10 +01:00
late final _InitialData initialData = getInitialData();
_InitialData? _initialData;
2020-07-16 20:02:54 +02:00
///
2021-03-11 14:20:10 +01:00
_InitialData getInitialData() {
2020-10-17 22:09:52 +02:00
if (_initialData != null) {
2021-03-11 14:20:10 +01:00
return _initialData!;
2020-10-17 22:09:52 +02:00
}
2020-12-25 23:29:01 +01:00
final scriptText = root!
.querySelectorAll('script')
.map((e) => e.text)
.toList(growable: false);
return scriptText.extractGenericData(
(obj) => _InitialData(obj),
() => TransientFailureException(
'Failed to retrieve initial data from the search page, please report this to the project GitHub page.'));
}
2020-07-16 20:02:54 +02:00
///
SearchPage(this.root, this.queryString, [_InitialData? initialData])
: _initialData = initialData;
2020-06-13 22:54:53 +02:00
2021-03-11 14:20:10 +01:00
Future<SearchPage?> nextPage(YoutubeHttpClient httpClient) async {
if (initialData.continuationToken == '' ||
initialData.estimatedResults == 0) {
2020-06-13 22:54:53 +02:00
return null;
}
2021-03-04 10:46:37 +01:00
return get(httpClient, queryString, token: initialData.continuationToken);
2020-06-13 22:54:53 +02:00
}
2020-07-16 20:02:54 +02:00
///
static Future<SearchPage> get(
YoutubeHttpClient httpClient, String queryString,
2021-03-20 18:31:53 +01:00
{String? token, SearchFilter filter = const SearchFilter('')}) {
2020-09-21 17:34:03 +02:00
if (token != null) {
var url =
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8';
2020-09-21 17:34:03 +02:00
return retry(() async {
var body = {
'context': const {
'client': {
'hl': 'en',
'clientName': 'WEB',
'clientVersion': '2.20200911.04.00'
}
2020-09-21 17:34:03 +02:00
},
'continuation': token
};
var raw =
await httpClient.post(Uri.parse(url), body: json.encode(body));
return SearchPage(
null, queryString, _InitialData(json.decode(raw.body)));
2020-09-21 17:34:03 +02:00
});
// Ask for next page,
}
var url =
'https://www.youtube.com/results?search_query=${Uri.encodeQueryComponent(queryString)}&sp=${filter.value}';
return retry(() async {
2020-09-21 17:34:03 +02:00
var raw = await httpClient.getString(url);
2020-06-13 22:54:53 +02:00
return SearchPage.parse(raw, queryString);
});
2020-09-21 17:34:03 +02:00
// ask for next page
}
2020-07-16 20:02:54 +02:00
///
2021-03-11 14:20:10 +01:00
SearchPage.parse(String raw, this.queryString) : root = parser.parse(raw);
}
class _InitialData {
// Json parsed map
2021-03-11 14:20:10 +01:00
final Map<String, dynamic> root;
2020-09-21 17:34:03 +02:00
_InitialData(this.root);
2021-03-11 14:20:10 +01:00
List<Map<String, dynamic>>? getContentContext() {
if (root['contents'] != null) {
return root
.get('contents')
?.get('twoColumnSearchResultsRenderer')
?.get('primaryContents')
?.get('sectionListRenderer')
?.getList('contents')
?.firstOrNull
?.get('itemSectionRenderer')
?.getList('contents');
2020-06-13 22:54:53 +02:00
}
2021-03-11 14:20:10 +01:00
if (root['onResponseReceivedCommands'] != null) {
return root
.getList('onResponseReceivedCommands')
?.firstOrNull
?.get('appendContinuationItemsAction')
?.getList('continuationItems')
?.firstOrNull
?.get('itemSectionRenderer')
?.getList('contents');
2020-06-13 22:54:53 +02:00
}
2020-10-17 22:09:52 +02:00
return null;
2020-06-13 22:54:53 +02:00
}
2021-03-11 14:20:10 +01:00
String? _getContinuationToken() {
if (root['contents'] != null) {
var contents = root
.get('contents')
?.get('twoColumnSearchResultsRenderer')
?.get('primaryContents')
?.get('sectionListRenderer')
?.getList('contents');
2020-09-21 17:34:03 +02:00
2021-03-11 14:20:10 +01:00
if (contents == null || contents.length <= 1) {
2020-09-21 17:34:03 +02:00
return null;
}
2021-03-11 14:20:10 +01:00
return contents
.elementAtSafe(1)
?.get('continuationItemRenderer')
?.get('continuationEndpoint')
?.get('continuationCommand')
?.getT<String>('token');
2020-06-13 22:54:53 +02:00
}
2021-03-11 14:20:10 +01:00
if (root['onResponseReceivedCommands'] != null) {
2020-09-21 17:34:03 +02:00
return root
2021-03-11 14:20:10 +01:00
.getList('onResponseReceivedCommands')
?.firstOrNull
?.get('appendContinuationItemsAction')
?.getList('continuationItems')
?.elementAtSafe(1)
?.get('continuationItemRenderer')
?.get('continuationEndpoint')
?.get('continuationCommand')
?.getT<String>('token');
2020-06-13 22:54:53 +02:00
}
return null;
}
// Contains only [SearchVideo] or [SearchPlaylist]
late final List<BaseSearchContent> searchContent =
getContentContext()?.map(_parseContent).whereNotNull().toList() ??
const [];
2020-06-13 22:54:53 +02:00
List<RelatedQuery> get relatedQueries =>
getContentContext()
2021-03-11 14:20:10 +01:00
?.where((e) => e['horizontalCardListRenderer'] != null)
.map((e) => e.get('horizontalCardListRenderer')?.getList('cards'))
.firstOrNull
?.map((e) => e['searchRefinementCardRenderer'])
.map((e) => RelatedQuery(
e.searchEndpoint.searchEndpoint.query,
VideoId(
Uri.parse(e.thumbnail.thumbnails.first.url).pathSegments[1])))
2021-03-11 14:20:10 +01:00
.toList()
.cast<RelatedQuery>() ??
2020-06-13 22:54:53 +02:00
const [];
List<dynamic> get relatedVideos =>
getContentContext()
2021-03-11 14:20:10 +01:00
?.where((e) => e['shelfRenderer'] != null)
.map((e) => e
.get('shelfRenderer')
?.get('content')
?.get('verticalListRenderer')
?.getList('items'))
2021-03-11 14:20:10 +01:00
.firstOrNull
2020-06-13 22:54:53 +02:00
?.map(_parseContent)
2021-03-11 14:20:10 +01:00
.whereNotNull()
.toList() ??
2020-06-13 22:54:53 +02:00
const [];
2021-03-11 14:20:10 +01:00
late final String? continuationToken = _getContinuationToken();
late final int estimatedResults =
int.parse(root.getT<String>('estimatedResults') ?? '0');
2021-03-11 14:20:10 +01:00
BaseSearchContent? _parseContent(Map<String, dynamic>? content) {
if (content == null) {
return null;
}
2021-03-11 14:20:10 +01:00
if (content['videoRenderer'] != null) {
var renderer = content.get('videoRenderer')!;
2020-06-13 22:54:53 +02:00
return SearchVideo(
2021-03-11 14:20:10 +01:00
VideoId(renderer.getT<String>('videoId')!),
_parseRuns(renderer.get('title')?.getList('runs')),
_parseRuns(renderer.get('ownerText')?.getList('runs')),
_parseRuns(renderer.get('descriptionSnippet')?.getList('runs')),
renderer.get('lengthText')?.getT<String>('simpleText') ?? '',
int.parse(renderer
.get('viewCountText')
?.getT<String>('simpleText')
?.stripNonDigits()
.nullIfWhitespace ??
renderer
.get('viewCountText')
?.getList('runs')
?.firstOrNull
?.getT<String>('text')
?.stripNonDigits()
.nullIfWhitespace ??
'0'),
2021-03-11 14:20:10 +01:00
(renderer.get('thumbnail')?.getList('thumbnails') ?? const [])
.map((e) =>
Thumbnail(Uri.parse(e['url']), e['height'], e['width']))
2021-02-27 18:58:42 +01:00
.toList(),
2021-03-11 14:20:10 +01:00
renderer.get('publishedTimeText')?.getT<String>('simpleText'),
renderer
.get('viewCountText')
?.getList('runs')
?.elementAtSafe(1)
?.getT<String>('text')
?.trim() ==
'watching',
renderer['ownerText']['runs'][0]['navigationEndpoint']
['browseEndpoint']['browseId']);
}
2021-03-11 14:20:10 +01:00
if (content['radioRenderer'] != null) {
var renderer = content.get('radioRenderer')!;
2020-06-13 22:54:53 +02:00
return SearchPlaylist(
PlaylistId(renderer.getT<String>('playlistId')!),
renderer.get('title')!.getT<String>('simpleText')!,
int.parse(_parseRuns(renderer.get('videoCountText')?.getList('runs'))
.stripNonDigits()
.nullIfWhitespace ??
'0'));
2021-03-20 18:31:53 +01:00
}
if (content['channelRenderer'] != null) {
var renderer = content.get('channelRenderer')!;
return SearchChannel(
ChannelId(renderer.getT<String>('channelId')!),
2021-03-18 22:22:55 +01:00
renderer.get('title')!.getT<String>('simpleText')!,
renderer.get('descriptionSnippet')?.getList('runs')?.parseRuns() ??
'',
renderer
.get('videoCountText')!
.getList('runs')!
.first
.getT<String>('text')!
.parseInt()!);
}
// Here ignore 'horizontalCardListRenderer' & 'shelfRenderer'
return null;
}
2020-06-13 22:54:53 +02:00
String _parseRuns(List<dynamic>? runs) =>
runs?.map((e) => e['text']).join() ?? '';
}