youtube_explode/lib/src/reverse_engineering/pages/watch_page.dart

240 lines
6.9 KiB
Dart
Raw Normal View History

2021-03-11 14:20:10 +01:00
import 'package:collection/collection.dart';
2020-05-31 23:36:23 +02:00
import 'package:html/dom.dart';
2020-06-03 13:18:37 +02:00
import 'package:html/parser.dart' as parser;
import '../../../youtube_explode_dart.dart';
import '../../extensions/helpers_extension.dart';
import '../../retry.dart';
2021-07-23 12:54:29 +02:00
import '../models/initial_data.dart';
import '../models/youtube_page.dart';
2021-07-21 02:06:02 +02:00
import '../player/player_response.dart';
import 'player_config_base.dart';
2020-05-31 23:36:23 +02:00
2020-07-16 20:02:54 +02:00
///
2021-07-21 02:06:02 +02:00
class WatchPage extends YoutubePage<_InitialData> {
2021-03-18 22:22:55 +01:00
static final RegExp _videoLikeExp =
RegExp(r'"label"\s*:\s*"([\d,\.]+) likes"');
static final RegExp _videoDislikeExp =
RegExp(r'"label"\s*:\s*"([\d,\.]+) dislikes"');
static final RegExp _visitorInfoLiveExp =
RegExp('VISITOR_INFO1_LIVE=([^;]+)');
static final RegExp _yscExp = RegExp('YSC=([^;]+)');
2021-07-21 02:06:02 +02:00
@override
2021-07-23 12:54:29 +02:00
// Overridden to be non-nullable.
// ignore: overridden_fields
2021-03-11 14:20:10 +01:00
final Document root;
2020-07-16 20:02:54 +02:00
///
final String visitorInfoLive;
2020-07-16 20:02:54 +02:00
///
final String ysc;
2020-10-27 14:44:11 +01:00
///
2021-03-11 14:20:10 +01:00
String? get sourceUrl {
var url = root
.querySelectorAll('script')
.map((e) => e.attributes['src'])
2021-03-11 14:20:10 +01:00
.whereNotNull()
.firstWhereOrNull((e) => e.contains('player_ias') && e.endsWith('.js'));
2020-10-27 14:44:11 +01:00
if (url == null) {
return null;
}
return 'https://youtube.com$url';
}
2020-07-16 20:02:54 +02:00
///
2021-03-11 14:20:10 +01:00
bool get isOk => root.body?.querySelector('#player') != null;
2020-05-31 23:36:23 +02:00
2020-07-16 20:02:54 +02:00
///
2021-03-18 22:22:55 +01:00
bool get isVideoAvailable =>
root.querySelector('meta[property="og:url"]') != null;
2020-05-31 23:36:23 +02:00
2020-07-16 20:02:54 +02:00
///
int get videoLikeCount =>
initialData.likesCount ??
int.parse(_videoLikeExp
.firstMatch(root.outerHtml)
?.group(1)
?.stripNonDigits()
.nullIfWhitespace ??
root
.querySelector('.like-button-renderer-like-button')
?.text
.stripNonDigits()
.nullIfWhitespace ??
'0');
2020-06-05 16:17:08 +02:00
2020-07-16 20:02:54 +02:00
///
int get videoDislikeCount =>
initialData.disLikesCount ??
int.parse(_videoDislikeExp
.firstMatch(root.outerHtml)
?.group(1)
?.stripNonDigits()
.nullIfWhitespace ??
root
.querySelector('.like-button-renderer-dislike-button')
?.text
.stripNonDigits()
.nullIfWhitespace ??
'0');
2020-06-05 16:17:08 +02:00
String? get commentsContinuation => initialData.commentsContinuation;
2020-10-27 14:44:11 +01:00
static final _playerConfigExp = RegExp(r'ytplayer\.config\s*=\s*(\{.*\})');
late final WatchPlayerConfig? playerConfig = getPlayerConfig();
2021-03-11 14:20:10 +01:00
late final PlayerResponse? playerResponse = getPlayerResponse();
2020-06-05 16:17:08 +02:00
///
WatchPlayerConfig? getPlayerConfig() {
2021-03-18 22:22:55 +01:00
final jsonMap = _playerConfigExp
.firstMatch(root.getElementsByTagName('html').first.text)
?.group(1)
?.extractJson();
if (jsonMap == null) {
return null;
}
return WatchPlayerConfig(jsonMap);
}
2021-03-11 14:20:10 +01:00
PlayerResponse? getPlayerResponse() {
final scriptText = root
2021-03-11 14:20:10 +01:00
.querySelectorAll('script')
.map((e) => e.text)
.toList(growable: false);
return scriptText.extractGenericData(
['var ytInitialPlayerResponse = '],
(root) => PlayerResponse(root),
() => TransientFailureException(
'Failed to retrieve initial player response, please report this to the project GitHub page.'));
2021-03-11 14:20:10 +01:00
}
2020-05-31 23:36:23 +02:00
2020-07-16 20:02:54 +02:00
///
2021-03-18 22:22:55 +01:00
WatchPage.parse(String raw, this.visitorInfoLive, this.ysc)
2021-07-21 02:06:02 +02:00
: root = parser.parse(raw),
super(parser.parse(raw), (root) => _InitialData(root));
2020-06-03 13:18:37 +02:00
2020-07-16 20:02:54 +02:00
///
2020-06-03 13:18:37 +02:00
static Future<WatchPage> get(YoutubeHttpClient httpClient, String videoId) {
final url = 'https://youtube.com/watch?v=$videoId&bpctr=9999999999&hl=en';
return retry(httpClient, () async {
var req = await httpClient.get(url, validate: true);
2020-06-03 13:18:37 +02:00
2021-03-11 14:20:10 +01:00
var cookies = req.headers['set-cookie']!;
2021-03-20 18:31:53 +01:00
var visitorInfoLive = _visitorInfoLiveExp.firstMatch(cookies)?.group(1);
2021-03-11 14:20:10 +01:00
var ysc = _yscExp.firstMatch(cookies)!.group(1)!;
var result = WatchPage.parse(req.body, visitorInfoLive ?? '', ysc);
2020-06-03 13:18:37 +02:00
if (!result.isOk) {
2020-07-16 20:02:54 +02:00
throw TransientFailureException('Video watch page is broken.');
2020-06-03 13:18:37 +02:00
}
if (!result.isVideoAvailable) {
throw VideoUnavailableException.unavailable(VideoId(videoId));
}
return result;
});
}
2020-05-31 23:36:23 +02:00
}
/// Used internally
2021-07-21 02:06:02 +02:00
class WatchPlayerConfig implements PlayerConfigBase {
@override
2021-07-21 02:06:02 +02:00
final JsonMap root;
2020-05-31 23:36:23 +02:00
///
WatchPlayerConfig(this.root);
2020-05-31 23:36:23 +02:00
@override
2021-03-18 22:22:55 +01:00
late final String sourceUrl =
'https://youtube.com${root.get('assets')!.getT<String>('js')}';
2020-06-03 13:18:37 +02:00
///
2021-03-18 22:22:55 +01:00
late final PlayerResponse playerResponse =
PlayerResponse.parse(root.get('args')!.getT<String>('playerResponse')!);
2020-06-03 13:18:37 +02:00
}
2020-06-17 22:14:27 +02:00
2021-07-21 02:06:02 +02:00
class _InitialData extends InitialData {
_InitialData(JsonMap root) : super(root);
2020-06-17 22:14:27 +02:00
late final int? likesCount = _getLikes();
late final int? disLikesCount = _getDislikes();
int? _getLikes() {
if (root['contents'] != null) {
final likes = root
.get('contents')
?.get('twoColumnWatchNextResults')
?.get('results')
?.get('results')
?.getList('contents')
?.firstWhereOrNull((e) => e['videoPrimaryInfoRenderer'] != null)
?.get('videoPrimaryInfoRenderer')
?.get('videoActions')
?.get('menuRenderer')
?.getList('topLevelButtons')
?.firstWhereOrNull((e) => e['toggleButtonRenderer'] != null)
?.get('toggleButtonRenderer')
?.get('defaultText')
?.get('accessibility')
?.get('accessibilityData')
?.getT<String>('label');
return likes.parseInt();
}
return null;
}
int? _getDislikes() {
if (root['contents'] != null) {
final likes = root
.get('contents')
?.get('twoColumnWatchNextResults')
?.get('results')
?.get('results')
?.getList('contents')
?.firstWhereOrNull((e) => e['videoPrimaryInfoRenderer'] != null)
?.get('videoPrimaryInfoRenderer')
?.get('videoActions')
?.get('menuRenderer')
?.getList('topLevelButtons')
?.where((e) => e['toggleButtonRenderer'] != null)
.elementAtSafe(1)
?.get('toggleButtonRenderer')
?.get('defaultText')
?.get('accessibility')
?.get('accessibilityData')
?.getT<String>('label');
return likes.parseInt();
}
return null;
}
2021-07-21 02:06:02 +02:00
JsonMap? getContinuationContext() {
2021-03-11 14:20:10 +01:00
if (root['contents'] != null) {
return root
.get('contents')
?.get('twoColumnWatchNextResults')
?.get('results')
?.get('results')
?.getList('contents')
?.firstWhereOrNull((e) => e['itemSectionRenderer'] != null)
?.get('itemSectionRenderer')
?.getList('contents')
2021-03-11 14:20:10 +01:00
?.firstOrNull
?.get('continuationItemRenderer')
?.get('continuationEndpoint')
?.get('continuationCommand');
2020-06-17 22:14:27 +02:00
}
return null;
}
late final String commentsContinuation =
getContinuationContext()?.getT<String>('token') ?? '';
2020-06-17 22:14:27 +02:00
}