youtube_explode/lib/src/reverse_engineering/responses/watch_page.dart

221 lines
6.3 KiB
Dart
Raw Normal View History

2020-05-31 23:36:23 +02:00
import 'package:html/dom.dart';
2020-06-03 13:18:37 +02:00
import 'package:html/parser.dart' as parser;
import 'package:youtube_explode_dart/src/reverse_engineering/responses/player_config_base.dart';
2020-06-03 13:18:37 +02:00
import '../../../youtube_explode_dart.dart';
import '../../extensions/helpers_extension.dart';
import '../../retry.dart';
2020-06-03 23:02:21 +02:00
import '../../videos/video_id.dart';
import '../youtube_http_client.dart';
2020-09-21 17:34:03 +02:00
import 'generated/player_response_json.g.dart';
import 'generated/watch_page_id.g.dart';
2020-06-03 13:18:37 +02:00
import 'player_response.dart';
2020-05-31 23:36:23 +02:00
2020-07-16 20:02:54 +02:00
///
2020-06-03 13:18:37 +02:00
class WatchPage {
static final RegExp _videoLikeExp =
RegExp(r'"label"\s*:\s*"([\d,\.]+) likes"');
static final RegExp _videoDislikeExp =
RegExp(r'"label"\s*:\s*"([\d,\.]+) dislikes"');
static final RegExp _visitorInfoLiveExp =
RegExp('VISITOR_INFO1_LIVE=([^;]+)');
static final RegExp _yscExp = RegExp('YSC=([^;]+)');
static final RegExp _playerResponseExp =
RegExp(r'var\s+ytInitialPlayerResponse\s*=\s*(\{.*\})');
static final _xsfrTokenExp = RegExp(r'"XSRF_TOKEN"\s*:\s*"(.+?)"');
2020-05-31 23:36:23 +02:00
final Document _root;
2020-07-16 20:02:54 +02:00
///
final String visitorInfoLive;
2020-07-16 20:02:54 +02:00
///
final String ysc;
2020-06-22 17:40:57 +02:00
_InitialData _initialData;
String _xsfrToken;
WatchPlayerConfig _playerConfig;
2020-10-27 14:44:11 +01:00
///
String get sourceUrl {
var url = _root
.querySelectorAll('script')
.map((e) => e.attributes['src'])
.where((e) => !e.isNullOrWhiteSpace)
.firstWhere((e) => e.contains('player_ias') && e.endsWith('.js'),
orElse: () => null);
2020-10-27 14:44:11 +01:00
if (url == null) {
return null;
}
return 'https://youtube.com$url';
}
2020-07-16 20:02:54 +02:00
///
2020-12-25 23:29:01 +01:00
_InitialData get initialData {
if (_initialData != null) {
return _initialData;
}
final scriptText = _root
.querySelectorAll('script')
.map((e) => e.text)
.toList(growable: false);
var initialDataText = scriptText.firstWhere(
(e) => e.contains('window["ytInitialData"] ='),
orElse: () => null);
if (initialDataText != null) {
return _initialData = _InitialData(WatchPageId.fromRawJson(
_extractJson(initialDataText, 'window["ytInitialData"] =')));
}
initialDataText = scriptText.firstWhere(
(e) => e.contains('var ytInitialData = '),
orElse: () => null);
if (initialDataText != null) {
return _initialData = _InitialData(WatchPageId.fromRawJson(
_extractJson(initialDataText, 'var ytInitialData = ')));
}
throw TransientFailureException(
'Failed to retrieve initial data from the watch page, please report this to the project GitHub page.'); // ignore: lines_longer_than_80_chars
}
2020-07-16 20:02:54 +02:00
///
2020-06-22 17:40:57 +02:00
String get xsfrToken => _xsfrToken ??= _xsfrTokenExp
.firstMatch(_root
.querySelectorAll('script')
.firstWhere((e) => _xsfrTokenExp.hasMatch(e.text))
.text)
.group(1);
2020-05-31 23:36:23 +02:00
2020-07-16 20:02:54 +02:00
///
2020-05-31 23:36:23 +02:00
bool get isOk => _root.body.querySelector('#player') != null;
2020-07-16 20:02:54 +02:00
///
2020-05-31 23:36:23 +02:00
bool get isVideoAvailable =>
_root.querySelector('meta[property="og:url"]') != null;
2020-07-16 20:02:54 +02:00
///
int get videoLikeCount => int.parse(_videoLikeExp
.firstMatch(_root.outerHtml)
?.group(1)
?.stripNonDigits()
?.nullIfWhitespace ??
_root
2020-06-05 16:17:08 +02:00
.querySelector('.like-button-renderer-like-button')
?.text
?.stripNonDigits()
?.nullIfWhitespace ??
'0');
2020-07-16 20:02:54 +02:00
///
int get videoDislikeCount => int.parse(_videoDislikeExp
.firstMatch(_root.outerHtml)
?.group(1)
?.stripNonDigits()
?.nullIfWhitespace ??
_root
2020-06-05 16:17:08 +02:00
.querySelector('.like-button-renderer-dislike-button')
?.text
?.stripNonDigits()
?.nullIfWhitespace ??
'0');
2020-10-27 14:44:11 +01:00
static final _playerConfigExp = RegExp(r'ytplayer\.config\s*=\s*(\{.*\})');
2020-07-16 20:02:54 +02:00
///
WatchPlayerConfig get playerConfig => _playerConfig ??= WatchPlayerConfig(
2020-10-17 22:09:52 +02:00
PlayerConfigJson.fromRawJson(_playerConfigExp
.firstMatch(_root.getElementsByTagName('html').first.text)
2020-10-27 14:44:11 +01:00
?.group(1)
?.extractJson()));
2020-06-05 16:17:08 +02:00
2020-12-25 23:29:01 +01:00
///
PlayerResponse get playerResponse => PlayerResponse.parse(_root
.querySelectorAll('script')
.map((e) => e.text)
.map((e) => _playerResponseExp.firstMatch(e)?.group(1))
.firstWhere((e) => !e.isNullOrWhiteSpace)
.extractJson());
2020-05-31 23:36:23 +02:00
String _extractJson(String html, String separator) =>
html.substring(html.indexOf(separator) + separator.length).extractJson();
2020-06-03 13:18:37 +02:00
2020-07-16 20:02:54 +02:00
///
2020-06-22 17:40:57 +02:00
WatchPage(this._root, this.visitorInfoLive, this.ysc);
2020-07-16 20:02:54 +02:00
///
WatchPage.parse(String raw, this.visitorInfoLive, this.ysc)
: _root = parser.parse(raw);
2020-06-03 13:18:37 +02:00
2020-07-16 20:02:54 +02:00
///
2020-06-03 13:18:37 +02:00
static Future<WatchPage> get(YoutubeHttpClient httpClient, String videoId) {
final url = 'https://youtube.com/watch?v=$videoId&bpctr=9999999999&hl=en';
return retry(() async {
var req = await httpClient.get(url, validate: true);
2020-06-03 13:18:37 +02:00
var cookies = req.headers['set-cookie'];
var visitorInfoLive = _visitorInfoLiveExp.firstMatch(cookies).group(1);
var ysc = _yscExp.firstMatch(cookies).group(1);
var result = WatchPage.parse(req.body, visitorInfoLive, ysc);
2020-06-03 13:18:37 +02:00
if (!result.isOk) {
2020-07-16 20:02:54 +02:00
throw TransientFailureException('Video watch page is broken.');
2020-06-03 13:18:37 +02:00
}
if (!result.isVideoAvailable) {
throw VideoUnavailableException.unavailable(VideoId(videoId));
}
return result;
});
}
2020-05-31 23:36:23 +02:00
}
/// Used internally
class WatchPlayerConfig implements PlayerConfigBase<PlayerConfigJson> {
@override
2020-09-21 17:34:03 +02:00
final PlayerConfigJson root;
2020-05-31 23:36:23 +02:00
///
WatchPlayerConfig(this.root);
2020-05-31 23:36:23 +02:00
@override
2020-09-21 17:34:03 +02:00
String get sourceUrl => 'https://youtube.com${root.assets.js}';
2020-06-03 13:18:37 +02:00
///
2020-06-03 13:18:37 +02:00
PlayerResponse get playerResponse =>
2020-09-21 17:34:03 +02:00
PlayerResponse.parse(root.args.playerResponse);
2020-06-03 13:18:37 +02:00
}
2020-06-17 22:14:27 +02:00
class _InitialData {
// Json parsed map
2020-09-21 17:34:03 +02:00
final WatchPageId root;
2020-06-17 22:14:27 +02:00
2020-07-14 14:16:52 +02:00
_InitialData(this.root);
2020-06-17 22:14:27 +02:00
/* Cache results */
String _continuation;
String _clickTrackingParams;
2020-09-21 17:34:03 +02:00
NextContinuationData getContinuationContext() {
if (root.contents != null) {
return root.contents.twoColumnWatchNextResults.results.results.contents
.firstWhere((e) => e.itemSectionRenderer != null)
.itemSectionRenderer
.continuations
.first
.nextContinuationData;
2020-06-17 22:14:27 +02:00
}
return null;
}
2020-09-21 17:34:03 +02:00
String get continuation =>
_continuation ??= getContinuationContext()?.continuation ?? '';
2020-06-17 22:14:27 +02:00
String get clickTrackingParams => _clickTrackingParams ??=
2020-09-21 17:34:03 +02:00
getContinuationContext()?.clickTrackingParams ?? '';
2020-06-17 22:14:27 +02:00
}