youtube_explode/lib/src/reverse_engineering/responses/watch_page.dart

272 lines
7.2 KiB
Dart
Raw Normal View History

2020-06-03 13:18:37 +02:00
import 'dart:convert';
2020-05-31 23:36:23 +02:00
import 'package:html/dom.dart';
2020-06-03 13:18:37 +02:00
import 'package:html/parser.dart' as parser;
import 'package:http_parser/http_parser.dart';
import '../../../youtube_explode_dart.dart';
import '../../extensions/helpers_extension.dart';
import '../../retry.dart';
2020-06-03 23:02:21 +02:00
import '../../videos/video_id.dart';
import '../youtube_http_client.dart';
2020-06-03 13:18:37 +02:00
import 'player_response.dart';
2020-06-03 23:02:21 +02:00
import 'stream_info_provider.dart';
2020-05-31 23:36:23 +02:00
2020-07-16 20:02:54 +02:00
///
2020-06-03 13:18:37 +02:00
class WatchPage {
static final RegExp _videoLikeExp =
RegExp(r'"label"\s*:\s*"([\d,\.]+) likes"');
static final RegExp _videoDislikeExp =
RegExp(r'"label"\s*:\s*"([\d,\.]+) dislikes"');
static final RegExp _visitorInfoLiveExp =
RegExp('VISITOR_INFO1_LIVE=([^;]+)');
static final RegExp _yscExp = RegExp('YSC=([^;]+)');
static final _xsfrTokenExp = RegExp(r'"XSRF_TOKEN"\s*:\s*"(.+?)"');
2020-05-31 23:36:23 +02:00
final Document _root;
2020-07-16 20:02:54 +02:00
///
final String visitorInfoLive;
2020-07-16 20:02:54 +02:00
///
final String ysc;
2020-06-22 17:40:57 +02:00
_InitialData _initialData;
String _xsfrToken;
_PlayerConfig _playerConfig;
2020-07-16 20:02:54 +02:00
///
_InitialData get initialData =>
2020-06-22 17:40:57 +02:00
_initialData ??= _InitialData(json.decode(_matchJson(_extractJson(
_root
.querySelectorAll('script')
.map((e) => e.text)
.toList()
.firstWhere((e) => e.contains('window["ytInitialData"] =')),
'window["ytInitialData"] ='))));
2020-07-16 20:02:54 +02:00
///
2020-06-22 17:40:57 +02:00
String get xsfrToken => _xsfrToken ??= _xsfrTokenExp
.firstMatch(_root
.querySelectorAll('script')
.firstWhere((e) => _xsfrTokenExp.hasMatch(e.text))
.text)
.group(1);
2020-05-31 23:36:23 +02:00
2020-07-16 20:02:54 +02:00
///
2020-05-31 23:36:23 +02:00
bool get isOk => _root.body.querySelector('#player') != null;
2020-07-16 20:02:54 +02:00
///
2020-05-31 23:36:23 +02:00
bool get isVideoAvailable =>
_root.querySelector('meta[property="og:url"]') != null;
2020-07-16 20:02:54 +02:00
///
int get videoLikeCount => int.parse(_videoLikeExp
.firstMatch(_root.outerHtml)
?.group(1)
?.stripNonDigits()
?.nullIfWhitespace ??
_root
2020-06-05 16:17:08 +02:00
.querySelector('.like-button-renderer-like-button')
?.text
?.stripNonDigits()
?.nullIfWhitespace ??
'0');
2020-07-16 20:02:54 +02:00
///
int get videoDislikeCount => int.parse(_videoDislikeExp
.firstMatch(_root.outerHtml)
?.group(1)
?.stripNonDigits()
?.nullIfWhitespace ??
_root
2020-06-05 16:17:08 +02:00
.querySelector('.like-button-renderer-dislike-button')
?.text
?.stripNonDigits()
?.nullIfWhitespace ??
'0');
2020-07-16 20:02:54 +02:00
///
_PlayerConfig get playerConfig =>
_playerConfig ??= _PlayerConfig(json.decode(_matchJson(_extractJson(
_root.getElementsByTagName('html').first.text,
'ytplayer.config = '))));
2020-06-05 16:17:08 +02:00
String _extractJson(String html, String separator) {
2020-06-05 16:17:08 +02:00
return _matchJson(
html.substring(html.indexOf(separator) + separator.length));
2020-06-05 16:17:08 +02:00
}
2020-05-31 23:36:23 +02:00
2020-06-05 16:17:08 +02:00
String _matchJson(String str) {
var bracketCount = 0;
int lastI;
for (var i = 0; i < str.length; i++) {
lastI = i;
if (str[i] == '{') {
bracketCount++;
} else if (str[i] == '}') {
bracketCount--;
} else if (str[i] == ';') {
if (bracketCount == 0) {
return str.substring(0, i);
}
}
}
2020-06-05 21:06:54 +02:00
return str.substring(0, lastI + 1);
2020-06-05 16:17:08 +02:00
}
2020-06-03 13:18:37 +02:00
2020-07-16 20:02:54 +02:00
///
2020-06-22 17:40:57 +02:00
WatchPage(this._root, this.visitorInfoLive, this.ysc);
2020-07-16 20:02:54 +02:00
///
WatchPage.parse(String raw, this.visitorInfoLive, this.ysc)
: _root = parser.parse(raw);
2020-06-03 13:18:37 +02:00
2020-07-16 20:02:54 +02:00
///
2020-06-03 13:18:37 +02:00
static Future<WatchPage> get(YoutubeHttpClient httpClient, String videoId) {
final url = 'https://youtube.com/watch?v=$videoId&bpctr=9999999999&hl=en';
return retry(() async {
var req = await httpClient.get(url, validate: true);
2020-06-03 13:18:37 +02:00
var cookies = req.headers['set-cookie'];
var visitorInfoLive = _visitorInfoLiveExp.firstMatch(cookies).group(1);
var ysc = _yscExp.firstMatch(cookies).group(1);
var result = WatchPage.parse(req.body, visitorInfoLive, ysc);
2020-06-03 13:18:37 +02:00
if (!result.isOk) {
2020-07-16 20:02:54 +02:00
throw TransientFailureException('Video watch page is broken.');
2020-06-03 13:18:37 +02:00
}
if (!result.isVideoAvailable) {
throw VideoUnavailableException.unavailable(VideoId(videoId));
}
return result;
});
}
2020-05-31 23:36:23 +02:00
}
2020-06-03 13:18:37 +02:00
class _StreamInfo extends StreamInfoProvider {
final Map<String, String> _root;
_StreamInfo(this._root);
@override
int get bitrate => int.parse(_root['bitrate']);
@override
int get tag => int.parse(_root['itag']);
@override
String get url => _root['url'];
@override
String get signature => _root['s'];
@override
String get signatureParameter => _root['sp'];
@override
int get contentLength => int.tryParse(_root['clen'] ??
StreamInfoProvider.contentLenExp
.firstMatch(url)
.group(1)
.nullIfWhitespace ??
'');
MediaType get mimeType => MediaType.parse(_root['mimeType']);
@override
String get container => mimeType.subtype;
bool get isAudioOnly => mimeType.type == 'audio';
@override
String get audioCodec => codecs.last;
@override
String get videoCodec => isAudioOnly ? null : codecs.first;
List<String> get codecs =>
mimeType.parameters['codecs'].split(',').map((e) => e.trim());
@override
String get videoQualityLabel => _root['quality_label'];
List<int> get _size =>
_root['size'].split(',').map((e) => int.tryParse(e ?? ''));
@override
int get videoWidth => _size.first;
@override
int get videoHeight => _size.last;
@override
int get framerate => int.tryParse(_root['fps'] ?? '');
2020-05-31 23:36:23 +02:00
}
2020-06-03 13:18:37 +02:00
class _PlayerConfig {
// Json parsed map
final Map<String, dynamic> _root;
2020-05-31 23:36:23 +02:00
2020-06-03 13:18:37 +02:00
_PlayerConfig(this._root);
2020-05-31 23:36:23 +02:00
2020-06-03 13:18:37 +02:00
String get sourceUrl => 'https://youtube.com${_root['assets']['js']}';
PlayerResponse get playerResponse =>
PlayerResponse.parse(_root['args']['player_response']);
List<_StreamInfo> get muxedStreams =>
2020-06-05 16:17:08 +02:00
_root
.get('args')
?.getValue('url_encoded_fmt_stream_map')
2020-06-03 13:18:37 +02:00
?.split(',')
?.map((e) => _StreamInfo(Uri.splitQueryString(e))) ??
const [];
List<_StreamInfo> get adaptiveStreams =>
2020-06-05 16:17:08 +02:00
_root
.get('args')
?.getValue('adaptive_fmts')
2020-06-03 13:18:37 +02:00
?.split(',')
?.map((e) => _StreamInfo(Uri.splitQueryString(e))) ??
const [];
List<_StreamInfo> get streams => [...muxedStreams, ...adaptiveStreams];
}
2020-06-17 22:14:27 +02:00
class _InitialData {
// Json parsed map
2020-07-14 14:16:52 +02:00
final Map<String, dynamic> root;
2020-06-17 22:14:27 +02:00
2020-07-14 14:16:52 +02:00
_InitialData(this.root);
2020-06-17 22:14:27 +02:00
/* Cache results */
String _continuation;
String _clickTrackingParams;
Map<String, dynamic> getContinuationContext(Map<String, dynamic> root) {
2020-07-14 14:16:52 +02:00
if (root['contents'] != null) {
return (root['contents']['twoColumnWatchNextResults']['results']
['results']['contents'] as List<dynamic>)
?.firstWhere((e) => e.containsKey('itemSectionRenderer'))[
'itemSectionRenderer']['continuations']
?.first['nextContinuationData']
2020-06-17 22:14:27 +02:00
?.cast<String, dynamic>();
}
2020-07-14 14:16:52 +02:00
if (root['response'] != null) {
return root['response']['itemSectionContinuation']['continuations']
2020-06-17 22:14:27 +02:00
?.first['nextContinuationData']
?.cast<String, dynamic>();
}
return null;
}
String get continuation => _continuation ??=
2020-07-14 14:16:52 +02:00
getContinuationContext(root)?.getValue('continuation') ?? '';
2020-06-17 22:14:27 +02:00
String get clickTrackingParams => _clickTrackingParams ??=
2020-07-14 14:16:52 +02:00
getContinuationContext(root)?.getValue('clickTrackingParams') ?? '';
2020-06-17 22:14:27 +02:00
}