From a6f2dcf27251dab1e06a0a2b36ddb9ae8ccfb39f Mon Sep 17 00:00:00 2001 From: Mattia Date: Sun, 1 Nov 2020 15:05:19 +0100 Subject: [PATCH] Better closed captions. #81, #82 --- analysis_options.yaml | 20 +-- example/example.dart | 11 +- lib/src/channels/channel_about.dart | 2 +- lib/src/extensions/helpers_extension.dart | 11 ++ .../responses/channel_about_page.dart | 4 +- .../closed_caption_track_response.dart | 18 +-- .../responses/player_response.dart | 2 +- .../responses/search_page.dart | 2 +- .../youtube_http_client.dart | 49 ++----- lib/src/search/search_client.dart | 3 +- .../closed_captions/closed_caption.dart | 3 + .../closed_caption_client.dart | 133 +++++++++--------- .../closed_caption_track_info.dart | 33 ++++- lib/src/videos/streams/stream_info.dart | 8 -- lib/src/videos/video_client.dart | 2 +- test/search_test.dart | 12 +- 16 files changed, 155 insertions(+), 158 deletions(-) diff --git a/analysis_options.yaml b/analysis_options.yaml index 18fd5b8..00f2413 100644 --- a/analysis_options.yaml +++ b/analysis_options.yaml @@ -1,5 +1,15 @@ include: package:effective_dart/analysis_options.yaml +analyzer: + exclude: #most likely not all of these are needed, but as it is now it works. + - "**/*.g.dart" + - /**/*.g.dart + - \**\*.g.dart + - "*.g.dart" + - "**.g.dart" + - example\** + - lib\src\reverse_engineering\responses\generated\** + linter: rules: - valid_regexps @@ -57,13 +67,3 @@ linter: - prefer_single_quotes - use_function_type_syntax_for_parameters -analyzer: - exclude: #most likely not all of these are needed, but as it is now it works. - - "**/*.g.dart" - - "**\*.g.dart" - - /**/*.g.dart - - \**\*.g.dart - - "*.g.dart" - - "**.g.dart" - - example\** - - lib\src\reverse_engineering\responses\generated\** \ No newline at end of file diff --git a/example/example.dart b/example/example.dart index d028956..8b509c5 100644 --- a/example/example.dart +++ b/example/example.dart @@ -2,11 +2,14 @@ import 'package:youtube_explode_dart/youtube_explode_dart.dart'; Future main() async { var yt = YoutubeExplode(); - var video = - await yt.videos.get('https://www.youtube.com/watch?v=AI7ULzgf8RU'); - print('Title: ${video.title}'); + var manifest = await yt.videos.closedCaptions + .getManifest('Pxgvgh9IFqA', autoGenerated: true); + print(manifest.tracks); + print('\n\n---------------------\n\n'); - // Close the YoutubeExplode's http client. + manifest = await yt.videos.closedCaptions + .getManifest('Pxgvgh9IFqA', autoGenerated: false); + print(manifest.tracks); yt.close(); } diff --git a/lib/src/channels/channel_about.dart b/lib/src/channels/channel_about.dart index 59c2d5e..127f2ea 100644 --- a/lib/src/channels/channel_about.dart +++ b/lib/src/channels/channel_about.dart @@ -1,7 +1,7 @@ import 'package:equatable/equatable.dart'; -import 'channel_link.dart'; import '../common/thumbnail.dart'; +import 'channel_link.dart'; /// YouTube channel's about page metadata. class ChannelAbout with EquatableMixin { diff --git a/lib/src/extensions/helpers_extension.dart b/lib/src/extensions/helpers_extension.dart index 2443936..836bb8e 100644 --- a/lib/src/extensions/helpers_extension.dart +++ b/lib/src/extensions/helpers_extension.dart @@ -113,3 +113,14 @@ extension GetOrNullMap on Map { return v; } } + +/// +extension UriUtils on Uri { + /// + Uri replaceQueryParameters(Map parameters) { + var query = Map.from(queryParameters); + query.addAll(parameters); + + return replace(queryParameters: query); + } +} diff --git a/lib/src/reverse_engineering/responses/channel_about_page.dart b/lib/src/reverse_engineering/responses/channel_about_page.dart index 4baff98..f431b46 100644 --- a/lib/src/reverse_engineering/responses/channel_about_page.dart +++ b/lib/src/reverse_engineering/responses/channel_about_page.dart @@ -1,12 +1,12 @@ import 'package:html/dom.dart'; import 'package:html/parser.dart' as parser; -import 'package:youtube_explode_dart/youtube_explode_dart.dart'; +import '../../../youtube_explode_dart.dart'; import '../../exceptions/exceptions.dart'; +import '../../extensions/helpers_extension.dart'; import '../../retry.dart'; import '../youtube_http_client.dart'; import 'generated/channel_about_page_id.g.dart'; -import '../../extensions/helpers_extension.dart'; /// class ChannelAboutPage { diff --git a/lib/src/reverse_engineering/responses/closed_caption_track_response.dart b/lib/src/reverse_engineering/responses/closed_caption_track_response.dart index 8937baf..f7c4e4d 100644 --- a/lib/src/reverse_engineering/responses/closed_caption_track_response.dart +++ b/lib/src/reverse_engineering/responses/closed_caption_track_response.dart @@ -1,5 +1,6 @@ import 'package:xml/xml.dart' as xml; +import '../../extensions/helpers_extension.dart'; import '../../retry.dart'; import '../youtube_http_client.dart'; @@ -23,21 +24,12 @@ class ClosedCaptionTrackResponse { /// static Future get( YoutubeHttpClient httpClient, String url) { - var formatUrl = _setQueryParameters(url, {'format': '3'}); + var formatUrl = Uri.parse(url).replaceQueryParameters({'fmt': 'srv3'}); return retry(() async { var raw = await httpClient.getString(formatUrl); return ClosedCaptionTrackResponse.parse(raw); }); } - - static Uri _setQueryParameters(String url, Map parameters) { - var uri = Uri.parse(url); - - var query = Map.from(uri.queryParameters); - query.addAll(parameters); - - return uri.replace(queryParameters: query); - } } /// @@ -47,7 +39,7 @@ class ClosedCaption { Duration _offset; Duration _duration; Duration _end; - Iterable _parts; + List _parts; /// String get text => _root.text; @@ -64,8 +56,8 @@ class ClosedCaption { Duration get end => _end ??= offset + duration; /// - Iterable getParts() => - _parts ??= _root.findAllElements('s').map((e) => ClosedCaptionPart._(e)); + List getParts() => _parts ??= + _root.findAllElements('s').map((e) => ClosedCaptionPart._(e)).toList(); ClosedCaption._(this._root); } diff --git a/lib/src/reverse_engineering/responses/player_response.dart b/lib/src/reverse_engineering/responses/player_response.dart index f410760..8e6ec0a 100644 --- a/lib/src/reverse_engineering/responses/player_response.dart +++ b/lib/src/reverse_engineering/responses/player_response.dart @@ -1,9 +1,9 @@ import 'dart:convert'; import 'package:http_parser/http_parser.dart'; -import 'package:youtube_explode_dart/src/reverse_engineering/responses/generated/player_response.g.dart'; import '../../extensions/helpers_extension.dart'; +import 'generated/player_response.g.dart'; import 'stream_info_provider.dart'; /// diff --git a/lib/src/reverse_engineering/responses/search_page.dart b/lib/src/reverse_engineering/responses/search_page.dart index d330e80..3915542 100644 --- a/lib/src/reverse_engineering/responses/search_page.dart +++ b/lib/src/reverse_engineering/responses/search_page.dart @@ -2,11 +2,11 @@ import 'dart:convert'; import 'package:html/dom.dart'; import 'package:html/parser.dart' as parser; -import 'package:youtube_explode_dart/src/search/base_search_content.dart'; import '../../../youtube_explode_dart.dart'; import '../../extensions/helpers_extension.dart'; import '../../retry.dart'; +import '../../search/base_search_content.dart'; import '../../search/related_query.dart'; import '../../search/search_video.dart'; import '../../videos/videos.dart'; diff --git a/lib/src/reverse_engineering/youtube_http_client.dart b/lib/src/reverse_engineering/youtube_http_client.dart index 677eab8..997a7da 100644 --- a/lib/src/reverse_engineering/youtube_http_client.dart +++ b/lib/src/reverse_engineering/youtube_http_client.dart @@ -85,53 +85,24 @@ class YoutubeHttpClient extends http.BaseClient { } /// - // TODO: Check why isRateLimited is not working. Stream> getStream(StreamInfo streamInfo, {Map headers, bool validate = true, int start = 0, int errorCount = 0}) async* { var url = streamInfo.url; -// if (!streamInfo.isRateLimited()) { -// var request = http.Request('get', url); -// request.headers.addAll(_defaultHeaders); -// var response = await request.send(); -// if (validate) { -// _validateResponse(response, response.statusCode); -// } -// yield* response.stream; -// } else { - var bytesCount = start; - for (var i = start; i < streamInfo.size.totalBytes; i += 9898989) { - try { - final request = http.Request('get', url); - request.headers['range'] = 'bytes=$i-${i + 9898989 - 1}'; - final response = await send(request); - if (validate) { - _validateResponse(response, response.statusCode); - } - final stream = StreamController>(); - response.stream.listen((data) { - bytesCount += data.length; - stream.add(data); - }, onError: (_) => null, onDone: stream.close, cancelOnError: false); - errorCount = 0; - yield* stream.stream; - } on Exception { - if (errorCount == 5) { - rethrow; - } - await Future.delayed(const Duration(milliseconds: 500)); - yield* getStream(streamInfo, - headers: headers, - validate: validate, - start: bytesCount, - errorCount: errorCount + 1); - break; - } + var query = Map.from(url.queryParameters); + query['ratebypass'] = 'yes'; + url = url.replace(queryParameters: query); + + var request = http.Request('get', url); + request.headers.addAll(_defaultHeaders); + var response = await request.send(); + if (validate) { + _validateResponse(response, response.statusCode); } -// } + yield* response.stream; } /// diff --git a/lib/src/search/search_client.dart b/lib/src/search/search_client.dart index 1c10b5e..cb471b1 100644 --- a/lib/src/search/search_client.dart +++ b/lib/src/search/search_client.dart @@ -1,7 +1,6 @@ -import 'package:youtube_explode_dart/src/reverse_engineering/responses/search_page.dart'; - import '../common/common.dart'; import '../reverse_engineering/responses/playlist_response.dart'; +import '../reverse_engineering/responses/search_page.dart'; import '../reverse_engineering/youtube_http_client.dart'; import '../videos/video.dart'; import '../videos/video_id.dart'; diff --git a/lib/src/videos/closed_captions/closed_caption.dart b/lib/src/videos/closed_captions/closed_caption.dart index 284e5cf..131a9d2 100644 --- a/lib/src/videos/closed_captions/closed_caption.dart +++ b/lib/src/videos/closed_captions/closed_caption.dart @@ -32,4 +32,7 @@ class ClosedCaption { /// Note that some captions may not have any parts at all. ClosedCaptionPart getPartByTime(Duration offset) => parts.firstWhere((e) => e.offset >= offset, orElse: () => null); + + @override + String toString() => 'Text: $text'; } diff --git a/lib/src/videos/closed_captions/closed_caption_client.dart b/lib/src/videos/closed_captions/closed_caption_client.dart index eff073e..49be477 100644 --- a/lib/src/videos/closed_captions/closed_caption_client.dart +++ b/lib/src/videos/closed_captions/closed_caption_client.dart @@ -1,7 +1,8 @@ +import 'package:xml/xml.dart' as xml; + import '../../extensions/helpers_extension.dart'; -import '../../reverse_engineering/responses/closed_caption_track_response.dart' - hide ClosedCaption, ClosedCaptionPart; -import '../../reverse_engineering/responses/video_info_response.dart'; +import '../../reverse_engineering/responses/responses.dart' + hide ClosedCaption, ClosedCaptionPart, ClosedCaptionTrack; import '../../reverse_engineering/youtube_http_client.dart'; import '../videos.dart'; import 'closed_caption.dart'; @@ -20,16 +21,57 @@ class ClosedCaptionClient { /// Gets the manifest that contains information /// about available closed caption tracks in the specified video. - Future getManifest(dynamic videoId) async { + Future getManifest(dynamic videoId, + {bool autoGenerated = false}) async { videoId = VideoId.fromString(videoId); - var videoInfoResponse = - await VideoInfoResponse.get(_httpClient, videoId.value); - var playerResponse = videoInfoResponse.playerResponse; + var tracks = []; + if (!autoGenerated) { + var subList = await _httpClient.get( + 'https://video.google.com/timedtext?hl=en&type=list&v=${videoId.value}', + validate: true); + // ignore: deprecated_member_use + var content = xml.parse(subList.body); - var tracks = playerResponse.closedCaptionTrack.map((track) => - ClosedCaptionTrackInfo(Uri.parse(track.url), - Language(track.languageCode, track.languageName), - isAutoGenerated: track.autoGenerated)); + var langList = []; + for (var track in content.findAllElements('track')) { + var lang = track.getAttribute('lang_code'); + if (langList.contains(lang)) { + continue; + } + langList.add(lang); + for (var ext in ClosedCaptionFormat.values) { + tracks.add(ClosedCaptionTrackInfo( + Uri.parse('https://www.youtube.com/api/timedtext') + .replaceQueryParameters({ + 'lang': lang, + 'v': videoId.value, + 'fmt': ext.formatCode, + 'name': track.getAttribute('name'), + }), + Language(lang, track.getAttribute('lang_translated')), + format: ext)); + } + } + if (langList.isEmpty) { + return ClosedCaptionManifest([]); + } + return ClosedCaptionManifest(tracks); + } else { + var videoInfoResponse = + await VideoInfoResponse.get(_httpClient, videoId.value); + var playerResponse = videoInfoResponse.playerResponse; + + for (var track in playerResponse.closedCaptionTrack) { + for (var ext in ClosedCaptionFormat.values) { + tracks.add(ClosedCaptionTrackInfo( + Uri.parse(track.url) + .replaceQueryParameters({'fmt': ext.formatCode}), + Language(track.languageCode, track.languageName), + isAutoGenerated: track.autoGenerated, + format: ext)); + } + } + } return ClosedCaptionManifest(tracks); } @@ -46,62 +88,17 @@ class ClosedCaptionClient { return ClosedCaptionTrack(captions); } - /// - Future getSrt(ClosedCaptionTrackInfo trackInfo) async { - var track = await get(trackInfo); - - var buffer = StringBuffer(); - for (var i = 0; i < track.captions.length; i++) { - var caption = track.captions[i]; - - // Line number - buffer.writeln('${i + 1}'); - - // Time start --> time end - buffer.write(caption.offset.toSrtFormat()); - buffer.write(' --> '); - buffer.write(caption.end.toSrtFormat()); - buffer.writeln(); - - // Actual text - buffer.writeln(caption.text); - buffer.writeln(); - } - return buffer.toString(); - } -} - -extension on Duration { - String toSrtFormat() { - String threeDigits(int n) { - if (n >= 1000) { - return n.toString().substring(0, 3); - } - if (n >= 100) { - return '$n'; - } - if (n >= 10) { - return '0$n'; - } - return '00$n'; - } - - String twoDigits(int n) { - if (n >= 10) { - return '$n'; - } - return '0$n'; - } - - if (inMicroseconds < 0) { - return '-${-this}'; - } - var twoDigitHours = twoDigits(inHours); - var twoDigitMinutes = - twoDigits(inMinutes.remainder(Duration.minutesPerHour)); - var twoDigitSeconds = - twoDigits(inSeconds.remainder(Duration.secondsPerMinute)); - var fourDigitsUs = threeDigits(inMilliseconds.remainder(1000)); - return '$twoDigitHours:$twoDigitMinutes:$twoDigitSeconds,$fourDigitsUs'; + /// Auto translated a closed caption track. + ClosedCaptionTrackInfo autoTranslate( + ClosedCaptionTrackInfo trackInfo, String lang) { + return ClosedCaptionTrackInfo( + trackInfo.url.replaceQueryParameters({'tlang': lang}), + Language(lang, ''), + isAutoGenerated: trackInfo.isAutoGenerated, + format: trackInfo.format); } + + /// Returns the subtitles as a string. + Future getSubTitles(ClosedCaptionTrackInfo trackInfo) => + _httpClient.getString(trackInfo.url); } diff --git a/lib/src/videos/closed_captions/closed_caption_track_info.dart b/lib/src/videos/closed_captions/closed_caption_track_info.dart index 24ff07c..25a7c04 100644 --- a/lib/src/videos/closed_captions/closed_caption_track_info.dart +++ b/lib/src/videos/closed_captions/closed_caption_track_info.dart @@ -13,8 +13,13 @@ class ClosedCaptionTrackInfo extends Equatable { /// Whether the associated track was automatically generated. final bool isAutoGenerated; + /// Track format + final ClosedCaptionFormat format; + /// Initializes an instance of [ClosedCaptionTrackInfo] - const ClosedCaptionTrackInfo(this.url, this.language, {this.isAutoGenerated}); + const ClosedCaptionTrackInfo(this.url, this.language, + {this.isAutoGenerated = false, this.format}) + : assert(format != null); @override String toString() => 'CC Track ($language)'; @@ -22,3 +27,29 @@ class ClosedCaptionTrackInfo extends Equatable { @override List get props => [url, language, isAutoGenerated]; } + +/// SubTiles format. +class ClosedCaptionFormat { + /// .srv format(1). + static const ClosedCaptionFormat srv1 = ClosedCaptionFormat._('srv1'); + + /// .srv format(2). + static const ClosedCaptionFormat srv2 = ClosedCaptionFormat._('srv2'); + + /// .srv format(3). + static const ClosedCaptionFormat srv3 = ClosedCaptionFormat._('srv3'); + + /// .ttml format. + static const ClosedCaptionFormat ttml = ClosedCaptionFormat._('ttml'); + + /// .vtt format. + static const ClosedCaptionFormat vtt = ClosedCaptionFormat._('vtt'); + + /// List of all sub titles format. + static const List values = [srv1, srv2, srv3, ttml, vtt]; + + /// Format code as string. + final String formatCode; + + const ClosedCaptionFormat._(this.formatCode); +} diff --git a/lib/src/videos/streams/stream_info.dart b/lib/src/videos/streams/stream_info.dart index ccbacbc..a9e23b4 100644 --- a/lib/src/videos/streams/stream_info.dart +++ b/lib/src/videos/streams/stream_info.dart @@ -24,14 +24,6 @@ abstract class StreamInfo { StreamInfo(this.tag, this.url, this.container, this.size, this.bitrate); } -/// Extensions for [StreamInfo]. -extension StreamInfoExt on StreamInfo { - static final _exp = RegExp('ratebypass[=/]yes'); - - /// Returns true if this video is rate limited. - bool isRateLimited() => _exp.hasMatch(url.toString()); -} - /// Extension for Iterables of StreamInfo. extension StreamInfoIterableExt on Iterable { /// Gets the stream with highest bitrate. diff --git a/lib/src/videos/video_client.dart b/lib/src/videos/video_client.dart index 3266d8a..e9a69aa 100644 --- a/lib/src/videos/video_client.dart +++ b/lib/src/videos/video_client.dart @@ -71,7 +71,7 @@ class VideoClient { } /// Get a [Video] instance from a [videoId] - Future