diff --git a/.emacs.d/custom.el b/.emacs.d/custom.el index 2c92669..3b05725 100644 --- a/.emacs.d/custom.el +++ b/.emacs.d/custom.el @@ -58,7 +58,7 @@ '(org-odd-levels-only t) '(org-todo-keywords '((sequence "TODO" "|" "DONE" "ABANDONED" "DEFERRED"))) '(package-selected-packages - '(flycheck-rust eglot prettier-js zig-mode modus-operandi-theme esup gnu-elpa-keyring-update lsp-hack hack-mode rust-mode filladapt lsp-ui yaml-mode wgrep fsharp-mode company-lsp cquery mustache-mode clang-format projectile dash-functional mocha add-node-modules-path rjsx-mode xref-js2 js2-refactor company omnisharp geiser cider clojure-mode graphviz-dot-mode multi-term xterm-color thrift markdown-mode tuareg merlin ag use-package flycheck dockerfile-mode js2-mode web-mode tss switch-window python-mode paredit magit lua-mode go-mode go-autocomplete exec-path-from-shell csharp-mode color-theme-monokai auto-complete auto-complete-nxml flymake flyspell json-mode popup ruby-mode company-jedi tide elm-mode monky)) + '(flycheck-rust eglot ink-mode prettier-js zig-mode modus-operandi-theme esup gnu-elpa-keyring-update lsp-hack hack-mode rust-mode filladapt lsp-ui yaml-mode wgrep fsharp-mode company-lsp cquery mustache-mode clang-format projectile dash-functional mocha add-node-modules-path rjsx-mode xref-js2 js2-refactor company omnisharp geiser cider clojure-mode graphviz-dot-mode multi-term xterm-color thrift markdown-mode tuareg merlin ag use-package flycheck dockerfile-mode js2-mode web-mode tss switch-window python-mode paredit magit lua-mode go-mode go-autocomplete exec-path-from-shell csharp-mode color-theme-monokai auto-complete auto-complete-nxml flymake flyspell json-mode popup ruby-mode company-jedi tide elm-mode monky)) '(reb-re-syntax 'string) '(rmail-mail-new-frame t) '(safe-local-variable-values diff --git a/.emacs.d/init.el b/.emacs.d/init.el index c8db304..19ef937 100644 --- a/.emacs.d/init.el +++ b/.emacs.d/init.el @@ -213,7 +213,7 @@ (width . 91) (height . ,jd-frame-height))) - (use-package modus-operandi-theme :ensure))) + (use-package modus-themes :ensure))) ;; ================================================================= ;; FUN WITH KEY BINDINGS! YAAAAYYY!!! @@ -975,4 +975,23 @@ (use-package pico8-mode :mode (("\\.p8\\'" . pico8-mode))) +;; ================================================================ +;; Ink +;; ================================================================ +(defun my-ink-mode-hook () + "My hook for ink mode." + (flymake-mode) + (flycheck-mode 0) + (turn-off-auto-fill) + (setq truncate-lines nil) + (visual-line-mode)) + +(use-package ink-mode :ensure t + :mode (("\\.ink\\'" . ink-mode)) + :bind (("M-." . ink-follow-link-at-point) + ("C-c ! n" . flymake-goto-next-error)) + :config + (add-hook 'ink-mode-hook 'my-ink-mode-hook)) + + ;;; init.el ends here diff --git a/bin/youtubedown b/bin/youtubedown index eab23ce..24da93f 100755 --- a/bin/youtubedown +++ b/bin/youtubedown @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright © 2007-2020 Jamie Zawinski +# Copyright © 2007-2021 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that @@ -21,24 +21,31 @@ # --bwlimit Nkbps Throttle download speed. # # --size Instead of downloading it all, print video dimensions. -# This requires "ffmpeg". +# This requires "ffmpeg". # # --list List the underlying URLs of a playlist. # --list --list List IDs and titles of a playlist. # --size --size List the sizes of each video of a playlist. # +# --ping Probe whether video exists and is embeddable. +# +# --max-size SIZE Don't download videos larger than the given size, if +# possible. Size can be WxH, "1080p", "SD", etc. For when +# you don't really need that "4K" version. +# # --no-mux Only download pre-muxed videos, instead of sometimes # downloading separate audio and video files, then combining # them afterward with "ffmpeg". If you specify this option, # you probably can't download anything higher resolution # than 720p. # -# --webm Download WebM files if those are higher resolution than -# MP4. Off by default because only VLC can play WebM. +# --webm Download WebM or AV1 files if those are higher resolution +# than MP4. Off by default because only VLC can play these +# newfangled, irritating formats, which ought not exist. # -# --webm-transcode Download WebM, but convert it to MP4. Off by default -# because it is very slow, however it is the only way to -# get 4K MP4s out of Youtube. +# --webm-transcode Download WebM or AV1, but convert them to MP4. Off by +# default because it is very slow, however it is the only +# way to get 4K MP4s out of Youtube. # # Note: if you have ffmpeg < 2.2, upgrade to something less flaky. # @@ -62,7 +69,7 @@ use Encode; my $progname0 = $0; my $progname = $0; $progname =~ s@.*/@@g; -my ($version) = ('$Revision: 1.1549 $' =~ m/\s(\d[.\d]+)\s/s); +my ($version) = ('$Revision: 1.1689 $' =~ m/\s(\d[.\d]+)\s/s); # Without this, [:alnum:] doesn't work on non-ASCII. use locale; @@ -76,10 +83,11 @@ my $webm_transcode_p = 0; my $http_proxy = undef; +my $ffmpeg = 'ffmpeg'; $ENV{PATH} = "/opt/local/bin:$ENV{PATH}"; # for macports ffmpeg -my @video_extensions = ("mp4", "flv", "webm"); +my @video_extensions = ("mp4", "flv", "webm", "av1"); # Anything placed on this list gets unconditionally deleted when this @@ -111,6 +119,7 @@ $SIG{KILL} = \&signal_cleanup; $SIG{TERM} = \&signal_cleanup; +my $total_retries = 0; my $noerror = 0; sub error($) { @@ -290,9 +299,9 @@ my %keepalive; # { $hostname => $socket, ... } # $bytes_read, $content_length, $document_length. # Does not retry or process redirects. # -sub get_url_1($;$$$$$$$) { +sub get_url_1($;$$$$$$$$) { my ($url, $referer, $to_file, $bwlimit, $start_byte, $max_bytes, - $append_p, $progress_p) = @_; + $append_p, $progress_p, $extra_headers) = @_; error ("not an HTTP URL, try rtmpdump: $url") if ($url =~ m@^rtmp@i); error ("not an HTTP URL: $url") unless ($url =~ m@^(https?|feed)://@i); @@ -314,6 +323,38 @@ sub get_url_1($;$$$$$$$) { if ($S) { print STDERR "$progname: reusing connection: $host\n" if ($verbose > 2); + + } elsif (!$http_proxy && $proto eq 'https:') { + + # If we're not using a proxy, do a direct SSL connection. + # + # There *should* be no difference between: + # IO::Socket::SSL->new (..) + # and + # IO::Socket::INET->new (...) + # IO::Socket::SSL->start_SSL (...) + # but, there is. + # + # As of Jun 2020, the former works but the latter results in Youtube + # responding with "429 Too Many Requests". So that means that there + # is some difference in how those two methods set up the connection, + # and that difference is being detected by Youtube and causing it to + # limit our connections more highly. WTF, and WTF. + # + $S = IO::Socket::SSL->new (PeerAddr => $host, + PeerPort => $port, + Proto => 'tcp', + # Ignore certificate errors + verify_hostname => 0, + SSL_verify_mode => 0, + SSL_verifycn_scheme => 'none', + # set hostname for SNI + SSL_hostname => $ohost, + ) + || error ("socket: SSL: $!"); + + $S->autoflush(1); + } else { # If we were just using LWP::UserAgent, we wouldn't have to do all of this @@ -378,7 +419,8 @@ sub get_url_1($;$$$$$$$) { foreach (@ha) { print STDERR " <== $_\n"; } print STDERR " <==\n"; } - error ("HTTP proxy error: $ha[0]\n") + my $ha0 = $ha[0] || 'null response'; + error ("HTTP proxy error: $ha0\n") unless ($ha[0] =~ m@^HTTP/[0-9.]+ 20\d@si); } @@ -417,6 +459,7 @@ sub get_url_1($;$$$$$$$) { my @extra_headers = (); push @extra_headers, "Referer: $referer" if ($referer); push @extra_headers, "Connection: keep-alive" if ($keepalive_p); + push @extra_headers, @$extra_headers if ($extra_headers); # If we're only reading the first N bytes, don't ask for more. # @@ -649,7 +692,9 @@ sub get_url_1($;$$$$$$$) { draw_progress (($cl ? ($start_byte + $bytes) / $document_length : 0), $actual_bits_per_sec, 1) - if ($progress_p); + if ($progress_p && + !($max_bytes || # don't draw EOF if we're chunking a single URL. + $start_byte + $bytes >= $document_length)); if ($to_file && !$ok_p) { error ("\"$to_file\" unexpectedly vanished!") unless (-f $to_file); @@ -706,8 +751,8 @@ sub get_url_1($;$$$$$$$) { # Check to see if a network failure truncated the file and warn. # Caller will then resume the download using byte ranges. # - if ($to_file && - $cl && + if ($to_file && + $cl && $start_byte + $bytes < $cl-1) { my $pct = int (100 * ($start_byte + $bytes) / $cl); $pct = sprintf ("%.2f", 100 * $bytes / $cl) if ($pct == 100); @@ -730,9 +775,9 @@ sub get_url_1($;$$$$$$$) { # Loads the given URL, processes redirects; retries dropped connections. # Returns: $http, $head, $body, $final_redirected_url. # -sub get_url($;$$$$$$$) { - my ($url, $referer, $to_file, $bwlimit, $max_bytes, - $append_p, $progress_p, $force_ranges_p) = @_; +sub get_url($;$$$$$$$$) { + my ($url, $referer, $to_file, $bwlimit, $max_bytes, + $append_p, $progress_p, $force_ranges_p, $extra_headers) = @_; my $orig_url = $url; my $redirect_count = 0; @@ -746,7 +791,7 @@ sub get_url($;$$$$$$$) { if ($force_ranges_p && !$to_file); do { - + $url =~ s/\#.*$//s; # Remove HTML anchor # If $force_ranges_p is true, we always make multiple sub-range requests @@ -777,7 +822,7 @@ sub get_url($;$$$$$$$) { my ($http, $head, $body, $bytes, $cl, $cl2) = get_url_1 ($url, $referer, $to_file, $bwlimit, $start_byte, $max_bytes_2, - $append_p, $progress_p); + $append_p, $progress_p, $extra_headers); $total_bytes += $bytes; $max_bytes -= $bytes if defined($max_bytes); @@ -858,6 +903,19 @@ sub get_url($;$$$$$$$) { } +sub get_url_hdrs($$) { + my ($url, $hdrs) = @_; + return get_url ($url, undef, # $referer + undef, # $to_file + undef, # $bwlimit + undef, # $max_bytes + undef, # $append_p + undef, # $progress_p + undef, # $force_ranges_p + $hdrs); +} + + sub check_http_status($$$$) { my ($id, $url, $http, $err_p) = @_; return 1 if ($http =~ m@^HTTP/[0-9.]+ 20\d@si); @@ -879,7 +937,7 @@ sub video_file_size($) { my $size = (stat($file))[7]; - my @cmd = ("ffmpeg", + my @cmd = ($ffmpeg, "-i", $file, "-vframes", "0", "-f", "null", @@ -928,7 +986,8 @@ sub video_file_size($) { sub which($) { my ($cmd) = @_; - foreach my $dir (split (/:/, $ENV{PATH})) { + return (-x $cmd) if ($cmd =~ m@^/@s); + foreach my $dir (split (/:/, $ENV{PATH})) { my $cmd2 = "$dir/$cmd"; return $cmd2 if (-x "$cmd2"); } @@ -1014,11 +1073,11 @@ sub write_file_metadata_url($$$) { my $exit_value = $? >> 8; my $signal_num = $? & 127; my $dumped_core = $? & 128; - print STDERR "$progname: $id: $plutil: core dumped!" + print STDERR "$progname: $id: $plutil: core dumped!\n" if ($dumped_core); - print STDERR "$progname: $id: $plutil: signal $signal_num!" + print STDERR "$progname: $id: $plutil: signal $signal_num!\n" if ($signal_num); - print STDERR "$progname: $id: $plutil: exited with $exit_value!" + print STDERR "$progname: $id: $plutil: exited with $exit_value!\n" if ($exit_value); } } @@ -1136,12 +1195,16 @@ sub video_url_size($$;$$$) { # my $bytes = 1024 * 1024; - # If it's a segmented URL, only grab data for the first few. + # If it's a segmented URL, only grab data for the first one. + # # But HEAD all of them (really, GET of 1 byte) to total up the # final Content-Length; I don't see another way to find that. + # Sep 2020: No, that takes too long, there might be thousands. # - my $max = 3; + my $min_segs = 3; + my $max_segs = 8; my $segp = (ref($url) eq 'ARRAY'); + my $size_guess = 0; my $size = 0; my ($http, $head, $body); @@ -1151,9 +1214,9 @@ sub video_url_size($$;$$$) { my $total = scalar (@$url); foreach my $u2 (@$url) { my $append_p = ($i > 0); - my $donep = ($i >= $max); + my $donep = ($i >= $min_segs); - ($http, $head, $body) = get_url ($u2, undef, + ($http, $head, $body) = get_url ($u2, undef, ($donep ? undef : $file), $bwlimit, ($donep ? 1 : $bytes), @@ -1169,8 +1232,14 @@ sub video_url_size($$;$$$) { ($s2) = ($head =~ m@^Content-Length: \s* (\d+) @mix) unless $s2; $size += $s2 if defined($s2); + last if ($i >= $max_segs); $i++; } + + # Approximate the total content length by assuming that size of the + # first N segments are representative of the size of the rest. + $size_guess = $bytes * (@$url / $i); + } else { ($http, $head, $body) = get_url ($url, undef, $file, $bwlimit, $bytes); # internal error if still 403 @@ -1183,6 +1252,8 @@ sub video_url_size($$;$$$) { ($size) = ($head =~ m@^Content-Length: \s* (\d+) @mix) unless $size; + $size = $size_guess if ($size_guess); + errorI ("$id: expected audio or video, got \"$ct\" in $url") if ($ct =~ m/text/i); @@ -1225,7 +1296,7 @@ sub video_url_size($$;$$$) { # - wN = swap 0th and Nth character. # # The first number is the "sts" parameter from the html5player file, -# which is a timestamp or other ID code corresponding to this algorithm. +# which is the "Signature Time Stamp" corresponding to this algorithm. # Requesting get_video_info with that number will return URLs using the # corresponding cipher algorithm. Except sometimes those old 'sts' values # stop working! See below. @@ -2281,6 +2352,117 @@ my %ciphers = ( 'bfb2a3b4/player_ias.vflset/en_US/base' => '18372 s3 w21 r s3 r w36',# 20 Apr 2020 '45e4d51d/player_ias.vflset/en_US/base' => '18375 w17 w16 r w25 r w50 w35',# 23 Apr 2020 '0374edcb/player_ias.vflset/en_US/base' => '18379 w8 w43 w10 w34',# 27 Apr 2020 + '64dddad9/player_ias.vflset/en_US/base' => '18382 r w69 r w40', # 30 Apr 2020 + '52b1e972/player_ias.vflset/en_US/base' => '18386 w25 w47 r s1 w47',# 04 May 2020 + '0acb4375/player_ias.vflset/en_US/base' => '18389 r s1 w23 s3 w51',# 07 May 2020 + '376e3c34/player_ias.vflset/en_US/base' => '18394 r s3 w26', # 12 May 2020 + '70f6ca87/player_ias.vflset/en_US/base' => '18395 w24 w13 w22 w23 s1 w44 r w15',# 13 May 2020 + 'c31ba6fc/player_ias.vflset/en_US/base' => '18396 s3 w35 r w7', # 14 May 2020 + 'e3cd195e/player_ias.vflset/en_US/base' => '18400 r w5 w20 r s2 w52 r w66 r',# 18 May 2020 + '85548937/player_ias.vflset/en_US/base' => '18403 s2 r s3 w14 w68 r w49 w30 w69',# 21 May 2020 + '4583e272/player_ias.vflset/en_US/base' => '18407 w27 s1 r s2 r w43',# 25 May 2020 + 'de455b1a/player_ias.vflset/en_US/base' => '18410 w11 w70 w8 w40',# 28 May 2020 + 'c31b936c/player_ias.vflset/en_US/base' => '18414 w23 w59 s3 w33 w49 s3 r',# 01 Jun 2020 + '39dd62a0/player_ias.vflset/en_US/base' => '18417 w11 w29 s3 w69 r s2 r',# 05 Jun 2020 + '16a691a1/player_ias.vflset/en_US/base' => '18421 r s3 w1 w67 s2 w8',# 08 Jun 2020 + '0c5285fd/player_ias.vflset/en_US/base' => '18428 s1 r w18 w59',# 15 Jun 2020 + '1d33781a/player_ias.vflset/en_US/base' => '18432 r s1 r w28', # 19 Jun 2020 + '5cc7c83f/player_ias.vflset/en_US/base' => '18435 s1 w29 r s3 w41 s2 w27 r',# 22 Jun 2020 + '68f00b39/player_ias.vflset/en_US/base' => '18438 r w15 s1 r w11 s2',# 25 Jun 2020 + '02c092a5/player_ias.vflset/en_US/base' => '18442 s2 r s1 r w37 s3',# 29 Jun 2020 + '54668ca9/player_ias.vflset/en_US/base' => '18444 r w43 r s3 r',# 01 Jul 2020 + '3662280c/player_ias.vflset/en_US/base' => '18449 s2 r w70', # 06 Jul 2020 + '5253ac4d/player_ias.vflset/en_US/base' => '18456 r s3 r w70', # 13 Jul 2020 + '8786a07b/player_ias.vflset/en_US/base' => '18463 r s2 r w29 r s1 r s3',# 20 Jul 2020 + '0bb3b162/player_ias.vflset/en_US/base' => '18466 w39 r w20', # 23 Jul 2020 + 'c718385a/player_ias.vflset/en_US/base' => '18473 w23 r w63 r s1 r',# 30 Jul 2020 + 'e49bfb00/player_ias.vflset/en_US/base' => '18477 w4 r w15 s3 w24 w4 r w24',# 03 Aug 2020 + 'c0a91787/player_ias.vflset/en_US/base' => '18480 w14 w61 s2 w28 w27 s3 w1 w20',# 06 Aug 2020 + '0a90460f/player_ias.vflset/en_US/base' => '18484 s1 r s1 w47', # 10 Aug 2020 + '0c815aae/player_ias.vflset/en_US/base' => '18487 w29 r w61 s2 w32 s3 w34',# 13 Aug 2020 + 'cba0baa7/player_ias.vflset/en_US/base' => '18491 w27 w46 w22 r w46 s2 r w21',# 17 Aug 2020 + '530216c1/player_ias.vflset/en_US/base' => '18494 r s3 r w43', # 20 Aug 2020 + 'eecb0f1e/player_ias.vflset/en_US/base' => '18498 w20 w11 s3 r w27 r s2 r s2',# 24 Aug 2020 + '54d6fa95/player_ias.vflset/en_US/base' => '18501 s1 w39 s3', # 27 Aug 2020 + '86f77974/player_ias.vflset/en_US/base' => '18505 r w69 r w20 r w32 s3',# 31 Aug 2020 + 'bcf2977e/player_ias.vflset/en_US/base' => '18508 s2 w28 w46 r s1',# 03 Sep 2020 + '8c24a503/player_ias.vflset/en_US/base' => '18508 s2 w28 w46 r s1',# 04 Sep 2020 + '134332d3/player_ias.vflset/en_US/base' => '18516 s3 r w1 w3 w51 r',# 11 Sep 2020 + 'e0d83c30/player_ias.vflset/en_US/base' => '18519 w32 r s1 r s3 r w28 s1',# 14 Sep 2020 + '4b1ba5ea/player_ias.vflset/en_US/base' => '18523 s3 w21 r s1 w27 w54 r s3',# 18 Sep 2020 + '9ce2f25a/player_ias.vflset/en_US/base' => '18526 r s2 r w51 s1 r',# 21 Sep 2020 + '12237e3d/player_ias.vflset/en_US/base' => '18529 s1 r s2', # 24 Sep 2020 + '4c375770/player_ias.vflset/en_US/base' => '18536 s3 w41 s3 w3 s3 r',# 01 Oct 2020 + '1a1b48e5/player_ias.vflset/en_US/base' => '18540 s1 w45 r s3 r s1 w50 s1 r',# 05 Oct 2020 + '3c37ed48/player_ias.vflset/en_US/base' => '18547 r w15 r', # 12 Oct 2020 + '00510e67/player_ias.vflset/en_US/base' => '18550 w62 r s1 w45 r s2 r',# 15 Oct 2020 + '5799986b/player_ias.vflset/en_US/base' => '18554 w29 r w38 r', # 19 Oct 2020 + '4a1799bd/player_ias.vflset/en_US/base' => '18557 s2 w30 r', # 22 Oct 2020 + '9b65e980/player_ias.vflset/en_US/base' => '18561 r s3 w51 s2', # 26 Oct 2020 + 'ec262be6/player_ias.vflset/en_US/base' => '18564 w67 s1 w49 s3 w52 r s1 w43 r',# 29 Oct 2020 + 'c926146c/player_ias.vflset/en_US/base' => '18568 s1 w50 r s3 w34 r',# 02 Nov 2020 + '16e41f55/player_ias.vflset/en_US/base' => '18571 w64 w1 w25 w70 r s2',# 05 Nov 2020 + 'ac4b0b03/player_ias.vflset/en_US/base' => '18575 w69 r s2 w56 s1 r s3 r',# 09 Nov 2020 + 'c299662f/player_ias.vflset/en_US/base' => '18578 r s2 w65 s2 r s3 r w2',# 12 Nov 2020 + 'a3726513/player_ias.vflset/en_US/base' => '18582 r w10 r s1 w13 s2 w15',# 16 Nov 2020 + '8b85eac2/player_ias.vflset/en_US/base' => '18585 w62 r s1 w67',# 19 Nov 2020 + '77da52cd/player_ias.vflset/en_US/base' => '18590 s3 r w69 s2 w41',# 24 Nov 2020 + '408be03a/player_ias.vflset/en_US/base' => '18596 w26 w1 r s2 r',# 30 Nov 2020 + '6dde7fb4/player_ias.vflset/en_US/base' => '18604 r s3 w54 s3 r s1',# 08 Dec 2020 + '03226028/player_ias.vflset/en_US/base' => '18606 s2 w62 r', # 10 Dec 2020 + '62f90c99/player_ias.vflset/en_US/base' => '18610 w49 s3 w10 s1',# 14 Dec 2020 + 'c88a8657/player_ias.vflset/en_US/base' => '18612 r s1 r w18 s3 w36 r w32 s3',# 16 Dec 2020 + '2e6e57d8/player_ias.vflset/en_US/base' => '18613 w9 w4 r w54 r',# 17 Dec 2020 + '5dd3f3b2/player_ias.vflset/en_US/base' => '18617 r w13 s3 r w69 s2 w58 r s1',# 21 Dec 2020 + '9f996d3e/player_ias.vflset/en_US/base' => '18634 s2 r s2 w1 w8',# 07 Jan 2021 + 'bfb74eaf/player_ias.vflset/en_US/base' => '18645 s2 r w29', # 18 Jan 2021 + '27cea338/player_ias.vflset/en_US/base' => '18652 w35 s2 w8 r w51',# 25 Jan 2021 + 'c6df6ed7/player_ias.vflset/en_US/base' => '18653 w35 s3 r w44 s1',# 26 Jan 2021 + '7bc032d0/player_ias.vflset/en_US/base' => '18655 r w56 w14 w9 r w30 r s3',# 28 Jan 2021 + 'f6ef8aad/player_ias.vflset/en_US/base' => '18659 s1 w34 s2 r w64 s1 w55 r s3',# 01 Feb 2021 + '4bc55fd6/player_ias.vflset/en_US/base' => '18660 s2 r s1', # 02 Feb 2021 + '0e3144b6/player_ias.vflset/en_US/base' => '18662 r s1 r w10 r s3 w68 s3 w58',# 04 Feb 2021 + '31234943/player_ias.vflset/en_US/base' => '18667 s2 r s1 r s3 r s2 r w66',# 09 Feb 2021 + '0ce056a2/player_ias.vflset/en_US/base' => '18668 w55 w55 s2 r',# 10 Feb 2021 + '490079fb/player_ias.vflset/en_US/base' => '18669 r s1 r s3 r w61 w60',# 11 Feb 2021 + '6eebf7aa/player_ias.vflset/en_US/base' => '18673 w2 s2 w24', # 15 Feb 2021 + '1c732901/player_ias.vflset/en_US/base' => '18676 r s3 w37 s3', # 18 Feb 2021 + '5a096a9f/player_ias.vflset/en_US/base' => '18680 w17 r s3 w12 s1 r w47 r',# 22 Feb 2021 + '392133a3/player_ias.vflset/en_US/base' => '18681 r s2 r w48 s1 w15 r',# 23 Feb 2021 + '4fe52f49/player_ias.vflset/en_US/base' => '18683 w20 s2 w13 r s3',# 25 Feb 2021 + '0d54190b/player_ias.vflset/en_US/base' => '18688 w18 r s1 w17 r w60 r s3 w39',# 02 Mar 2021 + 'a09205f7/player_ias.vflset/en_US/base' => '18690 w33 r s1 r s1 r s2',# 04 Mar 2021 + 'd91669a4/player_ias.vflset/en_US/base' => '18694 s2 r s1 w54 w14 w12 s3',# 08 Mar 2021 + '34a43f74/player_ias.vflset/en_US/base' => '18695 r w27 r s1 r w4 r w41 w41',# 09 Mar 2021 + 'd29f3109/player_ias.vflset/en_US/base' => '18697 r w2 w25 r w21 s3 w35 s2 w70',# 11 Mar 2021 + 'b2e56c01/player_ias.vflset/en_US/base' => '18701 w18 w37 w29 w21 s2 r w29 s2 r',# 15 Mar 2021 + '223a7479/player_ias.vflset/en_US/base' => '18702 s3 w54 w21 w23 w57 w2 r w22',# 16 Mar 2021 + '228f3ac7/player_ias.vflset/en_US/base' => '18708 s3 w54 r w39 r',# 22 Mar 2021 + '38c5f870/player_ias.vflset/en_US/base' => '18709 w51 r s3 w11 r s2 r',# 23 Mar 2021 + '9f1ab255/player_ias.vflset/en_US/base' => '18716 r s2 r w68 w66 r',# 30 Mar 2021 + '4ad4b014/player_ias.vflset/en_US/base' => '18717 s3 w63 w20 s1 r w44 s1',# 31 Mar 2021 + '3a4ee0a9/player_ias.vflset/en_US/base' => '18718 w9 w21 s3 r s3 w46 w67 r w15',# 01 Apr 2021 + '1c20fac3/player_ias.vflset/en_US/base' => '18722 w25 r s1 r s3 w65 w32 s1 r',# 05 Apr 2021 + '1d7f16b4/player_ias.vflset/en_US/base' => '18723 r w27 s3 w23 r w56 w46 r s1',# 06 Apr 2021 + 'd2ff46c3/player_ias.vflset/en_US/base' => '18725 w19 s2 w2 w24',# 08 Apr 2021 + '2cea24bf/player_ias.vflset/en_US/base' => '18729 s1 w69 w1 w18 r w2',# 12 Apr 2021 + '82e684c7/player_ias.vflset/en_US/base' => '18730 s2 w9 r s3 r w53 r',# 13 Apr 2021 + 'e0d06a61/player_ias.vflset/en_US/base' => '18732 s3 r s3 r s2 w4 w41 w41 r',# 15 Apr 2021 + 'ba95ea16/player_ias.vflset/en_US/base' => '18736 w68 r w24 r s3 w67 s2 w16',# 19 Apr 2021 + 'ae5b2092/player_ias.vflset/en_US/base' => '18737 w54 r w13 w7 w27 w19 w13',# 20 Apr 2021 + 'fa244a41/player_ias.vflset/en_US/base' => '18739 s2 r s3 r w54 w51 r',# 22 Apr 2021 + 'cb5bd7e6/player_ias.vflset/en_US/base' => '18744 s2 w5 r s1 r w41 s3',# 27 Apr 2021 + '901932ee/player_ias.vflset/en_US/base' => '18746 s1 w26 w38 s3 r w53 w35 w24 s2',# 29 Apr 2021 + 'bce81a70/player_ias.vflset/en_US/base' => '18747 w50 w48 w23 w31',# 30 Apr 2021 + '3e7e4b43/player_ias.vflset/en_US/base' => '18750 s2 w47 s1 r s2 w4 r w15 w40',# 03 May 2021 + 'bffc6f9f/player_ias.vflset/en_US/base' => '18751 s3 w7 w50 r s2 r w38',# 04 May 2021 + '838cc154/player_ias.vflset/en_US/base' => '18753 w41 s3 r s2 r w62 s3 w69 s2',# 06 May 2021 + '8fd60c09/player_ias.vflset/en_US/base' => '18758 s2 w18 s2 w50 s1 w44 w54 r',# 11 May 2021 + '24fb4fc5/player_ias.vflset/en_US/base' => '18758 s2 w18 s2 w50 s1 w44 w54 r',# 11 May 2021 + 'b2ff0586/player_ias.vflset/en_US/base' => '18760 s2 w45 w38 s2 r s2',# 13 May 2021 + '08244190/player_ias.vflset/en_US/base' => '18764 w46 r s3', # 17 May 2021 + 'fba90263/player_ias.vflset/en_US/base' => '18766 s2 r s3', # 19 May 2021 + '3d0175c7/player_ias.vflset/en_US/base' => '18767 w68 s1 r', # 20 May 2021 ); @@ -2429,6 +2611,10 @@ sub guess_cipher($;$$) { m/$v = (\d{5,}) ; $v \("ipp_signature_cipher_killswitch"\) /sx); } + if (!$sts) { # New way, 15-Aug-2020 + ($sts) = ($body =~ m/signatureTimestamp[:=](\d{5,})/s); + } + errorI ("$cipher_id: no sts parameter: $url") unless $sts; @@ -2701,7 +2887,12 @@ sub youtube_parse_urlmap($$$;$) { if ($urlmap =~ m/^\{"/s) { # Ugh, sometimes it is JSON $urlmap =~ s/^\{//s; - $urlmap =~ s/"(.*?)" : ([^,]+) [,\}]* /{ # "a":x, => a=x& + + # FFS, I don't want to write a whole JSON parser! + # codecs=\"abc, def\" -> %2C + $urlmap =~ s/(\\".*?\\")/{ my $s = $1; $s =~ s@,@%2C@gs; $s; }/gsexi; + $urlmap =~ s/"( [^\"\[\],]*? )" : + ( [^,]+ ) [,\}]* / { # "a":x, => a=x& "$1=" . url_quote($2) . "&"; }/gsexi; $urlmap =~ s/&\{/,/gs; @@ -2727,9 +2918,10 @@ sub youtube_parse_urlmap($$$;$) { $v = '' unless $v; # In JSON, "cipher":"sp=sig&s=...&url=..." - if ($mapelt =~ m@\bcipher=([^&\"]+)@s) { - my $sig4 = url_unquote ($1); + if ($mapelt =~ m@\b(cipher|signatureCipher)=([^&\"{}]+)@s) { + my $sig4 = url_unquote ($2); $sig4 =~ s/^.*"(.*?)".*$/$1/s; + $sig4 =~ s@\\u0026@&@gs; my ($s2) = ($sig4 =~ m/\bs=([^&]+)/s); my ($u2) = ($sig4 =~ m/\burl=([^&]+)/s); if ($u2) { @@ -2774,8 +2966,22 @@ sub youtube_parse_urlmap($$$;$) { my ($ct) = ($e =~ m@\b((audio|video|text|application)/[-_a-z\d]+)\b@si); + # Youtube has started returning "video/mp4" content types that are + # actually VP9 / WebM rather than H.264. Those must be transcoded, + # not copied, to play on H.264-only devices. + # + if ($e =~ m@\b(vp9)\b@si) { # video/mp4; codecs="vp9" + $ct = 'video/webm'; + } elsif ($e =~ m@(av1|av01)@si) { # video/mp4; codecs="av01.0.16M.08" + $ct = 'video/av1'; + } elsif ($e =~ m@\b(opus)\b@si) { # audio/mp4; codecs="opus" + $ct = 'audio/opus'; + } + $v =~ s@^.*?\|@@s; # VEVO + $v =~ s@\\u0026@&@gs; # FFS + errorI ("$id: enciphered URL but no cipher found: $v") if (($sig2 || $sig3) && !$cipher); @@ -2917,6 +3123,17 @@ sub youtube_parse_dashmpd($$$$) { # Nuke the subtitles: the Representations inside them aren't useful. $body2 =~ s@@@gs; + my ($mpd) = ($body2 =~ m@]*)>@si); + error ("no MPD in DASH $url") unless $mpd; + if ($mpd =~ m@\btype=[\"\']dynamic[\"\']@si) { + # default: type="static" + # The stream is live, and there will be some arbitrary (possibly large) + # number of segments, but that's not the whole thing. Punt. + print STDERR "$progname: DASH is a live stream: $url\n" + if ($verbose > 2); + return 0; + } + my @reps = split(/]* > \s* @@ -3083,7 +3305,6 @@ sub load_youtube_formats_html($$$) { if (!$title || $title =~ m/^untitled$/si) { if ($body =~ m/\\?"title\\?":\\?"(.*?)\\?",/si) { $title = $1; - $title =~ s/\\//gs; $title = munge_title (html_unquote ($title)); } } @@ -3134,7 +3355,8 @@ sub load_youtube_formats_html($$$) { if ($v =~ m@&live_playback=([^&]+)@si || $v =~ m@&live=(1)@si || - $v =~ m@&source=(yt_live_broadcast)@si) { + $v =~ m@&source=(yt_live_broadcast)@si || + $v =~ m@"status":"LIVE_STREAM_OFFLINE"@si) { $err = "can't download live videos"; # The fmts point to an M3U8 that is currently of unbounded length. undef %$fmts; @@ -3142,12 +3364,19 @@ sub load_youtube_formats_html($$$) { } if ($key eq 'dashmpd' || $key eq 'hlsvp') { - $count += youtube_parse_dashmpd ("$id HTML", $v, $cipher, $fmts); + my $n = youtube_parse_dashmpd ("$id HTML", $v, $cipher, $fmts); + $count += $n; + $err = "can't download live videos" if ($n == 0 && !$err); + } elsif ($key eq 'player_response') { my $ov = $v; ($v) = ($ov =~ m@"dashManifestUrl": *"(.*?[^\\])"@s); # This manifest sometimes works when the one in get_video_info doesn't. - $count += youtube_parse_dashmpd ("$id HTML", $v, $cipher, $fmts) if $v; + if ($v) { + my $n = youtube_parse_dashmpd ("$id HTML", $v, $cipher, $fmts); + $count += $n; + $err = "can't download live videos" if ($n == 0 && !$err); + } # Nov 2019: Saw this on an old fmt 133 video, and it was the only # list of formats available in the HTML. @@ -3159,6 +3388,18 @@ sub load_youtube_formats_html($$$) { } } + # Sometimes none of that bullshit exists, but we have + # var ytInitialPlayerResponse = { ... + # with formats, adaptiveFormats, and dashManifestUrl. + # + foreach my $key ('formats', 'adaptiveFormats') { + my ($v) = ($body =~ m@"$key": *\[(\{.*?\})\]@s); + $count += youtube_parse_urlmap ("$id HTML b", $v, $cipher, $fmts) if ($v); + } + my ($v) = ($body =~ m@"dashManifestUrl": *"(.*?[^\\])"@s); + $count += youtube_parse_dashmpd ("$id HTML b", $v, $cipher, $fmts) if ($v); + + # Do this after we determine whether we have any video info. sanity_check_title ($title, $url, "ERR: \"$err\"\n\n$body", #### @@ -3249,15 +3490,20 @@ sub load_youtube_formats_video_info($$$) { $body2 =~ s/\\u0026/&/gs; $body2 =~ s/%3D/=/gs; + my ($prep) = ($body2 =~ m@&player_response=([^&]+)@si); + $prep = url_unquote ($prep || ''); + ($title) = ($body2 =~ m@&title=([^&]+)@si) unless $title; ($rental) = ($body2 =~ m@&ypc_vid=([^&]+)@si); ($live_p) = ($body2 =~ m@&live_playback=([^&]+)@si || $body2 =~ m@&live=(1)@si || - $body2 =~ m@&source=(yt_live_broadcast)@si); + $body2 =~ m@&source=(yt_live_broadcast)@si || + $prep =~ m@"is_viewed_live","value":"True"@si); $embed_p = $1 if ($body =~ m@&allow_embed=([^&]+)@si); $embed_p = 0 if (!defined($embed_p) && - $body =~ m/on[+\s+]other[+\s+]websites/s); + ($body =~ m/on[+\s+]other[+\s+]websites/s || + $body =~ m/Age.restricted/si)); # Sigh, %2526source%253Dyt_premiere_broadcast%2526 $premiere_p = 1 if ($body =~ m@yt_premiere_broadcast@s); @@ -3290,7 +3536,10 @@ sub load_youtube_formats_video_info($$$) { "\n" if ($verbose > 1); if ($key eq 'dashmpd' || $key eq 'hlsvp') { - $count += youtube_parse_dashmpd ("$id VI-1", $v, $cipher, $fmts); + my $n = youtube_parse_dashmpd ("$id VI-1", $v, $cipher, $fmts); + $count += $n; + $err = "can't download live videos" if ($n == 0 && !$err); + } elsif ($key eq 'player_response') { ($v) = ($v =~ m@"adaptiveFormats":\[(.*?)\]@s); $count += youtube_parse_urlmap ("$id VI-2", $v, $cipher, $fmts) @@ -3384,10 +3633,6 @@ sub load_youtube_formats($$$) { my %fmts; - # Scrape the HTML page before loading get_video_info because the - # DASH URL in the HTML page is more likely to work than the one - # returned by get_video_info. - # I don't think any of these are needed. # $url .= join('&', # 'has_verified=1', @@ -3395,8 +3640,28 @@ sub load_youtube_formats($$$) { # 'hl=en', # 'disable_polymer=true'); - my $err1 = load_youtube_formats_html ($id, $url, \%fmts); - my $err2 = load_youtube_formats_video_info ($id, $url, \%fmts); + my $ping_p = (($size_p || 0) eq 'ping'); + + # Scrape the HTML page first, or video info first? + # + # - I have sometimes seen that the HTML page has a DASH URL that works, + # but get_video_info has one whose URLs are all 404 (early 2020). + # - I have sometimes seen the opposite. (Late 2020). + # + # So, let's alternate it on error retries. + # + my $html_first_p = !($total_retries & 1); + + my ($err1, $err2); + if ($ping_p && $total_retries == 0) { + $err1 = load_youtube_formats_video_info ($id, $url, \%fmts); + } elsif ($html_first_p) { + $err1 = load_youtube_formats_html ($id, $url, \%fmts); + $err2 = load_youtube_formats_video_info ($id, $url, \%fmts); + } else { + $err1 = load_youtube_formats_video_info ($id, $url, \%fmts); + $err2 = load_youtube_formats_html ($id, $url, \%fmts); + } # Which error sucks less? Hard to say. # my $err = $err2 || $err; @@ -3410,6 +3675,8 @@ sub load_youtube_formats($$$) { # It's rare, but there can be only one format available. # Keys: 18, cipher, title. + undef %fmts if ($ping_p && $err =~ m/embeddable/si); + if (scalar (keys %fmts) < 3) { error ("$id: $err") if $err; errorI ("$id: no formats available: $err"); @@ -3560,8 +3827,8 @@ sub load_vimeo_formats($$$) { my ($ext) = ($url2 =~ m@ ^ [^?&]+ \. ( [^./?&]+ ) ( [?&] | $ ) @sx); $ext = 'mp4' unless $ext; - my $ct = ($ext =~ m/^(flv|webm|3gpp?)$/s ? "video/$ext" : - $ext =~ m/^(mov)$/s ? 'video/quicktime' : + my $ct = ($ext =~ m/^(flv|webm|3gpp?|av1)$/s ? "video/$ext" : + $ext =~ m/^(mov)$/s ? 'video/quicktime' : 'video/mpeg'); $seen{$fmt} = 1; @@ -3593,7 +3860,7 @@ sub load_vimeo_formats($$$) { sub load_tumblr_formats($$$) { my ($id, $url, $size_p) = @_; - # The old code doesn't work any more: I guess they locked down the + # The old code doesn't work any more: I guess they locked down the # video info URL to require an API key. So we can just grab the # "400" version, I guess... { @@ -3602,8 +3869,8 @@ sub load_tumblr_formats($$$) { # Incestuous if ($body =~ m@