Skip to content
This repository was archived by the owner on Jun 30, 2022. It is now read-only.

Commit 70403d5

Browse files
authored
Merge pull request #155 from alexmercerind/ytdlp
Migrate to yt-dlp
2 parents 980e440 + 1552459 commit 70403d5

File tree

6 files changed

+154
-90
lines changed

6 files changed

+154
-90
lines changed

‎.github/FUNDING.yml‎

Lines changed: 0 additions & 2 deletions
This file was deleted.

‎.github/workflows/automatic-testing.yml‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
with:
1717
python-version: '3.8'
1818
- name: Install dependencies
19-
run: python -m pip install httpx git+https://github.com/pytube/pytube
19+
run: python -m pip install httpx yt-dlp
2020
- name: Sync
2121
run: python syncExample.py
2222
- name: Async

‎youtubesearchpython/__future__/streamurlfetcher.py‎

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,9 @@ async def get(self, videoFormats: dict, itag: int) -> Union[str, None]:
5252
>>> print(url)
5353
"https://r6---sn-gwpa-5bgk.googlevideo.com/videoplayback?expire=1610798125&ei=zX8CYITXEIGKz7sP9MWL0AE&ip=2409%3A4053%3A803%3A2b22%3Adc68%3Adfb9%3Aa676%3A26a3&id=o-APBakKSE2_eMDMegtCmeWXfuhhUfAzJTmOCWj4lkEjAM&itag=251&source=youtube&requiressl=yes&mh=aP&mm=31%2C29&mn=sn-gwpa-5bgk%2Csn-gwpa-qxad&ms=au%2Crdu&mv=m&mvi=6&pl=36&initcwndbps=146250&vprv=1&mime=audio%2Fwebm&ns=ULL4mkMO31KDtEhOjkOrmpkF&gir=yes&clen=10210834&dur=634.601&lmt=1544629945422176&mt=1610776131&fvip=6&keepalive=yes&c=WEB&txp=5511222&n=uEjSqtzBZaJyVn&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cvprv%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&sig=AOq0QJ8wRAIgKKIEiwQTgXsdKPEyOckgVPs_LMH6KJoeaYmZic_lelECIHXHs1ZnSP5mgtpffNlIMJM3DhxcvDbA-4udFFE6AmVP&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRQIhAPmhL745RYeL_ffgUJk_xJLC-8riXKMylLTLA_pITYWWAiB2qUIXur8ThW7cLfQ73mIVK61mMZc2ncK6FZWjUHGcUw%3D%3D"
5454
'''
55-
self._getDecipheredURLs(videoFormats)
56-
for stream in self._streams:
57-
if stream["itag"] == itag:
58-
return stream["url"]
55+
self._getDecipheredURLs(videoFormats, itag)
56+
if len(self._streams) == 1:
57+
return self._streams[0]["url"]
5958
return None
6059

6160
async def getAll(self, videoFormats: dict) -> dict:
Lines changed: 110 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,96 +1,147 @@
1-
import json
1+
import copy
22
import urllib.request
3+
import urllib.parse
34

5+
import re
6+
7+
from youtubesearchpython.core.constants import ResultMode
8+
from youtubesearchpython.core.video import VideoCore
9+
from youtubesearchpython.core.componenthandler import getValue
410
from youtubesearchpython.core.requests import RequestCore
511

6-
isPyTubeInstalled = False
12+
isYtDLPinstalled = False
713

8-
import httpx
914
try:
10-
from pytube.extract import apply_descrambler, apply_signature
11-
from pytube import YouTube, extract
12-
import pytube
13-
isPyTubeInstalled = True
15+
from yt_dlp.extractor.youtube import YoutubeBaseInfoExtractor, YoutubeIE
16+
from yt_dlp import YoutubeDL
17+
from yt_dlp.utils import url_or_none, try_get, update_url_query, ExtractorError
18+
19+
isYtDLPinstalled = True
1420
except:
15-
class YouTube:
16-
def __init__(self):
17-
pass
21+
pass
1822

1923

20-
class StreamURLFetcherCore(YouTube):
24+
class StreamURLFetcherCore(RequestCore):
2125
'''
2226
Overrided parent's constructor.
2327
'''
2428
def __init__(self):
25-
if isPyTubeInstalled:
29+
if isYtDLPinstalled:
30+
super().__init__()
2631
self._js_url = None
2732
self._js = None
33+
#self.ytdlp = YoutubeBaseInfoExtractor()
34+
self.ytie = YoutubeIE()
35+
self.ytie.set_downloader(YoutubeDL())
36+
self._streams = []
2837
else:
29-
raise Exception('ERROR: PyTube is not installed. To use this functionality of youtube-search-python, PyTube must be installed.')
38+
raise Exception('ERROR: yt-dlp is not installed. To use this functionality of youtube-search-python, yt-dlp must be installed.')
3039

3140
'''
3241
Saving videoFormats inside a dictionary with key "player_response" for apply_descrambler & apply_signature methods.
3342
'''
34-
def _getDecipheredURLs(self, videoFormats: dict) -> None:
35-
# For some reason, we cannot fetch JavaScript the old way, as PyTube's RegEx doesn't like it.
36-
self.video_id = videoFormats["id"]
37-
self._player_response = videoFormats
38-
if not videoFormats['streamingData']:
39-
try:
40-
self.use_oauth = False
41-
self.allow_oauth_cache = False
42-
self.bypass_age_gate()
43-
self._player_response = self._vid_info
44-
except:
45-
raise Exception('ERROR: Could not make request.')
46-
47-
# We use this to retrieve JavaScript
48-
url = f"https://www.youtube.com/watch?v={self.video_id}"
49-
self.youtube = pytube.YouTube(url)
43+
def _getDecipheredURLs(self, videoFormats: dict, formatId: int = None) -> None:
44+
# We reset our stream list
45+
# See https://github.com/alexmercerind/youtube-search-python/pull/155#discussion_r790165920
46+
# If we don't reset it, then it's going to cache older URLs and as we are using length comparison in upper class
47+
# it would return None, because length is not 1
48+
self._streams = []
5049

50+
self.video_id = videoFormats["id"]
51+
if not videoFormats["streamingData"]:
52+
# Video is age-restricted. Try to retrieve it using ANDROID_EMBED client and override old response.
53+
# This works most time.
54+
vc = VideoCore(self.video_id, None, ResultMode.dict, None, False, overridedClient="ANDROID_EMBED")
55+
vc.sync_create()
56+
videoFormats = vc.result
57+
if not videoFormats["streamingData"]:
58+
# Video is:
59+
# 1. Either age-restricted on so called level 3
60+
# 2. Needs payment (is only for users that use so called "Join feature")
61+
raise Exception("streamingData is not present in Video.get. This is most likely a age-restricted video")
62+
# We deepcopy a list, otherwise it would duplicate
63+
# See https://github.com/alexmercerind/youtube-search-python/pull/155#discussion_r790165920
64+
self._player_response = copy.deepcopy(videoFormats["streamingData"]["formats"])
65+
self._player_response.extend(videoFormats["streamingData"]["adaptiveFormats"])
66+
self.format_id = formatId
5167
self._decipher()
5268

53-
'''
54-
This method is derived from YouTube.prefetch.
55-
This method fetches player JavaScript & its URL from /watch endpoint on YouTube.
56-
Removed unnecessary methods & web requests as we already have metadata.
57-
Uses httpx.AsyncClient in place of requests.
58-
Removed v parameter from the query. (No idea about why PyTube bothered with that)
59-
'''
69+
def extract_js_url(self, res: str):
70+
if res:
71+
# My modified RegEx derived from yt-dlp, that retrieves JavaScript version
72+
# Source: https://github.com/yt-dlp/yt-dlp/blob/e600a5c90817f4caac221679f6639211bba1f3a2/yt_dlp/extractor/youtube.py#L2258
73+
player_version = re.search(
74+
r'([0-9a-fA-F]{8})\\?', res)
75+
player_version = player_version.group().replace("\\", "")
76+
self._js_url = f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
77+
else:
78+
raise Exception("Failed to retrieve JavaScript for this video")
79+
6080
def _getJS(self) -> None:
61-
self._js = self.youtube.js
81+
# Here we get a JavaScript that links to specific Player JavaScript
82+
self.url = 'https://www.youtube.com/iframe_api'
83+
res = self.syncGetRequest()
84+
self.extract_js_url(res.text)
6285

6386
async def getJavaScript(self):
64-
# we don't wanna break compatibility, so we just pass.
65-
# We retrieve Player JavaScript using _getDecipheredURLs()
66-
pass
87+
# Same as in _getJS(), except it's asynchronous
88+
self.url = 'https://www.youtube.com/iframe_api'
89+
res = await self.asyncGetRequest()
90+
self.extract_js_url(res.text)
6791

68-
'''
69-
Not fetching for new player JavaScript if pytube.__js__ is not None or exception is not caused.
70-
'''
7192
def _decipher(self, retry: bool = False):
72-
if not pytube.__js__ or retry:
73-
self.youtube._js = None
74-
self.youtube._js_url = None
75-
pytube.__js__ = None
76-
pytube.__js_url__ = None
93+
if not self._js_url or retry:
94+
self._js_url = None
95+
self._js = None
7796
self._getJS()
7897
try:
79-
'''
80-
These two are the main methods being used from PyTube.
81-
Used to decipher the stream URLs using player JavaScript & the player_response passed from the getStream method of this derieved class.
82-
These methods operate on the value of "player_response" key in dictionary of self._player_response & save _deciphered information in the "url_encoded_fmt_stream_map" key.
83-
'''
98+
# We need to decipher one URL at time.
99+
for yt_format in self._player_response:
100+
# If format_id is specified, then it means that we requested only for one URL (ITAG), thus we can skip
101+
# all other ITAGs, which would take up our precious system resources and our valuable time
102+
if self.format_id == yt_format["itag"] or self.format_id is None:
103+
# If "url" is specified in JSON, it is definitely an unciphered URL.
104+
# Thus we can skip deciphering completely.
105+
if getValue(yt_format, ["url"]):
106+
# This is a non-ciphered URL
107+
yt_format["throttled"] = False
108+
self._streams.append(yt_format)
109+
continue
110+
else:
111+
cipher = yt_format["signatureCipher"]
112+
# Some deciphering magic from yt-dlp
113+
# Source: https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/youtube.py#L2972-L2981
114+
sc = urllib.parse.parse_qs(cipher)
115+
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
116+
encrypted_sig = try_get(sc, lambda x: x['s'][0])
117+
if not (sc and fmt_url and encrypted_sig):
118+
# It's not ciphered
119+
yt_format["throttled"] = False
120+
self._streams.append(yt_format)
121+
continue
122+
if not cipher:
123+
continue
124+
signature = self.ytie._decrypt_signature(sc['s'][0], self.video_id, self._js_url)
125+
sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
126+
fmt_url += '&' + sp + '=' + signature
84127

85-
stream = apply_descrambler(self._player_response["streamingData"])
86-
apply_signature(
87-
stream, self._player_response, pytube.__js__
88-
)
89-
self._streams = stream
128+
# Some magic to unthrottle streams
129+
# Source: https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/youtube.py#L2983-L2993
130+
query = urllib.parse.parse_qs(fmt_url)
131+
throttled = False
132+
if query.get('n'):
133+
try:
134+
fmt_url = update_url_query(fmt_url, {
135+
'n': self.ytie._decrypt_nsig(query['n'][0], self.video_id, self._js_url)})
136+
except ExtractorError as e:
137+
throttled = True
138+
yt_format["url"] = fmt_url
139+
yt_format["throttled"] = throttled
140+
self._streams.append(yt_format)
90141
except Exception as e:
91142
if retry:
92143
raise e
93144
'''
94145
Fetch updated player JavaScript to get new cipher algorithm.
95146
'''
96-
self._decipher(retry = True)
147+
self._decipher(retry=True)

‎youtubesearchpython/core/video.py‎

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,47 @@
88
from youtubesearchpython.core.componenthandler import getValue, getVideoId
99

1010

11+
CLIENTS = {
12+
"MWEB": {
13+
'context': {
14+
'client': {
15+
'clientName': 'MWEB',
16+
'clientVersion': '2.20211109.01.00'
17+
}
18+
},
19+
'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
20+
},
21+
"ANDROID": {
22+
'context': {
23+
'client': {
24+
'clientName': 'ANDROID',
25+
'clientVersion': '16.20'
26+
}
27+
},
28+
'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
29+
},
30+
"ANDROID_EMBED": {
31+
'context': {
32+
'client': {
33+
'clientName': 'ANDROID',
34+
'clientVersion': '16.20',
35+
'clientScreen': 'EMBED'
36+
}
37+
},
38+
'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
39+
}
40+
}
41+
42+
1143
class VideoCore(RequestCore):
12-
def __init__(self, videoLink: str, componentMode: str, resultMode: int, timeout: int, enableHTML: bool):
44+
def __init__(self, videoLink: str, componentMode: str, resultMode: int, timeout: int, enableHTML: bool, overridedClient: str = "ANDROID"):
1345
super().__init__()
1446
self.timeout = timeout
1547
self.resultMode = resultMode
1648
self.componentMode = componentMode
1749
self.videoLink = videoLink
1850
self.enableHTML = enableHTML
51+
self.overridedClient = overridedClient
1952

2053
# We call this when we use only HTML
2154
def post_request_only_html_processing(self):
@@ -34,15 +67,7 @@ def prepare_innertube_request(self):
3467
'racyCheckOk': True,
3568
"videoId": getVideoId(self.videoLink)
3669
})
37-
self.data = {
38-
'context': {
39-
'client': {
40-
'clientName': 'ANDROID',
41-
'clientVersion': '16.20'
42-
}
43-
},
44-
'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
45-
}
70+
self.data = CLIENTS[self.overridedClient]
4671

4772
async def async_create(self):
4873
self.prepare_innertube_request()
@@ -69,15 +94,7 @@ def prepare_html_request(self):
6994
'racyCheckOk': True,
7095
"videoId": getVideoId(self.videoLink)
7196
})
72-
self.data = {
73-
'context': {
74-
'client': {
75-
'clientName': 'MWEB',
76-
'clientVersion': '2.20211109.01.00'
77-
}
78-
},
79-
'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
80-
}
97+
self.data = CLIENTS["MWEB"]
8198

8299
def sync_html_create(self):
83100
self.prepare_html_request()

‎youtubesearchpython/streamurlfetcher.py‎

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,9 @@ def get(self, videoFormats: dict, itag: int) -> Union[str, None]:
4949
>>> print(url)
5050
"https://r6---sn-gwpa-5bgk.googlevideo.com/videoplayback?expire=1610798125&ei=zX8CYITXEIGKz7sP9MWL0AE&ip=2409%3A4053%3A803%3A2b22%3Adc68%3Adfb9%3Aa676%3A26a3&id=o-APBakKSE2_eMDMegtCmeWXfuhhUfAzJTmOCWj4lkEjAM&itag=251&source=youtube&requiressl=yes&mh=aP&mm=31%2C29&mn=sn-gwpa-5bgk%2Csn-gwpa-qxad&ms=au%2Crdu&mv=m&mvi=6&pl=36&initcwndbps=146250&vprv=1&mime=audio%2Fwebm&ns=ULL4mkMO31KDtEhOjkOrmpkF&gir=yes&clen=10210834&dur=634.601&lmt=1544629945422176&mt=1610776131&fvip=6&keepalive=yes&c=WEB&txp=5511222&n=uEjSqtzBZaJyVn&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cvprv%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&sig=AOq0QJ8wRAIgKKIEiwQTgXsdKPEyOckgVPs_LMH6KJoeaYmZic_lelECIHXHs1ZnSP5mgtpffNlIMJM3DhxcvDbA-4udFFE6AmVP&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRQIhAPmhL745RYeL_ffgUJk_xJLC-8riXKMylLTLA_pITYWWAiB2qUIXur8ThW7cLfQ73mIVK61mMZc2ncK6FZWjUHGcUw%3D%3D"
5151
'''
52-
self._getDecipheredURLs(videoFormats)
53-
for stream in self._streams:
54-
if stream["itag"] == itag:
55-
return stream["url"]
52+
self._getDecipheredURLs(videoFormats, itag)
53+
if len(self._streams) == 1:
54+
return self._streams[0]["url"]
5655
return None
5756

5857
def getAll(self, videoFormats: dict) -> Union[dict, None]:
@@ -134,4 +133,4 @@ def getAll(self, videoFormats: dict) -> Union[dict, None]:
134133
}
135134
'''
136135
self._getDecipheredURLs(videoFormats)
137-
return {"streams": self._player_response}
136+
return {"streams": self._streams}

0 commit comments

Comments
 (0)