pengantar
Hari ini kami akan mempertimbangkan layanan musik terkenal seperti Yandex.Music. Layanan keseluruhan yang baik, tetapi dengan kekurangan yang signifikan - ketidakmampuan untuk bekerja secara offline. Kami akan mencoba memperbaiki kesalahpahaman yang mengganggu ini menggunakan alat yang tersedia.
Alat
Jadi, kami membutuhkan:
- Python yang relatif segar : 3.7 dan lebih tinggi
- Asinkron apa pun : aiohttp dan aiofile
- Alat klasik untuk bekerja dengan API html: BeautifulSoup
- Untuk menghibur pengguna selama proses: tqdm
- Untuk mengisi tag: mutagen
Otorisasi
Pengguna layanan yang tidak sah hanya dapat mengakses segmen lagu dengan durasi hingga 30 detik. Ini jelas tidak cukup untuk kualitas mendengarkan. Kami akan masuk dengan cara yang paling alami, melalui formulir web dan menerima cookie. Ini akan membantu kami sebagai pembuka untuk membuat permintaan dan HTMLParser untuk formulir parsing.
def resolve_cookie(login: str, password: str) -> str:
cookies = CookieJar()
opener = urllib.request.build_opener(
urllib.request.HTTPCookieProcessor(cookies),
urllib.request.HTTPRedirectHandler())
response = opener.open("https://passport.yandex.ru")
doc = response.read()
parser = FormParser()
parser.feed(doc.decode("utf-8"))
parser.close()
parser.params["login"] = login
response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
doc = response.read()
parser = FormParser()
parser.feed(doc.decode("utf-8"))
parser.close()
parser.params["login"] = login
parser.params["passwd"] = password
response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
cookie_data = {}
for item in cookies:
if item.domain == ".yandex.ru":
cookie_data[item.name] = item.value
if "yandex_login" not in cookie_data:
keys = ", ".join(cookie_data.keys())
raise Exception(f"Invalid cookie_data {keys}")
return "; ".join(map(lambda v: f"{v[0]}={v[1]}", cookie_data.items()))
https://passport.yandex.ru login. , , . . — . yandex_login, . .
Yandex Music (HTML) API
. , aiohttp. html BeautifulSoup. , , -.
class YandexMusicApi:
host = "music.yandex.ru"
base_url = f"https://{host}"
def __init__(self, cookie: str):
self.headers = Headers(self.host, cookie)
async def _request(self, end_point: str):
async with aiohttp.ClientSession() as session:
url = f"{self.base_url}/{end_point}"
async with session.request(method="GET", url=url) as response:
return await response.read()
async def get_favorite_artists(self, login: str) -> List[Artist]:
body = await self._request(f"users/{login}/artists")
soup = BeautifulSoup(body, "lxml")
artists_soup = soup.find("div", class_="page-users__artists")
if artists_soup is None:
caption = soup.find("div", class_="page-users__caption")
if caption:
raise Exception(caption.contents[0])
result = []
for artist_soup in artists_soup.find_all("div", class_="artist"):
title_soup = artist_soup.find("div", class_="artist__name")
title = title_soup.attrs["title"]
title_href_soup = title_soup.find("a")
id_ = int(title_href_soup.attrs["href"].split("/")[-1])
result.append(Artist(id_, title))
return result
, https://music.yandex.ru/users/<login>/artists page-users__artists . title artist__name. Id split .
, .
. , . — yandex-. , . Network , https://{host}/get-mp3/{sign}/{ts}/{path}, sign. (XGRlBW9FXlekgbPrRHuSiA) . , .
async def get_track_url(self, album_id: int, track_id: int) -> str:
async with aiohttp.ClientSession() as session:
url = f"{self.base_url}/api/v2.1/handlers/track/{track_id}:{album_id}/" \
f"web-album-track-track-main/download/m?hq=0&external-domain={self.host}&overembed=no&__t={timestamp()}"
page = f"album/{album_id}"
headers = self.headers.build(page)
async with session.request(method="GET", url=url, headers=headers) as response:
body = await response.json()
src = body["src"]
src += f"&format=json&external-domain={self.host}&overembed=no&__t={timestamp()}"
result = parse.urlparse(src)
headers = self.headers.build(page, {
":authority": "storage.mds.yandex.net",
":method": "GET",
":path": f"{result.path}/{result.query}",
":scheme": "https",
}, True)
async with session.request(method="GET", url=src, headers=headers) as response:
body = await response.json()
host = body["host"]
path = body["path"]
s = body["s"]
ts = body["ts"]
sign = md5(f"XGRlBW9FXlekgbPrRHuSiA{path[1::]}{s}".encode("utf-8")).hexdigest()
url = f"https://{host}/get-mp3/{sign}/{ts}/{path}"
return url
, .
async def download_file(cls, url: str, filename: str):
async with aiohttp.ClientSession() as session:
async with session.request(method="GET", url=url) as response:
data = await response.read()
async with AIOFile(filename, "wb") as afp:
await afp.write(data)
await afp.fsync()
. , , mp3, , , . , , . , . .
, . , , . , , . (- mp3 ).
async def download_artist(self, artist: Artist, depth: Depth = Depth.NORMAL):
artist_progress = tqdm(total=0, desc=artist.title, position=1, ascii=True)
albums = await self.api.get_artist_albums(artist.id)
artist_progress.total = len(albums)
artist_progress.refresh()
for album in albums:
album_dir = os.path.join(self.target_dir, normalize(artist.title), f"{album.year} - {normalize(album.title)}")
if depth < Depth.ALBUMS and os.path.exists(album_dir):
artist_progress.update()
continue
album_progress = tqdm(total=0, desc=f"> {album.title}", position=0, ascii=True)
tracks = await self.api.get_album_tracks(album.id)
album_progress.total = len(tracks)
album_progress.refresh()
os.makedirs(album_dir, exist_ok=True)
if album.cover:
album_progress.total += 1
cover_filename = os.path.join(album_dir, "cover.jpg")
if not os.path.exists(cover_filename):
await self.download_file(album.cover, cover_filename)
album_progress.update()
for track in tracks:
target_filename = os.path.join(album_dir, f"{track.num:02d}. {normalize(track.title)}.mp3")
if depth >= Depth.TRACKS or not os.path.exists(target_filename):
url = await self.api.get_track_url(track.album_id, track.id)
await self.download_file(url, target_filename)
self.write_tags(target_filename, {
"title": track.title,
"tracknumber": str(track.num),
"artist": artist.title,
"album": album.title,
"date": str(album.year),
})
album_progress.update()
album_progress.close()
artist_progress.update()
artist_progress.close()
, , AC/DC . normalize:
def normalize(name: str) -> str:
return name.replace("/", "-")
, ( ) . . asyncio.Semaphore asyncio.gather.
, .
, , , . .credentials, . .cookie .
def resolve_cookie() -> str:
base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
cookie_file = os.path.join(base_dir, ".cookie")
if os.path.exists(cookie_file):
with open(cookie_file, "rt") as file:
return file.read()
credentials_file = os.path.join(base_dir, ".credentials")
if os.path.exists(credentials_file):
config = configparser.ConfigParser()
config.read(credentials_file)
login = config["yandex"]["login"]
password = config["yandex"]["password"]
else:
raise Exception(f"""Create \"{credentials_file}\" with content
[yandex]
login=<user_login>
password=<user_password>
""")
cookie = auth.resolve_cookie(login, password)
with open(cookie_file, "wt") as file:
file.write(cookie)
return cookie
, , . argparse, .
:
- -a (--artist), Id , ,
- -o (--output), , —
Music. - -d (--depth), ,
- -
0 (NORMAL), , , - Value
1 (ALBUMS)loop melalui semua trek di album dan mendownload yang hilang - Nilai
2 (TRACKS)mengunduh dan menimpa trek meskipun mereka sudah ada di sistem file
- -
async def main():
parser = argparse.ArgumentParser()
parser.add_argument("-a", "--artist", help="Artist ID")
parser.add_argument("-o", "--output", default=f"{Path.home()}/Music",
help=f"Output directory, default {Path.home()}/Music")
parser.add_argument("-d", "--depth", default=0, type=int,
help=f"Exists files check depth, {enum_print(Depth)}")
args = parser.parse_args()
cookie = resolve_cookie()
api = YandexMusicApi(cookie)
agent = YandexMusicAgent(api, args.output)
if args.artist:
artist = await api.get_artist(args.artist)
await agent.download_artist(artist, args.depth)
else:
email = re.compile(".*?yandex_login=(.*?);.*?", re.M).match(cookie).group(1)
await agent.download_favorites(email, args.depth)
Dan sekarang, akhirnya, kita bisa menjalankan semuanya:
if __name__ == "__main__":
asyncio.run(main())
Terima kasih atas perhatian Anda. Sekarang Anda tahu cara menerapkan API yang tidak ada, unduh yang tidak dapat diunduh dan jadilah pemilik koleksi musik Anda sendiri.
Hasilnya bisa dilihat di repositori yandex.music.agent