Wednesday, 13 March 2013

xiami.com - python downloader







Platform: Linux, python(including BeautifulSoup), curl

Prerequisite: Even though Xiami account is not compulsory, however you would lost some of songs to search without login.

Further: Add metadata, such as album image to the mp3, refer to  eyeD3 (http://eyed3.nicfit.net/).

How to decode mp3 location:  

Example: 8h2xt%8%72%3tFi%2%26_5t%a2F2591Ep2mF3F23%3%Fi66%%4593f.95552E.A3n98EE25m%.e251181p, 8 is the desired matrix's row number. Drop the 8, then h2xt%8%72%3tFi%2%26_5t%a2F2591Ep2mF3F23%3%Fi66%%4593f.95552E.A3n98EE25m%.e251181p is our  substring to build the matrix. Get the length of h2xt%8%72%3tFi%2%26_5t%a2F2591Ep2mF3F23%3%Fi66%%4593f.95552E.A3n98EE25m%.e251181p, is 81. then 81 divide by desired 8, is 10. If 81 % 8 is not a zero, i call it overflow, and the maximum length become 10+1=11. () Length of each row ONLY can consists of (maximum length of all rows, 11) and (maximum length of all rows -1, 10).  Finally, unquote the url(%3A become =), and then replace ^(%5E) as 0 character.

8

h2xt%8%72%3

tFi%2%26_5

t%a2F2591E

p2mF3F23%3

%Fi66%%459

3f.95552E.

A3n98EE25m

%.e251181p





As you can see, matrix[0][0] -> matrix[0][1] -> matrix[0][2] -> matrix[0][3] -> matrix[0][4] -> matrix[0][5] -> matrix[0][6] - matrix[0][7]

is equal to http%3A%

Then,   matrix[1][0] -> matrix[1][1] -> matrix[1][2] -> matrix[1][3] -> matrix[1][4] -> matrix[1][5] -> matrix[1][6] - matrix[1][7] is equal to 2F%2Ff3.

And so on... 

Then combine all column become  http%3A%2F%2Ff3....blahblahblah..mp3

Unquote and replace the ^ as 0,

become http://f3.xiami.net/6992/365858/01%201769342282_1051039.mp3

NOTE: Download the mp3 with firefox.

Tha't s no guarantee download success using different web browsers because of the http referer header validation.

Screenshot:

     Fixed bug 15 March 2013 : cookies file naming
     Fixed bug 15 March 2013 : escape single quote on url/folder/song name when download
     New feature 15 March 2013 : Download all today's recommended mp3
     New feature 15 March 2013 : Naming convention, Artist name - song name.mp3



                                                         60298 is artist radio id


The location's link have to decode to get final mp3 link.


Downloader front 


                      Download collection's all songs - recommend collection id is shown


Download entire artist radio



Downloader Source code:

#author: <limkokhole@facebook.com>
EMAIL = "" #PUT YOUR EMAIL HERE !!!
PASSWORD = "" #PUT YOUR PASSWORD HERE !!!
USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17"
CONN_TYPE_HEADER = "Content-Type:application/x-www-form-urlencoded"
FAKE_REFERER = "http://www.xiami.com/res/fm/xiamiRadio_20120612.swf?v=20130129" #the key is http://www.xiami.com, not necessary full url required
COOKIES_FILE = "/tmp/xiami_cookie" #linux !!!
loc_list = []
col_list = []


import os
import sys
import time
import signal
import urllib
import urllib2
import subprocess
from xml.etree import ElementTree as ET
from BeautifulSoup import BeautifulSoup

def sub_pre():
signal.signal(signal.SIGPIPE, signal.SIG_DFL)

def download(folder_name="."):
total = len(loc_list)
for index, (mp3_url, song_name) in enumerate(loc_list):
if len(song_name) > 251:
song_name = song_name[:251]
song_name+=".mp3"
file_destination = "/".join([folder_name, song_name])
cmd = "curl -vLk -e '"+FAKE_REFERER+"' -A '"+USER_AGENT+"' '"+mp3_url.replace("'", "'\\''")+"' > '"+file_destination.replace("'", "'\\''")+"'"
print "cmd: ", cmd
proc = subprocess.Popen(cmd, preexec_fn=sub_pre, shell=True, bufsize=2048, stdout=subprocess.PIPE, close_fds=True)
try:
proc.communicate()
except Exception, e:
print "proc exception ", e
del proc
print "".join(["Completed ", str(index+1), " of ", str(total)])


def request_api(link, come_from, sid):
global loc_list
loc_list = []

cmd = "curl -vLk -b '"+COOKIES_FILE+"' --connect-timeout 20 -A '"+USER_AGENT+"' -e '"+FAKE_REFERER+"' '"+link+"'"
proc = subprocess.Popen(cmd, shell=True, bufsize=2048, stdout=subprocess.PIPE, close_fds=True)
r = proc.stdout.read()
try:
proc.communicate()
except Exception, e:
print "exception: ", e
base_xml = ET.fromstring(r)
c = base_xml.getchildren()
if come_from == "r":
g = c[1].getiterator()
song_name_tag = "song_name"
artist_name_tag = "artist_name"
elif come_from in ["s", "a", "c", "t"]:
g = c[0].getiterator()
song_name_tag = "title"
artist_name_tag = "artist"
real_index= 0
song_name = sid
for index, gg in enumerate(g):
if song_name_tag in gg.tag: #assume song_name/title always on top of artist_name/artist/location tag
song_name = gg.text.strip()
elif artist_name_tag in gg.tag: #assume artist_name/artist always on top of location tag
song_name = " - ".join([gg.text.strip(), song_name])
elif "location" in gg.tag: #may {http://xspf.org/ns/0/}location
real_index+=1
loc_list.append( [decode_mp3_matrix(gg.text.strip()), song_name])
#loc_list = loc_list[-4:] #TESTING PURPOSE !!!
print real_index

def request_recommend(link, come_from):
global col_list
col_list = []

cmd = "curl -vLk -b '"+COOKIES_FILE+"' --connect-timeout 20 -A '"+USER_AGENT+"' -e '"+FAKE_REFERER+"' '"+link+"'"
proc = subprocess.Popen(cmd, shell=True, bufsize=2048, stdout=subprocess.PIPE, close_fds=True)
r = proc.stdout.read()
try:
proc.communicate()
except Exception, e:
print "exception: ", e
print r, " r"
base_xml = ET.fromstring(r)
print base_xml, " x"
c = base_xml.getchildren()
if come_from == "c":
g = c[0].getiterator()
for index, gg in enumerate(g):
if "title" in gg.tag: #assume the sequence (title -> desc -> guid )is correct
t = gg.text.strip()
if "description" in gg.tag:
d = gg.text.strip()
if "guid" in gg.tag:
col_list.append( [ t, d, gg.text.strip().split("/")[-1] ] )
if col_list:
print "Recommend: \n"
for ct, cd, coll in col_list:
print "==========================\n"
print "Collection Id: ", coll
print "Title: ", ct
print "Description: ", cd #Enable it if you want :)
print "==========================\n"
print

def decode_mp3_matrix(s):
#8h2xt%8%72%3tFi%2%26_5t%a2F2591Ep2mF3F23%3%Fi66%%4593f.95552E.A3n98EE25m%.e251181p
#8h2xt%6EE5%mtFi%2131E5pt%a2F1%74E3p2mF4%2687%Fi6%259113f.95F28_1A3n9E%%316%.e2755%7.
#9hFaF15E9%t%m66E3_5t2i91153EpF.97%%7.%fn24253m33e%%5E%pA.t222853%x%FF%8E2i23%594
#print "s: ", s
m = []
s_len = float(len(s)) - 1
#print "s_len: ", s_len
first_char = int(s[0])
#print "first_char: ", first_char


if s_len % first_char:
#print "s_len % first_char: ", s_len % first_char
slice_len_upper = int(s_len / first_char) + 1
slice_len = slice_len_upper - 1
overflow = (int(s_len % first_char))
else:
#print "s_len % first_char2: ", s_len % first_char
slice_len_upper = int(round( s_len / first_char ))
slice_len = slice_len_upper
overflow = None
#print "slice_len_upper: ", slice_len_upper
#print "slice_len : ", slice_len
#print "overflow : ", overflow
sub_str = s[1:]
#print "sub_str : ", sub_str
if overflow:
sub_str2 = sub_str[ : overflow*slice_len_upper ]
#print "sub_str2 : ", sub_str2
m = [sub_str2[i:i+slice_len_upper] for i in range(0, len(sub_str2), slice_len_upper)]
#print "m : ", m
sub_str3 = sub_str[ overflow*slice_len_upper : ]
#print "sub_str3 : ", sub_str3
m2 = [sub_str3[i:i+slice_len] for i in range(0, len(sub_str3), slice_len)]
#print "m2 : ", m2
else: #None ,also including 0
m2 = [sub_str[i:i+slice_len_upper] for i in range(0, len(sub_str), slice_len_upper)]
#print "m2 : ", m2
m.extend(m2)
#print "m: ", m
f = ""
for n in range(slice_len_upper):
for mm in m:

if len(mm) > n:
f = "".join([f, mm[n]])
#else:
# print n, len(mm)
return urllib.unquote(f).replace("^", "0")

def xiami_login():
success = False
try:
login_page_cmd = 'curl -Lk --compressed --connect-timeout 30 -4 -c "'+COOKIES_FILE+'" -H \'Accept-Encoding:gzip,deflate\' -H \'Host:www.xiami.com\' -H \'Connection:close\' -A \''+USER_AGENT+'\' -X GET \"http://www.xiami.com/member/login\"'
proc = subprocess.Popen(login_page_cmd, shell=True, bufsize=2048, stdout=subprocess.PIPE, close_fds=True)
data = proc.stdout
r = data.read()
input_list = []
post_data = ""
name = ""
value = ""
soup = BeautifulSoup(r)
input_list = soup.html.form.findAll('input')
for i in input_list:
name = i.get("name", "")
if name == "email":
value = EMAIL
elif name == "password":
value = PASSWORD
else:
value = i.get("value", "")
try:
value = value.encode('utf-8')
except Exception, e:
print "xiami_login exception", e
tmpdict = {}
tmpdict[name] = value
dictEncoded = ""
dictEncoded = urllib.urlencode(tmpdict)
post_data = (post_data+dictEncoded+"&")
post_data = post_data[:-1]
print "post_data", post_data
login_cmd = "curl -k --compressed --connect-timeout 30 -4 -w '%{redirect_url}%{http_code}' --post301 --post302 -c '"+COOKIES_FILE+"' -b '"+COOKIES_FILE+"' -H 'Accept-Encoding:gzip,deflate' -A '"+USER_AGENT+"' -H 'Keep-Alive:115' -H 'Connection:keep-alive' -e \"http://www.xiami.com/member/login;auto\" -H '"+CONN_TYPE_HEADER+"' -F \""+post_data+"\" -X POST \"http://www.xiami.com/member/login\""
proc = subprocess.Popen(login_cmd, shell=True, bufsize=2048, stdout=subprocess.PIPE, close_fds=True)
data = proc.stdout
r = data.read()
print "r: ", r
http_code = r[-3:]
redirect_url = r[:-3]
if http_code == "302":
redirect_path = urllib2.urlparse.urlparse(redirect_url).path
if "login" in redirect_path:
print "Invalid username or password."
else:
print "login success"
success = True
else:
print "Invalid username or password."
except Exception, e:
print "exception xiami_login -1 ", e
return success

def folder_creator(pp):
folder_name = pp #CHANGE your folder name here if you want
if len(folder_name) > 255:
folder_name = folder_name[:255]
if not os.path.exists(folder_name):
os.makedirs(folder_name)
return folder_name

if not os.path.isfile(COOKIES_FILE):
success = xiami_login()

else:
c = open(COOKIES_FILE, "r")
cc = c.read().split("\t")
if "member_auth" not in cc:
success = xiami_login()
else:
success = True

#print decode_mp3_matrix("8h2xt%8%72%3tFi%2%26_5t%a2F2591Ep2mF3F23%3%Fi66%%4593f.95552E.A3n98EE25m%.e251181p")

if success:
print "You are loggedin\n\n"
else:
print "Bad news, failed to login :( That's means you lost some of the songs if given radio id...etc\n\n"
while 1:
try:
input = raw_input
except Exception, e:
print "exception input: ", e
p = input("Press to download \n s - single song \n r - artist radio/artist \n a - album \n c - collection \n t - today's recommended \n q - quit \n :").lower()
if p.lower() in ["q", "quit", "exit", "e", "bye", "end"]:
print "see you :)"
break
else:
print
print "".join(["Example: ", "http://www.xiami.com/song/1770157129", ", 1770157129 is song id"])
print "".join(["Example: ", "http://www.xiami.com/radio/play/type/5/oid/37276", ", 37276 is artist radio id"])
print "".join(["Example: ", "http://www.xiami.com/album/353688?ref=ac-artist", ", 353688 is album id"])
print "".join(["Example: ", "http://www.xiami.com/song/showcollect/id/16697686", ", 16835208 is collection id"])
print
if p == "s":
pp = input("Song id: ")
#http://www.xiami.com/widget/xml-single/sid/1769547541 or http://www.xiami.com/song/playlist/id/1769547541
link = "".join(["http://www.xiami.com/song/playlist/id/", pp])
elif p == "r":
pp = input("Radio/Artist id: ")
link = "".join(["http://www.xiami.com/radio/xml/type/5/id/", pp])
elif p == "a":
pp = input("Album id: ")
link = "".join(["http://www.xiami.com/song/playlist/id/", pp, "/type/1"])
elif p == "c":
request_recommend("http://www.xiami.com/collect/feed", "c")
pp = input("Collection id: ")
link = "".join(["http://www.xiami.com/song/playlist/id/", pp, "/type/3"])
elif p == "t":
pp = "today recommended"
link = "http://www.xiami.com/song/playlist/id/2/type/9"
else:
continue
try:
request_api(link, p, pp)
except Exception, e:
print "not found ", e
continue
folder_name = folder_creator(pp)
download(folder_name=folder_name)

No comments:

Post a Comment