本文共 3106 字,大约阅读时间需要 10 分钟。
&&&&
# -*- coding: utf-8 -*from bs4 import BeautifulSoupimport requestsfrom xlwt import Workbookimport timeimport jsonimport sysimport csvreload(sys)sys.setdefaultencoding('utf-8')def weibo(url): headers = { "Cookie":"_T_WM=cff5bb8be0f4876163913084ff9c62f0; ALF=1526623461; SCF=ApMI3mluv9yH6yKz4i7-HMlHojzPtQULc5G0xlrri-Ne18lmXmEFvULlwx0CKS_sw3NN27MeOjlDlndONngzHPI.; SUB=_2A25321onDeRhGeNN7FsX9CrIzzqIHXVVJGZvrDV6PUJbktAKLUbCkW1NSSw_ojRtzxFp7XG4qFOB5nNMnhzPu_2a; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhfooAg0xuUTUnWRX292Cbe5JpX5K-hUgL.Fo-0S0.cShBXShq2dJLoIpnLxKqL1-BL12-LxKML1K.LB.xke0np1hqt; SUHB=0KABS255VQmWlt; H5_INDEX=0_all; H5_INDEX_TITLE=%E6%9D%8E%E7%99%BD_38186; WEIBOCN_FROM=1110006030; M_WEIBOCN_PARAMS=featurecode%3D20000320%26luicode%3D10000011%26lfid%3D100103type%253D3%2526q%253D%25E4%25B8%25AD%25E5%2585%25B4%25E5%2588%25B6%25E8%25A3%2581%2526t%253D0%26fid%3D100103type%253D2%2526q%253D%25E4%25B8%25AD%25E5%2585%25B4%25E5%2588%25B6%25E8%25A3%2581%26uicode%3D10000011", "Referer":"https://m.weibo.cn/p/100103type%3D2%26q%3D%E4%B8%AD%E5%85%B4%E5%88%B6%E8%A3%81?type=wb&queryVal=%E4%B8%AD%E5%85%B4%E5%88%B6%E8%A3%81&featurecode=20000320&luicode=10000011&lfid=100103type%3D3%26q%3D%E4%B8%AD%E5%85%B4%E5%88%B6%E8%A3%81%26t%3D0&title=%E4%B8%AD%E5%85%B4%E5%88%B6%E8%A3%81", "user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.3964.2 Safari/537.36", } response = requests.get(url,headers=headers) response = response.json() return responsedef tiqu(page): url = "https://m.weibo.cn/api/container/getIndex?type=wb&queryVal=%E4%B8%AD%E5%85%B4%E5%88%B6%E8%A3%81&featurecode=20000320&luicode=10000011&lfid=100103type%3D3%26q%3D%E4%B8%AD%E5%85%B4%E5%88%B6%E8%A3%81%26t%3D0&title=%E4%B8%AD%E5%85%B4%E5%88%B6%E8%A3%81&containerid=100103type%3D2%26q%3D%E4%B8%AD%E5%85%B4%E5%88%B6%E8%A3%81&page="+str(page) response = weibo(url) response = response['data']['cards'] datalist = [] for i in range(0, len(response)): for key in response[i]['card_group']: dict2 = key['mblog'] user = dict2['user']['screen_name'] blog = dict2['text'].split()[0] source = dict2['source'] data = [user, blog, source] datalist.append(data) return datalistif __name__ == "__main__": book = Workbook(encoding='utf-8') # 设置execl编码格式 sheet1 = book.add_sheet('Sheet 1') # 操作execl表格 sheet1.write(0, 0, 'user') # 写入第一行第一列数据为变量label1的值 sheet1.write(0, 1, 'blog') # 写入第一行第一列数据为变量label1的值 sheet1.write(0, 2, 'source') # 写入第一行第2列数据为变量label2的值 datalistnew = [] for page in range(1,200): datalist = tiqu(page) datalistnew = datalistnew + datalist datalist = datalistnew for data in range(0, len(datalist)): # 遍历数据列表,然后把数据写入表格中 title = datalist[data][0] blog = datalist[data][1] source = datalist[data][2] sheet1.write(data + 1, 0, title) sheet1.write(data + 1, 1, blog) sheet1.write(data + 1, 2, source) book.save("weibo2.xls") # 保存表格,文件名是nsdc.xls
&&&&
转载地址:http://pqvbo.baihongyu.com/