代码:
##爬取网络:慕课网("
from bs4 import BeautifulSoup
import urllib.request
import xlwt
import time
time_star = time.time()
ur = ["mobile","python","java","php"] ##通过查看网页源代码,手动构造访问链接
url_basic, mingc, lianj, url, hda, x = [], [], [], [], [], 1
for j in range(len(ur)):
url_basic.append("
for i in range(7): ##网页个数最多为7
urll = url_basic[j] + str(i)
url.append(urll)
heads = {
'Connection': 'keep-alive',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Accept': 'text/html,application/xhtml+xml,application/xml;\
q=0.9,image/webp,image/apng,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36\
(KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
}
for key,value in heads.items():
items = (key,value)
hda.append(items)
opn = urllib.request.build_opener()
opn.addheaders = hda
urllib.request.install_opener(opn)
for i in url:
tr