python 爬取淘宝 并 分析词频保存出来展示

Lazy心宿 2017-10-10 16:40 2472次浏览

python 爬取淘宝 并 分析词频保存出来展示

from urllib import request
import urllib
import re
from jieba import analyse
search=urllib.parse.quote('哲♂学')
f=open('1.txt','a')
for i in range(10):
    
    print('正在读取第'+str(i+1)+'页数据...')
    response=request.urlopen('https://s.taobao.com/search?q='+search+'&s='+str(i*44)).read().decode('utf-8')

    title=re.findall(r'"raw_title":"([^"]+)"',response)
    
    for each in title:
        
        
        f.write(each+'\n')

f.close() 

content=open('1.txt','rb').read()

tags = analyse.extract_tags(content, topK=100, withWeight=False)
print(tags)
text =" ".join(tags)
153804a0kj9c33llcj3b9l.jpg