Online Weibo Data Visualization, collect Weibo data in real time, and display the data through different word clouds
Complete code gitee address: https://gitee.com/lyc96/weibo
1 ) Enter the star’s full name
2 ) After clicking to view, you can see the visual text of the star’s words, there are six graphics, you can switch at will
1 ) Crawl the celebrity’s Weibo comments based on the celebrity’s name and store it in a text file (no database storage is involved in the project). The program will also crawl according to the date to avoid repeated crawling of the same Weibo. .
2 ) The collected celebrity microblog text is stored in txt text, and stylecloud is used for word cloud generation (there are six graphic word clouds, which can be switched on the web side)
# - *- coding: utf-8-*-"""
Created on Sun Jul 1912:03:562020
@ author:Li Yunchen
"""
import requests
import time
import os
import json
from stylecloud import gen_stylecloud
import jieba
from flask_cors import CORS
from flask import Flask,render_template,request,Response,redirect,url_for
# Intranet ip
app =Flask(__name__)
### Change here to your own ip address, in index.Remember to change twice in html
ip="192.168.0.112"
###
root="static/data/"
pagedata="pagedata/"
textdata="textdata/"
# Sleep time input int is the rest time, page load and internet speed reasons need to give the page time to load page elements
def s(int):
time.sleep(int)
headers ={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}"""initialization"""
def initialization():
# Initialize crawling record text
if not os.path.exists(root):
os.mkdir(root)if not os.path.exists(root+pagedata):
os.mkdir(root+pagedata)if not os.path.exists(root+textdata):
os.mkdir(root+textdata)
def write(path,t):
# Record the current number of crawled pages
withopen(path,"a+",encoding='utf8')as f:
f.writelines(str(t))
f.writelines("\n")
def search(name_s,url,since_id):
# url ="https://m.weibo.cn/api/container/getIndex?uid=1566301073&t=0&luicode=10000011&lfid=100103type=1&q=Jia Ling&type=uid&value=1566301073&containerid=1076031566301073"
start=1if since_id is not None and len(since_id)>1:
url+="&since_id="+since_id
start=0
response = requests.get(url,headers = headers)
datas = response.json()
# print(data)
ok =str(datas['ok'])try:withopen(root+pagedata+name_s+".txt","r")as f: #Set file object
pagelist = f.read()
except:
pagelist=[]if ok is not None and ok=='1':
data = datas['data']
since_ids = data['cardlistInfo']['since_id']print(since_ids)
cards = data['cards']print(len(cards))for i inrange(start,len(cards)):
date = cards[i]['mblog']['created_at']ifstr(date) not in pagelist:
text1 = cards[i]['mblog']['text']write(root+textdata+name_s+".txt",clean(text1))write(root+pagedata+name_s+".txt",date)"""Remove emoji...,Wait html tag"""
def clean(s):
istart=-1try:
istart = s.index('<')
iend = s.index('>')
s = s[:istart]+s[iend+1:]
except:
pass
try:
istart = s.index('<')
except:
pass
if istart>=0:returnclean(s)else:
# print(s)return(s)
def geturl(name_g):
url1="https://m.weibo.cn/api/container/getIndex?containerid=100103type=1%26q="+name_g+"&page_type=searchall"
response = requests.get(url1,headers = headers)
datas = response.json()
uid =str(datas['data']['cards'][0]['card_group'][0]['user']['id'])
newurl ="https://m.weibo.cn/api/container/getIndex?uid="+uid+"&t=0&luicode=10000011&lfid=100103type=1&q="+name_g+"&type=uid&value="+uid+"&containerid=107603"+uid
return newurl
def jieba_cloud(file_name,icon):withopen(file_name,'r',encoding='utf8')as f:
word_list = jieba.cut(f.read())
result =" ".join(word_list) #Participles
# Making Chinese Cloud Words
icon_name=""if icon=="1":
icon_name=''
elif icon=="2":
icon_name='fas fa-dragon'
elif icon=="3":
icon_name='fas fa-dog'
elif icon=="4":
icon_name='fas fa-cat'
elif icon=="5":
icon_name='fas fa-dove'
elif icon=="6":
icon_name='fab fa-qq'"""
# icon_name='',#National flag
# icon_name='fas fa-dragon',#Pterodactyl
icon_name='fas fa-dog',#dog
# icon_name='fas fa-cat',#Cat
# icon_name='fas fa-dove',#pigeon
# icon_name='fab fa-qq',#qq
"""
picp=file_name.split('.')[0]+str(icon)+'.png'if icon_name is not None and len(icon_name)>0:gen_stylecloud(text=result,icon_name=icon_name,font_path='simsun.ttc',output_name=picp) #Chinese font must be added, otherwise the format is wrong
else:gen_stylecloud(text=result,font_path='simsun.ttc',output_name=picp) #Chinese font must be added, otherwise the format is wrong
return picp
############################ flask routing
# enter main page
@ app.route('/')
def index():returnrender_template('index.html')
# Get picture
@ app.route('/find')
def find():
# global history
# Data collection
name_i = request.args.get('name')if not os.path.exists(root+textdata+name_i+'.txt'):
u =geturl(name_i)search(name_i,u,"")
# Make word cloud
file_name = root+textdata+name_i+'.txt'
picpath =jieba_cloud(file_name,"1")returnResponse(json.dumps(picpath), mimetype='application/json')
# Toggle icon
@ app.route('/switchs')
def switchs():
# global history
# Data collection
name_i = request.args.get('name')
icon = request.args.get('ic')
# Make word cloud
file_name = root+textdata+name_i+'.txt'
picpath =jieba_cloud(file_name,str(icon))returnResponse(json.dumps(picpath), mimetype='application/json')
############################ end
if __name__ =="__main__":"""initialization"""initialization()
app.run(host=''+ip, port=5000,threaded=True)
Recommended Posts