Python realizes online microblog data visualization

Online Weibo Data Visualization, collect Weibo data in real time, and display the data through different word clouds

Complete code gitee address: https://gitee.com/lyc96/weibo

First come the renderings (suppressing shock)

1 ) Enter the star’s full name

2 ) After clicking to view, you can see the visual text of the star’s words, there are six graphics, you can switch at will

Program function introduction

1 ) Crawl the celebrity’s Weibo comments based on the celebrity’s name and store it in a text file (no database storage is involved in the project). The program will also crawl according to the date to avoid repeated crawling of the same Weibo. .

2 ) The collected celebrity microblog text is stored in txt text, and stylecloud is used for word cloud generation (there are six graphic word clouds, which can be switched on the web side)

python backend code

# - *- coding: utf-8-*-"""
Created on Sun Jul 1912:03:562020

@ author:Li Yunchen
"""
import requests
import time
import os
import json
from stylecloud import gen_stylecloud
import jieba
from flask_cors import CORS
from flask import Flask,render_template,request,Response,redirect,url_for
# Intranet ip
app =Flask(__name__)
### Change here to your own ip address, in index.Remember to change twice in html
ip="192.168.0.112"
###
root="static/data/"
pagedata="pagedata/"
textdata="textdata/"

# Sleep time input int is the rest time, page load and internet speed reasons need to give the page time to load page elements
def s(int):
 time.sleep(int)
headers ={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}"""initialization"""
def initialization():
 # Initialize crawling record text
 if not os.path.exists(root):
  os.mkdir(root)if not os.path.exists(root+pagedata):
  os.mkdir(root+pagedata)if not os.path.exists(root+textdata):
  os.mkdir(root+textdata)

def write(path,t):
 # Record the current number of crawled pages
 withopen(path,"a+",encoding='utf8')as f:
  f.writelines(str(t))
  f.writelines("\n")

def search(name_s,url,since_id):

  # url ="https://m.weibo.cn/api/container/getIndex?uid=1566301073&t=0&luicode=10000011&lfid=100103type=1&q=Jia Ling&type=uid&value=1566301073&containerid=1076031566301073"
  start=1if since_id is not None and len(since_id)>1:
   url+="&since_id="+since_id
   start=0
  response = requests.get(url,headers = headers)

  datas = response.json()
  # print(data)
  ok =str(datas['ok'])try:withopen(root+pagedata+name_s+".txt","r")as f:    #Set file object
    pagelist = f.read()
  except:
   pagelist=[]if ok is not None and ok=='1':
   data = datas['data']
   since_ids = data['cardlistInfo']['since_id']print(since_ids)
   cards = data['cards']print(len(cards))for i inrange(start,len(cards)):
    date = cards[i]['mblog']['created_at']ifstr(date) not in pagelist:
     text1 = cards[i]['mblog']['text']write(root+textdata+name_s+".txt",clean(text1))write(root+pagedata+name_s+".txt",date)"""Remove emoji...,Wait html tag"""
def clean(s):
 istart=-1try:
  istart = s.index('<')
  iend = s.index('>')
  s = s[:istart]+s[iend+1:]
 except:
  pass
 try:
  istart = s.index('<')
 except:
  pass
 if istart>=0:returnclean(s)else:
  # print(s)return(s)

def geturl(name_g):
 url1="https://m.weibo.cn/api/container/getIndex?containerid=100103type=1%26q="+name_g+"&page_type=searchall"
 response = requests.get(url1,headers = headers)
 datas = response.json()
 uid =str(datas['data']['cards'][0]['card_group'][0]['user']['id'])
 newurl ="https://m.weibo.cn/api/container/getIndex?uid="+uid+"&t=0&luicode=10000011&lfid=100103type=1&q="+name_g+"&type=uid&value="+uid+"&containerid=107603"+uid
 return newurl

def jieba_cloud(file_name,icon):withopen(file_name,'r',encoding='utf8')as f:
  word_list = jieba.cut(f.read())
  result =" ".join(word_list) #Participles
  # Making Chinese Cloud Words
  icon_name=""if icon=="1":
   icon_name=''
  elif icon=="2":
   icon_name='fas fa-dragon'
  elif icon=="3":
   icon_name='fas fa-dog'
  elif icon=="4":
   icon_name='fas fa-cat'
  elif icon=="5":
   icon_name='fas fa-dove'
  elif icon=="6":
   icon_name='fab fa-qq'"""
  # icon_name='',#National flag
  # icon_name='fas fa-dragon',#Pterodactyl
  icon_name='fas fa-dog',#dog
  # icon_name='fas fa-cat',#Cat
  # icon_name='fas fa-dove',#pigeon
  # icon_name='fab fa-qq',#qq
        """
  picp=file_name.split('.')[0]+str(icon)+'.png'if icon_name is not None and len(icon_name)>0:gen_stylecloud(text=result,icon_name=icon_name,font_path='simsun.ttc',output_name=picp) #Chinese font must be added, otherwise the format is wrong
  else:gen_stylecloud(text=result,font_path='simsun.ttc',output_name=picp) #Chinese font must be added, otherwise the format is wrong

 return picp
############################ flask routing
# enter main page
@ app.route('/')
def index():returnrender_template('index.html')
# Get picture
@ app.route('/find')
def find():
 # global history
 # Data collection
 name_i = request.args.get('name')if not os.path.exists(root+textdata+name_i+'.txt'):
  u =geturl(name_i)search(name_i,u,"")
 # Make word cloud
 file_name = root+textdata+name_i+'.txt'
 picpath =jieba_cloud(file_name,"1")returnResponse(json.dumps(picpath), mimetype='application/json')
# Toggle icon
@ app.route('/switchs')
def switchs():
 # global history
 # Data collection
 name_i = request.args.get('name')
 icon = request.args.get('ic')
 # Make word cloud
 file_name = root+textdata+name_i+'.txt'
 picpath =jieba_cloud(file_name,str(icon))returnResponse(json.dumps(picpath), mimetype='application/json')
############################ end

if __name__ =="__main__":"""initialization"""initialization()
 app.run(host=''+ip, port=5000,threaded=True)

Complete code gitee address: https://gitee.com/lyc96/weibo