Python realizes online microblog data visualization

Online Weibo Data Visualization, collect Weibo data in real time, and display the data through different word clouds

Complete code gitee address: https://gitee.com/lyc96/weibo

  1. First come the renderings (suppressing shock)

1 ) Enter the star’s full name

2 ) After clicking to view, you can see the visual text of the star’s words, there are six graphics, you can switch at will

  1. Program function introduction

1 ) Crawl the celebrity’s Weibo comments based on the celebrity’s name and store it in a text file (no database storage is involved in the project). The program will also crawl according to the date to avoid repeated crawling of the same Weibo. .

2 ) The collected celebrity microblog text is stored in txt text, and stylecloud is used for word cloud generation (there are six graphic word clouds, which can be switched on the web side)

  1. python backend code
# - *- coding: utf-8-*-"""
Created on Sun Jul 1912:03:562020

@ author:Li Yunchen
"""
import requests
import time
import os
import json
from stylecloud import gen_stylecloud
import jieba
from flask_cors import CORS
from flask import Flask,render_template,request,Response,redirect,url_for
# Intranet ip
app =Flask(__name__)
### Change here to your own ip address, in index.Remember to change twice in html
ip="192.168.0.112"
###
root="static/data/"
pagedata="pagedata/"
textdata="textdata/"

# Sleep time input int is the rest time, page load and internet speed reasons need to give the page time to load page elements
def s(int):
 time.sleep(int)
headers ={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}"""initialization"""
def initialization():
 # Initialize crawling record text
 if not os.path.exists(root):
  os.mkdir(root)if not os.path.exists(root+pagedata):
  os.mkdir(root+pagedata)if not os.path.exists(root+textdata):
  os.mkdir(root+textdata)

def write(path,t):
 # Record the current number of crawled pages
 withopen(path,"a+",encoding='utf8')as f:
  f.writelines(str(t))
  f.writelines("\n")

def search(name_s,url,since_id):

  # url ="https://m.weibo.cn/api/container/getIndex?uid=1566301073&t=0&luicode=10000011&lfid=100103type=1&q=Jia Ling&type=uid&value=1566301073&containerid=1076031566301073"
  start=1if since_id is not None and len(since_id)>1:
   url+="&since_id="+since_id
   start=0
  response = requests.get(url,headers = headers)

  datas = response.json()
  # print(data)
  ok =str(datas['ok'])try:withopen(root+pagedata+name_s+".txt","r")as f:    #Set file object
    pagelist = f.read()
  except:
   pagelist=[]if ok is not None and ok=='1':
   data = datas['data']
   since_ids = data['cardlistInfo']['since_id']print(since_ids)
   cards = data['cards']print(len(cards))for i inrange(start,len(cards)):
    date = cards[i]['mblog']['created_at']ifstr(date) not in pagelist:
     text1 = cards[i]['mblog']['text']write(root+textdata+name_s+".txt",clean(text1))write(root+pagedata+name_s+".txt",date)"""Remove emoji...,Wait html tag"""
def clean(s):
 istart=-1try:
  istart = s.index('<')
  iend = s.index('>')
  s = s[:istart]+s[iend+1:]
 except:
  pass
 try:
  istart = s.index('<')
 except:
  pass
 if istart>=0:returnclean(s)else:
  # print(s)return(s)

def geturl(name_g):
 url1="https://m.weibo.cn/api/container/getIndex?containerid=100103type=1%26q="+name_g+"&page_type=searchall"
 response = requests.get(url1,headers = headers)
 datas = response.json()
 uid =str(datas['data']['cards'][0]['card_group'][0]['user']['id'])
 newurl ="https://m.weibo.cn/api/container/getIndex?uid="+uid+"&t=0&luicode=10000011&lfid=100103type=1&q="+name_g+"&type=uid&value="+uid+"&containerid=107603"+uid
 return newurl

def jieba_cloud(file_name,icon):withopen(file_name,'r',encoding='utf8')as f:
  word_list = jieba.cut(f.read())
  result =" ".join(word_list) #Participles
  # Making Chinese Cloud Words
  icon_name=""if icon=="1":
   icon_name=''
  elif icon=="2":
   icon_name='fas fa-dragon'
  elif icon=="3":
   icon_name='fas fa-dog'
  elif icon=="4":
   icon_name='fas fa-cat'
  elif icon=="5":
   icon_name='fas fa-dove'
  elif icon=="6":
   icon_name='fab fa-qq'"""
  # icon_name='',#National flag
  # icon_name='fas fa-dragon',#Pterodactyl
  icon_name='fas fa-dog',#dog
  # icon_name='fas fa-cat',#Cat
  # icon_name='fas fa-dove',#pigeon
  # icon_name='fab fa-qq',#qq
        """
  picp=file_name.split('.')[0]+str(icon)+'.png'if icon_name is not None and len(icon_name)>0:gen_stylecloud(text=result,icon_name=icon_name,font_path='simsun.ttc',output_name=picp) #Chinese font must be added, otherwise the format is wrong
  else:gen_stylecloud(text=result,font_path='simsun.ttc',output_name=picp) #Chinese font must be added, otherwise the format is wrong

 return picp
############################ flask routing
# enter main page
@ app.route('/')
def index():returnrender_template('index.html')
# Get picture
@ app.route('/find')
def find():
 # global history
 # Data collection
 name_i = request.args.get('name')if not os.path.exists(root+textdata+name_i+'.txt'):
  u =geturl(name_i)search(name_i,u,"")
 # Make word cloud
 file_name = root+textdata+name_i+'.txt'
 picpath =jieba_cloud(file_name,"1")returnResponse(json.dumps(picpath), mimetype='application/json')
# Toggle icon
@ app.route('/switchs')
def switchs():
 # global history
 # Data collection
 name_i = request.args.get('name')
 icon = request.args.get('ic')
 # Make word cloud
 file_name = root+textdata+name_i+'.txt'
 picpath =jieba_cloud(file_name,str(icon))returnResponse(json.dumps(picpath), mimetype='application/json')
############################ end

if __name__ =="__main__":"""initialization"""initialization()
 app.run(host=''+ip, port=5000,threaded=True)
  1. Complete code gitee address: https://gitee.com/lyc96/weibo

Recommended Posts

Python realizes online microblog data visualization
Python realizes online translation
Python realizes online translation function
Python realizes 3D map visualization
02. Python data types
Python data model
Python data analysis
python data structure
Python data format-CSV
Python realizes spaceship war
Python data analysis-data update
Python data analysis-apply function
Python realizes stitching pictures
Python data analysis-data selection
Python basic data types
Python basic data types
Python data visualization: who are the big names in Python?
Python realizes tank battle
Python data analysis-data establishment
Python3 realizes airplane war game
Python Data Science: Neural Networks
Python common data structure collation
Python realizes apple eating game
Python3 crawler data cleaning analysis
Python parses simple XML data
Python Data Science: Logistic Regression
Python realizes face sign-in system
Python data structure and algorithm
Python Data Science: Regularization Methods
Python Data Science: Related Analysis
Python realizes batch naming photos
Python Data Science: Linear Regression
Python Faker data forgery module
Python Data Science: Chi-Square Test