Python- crawl all pictures of a station

The target is

If you need to use it on the non-linux side, please change the path symbol in the code

Holding on to the dog shit code###

#- *- coding:utf-8-*-import re
import urllib
import urllib2
import os
import chardet
import sys
def get_html(url):#Get web content normally
  request = urllib2.Request(url,headers=ua_headers)
  response = urllib2.urlopen(request)
  html = html
  print "Failed to get content"'''
def get_html(url):#Transcode to get web content
  request = urllib2.Request(url,headers=ua_headers)
  data = urllib2.urlopen(request).read()
  typeEncode = sys.getfilesystemencoding()
  infoencode = chardet.detect(data).get('encoding','gb2312')#Change the "gb2312" in this line according to the website code
  html = data.decode(infoencode,'ignore').encode(typeEncode)return html
  print "Failed to get content"
ua_headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36','Cookie':'AspxAutoDetectCookieSupport=1'}

url =""
# First get all the tag links in the meinv page. Then use the tag name to create the corresponding directory.
tag_code =get_html(url)
tag_egrep = r'href="(.*).*" class="'
tag_url_list = re.findall(tag_egrep, tag_code)
print "[V]Successfully crawled the links of all tag categories in the meinv page"
print tag_url_list##Print all links of meinv page
for tag_url in tag_url_list:try:
  tag_dir_name = tag_url[24:-5]
  # tag_mkpath ="C:\\Users\\Administrator\\Desktop\\Python-learn\\Photos\\"+ tag_dir_name
  tag_mkpath ="Photos/"+ tag_dir_name
  tag_mkdir = os.path.exists(tag_mkpath)
  print "...The name of the tag has been successfully matched:"+ tag_dir_name
  if not tag_mkdir:
   print "...create%s directory success----"%tag_dir_name
   print "...Already this%s directory----"%tag_dir_name
  print "...[X]Obtain%s link failed or created%s folder failed[X]"%tag_dir_name
  # Then use the tag link you got to get all tz links.
  tz_code =get_html(tag_url)
  tz_url_egrep = r'href="(.*).*" target="_blank" title="'
  tz_url_list = re.findall(tz_url_egrep,tz_code) 
  print tz_url_list
  for tz_url in tz_url_list:
   print ".........Link to current post---"+tz_url
    xz_dir = tag_mkpath +".html"
     # tz_name_egrep = r'_blank" title="(.*?)">'
    tz_name_egrep = r"<img alt='(.*?)' src"
    tz_name_list = re.findall(tz_name_egrep, tz_code)
    print tz_name_list  
    # Then use the tag link you got to get all tz names. And create the corresponding directory
    for x_tz_name in tz_name_list:
     print ".........Successfully matched"+x_tz_name
     tz_mkpath = tag_mkpath +"/"+ x_tz_name
     tz_mkdir = os.path.exists(tz_mkpath)if not tz_mkdir:
      print ".........create%s directory success"%x_tz_name
      print ".........Already%s this directory"%x_tz_name
     # Then use the tag link you got to get all tz links. And create the corresponding directory
     xx =0while True :try:
       ttz_url = tz_url_list[t]#Manually loop through each post
       # Add a link to the Nth page in each post
       if xx ==0:
        tz_HQ_url = ttz_url
        tz_hz_url = ttz_url[-5:]
        tz_qz_url = ttz_url[:-5]+"_"
        tz_HQ_url = tz_qz_url +str(xx)+ tz_hz_url
       print "-------------------------------------------"+tz_HQ_url
       # Get all the picture links of the current page
       img_code =get_html(tz_HQ_url)
       img_url_egrep = r"src='(.*).*' alt=''"
       img_url_list = re.findall(img_url_egrep,img_code)
       img = img_url_list[0]try:
        print "............Successfully crawled to%Links to all pictures in s"% x_tz_name
        print "............[Links to all pictures]"
        print img_url_list
        print "............%s%s picture links:%s"%(x_tz_name,xx,img)
        img_name = tag_mkpath +"/"+x_tz_name +"/"+ img[-15:]
        urllib.urlretrieve(img,img_name)#Download pictures
        print "...............The picture has been downloaded successfully:"+img_name
        print "========================================================="
        print "========================================================="
        print "[X]Error downloading pictures!"
        print "========================================================="
        print "========================================================="
       xx = xx +1
       print "while false"break
    print "Crawling%The image link in s failed!"%x_tz_name
   # Determine whether the corresponding folder is created after the current loop is executed, and if there is, end the loop and directly crawl the next tag tag page
   if os.access(str(xz_dir), os.F_OK):breakelse:
    # The scum code is not enough for outsiders
  print "Crawling%Post failed in s"%tag_dir_name

