Python- crawl all pictures of a station

The target is http://www.5442.com/meinv/
If you need to use it on the non-linux side, please change the path symbol in the code
Holding on to the dog shit code###

#- *- coding:utf-8-*-import re
import urllib
import urllib2
import os
import chardet
import sys
'''
def get_html(url):#Get web content normally
 try:
  request = urllib2.Request(url,headers=ua_headers)
  response = urllib2.urlopen(request)
  html = response.read()return html
 except:
  print "Failed to get content"'''
def get_html(url):#Transcode to get web content
 try:
  request = urllib2.Request(url,headers=ua_headers)
  data = urllib2.urlopen(request).read()
  typeEncode = sys.getfilesystemencoding()
  infoencode = chardet.detect(data).get('encoding','gb2312')#Change the &quot;gb2312&quot; in this line according to the website code
  html = data.decode(infoencode,'ignore').encode(typeEncode)return html
 except:
  print "Failed to get content"
        
ua_headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36','Cookie':'AspxAutoDetectCookieSupport=1'}

url ="http://www.5442.com/meinv/"
########################################################
# First get all the tag links in the meinv page. Then use the tag name to create the corresponding directory.
########################################################
tag_code =get_html(url)
tag_egrep = r'href="(.*).*" class="'
tag_url_list = re.findall(tag_egrep, tag_code)
print
print "[V]Successfully crawled the links of all tag categories in the meinv page"
print tag_url_list##Print all links of meinv page
for tag_url in tag_url_list:try:
  tag_dir_name = tag_url[24:-5]
  # tag_mkpath ="C:\\Users\\Administrator\\Desktop\\Python-learn\\Photos\\"+ tag_dir_name
  tag_mkpath ="Photos/"+ tag_dir_name
  tag_mkdir = os.path.exists(tag_mkpath)
  print
  print "...The name of the tag has been successfully matched:"+ tag_dir_name
  if not tag_mkdir:
   os.makedirs(tag_mkpath)
   print "...create%s directory success----"%tag_dir_name
  else:
   print "...Already this%s directory----"%tag_dir_name
 except:
  print "...[X]Obtain%s link failed or created%s folder failed[X]"%tag_dir_name
        ##################################
  # Then use the tag link you got to get all tz links.
        ##################################
 try:
  tz_code =get_html(tag_url)
  tz_url_egrep = r'href="(.*).*" target="_blank" title="'
  tz_url_list = re.findall(tz_url_egrep,tz_code) 
  print tz_url_list
  for tz_url in tz_url_list:
            
   print ".........Link to current post---"+tz_url
   try:
    xz_dir = tag_mkpath +".html"
    urllib.urlretrieve(tag_url,xz_dir)
     # tz_name_egrep = r'_blank" title="(.*?)">'
    tz_name_egrep = r"<img alt='(.*?)' src"
    tz_name_list = re.findall(tz_name_egrep, tz_code)
    print tz_name_list  
    t=0
                ###############################################
    # Then use the tag link you got to get all tz names. And create the corresponding directory
                ###############################################
    for x_tz_name in tz_name_list:
     print ".........Successfully matched"+x_tz_name
     tz_mkpath = tag_mkpath +"/"+ x_tz_name
     tz_mkdir = os.path.exists(tz_mkpath)if not tz_mkdir:
      os.makedirs(tz_mkpath)
      print ".........create%s directory success"%x_tz_name
     else:
      print ".........Already%s this directory"%x_tz_name
                    ###############################################
     # Then use the tag link you got to get all tz links. And create the corresponding directory
                    ###############################################
     xx =0while True :try:
       ttz_url = tz_url_list[t]#Manually loop through each post
                            ###########################
       # Add a link to the Nth page in each post
                            ###########################
       if xx ==0:
        tz_HQ_url = ttz_url
       else:
        tz_hz_url = ttz_url[-5:]
        tz_qz_url = ttz_url[:-5]+"_"
        tz_HQ_url = tz_qz_url +str(xx)+ tz_hz_url
       print "-------------------------------------------"+tz_HQ_url
                            #######################
       # Get all the picture links of the current page
                            #######################
       img_code =get_html(tz_HQ_url)
       img_url_egrep = r"src='(.*).*' alt=''"
       img_url_list = re.findall(img_url_egrep,img_code)
       img = img_url_list[0]try:
        print "............Successfully crawled to%Links to all pictures in s"% x_tz_name
        print "............[Links to all pictures]"
        print img_url_list
        print "............%s%s picture links:%s"%(x_tz_name,xx,img)
        img_name = tag_mkpath +"/"+x_tz_name +"/"+ img[-15:]
        urllib.urlretrieve(img,img_name)#Download pictures
        print "...............The picture has been downloaded successfully:"+img_name
        print "========================================================="
        print "========================================================="
        print
       except:
        print "[X]Error downloading pictures!"
        print "========================================================="
        print "========================================================="
        print
       xx = xx +1
      except:
       print "while false"break
     t=t+2    
   except:
    print "Crawling%The image link in s failed!"%x_tz_name
            ##########################################################################
   # Determine whether the corresponding folder is created after the current loop is executed, and if there is, end the loop and directly crawl the next tag tag page
            ##########################################################################
   if os.access(str(xz_dir), os.F_OK):breakelse:
    pass  
    # The scum code is not enough for outsiders
 except:
  print "Crawling%Post failed in s"%tag_dir_name