python excel multi-line merge

I. Overview#

There is a wenda1.xlsx file with the following content:

Need to merge the faq records into one line, the effect is as follows:

Note: For faq records, each line is spliced with ||.

Two, convert multiple lines to one line#

Create a new test1.py with the following content:

# !/usr/bin/python3
# - *- coding: utf-8-*-import xlrd

# Open the excel file and create a workbook object
rbook = xlrd.open_workbook('wenda1.xlsx')
# The sheets method returns a list of objects,[<xlrd.sheet.Sheet object at 0x103f147f0>]
rbook.sheets()
# xls has 3 workbooks by default,Sheet1,Sheet2,Sheet3
rsheet = rbook.sheet_by_index(0)  #Fetch the first workbook

faq_tmp_dict ={}  #FAQ Temporary Dictionary
faq_formal_list =[] #FAQ official list
first_category_tag =""  #Class I identification
index =0 #index

# Cycle through all rows of the workbook
for row in rsheet.get_rows():
 index +=1

 first_category_row = row[0]  #The column of level 1 classification
 first_category_value = first_category_row.value  #Class 1 value
 if first_category_value !='Level 1 classification':  #Exclude the first row, these are column names
  if first_category_value:  #When the value of level 1 classification is not empty
   # Update the first level classification identification, use#connection
   first_category_tag ="%s#%s"%(first_category_value,index)
   faq_like_column = row[1]  #FAQ column of similar sentences
   faq_like_value = faq_like_column.value  #FAQ value of similar sentences
   # Update the temporary dictionary, there is no default value is an empty dictionary, otherwise it will be appended to the list
   faq_tmp_dict.setdefault(first_category_tag,[]).append(faq_like_value)else:
   # When the value of level 1 classification is empty
   faq_like_column = row[1]  #FAQ column of similar sentences
   faq_like_value = faq_like_column.value
   faq_tmp_dict.setdefault(first_category_tag,[]).append(faq_like_value)print(faq_tmp_dict)
# Handling temporary dictionaries
for i in faq_tmp_dict:
 tmp_dict ={}
 # First class classification, cutting#Number, take the first
 first_category = i.split('#')[0]
 # print("first_category",first_category)
 # faq all records, convert the faq list into one line, splicing with commas
 faq_like_all ="||".join(faq_tmp_dict[i])
 # print("faq_merge",faq_all)
 tmp_dict['first_category']= first_category
 tmp_dict['faq_like_all']= faq_like_all
 faq_formal_list.append(tmp_dict)print(faq_formal_list)

Execution output:

{' Small talk#2':['not here?','Hello are you there?','are you there','anyone here.','Hello there?','Are you there? say something','Is the customer present?'],'Small talk#9':['Hello there','客服Hello there','Hello there,客服']}[{'first_category':'Small talk','faq_like_all':'not here?||Hello are you there?||are you there||anyone here.||Hello there?||Are you there? say something||Is the customer present?'},{'first_category':'Small talk','faq_like_all':'Hello there||客服Hello there||Hello there,客服'}]

It can be seen from the output above. Combine multiple lines into one line, and write the faq record into a dictionary. Then you can write to the new table.

Three, write a new form#

The complete code is as follows:

# !/usr/bin/python3
# - *- coding: utf-8-*-import xlrd

# Open the excel file and create a workbook object
rbook = xlrd.open_workbook('wenda1.xlsx')
# The sheets method returns a list of objects,[<xlrd.sheet.Sheet object at 0x103f147f0>]
rbook.sheets()
# xls has 3 workbooks by default,Sheet1,Sheet2,Sheet3
rsheet = rbook.sheet_by_index(0)  #Fetch the first workbook

faq_tmp_dict ={}  #FAQ Temporary Dictionary
faq_formal_list =[] #FAQ official list
first_category_tag =""  #Class I identification
index =0 #index

# Cycle through all rows of the workbook
for row in rsheet.get_rows():
 index +=1

 first_category_row = row[0]  #The column of level 1 classification
 first_category_value = first_category_row.value  #Class 1 value
 if first_category_value !='Level 1 classification':  #Exclude the first row, these are column names
  if first_category_value:  #When the value of level 1 classification is not empty
   # Update the first level classification identification, use#connection
   first_category_tag ="%s#%s"%(first_category_value,index)
   faq_like_column = row[1]  #FAQ column of similar sentences
   faq_like_value = faq_like_column.value  #FAQ value of similar sentences
   # Update the temporary dictionary, there is no default value is an empty dictionary, otherwise it will be appended to the list
   faq_tmp_dict.setdefault(first_category_tag,[]).append(faq_like_value)else:
   # When the value of level 1 classification is empty
   faq_like_column = row[1]  #FAQ column of similar sentences
   faq_like_value = faq_like_column.value
   faq_tmp_dict.setdefault(first_category_tag,[]).append(faq_like_value)print(faq_tmp_dict)
# Handling temporary dictionaries
for i in faq_tmp_dict:
 tmp_dict ={}
 # First class classification, cutting#Number, take the first
 first_category = i.split('#')[0]
 # print("first_category",first_category)
 # faq all records, convert the faq list into one line, splicing with commas
 faq_like_all ="||".join(faq_tmp_dict[i])
 # print("faq_merge",faq_all)
 tmp_dict['first_category']= first_category
 tmp_dict['faq_like_all']= faq_like_all
 faq_formal_list.append(tmp_dict)print(faq_formal_list)import xlwt
import json
f = xlwt.Workbook()
sheet1 = f.add_sheet('Sheet1', cell_overwrite_ok=True)
row0 =["Level 1 classification","faq record"]

# Write the first line
for i inrange(0,len(row0)):
 sheet1.write(0, i, row0[i])

# # Load the json file
# withopen("tj.json",'r')as load_f:
#  load_dict = json.load(load_f)  #Deserialize files
#
num =0  #counter
max_length =0  #The maximum length
for i in faq_formal_list:
 num+=1  #Self-increasing 1

 faq_col = sheet1.col(1)  #Column where the faq record is located
 length =len(i['faq_like_all'])  #Calculated length
 # print("length",length)
 # Update maximum length
 if max_length < length:
  max_length = length

 # Set table width
 faq_col.width = max_length *20*20
 # Write library name
 sheet1.write(num,0, i['first_category'])
 # faq_like_str ="||".join(i['faq_like_all'])
 sheet1.write(num,1, i['faq_like_all'])

# Save to table
f.save('test1.xls')

Execute the code, it will generate a test1.xlsx.

Open the file, the effect is as follows:

Recommended Posts

python excel multi-line merge
Python operation Excel merge cells
How to read Excel in Python
Compare Excel, learn Python window functions
How to process excel table with python
Python uses pandas to process Excel data