There is a wenda1.xlsx file with the following content:
Need to merge the faq records into one line, the effect is as follows:
Note: For faq records, each line is spliced with ||.
Create a new test1.py with the following content:
# !/usr/bin/python3
# - *- coding: utf-8-*-import xlrd
# Open the excel file and create a workbook object
rbook = xlrd.open_workbook('wenda1.xlsx')
# The sheets method returns a list of objects,[<xlrd.sheet.Sheet object at 0x103f147f0>]
rbook.sheets()
# xls has 3 workbooks by default,Sheet1,Sheet2,Sheet3
rsheet = rbook.sheet_by_index(0) #Fetch the first workbook
faq_tmp_dict ={} #FAQ Temporary Dictionary
faq_formal_list =[] #FAQ official list
first_category_tag ="" #Class I identification
index =0 #index
# Cycle through all rows of the workbook
for row in rsheet.get_rows():
index +=1
first_category_row = row[0] #The column of level 1 classification
first_category_value = first_category_row.value #Class 1 value
if first_category_value !='Level 1 classification': #Exclude the first row, these are column names
if first_category_value: #When the value of level 1 classification is not empty
# Update the first level classification identification, use#connection
first_category_tag ="%s#%s"%(first_category_value,index)
faq_like_column = row[1] #FAQ column of similar sentences
faq_like_value = faq_like_column.value #FAQ value of similar sentences
# Update the temporary dictionary, there is no default value is an empty dictionary, otherwise it will be appended to the list
faq_tmp_dict.setdefault(first_category_tag,[]).append(faq_like_value)else:
# When the value of level 1 classification is empty
faq_like_column = row[1] #FAQ column of similar sentences
faq_like_value = faq_like_column.value
faq_tmp_dict.setdefault(first_category_tag,[]).append(faq_like_value)print(faq_tmp_dict)
# Handling temporary dictionaries
for i in faq_tmp_dict:
tmp_dict ={}
# First class classification, cutting#Number, take the first
first_category = i.split('#')[0]
# print("first_category",first_category)
# faq all records, convert the faq list into one line, splicing with commas
faq_like_all ="||".join(faq_tmp_dict[i])
# print("faq_merge",faq_all)
tmp_dict['first_category']= first_category
tmp_dict['faq_like_all']= faq_like_all
faq_formal_list.append(tmp_dict)print(faq_formal_list)
Execution output:
{' Small talk#2':['not here?','Hello are you there?','are you there','anyone here.','Hello there?','Are you there? say something','Is the customer present?'],'Small talk#9':['Hello there','客服Hello there','Hello there,客服']}[{'first_category':'Small talk','faq_like_all':'not here?||Hello are you there?||are you there||anyone here.||Hello there?||Are you there? say something||Is the customer present?'},{'first_category':'Small talk','faq_like_all':'Hello there||客服Hello there||Hello there,客服'}]
It can be seen from the output above. Combine multiple lines into one line, and write the faq record into a dictionary. Then you can write to the new table.
The complete code is as follows:
# !/usr/bin/python3
# - *- coding: utf-8-*-import xlrd
# Open the excel file and create a workbook object
rbook = xlrd.open_workbook('wenda1.xlsx')
# The sheets method returns a list of objects,[<xlrd.sheet.Sheet object at 0x103f147f0>]
rbook.sheets()
# xls has 3 workbooks by default,Sheet1,Sheet2,Sheet3
rsheet = rbook.sheet_by_index(0) #Fetch the first workbook
faq_tmp_dict ={} #FAQ Temporary Dictionary
faq_formal_list =[] #FAQ official list
first_category_tag ="" #Class I identification
index =0 #index
# Cycle through all rows of the workbook
for row in rsheet.get_rows():
index +=1
first_category_row = row[0] #The column of level 1 classification
first_category_value = first_category_row.value #Class 1 value
if first_category_value !='Level 1 classification': #Exclude the first row, these are column names
if first_category_value: #When the value of level 1 classification is not empty
# Update the first level classification identification, use#connection
first_category_tag ="%s#%s"%(first_category_value,index)
faq_like_column = row[1] #FAQ column of similar sentences
faq_like_value = faq_like_column.value #FAQ value of similar sentences
# Update the temporary dictionary, there is no default value is an empty dictionary, otherwise it will be appended to the list
faq_tmp_dict.setdefault(first_category_tag,[]).append(faq_like_value)else:
# When the value of level 1 classification is empty
faq_like_column = row[1] #FAQ column of similar sentences
faq_like_value = faq_like_column.value
faq_tmp_dict.setdefault(first_category_tag,[]).append(faq_like_value)print(faq_tmp_dict)
# Handling temporary dictionaries
for i in faq_tmp_dict:
tmp_dict ={}
# First class classification, cutting#Number, take the first
first_category = i.split('#')[0]
# print("first_category",first_category)
# faq all records, convert the faq list into one line, splicing with commas
faq_like_all ="||".join(faq_tmp_dict[i])
# print("faq_merge",faq_all)
tmp_dict['first_category']= first_category
tmp_dict['faq_like_all']= faq_like_all
faq_formal_list.append(tmp_dict)print(faq_formal_list)import xlwt
import json
f = xlwt.Workbook()
sheet1 = f.add_sheet('Sheet1', cell_overwrite_ok=True)
row0 =["Level 1 classification","faq record"]
# Write the first line
for i inrange(0,len(row0)):
sheet1.write(0, i, row0[i])
# # Load the json file
# withopen("tj.json",'r')as load_f:
# load_dict = json.load(load_f) #Deserialize files
#
num =0 #counter
max_length =0 #The maximum length
for i in faq_formal_list:
num+=1 #Self-increasing 1
faq_col = sheet1.col(1) #Column where the faq record is located
length =len(i['faq_like_all']) #Calculated length
# print("length",length)
# Update maximum length
if max_length < length:
max_length = length
# Set table width
faq_col.width = max_length *20*20
# Write library name
sheet1.write(num,0, i['first_category'])
# faq_like_str ="||".join(i['faq_like_all'])
sheet1.write(num,1, i['faq_like_all'])
# Save to table
f.save('test1.xls')
Execute the code, it will generate a test1.xlsx.
Open the file, the effect is as follows: