当处理大型数据集时,通常需要将数据分组,并将每个分组的数据保存到单独的文件中。下面是一个使用 Python 中的 pandas 库来实现这一目标的示例代码。
步骤 1: 导入所需的库
import os
import pandas as pd
步骤 2: 读取 Excel 数据
df = pd. read_excel( "C:\\Users\\liuchunlin2\\Desktop\\新建XLSX 工作表.xlsx" )
步骤 3: 根据指定字段分组数据
grouped = df. groupby( [ '学校' , '班级' , '老师' ] )
步骤 4: 创建保存拆分数据的文件夹
folder_path = "C:\\Users\\liuchunlin2\\Desktop\\拆分数据"
os. makedirs( folder_path, exist_ok= True )
步骤 5: 遍历分组数据并保存到不同的 Excel 文件中
for name, group in grouped:
school, grade, teacher = name
filename = f" { school} _ { grade} _ { teacher} .xlsx"
file_path = os. path. join( folder_path, filename)
group. to_excel( file_path, index= False )
创建一个简单的图形用户界面,用于选择 Excel 文件并指定分组列,然后将数据按照分组保存到不同的 Excel 文件中
步骤 1: 导入所需的库
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import os
步骤 2: 定义函数,用于打开文件对话框并选择 Excel 文件路径
def browse_file ( ) :
filepath = filedialog. askopenfilename( filetypes= [ ( "Excel files" , "*.xlsx" ) ] )
file_entry. delete( 0 , tk. END)
file_entry. insert( 0 , filepath)
步骤 3: 定义函数,用于处理数据并将其按指定列分组保存为多个 Excel 文件
def process_data ( ) :
input_file = file_entry. get( )
group_columns = [ column_entry. get( ) for column_entry in column_entries if column_entry. get( ) ]
if not input_file or not group_columns:
result_label. config( text= "Please provide input file path and group columns." )
return
try :
df = pd. read_excel( input_file)
grouped = df. groupby( group_columns)
folder_name = "Splitted_Data"
if not os. path. exists( folder_name) :
os. makedirs( folder_name)
for name, group in grouped:
filename = f" { folder_name} / { '_' . join( name) } .xlsx"
group. to_excel( filename, index= False )
result_label. config( text= "Data processing completed successfully." )
except Exception as e:
result_label. config( text= f"Error occurred: { str ( e) } " )
步骤 4: 创建 tkinter 窗口对象并设置标题
root = tk. Tk( )
root. title( "Excel Data Grouping Tool" )
步骤 5: 创建标签和输入框,用于显示和输入 Excel 文件路径
file_label = tk. Label( root, text= "Excel File Path:" )
file_label. grid( row= 0 , column= 0 , padx= 5 , pady= 5 , sticky= "w" )
file_entry = tk. Entry( root, width= 50 )
file_entry. grid( row= 0 , column= 1 , padx= 5 , pady= 5 , sticky= "we" )
browse_button = tk. Button( root, text= "Browse" , command= browse_file)
browse_button. grid( row= 0 , column= 2 , padx= 5 , pady= 5 )
步骤 6: 创建标签、输入框和按钮,用于指定分组列名
column_label = tk. Label( root, text= "Group Columns:" )
column_label. grid( row= 1 , column= 0 , padx= 5 , pady= 5 , sticky= "w" )
column_entry = tk. Entry( root, width= 50 )
column_entry. grid( row= 1 , column= 1 , padx= 5 , pady= 5 , sticky= "we" )
column_entries = [ column_entry]
add_column_button = tk. Button( root, text= "Add Column" , command= lambda : add_column_entry( ) )
add_column_button. grid( row= 1 , column= 2 , padx= 5 , pady= 5 )
步骤 7: 创建函数,用于添加新的分组列输入框
def add_column_entry ( ) :
new_column_entry = tk. Entry( root, width= 50 )
new_column_entry. grid( row= len ( column_entries) + 1 , column= 1 , padx= 5 , pady= 5 , sticky= "we" )
column_entries. append( new_column_entry)
步骤 8: 创建按钮,用于处理数据
process_button = tk. Button( root, text= "Process Data" , command= process_data)
process_button. grid( row= 2 , column= 2 , padx= 5 , pady= 10 , sticky= "e" )
步骤 9: 创建标签,用于显示处理结果信息
result_label = tk. Label( root, text= "" )
result_label. grid( row= len ( column_entries) + 3 , column= 0 , columnspan= 3 , padx= 5 , pady= 5 )
步骤 10: 启动主事件循环
root. mainloop( )
完整代码:
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import os
def browse_file ( ) :
filepath = filedialog. askopenfilename( filetypes= [ ( "Excel files" , "*.xlsx" ) ] )
file_entry. delete( 0 , tk. END)
file_entry. insert( 0 , filepath)
def process_data ( ) :
input_file = file_entry. get( )
group_columns = [ column_entry. get( ) for column_entry in column_entries if column_entry. get( ) ]
if not input_file or not group_columns:
result_label. config( text= "Please provide input file path and group columns." )
return
try :
df = pd. read_excel( input_file)
grouped = df. groupby( group_columns)
folder_name = "Splitted_Data"
if not os. path. exists( folder_name) :
os. makedirs( folder_name)
for name, group in grouped:
filename = f" { folder_name} / { '_' . join( name) } .xlsx"
group. to_excel( filename, index= False )
result_label. config( text= "Data processing completed successfully." )
except Exception as e:
result_label. config( text= f"Error occurred: { str ( e) } " )
root = tk. Tk( )
root. title( "Excel Data Grouping Tool" )
file_label = tk. Label( root, text= "Excel File Path:" )
file_label. grid( row= 0 , column= 0 , padx= 5 , pady= 5 , sticky= "w" )
file_entry = tk. Entry( root, width= 50 )
file_entry. grid( row= 0 , column= 1 , padx= 5 , pady= 5 , sticky= "we" )
browse_button = tk. Button( root, text= "Browse" , command= browse_file)
browse_button. grid( row= 0 , column= 2 , padx= 5 , pady= 5 )
column_label = tk. Label( root, text= "Group Columns:" )
column_label. grid( row= 1 , column= 0 , padx= 5 , pady= 5 , sticky= "w" )
column_entry = tk. Entry( root, width= 50 )
column_entry. grid( row= 1 , column= 1 , padx= 5 , pady= 5 , sticky= "we" )
column_entries = [ column_entry]
add_column_button = tk. Button( root, text= "Add Column" , command= lambda : add_column_entry( ) )
add_column_button. grid( row= 1 , column= 2 , padx= 5 , pady= 5 )
def add_column_entry ( ) :
new_column_entry = tk. Entry( root, width= 50 )
new_column_entry. grid( row= len ( column_entries) + 1 , column= 1 , padx= 5 , pady= 5 , sticky= "we" )
column_entries. append( new_column_entry)
process_button = tk. Button( root, text= "Process Data" , command= process_data)
process_button. grid( row= 2 , column= 2 , padx= 5 , pady= 10 , sticky= "e" )
result_label = tk. Label( root, text= "" )
result_label. grid( row= len ( column_entries) + 3 , column= 0 , columnspan= 3 , padx= 5 , pady= 5 )
root. mainloop( )