import pandas as pd
import glob
def marge(csv_list, output_csv_path):
for inputfile in csv_list:
f = open(inputfile, 'r', encoding='utf-8')
data = pd.read_csv(f)
data.to_csv(output_csv_path, mode='a', index=False)
print('完成合并')
def distinct(file):
df = pd.read_csv(file,header=None)
datalist = df.drop_duplicates()
datalist.to_csv('此处填写去重后的新csv文件路径', index=False, header=False)
print('完成去重')
csv_list = glob.glob('此处填写包含所有csv文件的文件夹路径/*.csv')
output_csv_path = '/新路径/xx.csv'
for each_csv in csv_list:
print(each_csv)
marge(csv_list, output_csv_path)
distinct(output_csv_path)
转载请注明原文地址:https://blackberry.8miu.com/read-31245.html