Python使用pandas操作Excel整理数据

3,315次阅读

共计 1916 个字符，预计需要花费 5 分钟才能阅读完成。

Script1：

import pandas as pd

GlobalIndex = pd.DataFrame(pd.read_excel('global.xlsx', header=5, usecols=[1, 4], skipfooter=1))
uData = pd.DataFrame(pd.read_excel('u.xlsx', header=5, usecols=[1, 2, 3, 4], skipfooter=1))
MergeResult = pd.merge(GlobalIndex,uData,how='inner',on=['Research Fields'])
u = MergeResult.sort_values(by="Web of Science Documents", ascending=False)
u['序号'] = range(1,len(u)+1)
u = u[['序号','Research Fields','Web of Science Documents','Cites','Cites/Paper_y','Cites/Paper_x']]
u.rename(columns={'Research Fields':'学科','Cites/Paper_x':'基准线','Cites/Paper_y':'均篇被引数','Web of Science Documents':'发文数','Cites':'被引数'},inplace = True)
u.to_excel('对比结果.xlsx',index=False)

Script2：

import pandas as pd

ClinicalMedicineInstitutions = ['HARVARD UNIVERSITY', 
                                'JOHNS HOPKINS UNIVERSITY', 
                                'SHANGHAI JIAO TONG UNIVERSITY', 
                                'UNIVERSITY OF CALIFORNIA SAN DIEGO', 
                                'SUN YAT SEN UNIVERSITY', 
                                'FUDAN UNIVERSITY', 
                                'PEKING UNIVERSITY', 
                                'UNIVERSITY OF TOKYO',
                                'CAPITAL MEDICAL UNIVERSITY', 
                                'CHINESE ACADEMY OF MEDICAL SCIENCES - PEKING UNION MEDICAL COLLEGE', 
                                'SICHUAN UNIVERSITY', 
                                'ZHEJIANG UNIVERSITY', 
                                'HUAZHONG UNIVERSITY OF SCIENCE & TECHNOLOGY', 
                                'UNIVERSITY OF HONG KONG', 
                                'CHINESE UNIVERSITY OF HONG KONG']

RawData = pd.DataFrame(pd.read_excel('CLINICAL MEDICINE.xlsx', header=5, usecols=[1, 3, 4, 5], skipfooter=1))
SortByDocs = RawData.sort_values(by="Web of Science Documents", ascending=False)
SortByDocs['DOCS_Rank']= range(1,len(SortByDocs)+1) 
SortByCites = SortByDocs.sort_values(by="Cites", ascending=False)
SortByCites['CITES_Rank']= range(1,len(SortByCites)+1)
SortByCitesDivDocs = SortByCites.sort_values(by="Cites/Paper", ascending=False)
SortByCitesDivDocs['Cites/Papers_Rank'] = range(1,len(SortByCitesDivDocs)+1)
InstitutionsFilter_SortByCitesDivDocs = SortByCitesDivDocs[SortByCitesDivDocs.Institutions.isin(ClinicalMedicineInstitutions)]
InstitutionsFilter_SortByCitesDivDocs = InstitutionsFilter_SortByCitesDivDocs.sort_values(by="Web of Science Documents", ascending=False)
InstitutionsFilter_SortByCitesDivDocs.to_excel('临床医学统计结果.xlsx',index=False)

正文完