Python使用pandas操作Excel整理数据

  • Python使用pandas操作Excel整理数据已关闭评论
  • 202 views
  • A+
所属分类:python

Script1:

import pandas as pd

GlobalIndex = pd.DataFrame(pd.read_excel('global.xlsx', header=5, usecols=[1, 4], skipfooter=1))
uData = pd.DataFrame(pd.read_excel('u.xlsx', header=5, usecols=[1, 2, 3, 4], skipfooter=1))
MergeResult = pd.merge(GlobalIndex,uData,how='inner',on=['Research Fields'])
u = MergeResult.sort_values(by="Web of Science Documents", ascending=False)
u['序号'] = range(1,len(u)+1)
u = u[['序号','Research Fields','Web of Science Documents','Cites','Cites/Paper_y','Cites/Paper_x']]
u.rename(columns={'Research Fields':'学科','Cites/Paper_x':'基准线','Cites/Paper_y':'均篇被引数','Web of Science Documents':'发文数','Cites':'被引数'},inplace = True)
u.to_excel('对比结果.xlsx',index=False)

Script2:

import pandas as pd

ClinicalMedicineInstitutions = ['HARVARD UNIVERSITY', 
                                'JOHNS HOPKINS UNIVERSITY', 
                                'SHANGHAI JIAO TONG UNIVERSITY', 
                                'UNIVERSITY OF CALIFORNIA SAN DIEGO', 
                                'SUN YAT SEN UNIVERSITY', 
                                'FUDAN UNIVERSITY', 
                                'PEKING UNIVERSITY', 
                                'UNIVERSITY OF TOKYO',
                                'CAPITAL MEDICAL UNIVERSITY', 
                                'CHINESE ACADEMY OF MEDICAL SCIENCES - PEKING UNION MEDICAL COLLEGE', 
                                'SICHUAN UNIVERSITY', 
                                'ZHEJIANG UNIVERSITY', 
                                'HUAZHONG UNIVERSITY OF SCIENCE & TECHNOLOGY', 
                                'UNIVERSITY OF HONG KONG', 
                                'CHINESE UNIVERSITY OF HONG KONG']

RawData = pd.DataFrame(pd.read_excel('CLINICAL MEDICINE.xlsx', header=5, usecols=[1, 3, 4, 5], skipfooter=1))
SortByDocs = RawData.sort_values(by="Web of Science Documents", ascending=False)
SortByDocs['DOCS_Rank']= range(1,len(SortByDocs)+1) 
SortByCites = SortByDocs.sort_values(by="Cites", ascending=False)
SortByCites['CITES_Rank']= range(1,len(SortByCites)+1)
SortByCitesDivDocs = SortByCites.sort_values(by="Cites/Paper", ascending=False)
SortByCitesDivDocs['Cites/Papers_Rank'] = range(1,len(SortByCitesDivDocs)+1)
InstitutionsFilter_SortByCitesDivDocs = SortByCitesDivDocs[SortByCitesDivDocs.Institutions.isin(ClinicalMedicineInstitutions)]
InstitutionsFilter_SortByCitesDivDocs = InstitutionsFilter_SortByCitesDivDocs.sort_values(by="Web of Science Documents", ascending=False)
InstitutionsFilter_SortByCitesDivDocs.to_excel('临床医学统计结果.xlsx',index=False)
  • 我的微信
  • 这是我的微信扫一扫
  • weinxin
  • 我的微信公众号
  • 我的微信公众号扫一扫
  • weinxin