%run py/first_import.py
%run py/banlance_sheet.py
%run py/income_statement.py
%run py/cashflow.py
%run py/category_sales.py
%run py/basic_financial_kpi.py
%run py/Profitability.py
%run py/Repayment_ability.py
%run py/Growth_ability.py
%run py/Operational_capability.py
%run py/accounts_receivable_notes_receivable_advance_receipts.py
%run py/Market_Value.py
%run py/Price_Market_Value.py
%run py/mv_vs_p.py
mv_vs_p()
df=pd.read_csv('Price_Market_Value.txt', dtype={'index':str}).set_index('index')
df=df.iloc[0:,[1,3,4,5]]
df.plot(kind='barh',grid=True, figsize=(14,4), rot=0)
df.sort_index(ascending=False)
stock_pooling=stock_list
df_sales=pd.DataFrame()
for i in stock_pooling:
file_item=[i,'/',i,'_income_statement.txt']
file=''.join(file_item)
df=pd.read_csv(file,index_col='index_date')
df=df.iloc[:,1].astype(float).to_frame()
df.columns=[i]
df_sales=pd.concat([df_sales, df], axis=1, sort=False, join='outer')
df_sales=df_sales.T.sort_index(ascending=False).T.fillna(0)
df_sales.sort_index(ascending=True,inplace=True) #.plot(kind='line',grid=True, figsize=(10,4), rot=0)
figure, ax=plt.subplots(1, 1, figsize=(14, 4))
x=df_sales.index
y1,y2,y3,y4=df_sales['603083'], df_sales['002281'], df_sales['002369'], df_sales['603118']
ax.plot(x, y1, 'o-')
ax.plot(x, y2, 'o-')
ax.plot(x, y3, 'o-')
ax.plot(x, y4, 'o-')
ax.set_title('Sales' + '\nwww.jasper.wang')
ax.yaxis.set_ticks_position('right')
ax.grid(True)
ax.legend(loc=0,fontsize=15)
plt.tight_layout()
plt.show()
df_sales.T.sort_index(ascending=False).T.sort_index(ascending=False)
stock_pooling=stock_list
df_g=pd.DataFrame()
df_sales=pd.DataFrame()
for i in stock_pooling:
file_item=[i,'/',i,'_income_statement.txt']
file=''.join(file_item)
df=pd.read_csv(file,index_col='index_date')
df=df.iloc[:,[1,8]]
df['GPM']=df.apply(lambda x: x[0]-x[1], axis=1)
df['GPR']=df.apply(lambda x: x[2]/x[0], axis=1)
df=df.iloc[:,3].to_frame().astype(float)
df.columns=[i]
df.index.set_names('index_date', inplace=True)
if df_g.empty == True:
df_g = df
df_g=pd.concat([df,df_g],axis=1, sort=True).fillna(0)
df_g=df_g.T.sort_index(ascending=False)
df_g.drop_duplicates(inplace=True)
df_g=df_g.T
figure, ax=plt.subplots(1, 1, figsize=(14, 4))
x=df_g.index
y1,y2,y3,y4=df_g['603083'], df_g['002281'], df_g['002369'], df_g['603118']
ax.plot(x, y1, 'o-')
ax.plot(x, y2, 'o-')
ax.plot(x, y3, 'o-')
ax.plot(x, y4, 'o-')
ax.set_title('GPR' + '\nwww.jasper.wang')
ax.yaxis.set_ticks_position('right')
ax.grid(True)
ax.legend(loc=0,fontsize=15)
plt.tight_layout()
plt.show()
stock_pooling=stock_list
df_sales=pd.DataFrame()
for i in stock_pooling:
file_item=[i,'/',i,'_income_statement.txt']
file=''.join(file_item)
df=pd.read_csv(file,index_col='index_date')
df=df.iloc[:,12].replace("--","0").astype(float).to_frame()
df.columns=[i]
df_sales=pd.concat([df_sales, df], axis=1, sort=False, join='outer')
df_sales=df_sales.T.sort_index(ascending=False).T.fillna(0)
df_sales.sort_index(ascending=True,inplace=True) #.plot(kind='line',grid=True, figsize=(10,4), rot=0)
figure, ax=plt.subplots(1, 1, figsize=(14, 4))
x=df_sales.index
y1,y2,y3,y4=df_sales['603083'], df_sales['002281'], df_sales['002369'], df_sales['603118']
ax.plot(x, y1, 'o-')
ax.plot(x, y2, 'o-')
ax.plot(x, y3, 'o-')
ax.plot(x, y4, 'o-')
ax.set_title('Sales' + '\nwww.jasper.wang')
ax.yaxis.set_ticks_position('right')
ax.grid(True)
ax.legend(loc=0,fontsize=15)
plt.tight_layout()
plt.show()
df_sales.T.sort_index(ascending=False).T.sort_index(ascending=False)
stock_pooling=stock_list
df_sales=pd.DataFrame()
for i in stock_pooling:
file_item=[i,'/',i,'_banlance_sheet.txt']
file=''.join(file_item)
df=pd.read_csv(file,index_col='index_date')
df=df.iloc[:,44].replace("--","0").astype(float).to_frame()
df.columns=[i]
df_sales=pd.concat([df_sales, df], axis=1, sort=False, join='outer')
df_sales=df_sales.T.sort_index(ascending=False).T.fillna(0)
df_sales.sort_index(ascending=True,inplace=True) #.plot(kind='line',grid=True, figsize=(10,4), rot=0)
figure, ax=plt.subplots(1, 1, figsize=(14, 4))
x=df_sales.index
y1,y2,y3,y4=df_sales['603083'], df_sales['002281'], df_sales['002369'], df_sales['603118']
ax.plot(x, y1, 'o-')
ax.plot(x, y2, 'o-')
ax.plot(x, y3, 'o-')
ax.plot(x, y4, 'o-')
ax.set_title('Sales' + '\nwww.jasper.wang')
ax.yaxis.set_ticks_position('right')
ax.grid(True)
ax.legend(loc=0,fontsize=15)
plt.tight_layout()
plt.show()
df_sales.T.sort_index(ascending=False).T.sort_index(ascending=False)
stock_pooling=stock_list
for i in stock_pooling:
filename_from_frag='_income_statement.txt'
filename_from_null=''
filename_from_list=[i,'/',i,filename_from_frag]
filename_from=filename_from_null.join(filename_from_list)
df=pd.read_csv(filename_from, index_col='index_date')
df=df.loc[:,['营业收入(万元)','营业成本(万元)','营业税金及附加(万元)','销售费用(万元)','管理费用(万元)']].replace('--','0')
df['EBIT'] = df.apply(lambda x: x['营业收入(万元)']-x['营业成本(万元)']-x['营业税金及附加(万元)']-x['销售费用(万元)']-x['管理费用(万元)'], axis=1)
df=df.sort_index(ascending=True)
filename_to_null=''
filename_to=filename_to_null.join([i,'/',i,'_EBIT.txt'])
df.to_csv(filename_to)
stock_pooling=stock_list
df_e=pd.DataFrame()
df_sales=pd.DataFrame()
for i in stock_pooling:
file_item=[i,'/',i,'_EBIT.txt']
file=''.join(file_item)
df=pd.read_csv(file,index_col='index_date')
df=df.iloc[:,5].to_frame().astype(float)
df.columns=[i]
df.index.set_names('index_date', inplace=True)
if df_e.empty == True:
df_e = df
df_e=pd.concat([df,df_e],axis=1, sort=True).fillna(0)
df_e=df_e.T.sort_index(ascending=False)
df_e.drop_duplicates(inplace=True)
df_e=df_e.T
figure, ax=plt.subplots(1, 1, figsize=(14, 4))
x=df_e.index
y1,y2,y3,y4=df_e['603083'], df_e['002281'], df_e['002369'], df_e['603118']
ax.plot(x, y1, 'o-')
ax.plot(x, y2, 'o-')
ax.plot(x, y3, 'o-')
ax.plot(x, y4, 'o-')
ax.set_title('EBIT' + '\nwww.jasper.wang')
ax.yaxis.set_ticks_position('right')
ax.grid(True)
ax.legend(loc=0,fontsize=15)
plt.tight_layout()
plt.show()
dfcash_all=pd.read_csv('603083/603083_cashflow.txt',index_col='index_date')
dfcash=dfcash_all.iloc[:,[24,39,51]].replace('--','0')
dfcash.columns=['Operating','Investing','Financing']
dfcash.sort_index(ascending=True, inplace=True)
figure, ax=plt.subplots(1, 1, figsize=(14, 4))
x=dfcash.index
y1,y2, y3 = dfcash['Operating'], dfcash['Investing'], dfcash['Financing']
ax.plot(x, y1, 'r-')
ax.plot(x, y2, 'g-')
ax.plot(x, y3, 'b-')
ax.set_title('Cashflow' + '\nwww.jasper.wang')
ax.yaxis.set_ticks_position('right')
ax.grid(True)
ax.legend(loc=2)
plt.tight_layout()
plt.show()
df=dfcash_all.iloc[:,-5].to_frame().sort_index(ascending=True)
df.columns=['Cash balance']
print(df.plot(kind='bar',grid=True, figsize=(16,2), rot=0, secondary_y = True))
dfcash_all=pd.read_csv('603118/603118_cashflow.txt',index_col='index_date')
dfcash=dfcash_all.iloc[:,[24,39,51]].replace('--','0')
dfcash.columns=['Operating','Investing','Financing']
dfcash.sort_index(ascending=True, inplace=True)
figure, ax=plt.subplots(1, 1, figsize=(14, 4))
x=dfcash.index
y1,y2, y3 = dfcash['Operating'], dfcash['Investing'], dfcash['Financing']
ax.plot(x, y1, 'r-')
ax.plot(x, y2, 'g-')
ax.plot(x, y3, 'b-')
ax.set_title('Cashflow' + '\nwww.jasper.wang')
ax.yaxis.set_ticks_position('right')
ax.grid(True)
ax.legend(loc=2)
plt.tight_layout()
plt.show()
df=dfcash_all.iloc[:,-5].to_frame().sort_index(ascending=True)
df.columns=['Cash balance']
print(df.plot(kind='bar',grid=True, figsize=(16,2), rot=0, secondary_y = True))
dfcash_all=pd.read_csv('002281/002281_cashflow.txt',index_col='index_date')
dfcash=dfcash_all.iloc[:,[24,39,51]].replace('--','0')
dfcash.columns=['Operating','Investing','Financing']
dfcash.sort_index(ascending=True, inplace=True)
figure, ax=plt.subplots(1, 1, figsize=(14, 4))
x=dfcash.index
y1,y2, y3 = dfcash['Operating'], dfcash['Investing'], dfcash['Financing']
ax.plot(x, y1, 'r-')
ax.plot(x, y2, 'g-')
ax.plot(x, y3, 'b-')
ax.set_title('Cashflow' + '\nwww.jasper.wang')
ax.yaxis.set_ticks_position('right')
ax.grid(True)
ax.legend(loc=2)
plt.tight_layout()
plt.show()
df=dfcash_all.iloc[:,-5].to_frame().sort_index(ascending=True)
df.columns=['Cash balance']
print(df.plot(kind='bar',grid=True, figsize=(16,2), rot=0, secondary_y = True))
dfcash_all=pd.read_csv('002369/002369_cashflow.txt',index_col='index_date')
dfcash=dfcash_all.iloc[:,[24,39,51]].replace('--','0')
dfcash.columns=['Operating','Investing','Financing']
dfcash.sort_index(ascending=True, inplace=True)
figure, ax=plt.subplots(1, 1, figsize=(14, 4))
x=dfcash.index
y1,y2, y3 = dfcash['Operating'], dfcash['Investing'], dfcash['Financing']
ax.plot(x, y1, 'r-')
ax.plot(x, y2, 'g-')
ax.plot(x, y3, 'b-')
ax.set_title('Cashflow' + '\nwww.jasper.wang')
ax.yaxis.set_ticks_position('right')
ax.grid(True)
ax.legend(loc=2)
plt.tight_layout()
plt.show()
df=dfcash_all.iloc[:,-5].to_frame().sort_index(ascending=True)
df.columns=['Cash balance']
print(df.plot(kind='bar',grid=True, figsize=(16,2), rot=0, secondary_y = True))
df_1=pd.read_csv('603083/603083_income_statement.txt',index_col='index_date')
df_1=df_1.iloc[:,1].to_frame()*10000
df_1.columns=['Sales']
df_2=pd.read_csv('603083/603083_banlance_sheet.txt',index_col='index_date')
df_2=df_2.iloc[:,[5,6]].replace('--','0').astype(float)*10000
df_2['AR_BR']=df_2.apply(lambda x: x[0]+x[1], axis=1)
df_2=df_2.iloc[:,2].to_frame()
df=pd.concat([df_1,df_2], axis=1, sort=True)
print(df.plot(kind='bar',grid=True, figsize=(14,4), rot=0, secondary_y = True))
df_g=df.pct_change()
print(df_g.plot(kind='line',grid=True, figsize=(14,2), rot=0, secondary_y = True, xticks=range(len(df_g.index))))
df_1=pd.read_csv('603118/603118_income_statement.txt',index_col='index_date')
df_1=df_1.iloc[:,1].to_frame()*10000
df_1.columns=['Sales']
df_2=pd.read_csv('603118/603118_banlance_sheet.txt',index_col='index_date')
df_2=df_2.iloc[:,[5,6]].replace('--','0').astype(float)*10000
df_2['AR_BR']=df_2.apply(lambda x: x[0]+x[1], axis=1)
df_2=df_2.iloc[:,2].to_frame()
df=pd.concat([df_1,df_2], axis=1, sort=True)
print(df.plot(kind='bar',grid=True, figsize=(14,4), rot=0, secondary_y = True))
df_g=df.pct_change()
print(df_g.plot(kind='line',grid=True, figsize=(14,2), rot=0, secondary_y = True, xticks=range(len(df_g.index))))
df_1=pd.read_csv('002281/002281_income_statement.txt',index_col='index_date')
df_1=df_1.iloc[:,1].to_frame()*10000
df_1.columns=['Sales']
df_2=pd.read_csv('002281/002281_banlance_sheet.txt',index_col='index_date')
df_2=df_2.iloc[:,[5,6]].replace('--','0').astype(float)*10000
df_2['AR_BR']=df_2.apply(lambda x: x[0]+x[1], axis=1)
df_2=df_2.iloc[:,2].to_frame()
df=pd.concat([df_1,df_2], axis=1, sort=True)
print(df.plot(kind='bar',grid=True, figsize=(14,4), rot=0, secondary_y = True))
df_g=df.pct_change()
print(df_g.plot(kind='line',grid=True, figsize=(14,2), rot=0, secondary_y = True, xticks=range(len(df_g.index))))
df_1=pd.read_csv('002369/002369_income_statement.txt',index_col='index_date')
df_1=df_1.iloc[:,1].to_frame()*10000
df_1.columns=['Sales']
df_2=pd.read_csv('002369/002369_banlance_sheet.txt',index_col='index_date')
df_2=df_2.iloc[:,[5,6]].replace('--','0').astype(float)*10000
df_2['AR_BR']=df_2.apply(lambda x: x[0]+x[1], axis=1)
df_2=df_2.iloc[:,2].to_frame()
df=pd.concat([df_1,df_2], axis=1, sort=True)
print(df.plot(kind='bar',grid=True, figsize=(14,4), rot=0, secondary_y = True))
df_g=df.pct_change()
print(df_g.plot(kind='line',grid=True, figsize=(14,2), rot=0, secondary_y = True, xticks=range(len(df_g.index))))
p = ts.get_realtime_quotes('603083')
df=pd.read_csv('603083/603083_basic_financial_kpi.txt',index_col='index_date')
dfpe2 = pd.Series(
[ '603083'
, '剑桥科技'
, float(p['price'])
, float(df.iloc[0,0])
, float(p['price'])/float(df.iloc[0,0])
]
, index=['Stock','Name', 'P_'+p['date'][0], 'E_'+df.index[0], 'PE']
)
df_pe_2=pd.DataFrame(dfpe2).T
stock_pooling=stock_list
for i in stock_pooling:
p = ts.get_realtime_quotes(i)
filename=''.join([i,'/',i,'_basic_financial_kpi.txt'])
df=pd.read_csv(filename,index_col='index_date')
PE = pd.Series(
[ i
, p['name'][0]
, float(p['price'])
, float(df.iloc[0,0])
, float(p['price'])/float(df.iloc[0,0])
]
, index=['Stock','Name','P_'+p['date'][0], 'E_'+df.index[0], 'PE']
)
df_pe_1=pd.DataFrame(PE).T
df_pe_2=df_pe_2.append(df_pe_1)
df=df_pe_2.drop_duplicates('Stock', keep='last').reset_index(drop=True)
df=df.copy()
df.loc[:,'PE']=df.iloc[:,4].astype(float).round(2)
df.to_csv('PE.txt')
df
df=pd.read_csv('PE.txt', dtype={'Stock':str}).set_index('Stock').sort_index()
df.iloc[:,4].plot(kind='barh',grid=True, figsize=(14,2), rot=0)
df.iloc[:,[1,2,3,4]].sort_index(ascending=False)
stock_pooling=stock_list
for i in stock_pooling:
df_sina=pd.DataFrame()
url_item=['http://money.finance.sina.com.cn/corp/go.php/vFD_FinancialGuideLine/stockid/',i,'/ctrl/2017/displaytype/4.phtml']
url=''.join(url_item)
html=opener.open(url)
soup=BeautifulSoup(html, 'lxml')
y_list=soup.findAll('table')
l=y_list[12].get_text().replace('\xa0',',').replace('\n','').replace('\t',',').replace('\r','').replace('历年数据:','').replace(' ','')
year_list = l.split(',')
year_list=[i for i in year_list if i != '']
for j in year_list:
url_list=['http://money.finance.sina.com.cn/corp/go.php/vFD_FinancialGuideLine/stockid/',i,'/ctrl/',j,'/displaytype/4.phtml']
url=''.join(url_list)
html=opener.open(url)
data = pd.read_html(url)
df=pd.DataFrame(data[12]).T
if df_sina.empty == True:
df_sina=pd.DataFrame(data[12]).T
df_sina=df_sina.append(df, sort=False, ignore_index=True)
df_sina.drop_duplicates(inplace=True)
df=df_sina.T.set_index(0).T.set_index('报告日期')
df=df.sort_index(ascending=False).fillna(0).replace('--',0)
df_1=df.iloc[0,:].to_frame().T
df_2=df[df.index.str[5:7].isin(['12'])]
df_kpi=pd.concat([df_1,df_2], sort=False, ignore_index=False)
df_kpi.index.set_names('index_date', level=None, inplace=True)
df_kpi.drop_duplicates(inplace=True)
filename1=''.join([i,'_kpi_from_sina.txt'])
filename2=''.join([i,'/',filename1])
df_kpi.to_csv(filename2)
p = ts.get_realtime_quotes('603083')
df=pd.read_csv('603083/603083_kpi_from_sina.txt',index_col='index_date')
dfpe2 = pd.Series(
[ '603083'
, '剑桥科技'
, float(p['price'])
, float(df.iloc[0,6]) # 调整后每股净资产
# , float(p['price'])/float(df.iloc[0,6])
]
, index=['Stock','Name', 'P_'+p['date'][0], 'B_'+df.index[0]]
)
df_pe_2=pd.DataFrame(dfpe2).T
stock_pooling=stock_list
for i in stock_pooling:
p = ts.get_realtime_quotes(i)
filename=''.join([i,'/',i,'_kpi_from_sina.txt'])
df=pd.read_csv(filename,index_col='index_date')
PB = pd.Series(
[ i
, p['name'][0]
, float(p['price'])
, float(df.iloc[0,6])
# , float(p['price'])/float(df.iloc[0,6])
]
, index=['Stock','Name','P_'+p['date'][0], 'B_'+df.index[0]]
)
df_pe_1=pd.DataFrame(PB).T
df_pe_2=df_pe_2.append(df_pe_1)
df=df_pe_2.drop_duplicates('Stock', keep='last').reset_index(drop=True)
df=df.copy()
df['PB']=df.apply(lambda x: x[2]/x[3], axis=1)
#df.loc[:,'PB']=df.iloc[:,4].astype(float).round(2)
df.to_csv('PB.txt')
df
df=pd.read_csv('PB.txt', dtype={'Stock':str}).set_index('Stock').sort_index()
df.iloc[:,4].plot(kind='barh',grid=True, figsize=(14,2), rot=0)
df.iloc[:,[1,2,3,4]].sort_index(ascending=False)
# 用于计算复合增长率
from functools import reduce
from operator import mul
import numpy
p = ts.get_realtime_quotes('603083')
df=pd.read_csv('603083/603083_kpi_from_sina.txt',index_col='index_date')
dfpe2 = pd.Series(
[ '603083'
, '剑桥科技'
, float(p['price'])
, float(df.iloc[0,34]/100)
, float(df.iloc[0,34]/100)
]
, index=['Stock','Name', 'P_'+p['date'][0], 'G_CAGR_'+df.index[0],'G'+df.index[0]]
)
df_pe_2=pd.DataFrame(dfpe2).T
stock_pooling=stock_list
for i in stock_pooling:
p = ts.get_realtime_quotes(i)
filename=''.join([i,'/',i,'_kpi_from_sina.txt'])
df=pd.read_csv(filename,index_col='index_date')
l=[(1+i/100) for i in list(df.iloc[:,34].head(3))[::-1]]
CAGR=reduce(mul, l)**(1/3)-1
PEG = pd.Series(
[ i
, p['name'][0]
, float(p['price'])
# .real 取复数的实数部分
, float(CAGR.real)
, float(df.iloc[0,34]/100)
]
, index=['Stock','Name','P_'+p['date'][0], 'G_CAGR_'+df.index[0],'G'+df.index[0]]
)
df_pe_1=pd.DataFrame(PEG).T
df_pe_2=df_pe_2.append(df_pe_1)
df_G=df_pe_2.drop_duplicates('Stock', keep='last').reset_index(drop=True)
df_G=df_G.copy()
df_PE=pd.read_csv('PE.txt',index_col=0, dtype={'Stock':str})
df=pd.concat([df_G,df_PE.iloc[:,[0,1,4]]],axis=1, sort=False, ignore_index=False)
# numpy.float64(x[7])/(x[3]) 使用这个方法,当除数为 0 时,不报错,而是返回 -inf
# 此系临时解决方案
df['PEG_CAGR']=df.apply(lambda x: 100*numpy.float64(x[7])/(x[3]), axis=1).round(4)
df['PEG']=df.apply(lambda x: 100*numpy.float64(x[7])/(x[4]), axis=1).round(4)
df_peg=df.iloc[:,[0,1,7,3,4,8,9]]
df_peg.to_csv('PEG.txt')
df_peg
df=pd.read_csv('PEG.txt', dtype={'Stock':str}).set_index('Stock').sort_index()
df.iloc[:,5].plot(kind='barh',grid=True, figsize=(24,4), rot=0)
df.iloc[:,[1,2,3,5]].sort_index(ascending=False)
df_n=pd.read_csv('Stock_Name.txt', dtype={'Stock':str}).set_index('Stock')
df=pd.read_csv('603083/603083_income_statement.txt',index_col='index_date')
dfps2 = pd.Series(
[ '603083'
, float(df.iloc[0,1])
]
, index=['Stock', 'S_'+df.index[0]]
)
df_ps_2=pd.DataFrame(dfps2).T
stock_pooling=stock_list
for i in stock_pooling:
filename=''.join([i,'/',i,'_income_statement.txt'])
df=pd.read_csv(filename,index_col='index_date')
PS = pd.Series(
[ i
, float(df.iloc[0,1])
]
, index=['Stock', 'S_'+df.index[0]]
)
df_ps_1=pd.DataFrame(PS).T
df_ps_2=df_ps_2.append(df_ps_1)
df=df_ps_2.drop_duplicates('Stock', keep='last').reset_index(drop=True)
#df=df.copy()
#df.loc[:,'PS']=df.iloc[:,4].astype(float).round(2)
#df.to_csv('PS.txt')
df=df.set_index('Stock')
df=pd.concat([df_n,df],axis=1,sort=True)
df_MV=pd.read_csv('Market_Value.txt',index_col='index_date', dtype={'Stock':str})#.set_index('Stock')
df_MV=df_MV.copy()
df_MV=df_MV.iloc[:,0:2]
df_MV.columns=['Stock','MV_'+df_MV.index[0]]
df_MV=df_MV.set_index('Stock')
df_MV
df_ps=pd.concat([df,df_MV],axis=1,sort=True)
df_ps=df_ps.copy()
df_ps.iloc[:,1]=df_ps.iloc[:,1].astype(float).round(2)
df_ps.iloc[:,2]=df_ps.iloc[:,2].astype(float).round(2)*10000
df_ps['PS']=df_ps.apply(lambda x: x[2]/x[1], axis=1).round(2)
df_ps.sort_index(ascending=False, inplace=True)
df_ps.index.set_names('Stock', inplace=True)
df_ps.to_csv('PS.txt')
df_ps
import numpy as np
np.set_printoptions(suppress=True, threshold=np.nan)
pd.set_option('display.float_format', lambda x: '%.2f' % x) # 不以科学计数法显示(2f,保留2位小数)
df=pd.read_csv('PS.txt', dtype={'Stock':str}).set_index('Stock').sort_index()
df.iloc[:,3].plot(kind='barh',grid=True, figsize=(14,2), rot=0)
df.iloc[:,[0,2,1,3]].sort_index(ascending=False)
stock_pooling=stock_list
for i in stock_pooling:
filename_from_frag='_cashflow.txt'
filename_from_null=''
filename_from_list=[i,'/',i,filename_from_frag]
filename_from=filename_from_null.join(filename_from_list)
df_da=pd.read_csv(filename_from, index_col='index_date')
df_da=df_da.iloc[:,[60,61,62]].replace('--','0')
df_da=df_da.sort_index(ascending=True)
ebit_list=[i,'/',i,'_EBIT.txt']
ebit_file=filename_from_null.join(ebit_list)
df_ebit=pd.read_csv(ebit_file,index_col='index_date')
df_ebit=df_ebit['EBIT'].to_frame()
df_ebitda=pd.concat([df_ebit, df_da], axis=1, join='outer', sort=False)
df_ebitda
df_ebitda.iloc[:,1]=df_ebitda.iloc[:,1].astype(float64, copy=True)
df_ebitda.iloc[:,2]=df_ebitda.iloc[:,2].astype(float64, copy=True)
df_ebitda.iloc[:,3]=df_ebitda.iloc[:,3].astype(float64, copy=True)
df_ebitda['EBITDA']=df_ebitda.apply(lambda x: x[0]+x[1]+x[2]+x[3], axis=1)
filename_to_null=''
filename_to=filename_to_null.join([i,'/',i,'_EBITDA.txt'])
df_ebitda.to_csv(filename_to)
stock_pooling=stock_list
df_EBITDA=pd.DataFrame(index=range(len(stock_pooling)), columns=['date','Stock','EBITDA'])
for k, i in enumerate(stock_pooling):
filename=''.join([i,'/',i,'_EBITDA.txt'])
df=pd.read_csv(filename,index_col='index_date')
df_EBITDA.iloc[k,0]=df.index[-2]
df_EBITDA.iloc[k,1]=i
df_EBITDA.iloc[k,2]=df.sort_index(ascending=False).iloc[1,4]
df_EBITDA=df_EBITDA.copy()
df_EBITDA['EBITDA']=df_EBITDA['EBITDA'].astype(float).round(4)
df_EBITDA=df_EBITDA.set_index('Stock')
print(df_EBITDA)
df_EV=pd.read_csv('Market_Value.txt',index_col='index_date', dtype={'Stock':str})
df_EV=df_EV.reset_index()
df_EV=df_EV.set_index('Stock')
df=pd.concat([df_EV,df_EBITDA], sort=True, axis=1, join='outer', ignore_index=False)
df=df.copy()
df.iloc[:,1]=df.iloc[:,1]*10000
df_EE=df.iloc[:,[0,1,5,6]].reset_index() # 需要重设索引,否则lambda执行有误。
df_EE=df_EE.copy()
df_EE['EV / EBITDA']=df_EE.apply(lambda x: x[2]/x[4], axis=1) # 注意,缺省axis=0
#df['EV / EBITDA']=df.iloc[:,2]/df['EBITDA']
df_EE.columns=['Stock','MV_date','MV','E_date','EBITDA','EV_EBITDA']
df_EE.set_index('Stock', inplace=True)
df_EE.sort_index(ascending=False,inplace=True)
print(df_EE)
df_n=pd.read_csv('Stock_Name.txt', dtype={'Stock':str}).set_index('Stock')
df_EE=pd.concat([df_EE,df_n],axis=1,sort=True)
df_EE.index.set_names('Stock', inplace=True)
df_EE.to_csv('EV_EBITDA.txt')
df_EE
pd.set_option('display.float_format', lambda x: '%.2f' % x) # 不以科学计数法显示(2f,保留2位小数)
df=pd.read_csv('EV_EBITDA.txt', dtype={'Stock':str}).set_index('Stock').sort_index()
df.iloc[:,4].plot(kind='barh',grid=True, figsize=(14,2), rot=0)
df.iloc[:,:].sort_index(ascending=False)
# 先计算 df_s
stock_pooling=stock_list
df_S=pd.DataFrame(index=range(len(stock_pooling)), columns=['date','Stock','Sales'])
for k, i in enumerate(stock_pooling):
filename=''.join([i,'/',i,'_income_statement.txt'])
df=pd.read_csv(filename,index_col='index_date')
df_S.iloc[k,0]=df.index[0]
df_S.iloc[k,1]=i
df_S.iloc[k,2]=df.sort_index(ascending=True).iloc[0,1]
df_S=df_S.copy()
df_S['Sales']=df_S['Sales'].astype(float).round(4)
df_S=df_S.set_index('Stock')
print(df_S)
# 将 df_s 与原有数据合并
df_EV=pd.read_csv('Market_Value.txt',index_col='index_date', dtype={'Stock':str})
df_EV=df_EV.reset_index()
df_EV=df_EV.set_index('Stock')
df=pd.concat([df_EV,df_S], sort=True, axis=1, join='outer', ignore_index=False)
df=df.copy()
df.iloc[:,1]=df.iloc[:,1]*10000
df_ES=df.iloc[:,[0,1,5,6]].reset_index() # 需要重设索引,否则lambda执行有误。
df_ES=df_ES.copy()
df_ES['EV / Sales']=df_ES.apply(lambda x: x[2]/x[4], axis=1) # 注意,缺省axis=0
#df['EV / Sales']=df.iloc[:,1]/df['Salse']
df_ES.columns=['Stock','MV_date','MV','S_date','S','EV_Sales']
df_ES.set_index('Stock', inplace=True)
df_ES.sort_index(ascending=False,inplace=True)
print(df_ES)
df_n=pd.read_csv('Stock_Name.txt', dtype={'Stock':str}).set_index('Stock')
print(df_n)
df_ES=pd.concat([df_ES,df_n],axis=1,sort=True)
# df_ps.sort_index(ascending=False, inplace=True)
df_ES.index.set_names('Stock', inplace=True)
df_ES.to_csv('EV_Sales.txt')
df_ES
pd.set_option('display.float_format', lambda x: '%.2f' % x) # 不以科学计数法显示(2f,保留2位小数)
df=pd.read_csv('EV_Sales.txt', dtype={'Stock':str}).set_index('Stock').sort_index()
df.iloc[:,4].plot(kind='barh',grid=True, figsize=(14,2), rot=0)
df.iloc[:,:].sort_index(ascending=False)