code / 数据获取 /多指标排名收益分布.py
tradequant's picture
Upload 59 files
7ec53ba
import pandas as pd
import numpy as np
import choose
import os
import datetime
from pymongo import MongoClient
client = MongoClient(
"mongodb://wth000:[email protected]:27017/dbname?authSource=wth000")
db = client["wth000"]
# names = ["可转债","COIN", "股票", "指数", "行业", "ETF",]
names = ["COIN", ]
# 获取当前.py文件的绝对路径
file_path = os.path.abspath(__file__)
# 获取当前.py文件所在目录的路径
dir_path = os.path.dirname(file_path)
# 获取当前.py文件所在目录的上两级目录的路径
dir_path = os.path.dirname(os.path.dirname(dir_path))
files = os.listdir(dir_path)
for file in files:
for filename in names:
if (filename in file):
try:
# 获取文件名和扩展名
name, extension = os.path.splitext(file)
df = pd.read_csv(os.path.join(dir_path, f"{name}.csv"))
print(f'{name}')
# df.columns = ["日期", "代码", "开盘", "收盘", "最高", "最低", "昨收", "成交额", "成交量", "振幅", "换手率", "涨跌幅", "涨跌幅(开盘)",
# "流通波动", "流通贡献", "资金波动", "资金贡献", "换手波动", "换手贡献", "净利润同比增长率", "净资产收益率", "归属母公司股东的净利润同比增长率", "总市值", "流通市值", "总资产", "总负债", "净资产", "净利润", "市盈率", "市净率", "市销率", "市现率", "资产负债率", "营收",
# "净利润(归母)TTM", "营业收入增长率", "总资产增长率", "净利润增长率", "净资产增长率"]
# dflanchou = pd.read_csv(f"每日蓝筹.csv")
# dflanchou = dflanchou.iloc[:, :2]
# dflanchou.columns = ['日期', '代码']
# # dflanchou.rename(columns={dflanchou.columns[0]: '日期', dflanchou.columns[1]: '代码'}, inplace=True)
# df = df.merge(dflanchou[['日期', '代码']], on=['日期', '代码'])
# print(df)
# # df总共517万条,看看合并之后多少条
# dfkaipan = pd.read_csv('非ST股票(分钟)开盘后五分钟.csv')
# dfkaipan.columns = ['日期', '代码', '开盘(开盘后五分钟)', '最高(开盘后五分钟)', '最低(开盘后五分钟)',
# '收盘(开盘后五分钟)', '昨收(开盘后五分钟)', '成交额(开盘后五分钟)', '涨跌幅(今收今开)(开盘后五分钟)']
# df = df.merge(dfkaipan, on=['日期', '代码']) # 增加设置基本面因子
# dfshoupan = pd.read_csv('非ST股票(分钟)收盘前五分钟.csv')
# dfshoupan.columns = ['日期', '代码', '开盘(收盘前五分钟)', '最高(收盘前五分钟)', '最低(收盘前五分钟)',
# '收盘(收盘前五分钟)', '昨收(收盘前五分钟)', '成交额(收盘前五分钟)', '涨跌幅(今收今开)(收盘前五分钟)']
# df = df.merge(dfshoupan, on=['日期', '代码']) # 增加设置基本面因子
print(df)
watchtime = 1999
# start_date = datetime.datetime(
# watchtime, int(1), int(1)).strftime("%Y-%m-%d %H:%M:%S")
# end_date = datetime.datetime(datetime.datetime.strptime(
# start_date, "%Y-%m-%d %H:%M:%S").year + 8, int(1), int(1)).strftime("%Y-%m-%d %H:%M:%S")
# df = df[(df["日期"] >= start_date) & (df["日期"] <= end_date)]
df = df.groupby("代码", group_keys=False).apply(
choose.technology)
df = df.groupby("日期", group_keys=False).apply(choose.rank)
df.to_csv(f'指标(收益率隔夜){name}.csv')
df, m, n = choose.choose(name, df)
if ("股票" in name):
for i in range(1, n+1):
df = df[df[f"{i}日后总涨跌幅(未来函数)"] <= 3*(1+0.1*n)]
else:
for i in range(1, n+1):
df = df[df[f"{i}日后总涨跌幅(未来函数)"] <= 20*(1+0.1*n)]
# 将数据划分成a个等长度的区间
a = 50
ranges = []
left = 0
right = 1
step = (right - left) / a
for i in range(a):
ranges.append((left + i * step, left + (i + 1) * step))
# 筛选出列名中包含"rank"的列
rank_cols = df.filter(like="rank").columns.tolist()
# 创建空的结果DataFrame
result_df = pd.DataFrame()
# 循环处理每个指标和区间
for rank_range in ranges:
col_result_df = pd.DataFrame() # 创建一个空的DataFrame,用于存储指标的结果
for col_name in rank_cols:
# 根据区间筛选DataFrame
sub_df = df[(df[col_name] >= rank_range[0]) &
(df[col_name] <= rank_range[1])]
# 计算收益
sub_df_mean = sub_df.mean(numeric_only=True) # 均值法
# 构造包含指标名和涨跌幅的DataFrame,并添加到列结果DataFrame中
result_sub_df = pd.DataFrame(
{col_name: [sub_df_mean[f"{n}日后总涨跌幅(未来函数)"]]}, index=[rank_range])
col_result_df = pd.concat(
[col_result_df, result_sub_df], axis=1)
result_df = pd.concat([result_df, col_result_df])
# 新建涨跌分布文件夹在上级菜单下,并保存结果
path = os.path.join(os.path.abspath("."), "资产多指标排名收益分布")
if not os.path.exists(path):
os.makedirs(path)
result_df.to_csv(
f"{path}/{name}持有{n}{str(watchtime)}年多指标排名收益分布.csv")
print("任务已经完成!")
except Exception as e:
print(f"发生bug: {e}")