import pandas as pd | |
import numpy as np | |
import choose | |
import os | |
import datetime | |
from pymongo import MongoClient | |
client = MongoClient( | |
"mongodb://wth000:[email protected]:27017/dbname?authSource=wth000") | |
db = client["wth000"] | |
# names = ["可转债","COIN", "股票", "指数", "行业", "ETF",] | |
names = ["COIN", ] | |
# 获取当前.py文件的绝对路径 | |
file_path = os.path.abspath(__file__) | |
# 获取当前.py文件所在目录的路径 | |
dir_path = os.path.dirname(file_path) | |
# 获取当前.py文件所在目录的上两级目录的路径 | |
dir_path = os.path.dirname(os.path.dirname(dir_path)) | |
files = os.listdir(dir_path) | |
for file in files: | |
for filename in names: | |
if (filename in file): | |
try: | |
# 获取文件名和扩展名 | |
name, extension = os.path.splitext(file) | |
df = pd.read_csv(os.path.join(dir_path, f"{name}.csv")) | |
print(f'{name}') | |
# df.columns = ["日期", "代码", "开盘", "收盘", "最高", "最低", "昨收", "成交额", "成交量", "振幅", "换手率", "涨跌幅", "涨跌幅(开盘)", | |
# "流通波动", "流通贡献", "资金波动", "资金贡献", "换手波动", "换手贡献", "净利润同比增长率", "净资产收益率", "归属母公司股东的净利润同比增长率", "总市值", "流通市值", "总资产", "总负债", "净资产", "净利润", "市盈率", "市净率", "市销率", "市现率", "资产负债率", "营收", | |
# "净利润(归母)TTM", "营业收入增长率", "总资产增长率", "净利润增长率", "净资产增长率"] | |
# dflanchou = pd.read_csv(f"每日蓝筹.csv") | |
# dflanchou = dflanchou.iloc[:, :2] | |
# dflanchou.columns = ['日期', '代码'] | |
# # dflanchou.rename(columns={dflanchou.columns[0]: '日期', dflanchou.columns[1]: '代码'}, inplace=True) | |
# df = df.merge(dflanchou[['日期', '代码']], on=['日期', '代码']) | |
# print(df) | |
# # df总共517万条,看看合并之后多少条 | |
# dfkaipan = pd.read_csv('非ST股票(分钟)开盘后五分钟.csv') | |
# dfkaipan.columns = ['日期', '代码', '开盘(开盘后五分钟)', '最高(开盘后五分钟)', '最低(开盘后五分钟)', | |
# '收盘(开盘后五分钟)', '昨收(开盘后五分钟)', '成交额(开盘后五分钟)', '涨跌幅(今收今开)(开盘后五分钟)'] | |
# df = df.merge(dfkaipan, on=['日期', '代码']) # 增加设置基本面因子 | |
# dfshoupan = pd.read_csv('非ST股票(分钟)收盘前五分钟.csv') | |
# dfshoupan.columns = ['日期', '代码', '开盘(收盘前五分钟)', '最高(收盘前五分钟)', '最低(收盘前五分钟)', | |
# '收盘(收盘前五分钟)', '昨收(收盘前五分钟)', '成交额(收盘前五分钟)', '涨跌幅(今收今开)(收盘前五分钟)'] | |
# df = df.merge(dfshoupan, on=['日期', '代码']) # 增加设置基本面因子 | |
print(df) | |
watchtime = 1999 | |
# start_date = datetime.datetime( | |
# watchtime, int(1), int(1)).strftime("%Y-%m-%d %H:%M:%S") | |
# end_date = datetime.datetime(datetime.datetime.strptime( | |
# start_date, "%Y-%m-%d %H:%M:%S").year + 8, int(1), int(1)).strftime("%Y-%m-%d %H:%M:%S") | |
# df = df[(df["日期"] >= start_date) & (df["日期"] <= end_date)] | |
df = df.groupby("代码", group_keys=False).apply( | |
choose.technology) | |
df = df.groupby("日期", group_keys=False).apply(choose.rank) | |
df.to_csv(f'指标(收益率隔夜){name}.csv') | |
df, m, n = choose.choose(name, df) | |
if ("股票" in name): | |
for i in range(1, n+1): | |
df = df[df[f"{i}日后总涨跌幅(未来函数)"] <= 3*(1+0.1*n)] | |
else: | |
for i in range(1, n+1): | |
df = df[df[f"{i}日后总涨跌幅(未来函数)"] <= 20*(1+0.1*n)] | |
# 将数据划分成a个等长度的区间 | |
a = 50 | |
ranges = [] | |
left = 0 | |
right = 1 | |
step = (right - left) / a | |
for i in range(a): | |
ranges.append((left + i * step, left + (i + 1) * step)) | |
# 筛选出列名中包含"rank"的列 | |
rank_cols = df.filter(like="rank").columns.tolist() | |
# 创建空的结果DataFrame | |
result_df = pd.DataFrame() | |
# 循环处理每个指标和区间 | |
for rank_range in ranges: | |
col_result_df = pd.DataFrame() # 创建一个空的DataFrame,用于存储指标的结果 | |
for col_name in rank_cols: | |
# 根据区间筛选DataFrame | |
sub_df = df[(df[col_name] >= rank_range[0]) & | |
(df[col_name] <= rank_range[1])] | |
# 计算收益 | |
sub_df_mean = sub_df.mean(numeric_only=True) # 均值法 | |
# 构造包含指标名和涨跌幅的DataFrame,并添加到列结果DataFrame中 | |
result_sub_df = pd.DataFrame( | |
{col_name: [sub_df_mean[f"{n}日后总涨跌幅(未来函数)"]]}, index=[rank_range]) | |
col_result_df = pd.concat( | |
[col_result_df, result_sub_df], axis=1) | |
result_df = pd.concat([result_df, col_result_df]) | |
# 新建涨跌分布文件夹在上级菜单下,并保存结果 | |
path = os.path.join(os.path.abspath("."), "资产多指标排名收益分布") | |
if not os.path.exists(path): | |
os.makedirs(path) | |
result_df.to_csv( | |
f"{path}/{name}持有{n}日{str(watchtime)}年多指标排名收益分布.csv") | |
print("任务已经完成!") | |
except Exception as e: | |
print(f"发生bug: {e}") | |