import pandas as pd import numpy as np import choose import os import datetime from pymongo import MongoClient client = MongoClient( "mongodb://wth000:wth000@43.159.47.250:27017/dbname?authSource=wth000") db = client["wth000"] # names = ["可转债","COIN", "股票", "指数", "行业", "ETF",] names = ["COIN", ] # 获取当前.py文件的绝对路径 file_path = os.path.abspath(__file__) # 获取当前.py文件所在目录的路径 dir_path = os.path.dirname(file_path) # 获取当前.py文件所在目录的上两级目录的路径 dir_path = os.path.dirname(os.path.dirname(dir_path)) files = os.listdir(dir_path) for file in files: for filename in names: if (filename in file): try: # 获取文件名和扩展名 name, extension = os.path.splitext(file) df = pd.read_csv(os.path.join(dir_path, f"{name}.csv")) print(f'{name}') # df.columns = ["日期", "代码", "开盘", "收盘", "最高", "最低", "昨收", "成交额", "成交量", "振幅", "换手率", "涨跌幅", "涨跌幅(开盘)", # "流通波动", "流通贡献", "资金波动", "资金贡献", "换手波动", "换手贡献", "净利润同比增长率", "净资产收益率", "归属母公司股东的净利润同比增长率", "总市值", "流通市值", "总资产", "总负债", "净资产", "净利润", "市盈率", "市净率", "市销率", "市现率", "资产负债率", "营收", # "净利润(归母)TTM", "营业收入增长率", "总资产增长率", "净利润增长率", "净资产增长率"] # dflanchou = pd.read_csv(f"每日蓝筹.csv") # dflanchou = dflanchou.iloc[:, :2] # dflanchou.columns = ['日期', '代码'] # # dflanchou.rename(columns={dflanchou.columns[0]: '日期', dflanchou.columns[1]: '代码'}, inplace=True) # df = df.merge(dflanchou[['日期', '代码']], on=['日期', '代码']) # print(df) # # df总共517万条,看看合并之后多少条 # dfkaipan = pd.read_csv('非ST股票(分钟)开盘后五分钟.csv') # dfkaipan.columns = ['日期', '代码', '开盘(开盘后五分钟)', '最高(开盘后五分钟)', '最低(开盘后五分钟)', # '收盘(开盘后五分钟)', '昨收(开盘后五分钟)', '成交额(开盘后五分钟)', '涨跌幅(今收今开)(开盘后五分钟)'] # df = df.merge(dfkaipan, on=['日期', '代码']) # 增加设置基本面因子 # dfshoupan = pd.read_csv('非ST股票(分钟)收盘前五分钟.csv') # dfshoupan.columns = ['日期', '代码', '开盘(收盘前五分钟)', '最高(收盘前五分钟)', '最低(收盘前五分钟)', # '收盘(收盘前五分钟)', '昨收(收盘前五分钟)', '成交额(收盘前五分钟)', '涨跌幅(今收今开)(收盘前五分钟)'] # df = df.merge(dfshoupan, on=['日期', '代码']) # 增加设置基本面因子 print(df) watchtime = 1999 # start_date = datetime.datetime( # watchtime, int(1), int(1)).strftime("%Y-%m-%d %H:%M:%S") # end_date = datetime.datetime(datetime.datetime.strptime( # start_date, "%Y-%m-%d %H:%M:%S").year + 8, int(1), int(1)).strftime("%Y-%m-%d %H:%M:%S") # df = df[(df["日期"] >= start_date) & (df["日期"] <= end_date)] df = df.groupby("代码", group_keys=False).apply( choose.technology) df = df.groupby("日期", group_keys=False).apply(choose.rank) df.to_csv(f'指标(收益率隔夜){name}.csv') df, m, n = choose.choose(name, df) if ("股票" in name): for i in range(1, n+1): df = df[df[f"{i}日后总涨跌幅(未来函数)"] <= 3*(1+0.1*n)] else: for i in range(1, n+1): df = df[df[f"{i}日后总涨跌幅(未来函数)"] <= 20*(1+0.1*n)] # 将数据划分成a个等长度的区间 a = 50 ranges = [] left = 0 right = 1 step = (right - left) / a for i in range(a): ranges.append((left + i * step, left + (i + 1) * step)) # 筛选出列名中包含"rank"的列 rank_cols = df.filter(like="rank").columns.tolist() # 创建空的结果DataFrame result_df = pd.DataFrame() # 循环处理每个指标和区间 for rank_range in ranges: col_result_df = pd.DataFrame() # 创建一个空的DataFrame,用于存储指标的结果 for col_name in rank_cols: # 根据区间筛选DataFrame sub_df = df[(df[col_name] >= rank_range[0]) & (df[col_name] <= rank_range[1])] # 计算收益 sub_df_mean = sub_df.mean(numeric_only=True) # 均值法 # 构造包含指标名和涨跌幅的DataFrame,并添加到列结果DataFrame中 result_sub_df = pd.DataFrame( {col_name: [sub_df_mean[f"{n}日后总涨跌幅(未来函数)"]]}, index=[rank_range]) col_result_df = pd.concat( [col_result_df, result_sub_df], axis=1) result_df = pd.concat([result_df, col_result_df]) # 新建涨跌分布文件夹在上级菜单下,并保存结果 path = os.path.join(os.path.abspath("."), "资产多指标排名收益分布") if not os.path.exists(path): os.makedirs(path) result_df.to_csv( f"{path}/{name}持有{n}日{str(watchtime)}年多指标排名收益分布.csv") print("任务已经完成!") except Exception as e: print(f"发生bug: {e}")