import pandas as pd
from joblib import Parallel
def get_coc_dt(news_data, dt):
temp = news_data.query("trade_date==@dt").reset_index(drop=True)
temp['cnt'] = 1
coc_df = temp.pivot_table(index='news_id', columns='sec_code', values='cnt').fillna(0)
coc_mat, coc_codes = coc_df.values, coc_df.columns.tolist()
adj_mat = coc_mat.T.dot(coc_mat)
adj_df = pd.DataFrame(data=adj_mat, index=coc_codes , columns=coc_codes ).replace(0, np.nan).unstack().dropna()
return (dt, adj_df)
def get_coc_all(news_data):
all_dts_str = [str(dt) for dt in news_data['trade_date'].dt.date.unique()]
coc_all_lst = Parallel(n_jobs=5)(delayed(get_coc_dt)(com_senti_fil, dt) for dt in tqdm(all_dts_str))
return coc_all_lst