import pandas as pd import numpy as np import argparse def select_cols(df): columns = [ "Open_Interest_All", "M_Money_Positions_Long_ALL", "M_Money_Positions_Short_ALL", ] return pd.DataFrame(df, columns=columns).reset_index(drop=True) def read_excel(excel_path): df = pd.read_excel(excel_path) relevant_dfs = [] crude_oil_wti = relevant_dfs.append( df.loc[ df["Market_and_Exchange_Names"] == "CRUDE OIL, LIGHT SWEET-WTI - ICE FUTURES EUROPE" ] ) crude_oil_brent = relevant_dfs.append( df.loc[ ( df["Market_and_Exchange_Names"] == "BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE" ) | ( df["Market_and_Exchange_Names"] == "BRENT LAST DAY - NEW YORK MERCANTILE EXCHANGE" ) ] ) heating_oil = relevant_dfs.append( df.loc[ ( df["Market_and_Exchange_Names"] == "#2 HEATING OIL- NY HARBOR-ULSD - NEW YORK MERCANTILE EXCHANGE" ) | ( df["Market_and_Exchange_Names"] == "NY HARBOR ULSD - NEW YORK MERCANTILE EXCHANGE" ) ] ) palladium = relevant_dfs.append( df.loc[ df["Market_and_Exchange_Names"] == "PALLADIUM - NEW YORK MERCANTILE EXCHANGE" ] ) platinum = relevant_dfs.append( df.loc[ df["Market_and_Exchange_Names"] == "PLATINUM - NEW YORK MERCANTILE EXCHANGE" ] ) silver = relevant_dfs.append( df.loc[df["Market_and_Exchange_Names"] == "SILVER - COMMODITY EXCHANGE INC."] ) gold = relevant_dfs.append( df.loc[df["Market_and_Exchange_Names"] == "GOLD - COMMODITY EXCHANGE INC."] ) copper = relevant_dfs.append( df.loc[ ( df["Market_and_Exchange_Names"] == "COPPER-GRADE #1 - COMMODITY EXCHANGE INC." ) | ( df["Market_and_Exchange_Names"] == "COPPER- #1 - COMMODITY EXCHANGE INC." ) ] ) aluminium = relevant_dfs.append( df.loc[ ( df["Market_and_Exchange_Names"] == "ALUMINUM MW US TR PLATTS - COMMODITY EXCHANGE INC." ) | ( df["Market_and_Exchange_Names"] == "ALUMINUM MWP - COMMODITY EXCHANGE INC." ) ] ) steel = relevant_dfs.append( df.loc[ ( df["Market_and_Exchange_Names"] == "US MIDWEST DOMESTIC HOT-ROLL - COMMODITY EXCHANGE INC." ) | (df["Market_and_Exchange_Names"] == "STEEL-HRC - COMMODITY EXCHANGE INC.") ] ) natural_gas = relevant_dfs.append( df.loc[ ( df["Market_and_Exchange_Names"] == "NATURAL GAS - NEW YORK MERCANTILE EXCHANGE" ) | ( df["Market_and_Exchange_Names"] == "NAT GAS NYME - NEW YORK MERCANTILE EXCHANGE" ) ] ) return relevant_dfs if __name__ == "__main__": parser = argparse.ArgumentParser( "commitments", description="Create CSV file for commitments of traders report" ) parser.add_argument( "filename", help="Path to Excel file for a particular year of commitments of traders", ) parser.add_argument( "--dest", default="c_year.csv", help="Location to save parsed CSV to" ) args = parser.parse_args() dfs = read_excel(args.filename) relevant_dfs = [select_cols(df) for df in dfs] # Stack horizontally df = pd.concat(relevant_dfs, axis=1).iloc[::-1] df.to_csv(args.dest, index=False)