import pandas as pd import os import glob def remove_embeddings_column(file_path): """Remove the embeddings column from a CSV file.""" try: # Read the CSV file df = pd.read_csv(file_path, sep='\t') # Check if 'embeddings' column exists if 'embedding' in df.columns: # Remove the embeddings column df = df.drop('embedding', axis=1) # Save the modified DataFrame back to CSV df.to_csv(file_path, sep='\t', index=False) print(f"Successfully removed embedding column from {file_path}") else: print(f"No embedding column found in {file_path}") except Exception as e: print(f"Error processing {file_path}: {str(e)}") def process_all_layers(base_path="src/codebert/language_classification"): """Process explanation_words CSV files for all layers.""" # Find all explanation_words CSV files pattern = os.path.join(base_path, "layer*/explanation_words_layer*.csv") files = glob.glob(pattern) if not files: print(f"No explanation_words CSV files found matching pattern: {pattern}") return print(f"Found {len(files)} files to process") # Process each file for file_path in files: remove_embeddings_column(file_path) if __name__ == "__main__": process_all_layers() print("Processing complete!")