Salient_3_problems / remove.py
Vedant Pungliya
Updating files
5d6df7a unverified
import pandas as pd
import os
import glob
def remove_embeddings_column(file_path):
"""Remove the embeddings column from a CSV file."""
try:
# Read the CSV file
df = pd.read_csv(file_path, sep='\t')
# Check if 'embeddings' column exists
if 'embedding' in df.columns:
# Remove the embeddings column
df = df.drop('embedding', axis=1)
# Save the modified DataFrame back to CSV
df.to_csv(file_path, sep='\t', index=False)
print(f"Successfully removed embedding column from {file_path}")
else:
print(f"No embedding column found in {file_path}")
except Exception as e:
print(f"Error processing {file_path}: {str(e)}")
def process_all_layers(base_path="src/codebert/language_classification"):
"""Process explanation_words CSV files for all layers."""
# Find all explanation_words CSV files
pattern = os.path.join(base_path, "layer*/explanation_words_layer*.csv")
files = glob.glob(pattern)
if not files:
print(f"No explanation_words CSV files found matching pattern: {pattern}")
return
print(f"Found {len(files)} files to process")
# Process each file
for file_path in files:
remove_embeddings_column(file_path)
if __name__ == "__main__":
process_all_layers()
print("Processing complete!")