Spaces:
Sleeping
Sleeping
import pandas as pd | |
import os | |
import glob | |
def remove_embeddings_column(file_path): | |
"""Remove the embeddings column from a CSV file.""" | |
try: | |
# Read the CSV file | |
df = pd.read_csv(file_path, sep='\t') | |
# Check if 'embeddings' column exists | |
if 'embedding' in df.columns: | |
# Remove the embeddings column | |
df = df.drop('embedding', axis=1) | |
# Save the modified DataFrame back to CSV | |
df.to_csv(file_path, sep='\t', index=False) | |
print(f"Successfully removed embedding column from {file_path}") | |
else: | |
print(f"No embedding column found in {file_path}") | |
except Exception as e: | |
print(f"Error processing {file_path}: {str(e)}") | |
def process_all_layers(base_path="src/codebert/language_classification"): | |
"""Process explanation_words CSV files for all layers.""" | |
# Find all explanation_words CSV files | |
pattern = os.path.join(base_path, "layer*/explanation_words_layer*.csv") | |
files = glob.glob(pattern) | |
if not files: | |
print(f"No explanation_words CSV files found matching pattern: {pattern}") | |
return | |
print(f"Found {len(files)} files to process") | |
# Process each file | |
for file_path in files: | |
remove_embeddings_column(file_path) | |
if __name__ == "__main__": | |
process_all_layers() | |
print("Processing complete!") |