import os import tarfile import shutil # Define the base directory (update this to your folder location) base_dir = "/Users/pfz/Code/06-attribution/open-images/data/train" # Define the output directory output_dir = os.path.join(base_dir, "all_extracted") # Create the output directory if it doesn't exist os.makedirs(output_dir, exist_ok=True) # Iterate over the folders and files in the base directory for root, dirs, files in os.walk(base_dir): for file in files: if file.endswith(".tar"): tar_path = os.path.join(root, file) tar_name = os.path.basename(file)[:-4] print(f"Extracting: {tar_path}") # Open the .tar file and extract its contents with tarfile.open(tar_path) as tar: # Extract to a temporary location temp_dir = os.path.join(base_dir, "temp_extract") os.makedirs(temp_dir, exist_ok=True) tar.extractall(temp_dir) # Move the extracted files to the output directory for extracted_file in os.listdir(temp_dir): source_path = os.path.join(temp_dir, extracted_file) target_path = os.path.join(output_dir, f"{tar_name}_{extracted_file}") shutil.move(source_path, target_path) # Clean up the temporary directory shutil.rmtree(temp_dir) print(f"All files extracted to: {output_dir}")