DeId-Small / benchmark_config.yaml
Minibase's picture
Upload benchmark_config.yaml with huggingface_hub
d16cb83 verified
raw
history blame
1.77 kB
model:
base_url: "http://127.0.0.1:8000"
max_tokens: 256
temperature: 0.1
timeout: 30
datasets:
benchmark_dataset:
file_path: "Personal_De-identifier_Benchmark_SFT.jsonl"
sample_size: 100 # Use first 100 examples for quick benchmarking
instruction_field: "instruction"
input_field: "input"
expected_output_field: "response"
metrics:
# Primary metrics for HuggingFace
pii_detection:
name: "PII Detection Rate"
description: "Percentage of personal identifiers correctly identified and masked"
type: "accuracy"
completeness:
name: "Completeness Score"
description: "Percentage of texts where all PII was successfully removed"
type: "binary_accuracy"
semantic_preservation:
name: "Semantic Preservation"
description: "How well the original meaning is preserved (placeholder-based similarity)"
type: "similarity"
latency:
name: "Average Latency"
description: "Average response time in milliseconds"
type: "latency"
# Domain-specific performance
domain_performance:
medical:
name: "Medical Records"
keywords: ["patient", "doctor", "hospital", "medical", "diagnosis"]
legal:
name: "Legal Documents"
keywords: ["deponent", "attorney", "case", "court", "legal"]
hr:
name: "HR Records"
keywords: ["employee", "salary", "hr", "personnel", "recruitment"]
customer_service:
name: "Customer Service"
keywords: ["customer", "complaint", "service", "support", "inquiry"]
research:
name: "Research Data"
keywords: ["participant", "study", "research", "consent", "ethics"]
output:
results_file: "benchmarks.txt"
detailed_results_file: "benchmark_results.json"
include_examples: true
max_examples: 10