File size: 1,770 Bytes
d16cb83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
model:
base_url: "http://127.0.0.1:8000"
max_tokens: 256
temperature: 0.1
timeout: 30
datasets:
benchmark_dataset:
file_path: "Personal_De-identifier_Benchmark_SFT.jsonl"
sample_size: 100 # Use first 100 examples for quick benchmarking
instruction_field: "instruction"
input_field: "input"
expected_output_field: "response"
metrics:
# Primary metrics for HuggingFace
pii_detection:
name: "PII Detection Rate"
description: "Percentage of personal identifiers correctly identified and masked"
type: "accuracy"
completeness:
name: "Completeness Score"
description: "Percentage of texts where all PII was successfully removed"
type: "binary_accuracy"
semantic_preservation:
name: "Semantic Preservation"
description: "How well the original meaning is preserved (placeholder-based similarity)"
type: "similarity"
latency:
name: "Average Latency"
description: "Average response time in milliseconds"
type: "latency"
# Domain-specific performance
domain_performance:
medical:
name: "Medical Records"
keywords: ["patient", "doctor", "hospital", "medical", "diagnosis"]
legal:
name: "Legal Documents"
keywords: ["deponent", "attorney", "case", "court", "legal"]
hr:
name: "HR Records"
keywords: ["employee", "salary", "hr", "personnel", "recruitment"]
customer_service:
name: "Customer Service"
keywords: ["customer", "complaint", "service", "support", "inquiry"]
research:
name: "Research Data"
keywords: ["participant", "study", "research", "consent", "ethics"]
output:
results_file: "benchmarks.txt"
detailed_results_file: "benchmark_results.json"
include_examples: true
max_examples: 10
|