|
model: |
|
base_url: "http://127.0.0.1:8000" |
|
max_tokens: 256 |
|
temperature: 0.1 |
|
timeout: 30 |
|
|
|
datasets: |
|
benchmark_dataset: |
|
file_path: "Personal_De-identifier_Benchmark_SFT.jsonl" |
|
sample_size: 100 |
|
instruction_field: "instruction" |
|
input_field: "input" |
|
expected_output_field: "response" |
|
|
|
metrics: |
|
|
|
pii_detection: |
|
name: "PII Detection Rate" |
|
description: "Percentage of personal identifiers correctly identified and masked" |
|
type: "accuracy" |
|
|
|
completeness: |
|
name: "Completeness Score" |
|
description: "Percentage of texts where all PII was successfully removed" |
|
type: "binary_accuracy" |
|
|
|
semantic_preservation: |
|
name: "Semantic Preservation" |
|
description: "How well the original meaning is preserved (placeholder-based similarity)" |
|
type: "similarity" |
|
|
|
latency: |
|
name: "Average Latency" |
|
description: "Average response time in milliseconds" |
|
type: "latency" |
|
|
|
|
|
domain_performance: |
|
medical: |
|
name: "Medical Records" |
|
keywords: ["patient", "doctor", "hospital", "medical", "diagnosis"] |
|
legal: |
|
name: "Legal Documents" |
|
keywords: ["deponent", "attorney", "case", "court", "legal"] |
|
hr: |
|
name: "HR Records" |
|
keywords: ["employee", "salary", "hr", "personnel", "recruitment"] |
|
customer_service: |
|
name: "Customer Service" |
|
keywords: ["customer", "complaint", "service", "support", "inquiry"] |
|
research: |
|
name: "Research Data" |
|
keywords: ["participant", "study", "research", "consent", "ethics"] |
|
|
|
output: |
|
results_file: "benchmarks.txt" |
|
detailed_results_file: "benchmark_results.json" |
|
include_examples: true |
|
max_examples: 10 |
|
|