|
--- |
|
datasets: |
|
- sugiv/synthetic-text-transformation-dataset |
|
base_model: |
|
- google/flan-t5-large |
|
--- |
|
|
|
# Instruction Fine-tuned FLAN-T5 Large for Text Transformation with implicit Grammar and Spelling Correction, dubbed Bleuy-poor |
|
|
|
I am GPU poor, this instruction fine tuned model is only fine tuned for 3 epochs, for $10 dollars two A100 GPUs 80GB rented for 9 hours. |
|
|
|
This model is a instruction fine-tuned version of FLAN-T5 Large for text transformation tasks on https://huggingface.co/datasets/sugiv/synthetic-text-transformation-dataset/viewer/default/train |
|
|
|
## Usage |
|
|
|
```python |
|
|
|
# Load model and tokenizer |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
from peft import PeftModel, PeftConfig |
|
|
|
# Load the PEFT configuration |
|
peft_model_id = "sugiv/bluey-poor-flant5" |
|
peft_config = PeftConfig.from_pretrained(peft_model_id) |
|
|
|
# Load the base model |
|
base_model = AutoModelForSeq2SeqLM.from_pretrained(peft_config.base_model_name_or_path) |
|
|
|
# Load the PEFT model |
|
model = PeftModel.from_pretrained(base_model, peft_model_id) |
|
|
|
# Load the tokenizer |
|
tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path) |
|
|
|
# Set the model to evaluation mode |
|
model.eval() |
|
|
|
def generate_transform_prompt(input_text, filter_combination): |
|
return f'''You are an advanced text transformation AI. Your task is to {filter_combination['Task']} the given input text according to the specified parameters. {filter_combination['Task'].capitalize()}ing means expressing the same meaning using different words, while maintaining the original intent. Always correct spelling and grammatical errors implicitly. |
|
|
|
User: Transform the following text based on these parameters: |
|
Task: {filter_combination['Task']} |
|
Tone: {filter_combination['Tone']} |
|
Target Audience: {filter_combination['Target Audience']} |
|
Complexity: {filter_combination['Complexity']} |
|
Purpose: {filter_combination['Purpose']} |
|
Style: {filter_combination['Style']} |
|
Verbosity: {filter_combination['Verbosity']} |
|
|
|
Input Text: {input_text} |
|
|
|
Instructions: |
|
1. {filter_combination['Task']} the text according to the specified parameters. |
|
2. Maintain the original meaning, context, jargon, and entities. |
|
3. Adjust the language complexity and verbosity as specified. |
|
4. Optimize the text for the target audience and purpose. |
|
5. Ensure the output is coherent and flows naturally. |
|
6. Implicitly correct any spelling or grammatical errors. |
|
|
|
Transformed text:''' |
|
|
|
# Example usage |
|
input_text = "The quick brown fox jumps over the lazy dog." |
|
filter_combination = { |
|
"Task": "Rephrase", |
|
"Tone": "Professional", |
|
"Target Audience": "Business executives", |
|
"Complexity": "Advanced", |
|
"Purpose": "Inform", |
|
"Style": "Analytical", |
|
"Verbosity": "Concise" |
|
} |
|
|
|
prompt = generate_transform_prompt(input_text, filter_combination) |
|
inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True) |
|
outputs = model.generate(**inputs, max_length=150, num_return_sequences=1) |
|
transformed_text = tokenizer.decode(outputs, skip_special_tokens=True) |
|
print(transformed_text) |