Spaces:
Running
on
Zero
Running
on
Zero
| import re | |
| from typing import List | |
| def remove_space_between_chinese(text): | |
| text = re.sub(r'([\u4e00-\u9fff])\s+([\u4e00-\u9fff])', r'\1\2', text) | |
| text = re.sub(r'([a-zA-Z])\s+([\u4e00-\u9fff])', r'\1\2', text) | |
| text = re.sub(r'([\u4e00-\u9fff])\s+([a-zA-Z])', r'\1\2', text) | |
| return text | |
| def normalize_text(current_text): | |
| current_text = re.sub(r'([\u4e00-\u9fff])\s+([\u4e00-\u9fff])', r'\1\2', current_text) | |
| current_text = re.sub(r'([a-zA-Z])\s+([\u4e00-\u9fff])', r'\1\2', current_text) | |
| current_text = re.sub(r'([\u4e00-\u9fff])\s+([a-zA-Z])', r'\1\2', current_text) | |
| if re.search(r'[\u4e00-\u9fff]$', current_text): | |
| if current_text[-1] not in ",.?!。,?!": | |
| current_text += "。" | |
| elif re.search(r'[a-zA-Z]$', current_text): | |
| if current_text[-1] not in ".!?": | |
| current_text += "." | |
| return current_text | |
| def check_monologue_text(text: str, prefix: str = None) -> bool: | |
| text = text.strip() | |
| if prefix is not None and (not text.startswith(prefix)): | |
| return False | |
| if prefix is not None: | |
| text = text.removeprefix(prefix) | |
| text = text.strip() | |
| if len(text) == 0: | |
| return False | |
| return True | |
| def check_dialect_prompt_text(text: str, prefix: str = None) -> bool: | |
| text = text.strip() | |
| if prefix is not None and (not text.startswith(prefix)): | |
| return False | |
| text = text.strip() | |
| if len(text) == 0: | |
| return False | |
| return True | |
| def check_dialogue_text(text_list: List[str]) -> bool: | |
| if len(text_list) == 0: | |
| return False | |
| for text in text_list: | |
| if not ( | |
| check_monologue_text(text, "[S1]") | |
| or check_monologue_text(text, "[S2]") | |
| or check_monologue_text(text, "[S3]") | |
| or check_monologue_text(text, "[S4]") | |
| ): | |
| return False | |
| return True |