import re from typing import List def remove_space_between_chinese(text): text = re.sub(r'([\u4e00-\u9fff])\s+([\u4e00-\u9fff])', r'\1\2', text) text = re.sub(r'([a-zA-Z])\s+([\u4e00-\u9fff])', r'\1\2', text) text = re.sub(r'([\u4e00-\u9fff])\s+([a-zA-Z])', r'\1\2', text) return text def normalize_text(current_text): current_text = re.sub(r'([\u4e00-\u9fff])\s+([\u4e00-\u9fff])', r'\1\2', current_text) current_text = re.sub(r'([a-zA-Z])\s+([\u4e00-\u9fff])', r'\1\2', current_text) current_text = re.sub(r'([\u4e00-\u9fff])\s+([a-zA-Z])', r'\1\2', current_text) if re.search(r'[\u4e00-\u9fff]$', current_text): if current_text[-1] not in ",.?!。,?!": current_text += "。" elif re.search(r'[a-zA-Z]$', current_text): if current_text[-1] not in ".!?": current_text += "." return current_text def check_monologue_text(text: str, prefix: str = None) -> bool: text = text.strip() if prefix is not None and (not text.startswith(prefix)): return False if prefix is not None: text = text.removeprefix(prefix) text = text.strip() if len(text) == 0: return False return True def check_dialect_prompt_text(text: str, prefix: str = None) -> bool: text = text.strip() if prefix is not None and (not text.startswith(prefix)): return False text = text.strip() if len(text) == 0: return False return True def check_dialogue_text(text_list: List[str]) -> bool: if len(text_list) == 0: return False for text in text_list: if not ( check_monologue_text(text, "[S1]") or check_monologue_text(text, "[S2]") or check_monologue_text(text, "[S3]") or check_monologue_text(text, "[S4]") ): return False return True