Spaces:
Running
Running
Refactor paths in configuration and .gitignore for chat history and model management
Browse files- .gitignore +4 -5
- config/settings.py +4 -3
- src/training/fine_tuner.py +1 -0
- src/training/model_manager.py +0 -4
.gitignore
CHANGED
@@ -4,15 +4,14 @@ vector_store/
|
|
4 |
*.faiss
|
5 |
*.pkl
|
6 |
|
7 |
-
# Chat history
|
8 |
status-law-knowledge-base/chat_history/
|
9 |
chat_history/
|
|
|
|
|
10 |
*.json
|
11 |
|
12 |
-
# Training
|
13 |
-
models/fine_tuned/
|
14 |
-
models/registry.json
|
15 |
-
training_output/
|
16 |
*.bin
|
17 |
*.pt
|
18 |
*.pth
|
|
|
4 |
*.faiss
|
5 |
*.pkl
|
6 |
|
7 |
+
# Chat history and models
|
8 |
status-law-knowledge-base/chat_history/
|
9 |
chat_history/
|
10 |
+
chat_history/fine_tuned_models/
|
11 |
+
chat_history/models_registry.json
|
12 |
*.json
|
13 |
|
14 |
+
# Training files
|
|
|
|
|
|
|
15 |
*.bin
|
16 |
*.pt
|
17 |
*.pth
|
config/settings.py
CHANGED
@@ -22,15 +22,16 @@ API_CONFIG = {
|
|
22 |
DATASET_ID = "Rulga/status-law-knowledge-base"
|
23 |
CHAT_HISTORY_PATH = "chat_history"
|
24 |
VECTOR_STORE_PATH = "vector_store"
|
|
|
25 |
|
26 |
# Paths configuration
|
27 |
MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
|
28 |
-
TRAINING_OUTPUT_DIR = os.path.join(
|
29 |
|
30 |
# Create necessary directories if they don't exist
|
31 |
os.makedirs(MODEL_PATH, exist_ok=True)
|
32 |
os.makedirs(TRAINING_OUTPUT_DIR, exist_ok=True)
|
33 |
-
MODELS_REGISTRY_PATH = os.path.join(
|
34 |
|
35 |
# Models configuration with detailed information
|
36 |
MODELS = {
|
@@ -222,4 +223,4 @@ ACTIVE_MODEL = MODELS[DEFAULT_MODEL]
|
|
222 |
EMBEDDING_MODEL = "intfloat/multilingual-e5-large"
|
223 |
|
224 |
# Request settings
|
225 |
-
USER_AGENT = "Status-Law-Assistant/1.0"
|
|
|
22 |
DATASET_ID = "Rulga/status-law-knowledge-base"
|
23 |
CHAT_HISTORY_PATH = "chat_history"
|
24 |
VECTOR_STORE_PATH = "vector_store"
|
25 |
+
FINE_TUNED_PATH = "fine_tuned_models" # новый путь
|
26 |
|
27 |
# Paths configuration
|
28 |
MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
|
29 |
+
TRAINING_OUTPUT_DIR = os.path.join(CHAT_HISTORY_PATH, FINE_TUNED_PATH) # изменённый путь
|
30 |
|
31 |
# Create necessary directories if they don't exist
|
32 |
os.makedirs(MODEL_PATH, exist_ok=True)
|
33 |
os.makedirs(TRAINING_OUTPUT_DIR, exist_ok=True)
|
34 |
+
MODELS_REGISTRY_PATH = os.path.join(CHAT_HISTORY_PATH, "models_registry.json") # перемещаем registry.json
|
35 |
|
36 |
# Models configuration with detailed information
|
37 |
MODELS = {
|
|
|
223 |
EMBEDDING_MODEL = "intfloat/multilingual-e5-large"
|
224 |
|
225 |
# Request settings
|
226 |
+
USER_AGENT = "Status-Law-Assistant/1.0"
|
src/training/fine_tuner.py
CHANGED
@@ -38,6 +38,7 @@ class FineTuner:
|
|
38 |
self.model = None
|
39 |
self.chat_analyzer = ChatAnalyzer()
|
40 |
|
|
|
41 |
os.makedirs(self.output_dir, exist_ok=True)
|
42 |
|
43 |
def prepare_training_data(self, output_file: Optional[str] = None) -> str:
|
|
|
38 |
self.model = None
|
39 |
self.chat_analyzer = ChatAnalyzer()
|
40 |
|
41 |
+
# Создаём директорию для сохранения моделей в датасете
|
42 |
os.makedirs(self.output_dir, exist_ok=True)
|
43 |
|
44 |
def prepare_training_data(self, output_file: Optional[str] = None) -> str:
|
src/training/model_manager.py
CHANGED
@@ -23,10 +23,6 @@ class ModelManager:
|
|
23 |
self.registry_path = MODELS_REGISTRY_PATH
|
24 |
os.makedirs(os.path.dirname(self.registry_path), exist_ok=True)
|
25 |
|
26 |
-
# Create empty registry if it doesn't exist
|
27 |
-
if not os.path.exists(self.registry_path):
|
28 |
-
self._save_registry([])
|
29 |
-
|
30 |
def _load_registry(self) -> List[Dict[str, Any]]:
|
31 |
"""Load models registry"""
|
32 |
try:
|
|
|
23 |
self.registry_path = MODELS_REGISTRY_PATH
|
24 |
os.makedirs(os.path.dirname(self.registry_path), exist_ok=True)
|
25 |
|
|
|
|
|
|
|
|
|
26 |
def _load_registry(self) -> List[Dict[str, Any]]:
|
27 |
"""Load models registry"""
|
28 |
try:
|