Rulga commited on
Commit
c8bbed1
·
1 Parent(s): c4364db

Refactor paths in configuration and .gitignore for chat history and model management

Browse files
.gitignore CHANGED
@@ -4,15 +4,14 @@ vector_store/
4
  *.faiss
5
  *.pkl
6
 
7
- # Chat history
8
  status-law-knowledge-base/chat_history/
9
  chat_history/
 
 
10
  *.json
11
 
12
- # Training and model files
13
- models/fine_tuned/
14
- models/registry.json
15
- training_output/
16
  *.bin
17
  *.pt
18
  *.pth
 
4
  *.faiss
5
  *.pkl
6
 
7
+ # Chat history and models
8
  status-law-knowledge-base/chat_history/
9
  chat_history/
10
+ chat_history/fine_tuned_models/
11
+ chat_history/models_registry.json
12
  *.json
13
 
14
+ # Training files
 
 
 
15
  *.bin
16
  *.pt
17
  *.pth
config/settings.py CHANGED
@@ -22,15 +22,16 @@ API_CONFIG = {
22
  DATASET_ID = "Rulga/status-law-knowledge-base"
23
  CHAT_HISTORY_PATH = "chat_history"
24
  VECTOR_STORE_PATH = "vector_store"
 
25
 
26
  # Paths configuration
27
  MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
28
- TRAINING_OUTPUT_DIR = os.path.join(MODEL_PATH, "fine_tuned")
29
 
30
  # Create necessary directories if they don't exist
31
  os.makedirs(MODEL_PATH, exist_ok=True)
32
  os.makedirs(TRAINING_OUTPUT_DIR, exist_ok=True)
33
- MODELS_REGISTRY_PATH = os.path.join(MODEL_PATH, "registry.json")
34
 
35
  # Models configuration with detailed information
36
  MODELS = {
@@ -222,4 +223,4 @@ ACTIVE_MODEL = MODELS[DEFAULT_MODEL]
222
  EMBEDDING_MODEL = "intfloat/multilingual-e5-large"
223
 
224
  # Request settings
225
- USER_AGENT = "Status-Law-Assistant/1.0"
 
22
  DATASET_ID = "Rulga/status-law-knowledge-base"
23
  CHAT_HISTORY_PATH = "chat_history"
24
  VECTOR_STORE_PATH = "vector_store"
25
+ FINE_TUNED_PATH = "fine_tuned_models" # новый путь
26
 
27
  # Paths configuration
28
  MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
29
+ TRAINING_OUTPUT_DIR = os.path.join(CHAT_HISTORY_PATH, FINE_TUNED_PATH) # изменённый путь
30
 
31
  # Create necessary directories if they don't exist
32
  os.makedirs(MODEL_PATH, exist_ok=True)
33
  os.makedirs(TRAINING_OUTPUT_DIR, exist_ok=True)
34
+ MODELS_REGISTRY_PATH = os.path.join(CHAT_HISTORY_PATH, "models_registry.json") # перемещаем registry.json
35
 
36
  # Models configuration with detailed information
37
  MODELS = {
 
223
  EMBEDDING_MODEL = "intfloat/multilingual-e5-large"
224
 
225
  # Request settings
226
+ USER_AGENT = "Status-Law-Assistant/1.0"
src/training/fine_tuner.py CHANGED
@@ -38,6 +38,7 @@ class FineTuner:
38
  self.model = None
39
  self.chat_analyzer = ChatAnalyzer()
40
 
 
41
  os.makedirs(self.output_dir, exist_ok=True)
42
 
43
  def prepare_training_data(self, output_file: Optional[str] = None) -> str:
 
38
  self.model = None
39
  self.chat_analyzer = ChatAnalyzer()
40
 
41
+ # Создаём директорию для сохранения моделей в датасете
42
  os.makedirs(self.output_dir, exist_ok=True)
43
 
44
  def prepare_training_data(self, output_file: Optional[str] = None) -> str:
src/training/model_manager.py CHANGED
@@ -23,10 +23,6 @@ class ModelManager:
23
  self.registry_path = MODELS_REGISTRY_PATH
24
  os.makedirs(os.path.dirname(self.registry_path), exist_ok=True)
25
 
26
- # Create empty registry if it doesn't exist
27
- if not os.path.exists(self.registry_path):
28
- self._save_registry([])
29
-
30
  def _load_registry(self) -> List[Dict[str, Any]]:
31
  """Load models registry"""
32
  try:
 
23
  self.registry_path = MODELS_REGISTRY_PATH
24
  os.makedirs(os.path.dirname(self.registry_path), exist_ok=True)
25
 
 
 
 
 
26
  def _load_registry(self) -> List[Dict[str, Any]]:
27
  """Load models registry"""
28
  try: