Spaces:

lamhieu
/

lightweight-embeddings

Running

App Files Files Community

lamhieu commited on Jan 4

Commit

b8fd9fb

1 Parent(s): b6b230c

chore: update something

Browse files

Files changed (2) hide show

lightweight_embeddings/analytics.py +96 -29
lightweight_embeddings/service.py +1 -1

lightweight_embeddings/analytics.py CHANGED Viewed

@@ -6,27 +6,22 @@ from datetime import datetime
 from collections import defaultdict
 from typing import Dict
 logger = logging.getLogger(__name__)
 class Analytics:
-    def __init__(self, redis_url: str, sync_interval: int = 60):
         """
         Initializes the Analytics class with an async Redis connection and sync interval.
         Parameters:
         - redis_url: Redis connection URL (e.g., 'redis://localhost:6379/0')
         - sync_interval: Interval in seconds for syncing with Redis.
         """
-        self.redis_client = redis.from_url(
-            redis_url,
-            decode_responses=True,
-            health_check_interval=10,
-            socket_connect_timeout=5,
-            retry_on_timeout=True,
-            socket_keepalive=True,
-        )
         self.local_buffer = {
             "access": defaultdict(
                 lambda: defaultdict(int)
@@ -35,12 +30,24 @@ class Analytics:
                 lambda: defaultdict(int)
             ),  # {period: {model_id: tokens_count}}
         }
-        self.sync_interval = sync_interval
         self.lock = asyncio.Lock()  # Async lock for thread-safe updates
         asyncio.create_task(self._start_sync_task())
         logger.info("Initialized Analytics with Redis connection: %s", redis_url)
     def _get_period_keys(self) -> tuple:
         """
         Returns keys for day, week, month, and year based on the current date.
@@ -101,33 +108,93 @@ class Analytics:
         Synchronizes local buffer data with Redis.
         """
         async with self.lock:
-            pipeline = self.redis_client.pipeline()
-            # Sync access counts
-            for period, models in self.local_buffer["access"].items():
-                for model_id, count in models.items():
-                    redis_key = f"analytics:access:{period}"
-                    pipeline.hincrby(redis_key, model_id, count)
-            # Sync token counts
-            for period, models in self.local_buffer["tokens"].items():
-                for model_id, count in models.items():
-                    redis_key = f"analytics:tokens:{period}"
-                    pipeline.hincrby(redis_key, model_id, count)
-            pipeline.execute()
-            self.local_buffer["access"].clear()  # Clear access buffer after sync
-            self.local_buffer["tokens"].clear()  # Clear tokens buffer after sync
-            logger.info("Synced analytics data to Redis.")
     async def _start_sync_task(self):
         """
         Starts a background task that periodically syncs data to Redis.
         """
         while True:
             await asyncio.sleep(self.sync_interval)
             try:
                 await self._sync_to_redis()
             except redis.exceptions.ConnectionError as e:
                 logger.error("Redis connection error: %s", e)
-                await asyncio.sleep(5)

 from collections import defaultdict
 from typing import Dict
 logger = logging.getLogger(__name__)
 class Analytics:
+    def __init__(self, redis_url: str, sync_interval: int = 60, max_retries: int = 5):
         """
         Initializes the Analytics class with an async Redis connection and sync interval.
         Parameters:
         - redis_url: Redis connection URL (e.g., 'redis://localhost:6379/0')
         - sync_interval: Interval in seconds for syncing with Redis.
+        - max_retries: Maximum number of retries for reconnecting to Redis.
         """
+        self.redis_url = redis_url
+        self.sync_interval = sync_interval
+        self.max_retries = max_retries
+        self.redis_client = self._create_redis_client()
         self.local_buffer = {
             "access": defaultdict(
                 lambda: defaultdict(int)
                 lambda: defaultdict(int)
             ),  # {period: {model_id: tokens_count}}
         }
         self.lock = asyncio.Lock()  # Async lock for thread-safe updates
         asyncio.create_task(self._start_sync_task())
         logger.info("Initialized Analytics with Redis connection: %s", redis_url)
+    def _create_redis_client(self) -> redis.Redis:
+        """
+        Creates and returns a new Redis client.
+        """
+        return redis.from_url(
+            self.redis_url,
+            decode_responses=True,
+            health_check_interval=10,
+            socket_connect_timeout=5,
+            retry_on_timeout=True,
+            socket_keepalive=True,
+        )
     def _get_period_keys(self) -> tuple:
         """
         Returns keys for day, week, month, and year based on the current date.
         Synchronizes local buffer data with Redis.
         """
         async with self.lock:
+            try:
+                pipeline = self.redis_client.pipeline()
+                # Sync access counts
+                for period, models in self.local_buffer["access"].items():
+                    for model_id, count in models.items():
+                        redis_key = f"analytics:access:{period}"
+                        pipeline.hincrby(redis_key, model_id, count)
+                # Sync token counts
+                for period, models in self.local_buffer["tokens"].items():
+                    for model_id, count in models.items():
+                        redis_key = f"analytics:tokens:{period}"
+                        pipeline.hincrby(redis_key, model_id, count)
+                pipeline.execute()
+                self.local_buffer["access"].clear()  # Clear access buffer after sync
+                self.local_buffer["tokens"].clear()  # Clear tokens buffer after sync
+                logger.info("Synced analytics data to Redis.")
+            except redis.exceptions.ConnectionError as e:
+                logger.error("Redis connection error during sync: %s", e)
+                raise e
+            except Exception as e:
+                logger.error("Unexpected error during Redis sync: %s", e)
+                raise e
     async def _start_sync_task(self):
         """
         Starts a background task that periodically syncs data to Redis.
+        Implements retry logic with exponential backoff on connection failures.
         """
+        retry_delay = 1  # Initial retry delay in seconds
         while True:
             await asyncio.sleep(self.sync_interval)
             try:
                 await self._sync_to_redis()
+                retry_delay = 1  # Reset retry delay after successful sync
             except redis.exceptions.ConnectionError as e:
                 logger.error("Redis connection error: %s", e)
+                await self._handle_redis_reconnection()
+            except Exception as e:
+                logger.error("Error during sync: %s", e)
+                # Depending on the error, you might want to handle differently
+    async def _handle_redis_reconnection(self):
+        """
+        Handles Redis reconnection with exponential backoff.
+        """
+        retry_count = 0
+        delay = 1  # Start with 1 second delay
+        while retry_count < self.max_retries:
+            try:
+                logger.info("Attempting to reconnect to Redis (Attempt %d)...", retry_count + 1)
+                self.redis_client.close()
+                self.redis_client = self._create_redis_client()
+                # Optionally, perform a simple command to check connection
+                self.redis_client.ping()
+                logger.info("Successfully reconnected to Redis.")
+                return
+            except redis.exceptions.ConnectionError as e:
+                logger.error("Reconnection attempt %d failed: %s", retry_count + 1, e)
+                retry_count += 1
+                await asyncio.sleep(delay)
+                delay *= 2  # Exponential backoff
+        logger.critical("Max reconnection attempts reached. Unable to reconnect to Redis.")
+        # Depending on your application's requirements, you might choose to exit or keep retrying indefinitely
+        # For example, to keep retrying:
+        while True:
+            try:
+                logger.info("Retrying to reconnect to Redis...")
+                self.redis_client.close()
+                self.redis_client = self._create_redis_client()
+                self.redis_client.ping()
+                logger.info("Successfully reconnected to Redis.")
+                break
+            except redis.exceptions.ConnectionError as e:
+                logger.error("Reconnection attempt failed: %s", e)
+                await asyncio.sleep(delay)
+                delay = min(delay * 2, 60)  # Cap the delay to 60 seconds
+    async def close(self):
+        """
+        Closes the Redis connection gracefully.
+        """
+        self.redis_client.close()
+        logger.info("Closed Redis connection.")

lightweight_embeddings/service.py CHANGED Viewed

@@ -155,7 +155,7 @@ class EmbeddingsService:
     """
     def __init__(self, config: Optional[ModelConfig] = None):
-        self.lru_cache = LRUCache(maxsize=50_000)  # Approximate for ~500MB usage
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.config = config or ModelConfig()

     """
     def __init__(self, config: Optional[ModelConfig] = None):
+        self.lru_cache = LRUCache(maxsize=10_000)  # Approximate for ~100MB usage
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.config = config or ModelConfig()