Your IP : 3.145.153.251


Current Path : /opt/cloudlinux/venv/lib64/python3.11/site-packages/ssa/modules/
Upload File :
Current File : //opt/cloudlinux/venv/lib64/python3.11/site-packages/ssa/modules/processor.py

# -*- coding: utf-8 -*-

# Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2021 All Rights Reserved
#
# Licensed under CLOUD LINUX LICENSE AGREEMENT
# http://cloudlinux.com/docs/LICENSE.TXT

"""
This module contains RequestProcessor class
"""
import logging
import sys
import time
import traceback
from datetime import datetime, timedelta, timezone
from threading import Thread, RLock, current_thread
from typing import Callable, Any

from .autotracer import AutoTracer
from .common import Common
from .decision_maker import DecisionMaker
from .stat_sender import StatisticsSender
from ..db import session_scope, setup_database, RequestResult, cleanup_old_data, restore_database, is_malformed_database
from ..internal.exceptions import SSAError
from ..internal.utils import (
    singleton,
    url_split,
    switch_schedstats
)


@singleton
class RequestProcessor(Common):
    """
    SSA Request processor implementation.
    Only one instance is allowed to be created
    """

    BUFFER_SIZE = 100

    def __init__(self, engine=None):
        super().__init__()
        self.logger = logging.getLogger('req_processor')
        self.logger.info('Processor enabled: %s', __package__)
        # enable throttling detection kernel mechanism on service start
        switch_schedstats(enabled=True)

        self.engine = engine if engine else setup_database()
        self._lock = RLock()
        self.decision_maker = DecisionMaker(engine=engine)
        self.sender = StatisticsSender()
        self.auto_tracer = AutoTracer(engine=engine)
        self.start_background_routine()

        # sqlite is not thread-safe and saving results one-by-one
        # into database slowes ssa 10x times
        # so let's make a buffer that will contain some elements
        # and flush it periodically
        self._buffer = []

    @property
    def configured_duration(self):
        """
        Return config file value multiplied by 1000000,
        as we receive duration in microseconds
        """
        return self.requests_duration * 1000000

    def send_stats(self, report: dict):
        """
        Call Statistics Sender
        """
        try:
            self.sender.send(report)
        except SSAError as e:
            self.logger.error('StatisticsSender failed: %s', str(e))

    def start_background_routine(self) -> None:
        """
        Start dumper|DecisionMaker thread in background
        """
        t = Thread(target=self.background_routine, daemon=True)
        t.start()
        self.logger.info('[%s] Routine started', t.name)

    def background_routine(self) -> None:
        """
        Dumps collected stats to file once an hour.
        Runs DecisionMaker once a day
        Cleanup storage after DecisionMaker run
        """
        while True:
            tick = datetime.now(timezone.utc)
            if tick.minute == 0:
                if tick.hour == 0:
                    if is_malformed_database(self.engine):
                        self._save_exec(self.restore_db_with_lock(self.engine))
                        self.logger.info(
                            '[%s] Routine thread found Database disk image is malformed and now restored (%s)',
                            current_thread().name, tick)
                    self.logger.info(
                        '[%s] Routine thread launching buffer flushing (%s)',
                        current_thread().name, tick)
                    self._safe_exec(self.flush_with_lock)
                    self.logger.info(
                        '[%s] Routine thread launching AutoTracer (%s)',
                        current_thread().name, tick)
                    self._safe_exec(self.auto_tracer)
                    self.logger.info(
                        '[%s] Routine thread launching DecisionMaker (%s)',
                        current_thread().name, tick)
                    report = self._safe_exec(self.decision_maker)
                    self.logger.info(
                        '[%s] Routine thread launching cleanup (%s)',
                        current_thread().name, tick)
                    cleanup_old_data(self.engine)

                    self._safe_exec(self.send_stats, report)
                    # attempt to enable throttling detection kernel mechanism
                    # in case it was accidentally switched off
                    switch_schedstats(enabled=True)
                self._simple_sleep(60)
            else:
                self._sleep_till_next_hour(tick.minute)

    def _safe_exec(self, action: Callable, *args) -> Any:
        """Call requested Callable with given args and capture any exception"""
        try:
            return action(*args)
        except Exception:
            et, ev, _ = sys.exc_info()
            self.logger.exception('%s failed with exception %s, %s',
                                  str(action), et, ev,
                                  extra={'orig_traceback': traceback.format_exc()})

    def _simple_sleep(self, to_sleep: int = 15 * 60):
        """
        Log and sleep given number of seconds or 15 minutes by default
        """
        self.logger.info('[%s] Routine thread sleeping for (%s)',
                         current_thread().name, to_sleep)
        time.sleep(to_sleep)

    def _sleep_till_next_hour(self, start_minute):
        """
        Sleep the number of minutes remaining till next hour
        """
        sleep_for = (timedelta(hours=1) - timedelta(
            minutes=start_minute)).total_seconds()
        self._simple_sleep(int(sleep_for))

    def restore_db_with_lock(self, engine):
        with self._lock:
            restore_database(engine)

    @staticmethod
    def get_interval_for(timestamp: int) -> int:
        """
        Takes an hour of a day, to which the given timestamp belongs
        """
        return datetime.fromtimestamp(timestamp, timezone.utc).hour

    def flush_with_lock(self):
        with self._lock:
            objects = self._buffer[:]
            self._buffer = []
        self.flush_buffer(objects)

    def flush_buffer(self, objects=None):
        """
        Save in-memory buffer into database.
        """
        if objects is None:
            objects = self._buffer

        if not objects:
            return

        with session_scope(self.engine) as db:
            db.bulk_save_objects(objects)

    def handle(self, data: dict) -> None:
        """
        Process given request data
        """
        if not data:
            self.logger.info('[%s] has empty request, skipping', current_thread().name)
            return
        url = data.get('url')
        if self.is_ignored(url):
            self.logger.debug('%s ignored', url)
            return
        domain, uri = url_split(url)

        self.logger.debug('[%s] Acquires lock to handle request counters',
                          current_thread().name)
        objects_per_thread = []

        with self._lock:
            self._buffer.append(
                RequestResult(
                    domain=domain,
                    path=url,

                    timestamp=data['timestamp'],
                    duration=data['duration'],
                    is_slow_request=data['duration'] > self.configured_duration,
                    hitting_limits=data['hitting_limits'],
                    throttled_time=data['throttled_time'],
                    io_throttled_time=data['io_throttled_time'],
                    wordpress=data['wordpress'],
                )
            )
            # check buffer in same lock to prevent race conditions
            # between threads modifying buffer
            if len(self._buffer) >= self.BUFFER_SIZE:
                objects_per_thread = self._buffer[:]
                self._buffer = []

            self.flush_buffer(objects_per_thread)

        self.logger.debug('[%s] Released lock to handle request counters',
                          current_thread().name)

?>