[ Mini Kiebo ]
Server: Windows NT DESKTOP-5B8S0D4 6.2 build 9200 (Windows 8 Professional Edition) i586
Path:
D:
/
Backup
/
05122024
/
htdocs
/
jurnal-kesmas
/
lib
/
pkp
/
classes
/
task
/
[
Home
]
File: PKPUsageStatsLoader.php
<?php /** * @file classes/tasks/PKPUsageStatsLoader.php * * Copyright (c) 2022 Simon Fraser University * Copyright (c) 2022 John Willinsky * Distributed under the GNU GPL v3. For full terms see the file docs/COPYING. * * @class PKPUsageStatsLoader * * @ingroup tasks * * @brief Scheduled task to extract transform and load usage statistics data into database. */ namespace PKP\task; use APP\core\Application; use APP\core\Services; use APP\statistics\StatisticsHelper; use Illuminate\Support\Facades\Bus; use PKP\file\FileManager; use PKP\jobs\statistics\CompileMonthlyMetrics; use PKP\scheduledTask\ScheduledTaskHelper; use PKP\site\Site; use Throwable; abstract class PKPUsageStatsLoader extends FileLoader { /** * If the log files should be automatically moved to te stage folder. * This is the case for daily log file processing. * This is not the case if the whole month is reprocessed - all log files for the given month should be manually placed in the stage folder. */ private bool $autoStage; /** List of months the processed daily log files are from, to consider for monthly aggregation */ private array $months = []; /** List of log files that needs to be processed within this scheduled task, and the jobs needs to be chained for. */ private array $logFiles = []; /** * Constructor. */ public function __construct(array $args) { $this->autoStage = true; // if log files for a whole month should be reprocessed, // the month is given as parameter if (!empty($args)) { $reprocessMonth = current($args); $reprocessFiles = $this->getStagedFilesByMonth($reprocessMonth); $this->setOnlyConsiderFiles($reprocessFiles); $this->autoStage = false; } // shall the archived log files be compressed $site = Application::get()->getRequest()->getSite(); if ($site->getData('compressStatsLogs')) { $this->setCompressArchives(true); } // Define the base filesystem path. $basePath = StatisticsHelper::getUsageStatsDirPath(); $args[0] = $basePath; parent::__construct($args); $this->checkFolderStructure(true); } /** * @copydoc FileLoader::getName() */ public function getName(): string { return __('admin.scheduledTask.usageStatsLoader'); } /** * Get the jobs needed to process a usage stats log file and compile the stats. * The jobs have to be in the right execution order. * * @return BaseJob[] */ abstract protected function getFileJobs(string $filePath, Site $site): array; /** * @copydoc FileLoader::executeActions() */ protected function executeActions(): bool { // It's possible that the processing directory has files that // were being processed but the php process was stopped before // finishing the processing, or there may be a concurrent process running. // Warn the user if this is the case. $processingDirFiles = glob($this->getProcessingPath() . '/' . '*'); $processingDirError = is_array($processingDirFiles) && count($processingDirFiles); // If the processing directory is not empty (and this is not the reprocessing of the older log files) // log that message if ($processingDirError && !empty($this->getOnlyConsiderFiles())) { $this->addExecutionLogEntry(__('admin.scheduledTask.usageStatsLoader.processingPathNotEmpty', ['directory' => $this->getProcessingPath()]), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR); } if ($this->autoStage) { $this->autoStage(); } $processFilesResult = parent::executeActions(); if (!$processFilesResult) { return false; } $site = Application::get()->getRequest()->getSite(); $jobs = []; foreach ($this->logFiles as $filePath) { $jobsPerFile = $this->getFileJobs($filePath, $site); $jobs = array_merge($jobs, $jobsPerFile); } foreach ($this->months as $month) { $compileMonthlyMetricsJob = new CompileMonthlyMetrics($month, $site); $jobs = array_merge($jobs, [$compileMonthlyMetricsJob]); } // Bus::chain() cannot accept an empty array if (!empty($jobs)) { Bus::chain($jobs) ->catch(function (Throwable $e) { }) ->dispatch(); $this->addExecutionLogEntry(__( 'admin.scheduledTask.usageStatsLoader.jobDispatched' ), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_NOTICE); } return (!$processingDirError); } /** * Check if the log file's date is later than the first installation of the new log file format, * so that the log file can be processed. */ protected function isDateValid(string $loadId): bool { $date = substr($loadId, -12, 8); // Get the date when the version that uses the new log file format (and COUNTER R5) is installed. // Only the log files later than that day can be (regularly) processed here. $statsService = Services::get('sushiStats'); $dateR5Installed = date('Ymd', strtotime($statsService->getEarliestDate())); if ($date < $dateR5Installed) { // the log file is in old log file format // return the file to staging and // log the error $this->addExecutionLogEntry(__( 'admin.scheduledTask.usageStatsLoader.veryOldLogFile', ['file' => $loadId] ), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR); return false; } return true; } /** * Check if stats for the log file's month do not already exist. * Return true if they do not exist, so that log file can be processed. * Else, return the file to staging and log the error that * the CLI script for reprocessing should be called. * If the log files of the month are being reprocessed, * the CLI reprocessing script will first remove all the stats for the month, * so that this function will return true in that case. */ protected function isMonthValid(string $loadId, string $month): bool { $currentMonth = date('Ym'); $lastMonth = date('Ym', strtotime('last month')); $site = Application::get()->getRequest()->getSite(); // If the daily metrics are not kept, and this is not the current month (which is kept in the DB) // the CLI script to reprocess the whole month should be called. if (!$site->getData('keepDailyUsageStats') && $month != $currentMonth && $month != $lastMonth) { $statsService = Services::get('sushiStats'); $counterMonthExists = $statsService->monthExists($month); $geoService = Services::get('geoStats'); $geoMonthExists = $geoService->monthExists($month); if ($counterMonthExists || $geoMonthExists) { $this->addExecutionLogEntry(__( 'admin.scheduledTask.usageStatsLoader.monthExists', ['file' => $loadId] ), ScheduledTaskHelper::SCHEDULED_TASK_MESSAGE_TYPE_ERROR); return false; } } return true; } /** * Add the log file's month to the list of months to be considered for the * stats aggregation after the current log files are processed. */ protected function considerMonthForStatsAggregation(string $month): void { if (!in_array($month, $this->months)) { $this->months[] = $month; } } /** * @copydoc FileLoader::processFile() * The file name MUST be of form usage_events_YYYYMMDD.log * If the function successfully finishes, the file will be archived. */ protected function processFile(string $filePath): bool|int { $loadId = basename($filePath); $month = substr($loadId, -12, 6); // if the file is not being reprocessed using the CLI tool if (!in_array($loadId, $this->getOnlyConsiderFiles())) { // Check if the log file is an old log file and if the stats for the month already exist if (!$this->isDateValid($loadId) || !$this->isMonthValid($loadId, $month)) { return self::FILE_LOADER_RETURN_TO_STAGING; } } // Add this log file to the list, so that all jobs, for all files can be chained. $this->logFiles[] = $loadId; // Add this log file's month to the list of months the stats need to be aggregated for. $this->considerMonthForStatsAggregation($month); return self::FILE_LOADER_RETURN_TO_DISPATCH; } /** * Auto stage usage stats log files, also moving files that * might be in processing folder to stage folder. */ protected function autoStage(): void { // Copy all log files to stage directory, except the current day one. $fileManager = new FileManager(); $logFiles = []; $logsDirFiles = glob($this->getUsageEventLogsPath() . '/*'); if (is_array($logsDirFiles)) { $logFiles = array_merge($logFiles, $logsDirFiles); } // It's possible that the processing directory have files that // were being processed but the php process was stopped before // finishing the processing. Just copy them to the stage directory too. $processingDirFiles = glob($this->getProcessingPath() . '/*'); if (is_array($processingDirFiles)) { $logFiles = array_merge($logFiles, $processingDirFiles); } foreach ($logFiles as $filePath) { if ($fileManager->fileExists($filePath)) { $filename = pathinfo($filePath, PATHINFO_BASENAME); $currentDayFilename = $this->getUsageEventCurrentDayLogName(); if ($filename == $currentDayFilename) { continue; } $this->moveFile(pathinfo($filePath, PATHINFO_DIRNAME), $this->getStagePath(), $filename); } } } /** * Get staged usage log files belonging to a month, that should be reprocessed */ protected function getStagedFilesByMonth(string $month): array { $files = []; $stagePath = StatisticsHelper::getUsageStatsDirPath() . '/' . self::FILE_LOADER_PATH_STAGING; $stageDir = opendir($stagePath); while ($filename = readdir($stageDir)) { if (str_starts_with($filename, 'usage_events_' . $month)) { $files[] = $filename; } } return $files; } /** * Get the usage event logs directory path. */ protected function getUsageEventLogsPath(): string { return StatisticsHelper::getUsageStatsDirPath() . '/usageEventLogs'; } /** * Get current day usage event log name. */ protected function getUsageEventCurrentDayLogName(): string { return 'usage_events_' . date('Ymd') . '.log'; } }