[ Mini Kiebo ]
Server: Windows NT DESKTOP-5B8S0D4 6.2 build 9200 (Windows 8 Professional Edition) i586
Path:
D:
/
Backup
/
05122024
/
htdocs
/
jurnal-kesmas
/
lib
/
pkp
/
jobs
/
statistics
/
[
Home
]
File: PKPProcessUsageStatsLogFile.php
<?php /** * @file jobs/statistics/PKPProcessUsageStatsLogFile.php * * Copyright (c) 2024 Simon Fraser University * Copyright (c) 2024 John Willinsky * Distributed under the GNU GPL v3. For full terms see the file docs/COPYING. * * @class PKPProcessUsageStatsLogFile * * @ingroup jobs * * @brief Compile context metrics. */ namespace PKP\jobs\statistics; use APP\statistics\StatisticsHelper; use DateTime; use Exception; use PKP\core\Core; use PKP\job\exceptions\JobException; use PKP\jobs\BaseJob; use PKP\task\FileLoader; use SplFileObject; abstract class PKPProcessUsageStatsLogFile extends BaseJob { /** * Create a new job instance. * * @param string $loadId Usage stats log file name */ public function __construct(protected string $loadId) { parent::__construct(); } /** * Delete entries in usage stats temporary tables by loadId */ abstract protected function deleteByLoadId(): void; /** * Get valid assoc types that an usage event can contain */ abstract protected function getValidAssocTypes(): array; /** * Insert usage stats log entry into temporary tables */ abstract protected function insertTemporaryUsageStatsData(object $entry, int $lineNumber): void; /** * Execute the job. */ public function handle(): void { $filename = $this->loadId; $dispatchFilePath = StatisticsHelper::getUsageStatsDirPath() . '/' . FileLoader::FILE_LOADER_PATH_DISPATCH . '/' . $filename; if (!file_exists($dispatchFilePath)) { throw new JobException(__( 'admin.job.processLogFile.fileNotFound', ['file' => $dispatchFilePath] )); } $this->process($dispatchFilePath); } /** * Parse log file line by line and add the lines into the usage stats temporary DB tables. */ protected function process(string $dispatchFilePath): void { $splFileObject = new SplFileObject($dispatchFilePath, 'r'); if (!$splFileObject) { // reject file -- move the file from dispatch to reject folder $filename = $this->loadId; $rejectFilePath = StatisticsHelper::getUsageStatsDirPath() . '/' . FileLoader::FILE_LOADER_PATH_REJECT . '/' . $filename; if (!rename($dispatchFilePath, $rejectFilePath)) { $message = __('admin.job.compileMetrics.returnToStaging.error', ['file' => $filename, 'dispatchFilePath' => $dispatchFilePath, 'rejectFilePath' => $rejectFilePath]); error_log($message); } throw new JobException(__('admin.job.processLogFile.openFileFailed', ['file' => $dispatchFilePath])); } // Make sure we don't have any temporary records associated // with the current load ID in database. $this->deleteByLoadId(); $lineNumber = 0; while (!$splFileObject->eof()) { $lineNumber++; $line = $splFileObject->fgets(); if (empty($line) || substr($line, 0, 1) === '#') { continue; } // Spacing or comment lines. This actually should not occur in the new format. $entryData = json_decode($line); if ($entryData === null) { // This line is not in the right format. $message = __( 'admin.job.processLogFile.wrongLoglineFormat', ['file' => $this->loadId, 'lineNumber' => $lineNumber] ); error_log($message); continue; } try { $this->validateLogEntry($entryData); } catch (Exception $e) { $message = __( 'admin.job.processLogFile.invalidLogEntry', ['file' => $this->loadId, 'lineNumber' => $lineNumber, 'error' => $e->getMessage()] ); error_log($message); continue; } // Avoid bots. if (Core::isUserAgentBot($entryData->userAgent)) { continue; } $this->insertTemporaryUsageStatsData($entryData, $lineNumber); } //explicitly assign null, so that the file can be deleted $splFileObject = null; } /** * Validate the usage stats log entry * * @throws Exception. */ protected function validateLogEntry(object $entry): void { if (!$this->validateDate($entry->time)) { throw new Exception(__('admin.job.processLogFile.invalidLogEntry.time')); } // check hashed IP ? // check canonicalUrl ? if (!is_int($entry->contextId)) { throw new Exception(__('admin.job.processLogFile.invalidLogEntry.contextId')); } if (!empty($entry->submissionId) && !is_int($entry->submissionId)) { throw new Exception(__('admin.job.processLogFile.invalidLogEntry.submissionId')); } $validAssocTypes = $this->getValidAssocTypes(); if (!in_array($entry->assocType, $validAssocTypes)) { throw new Exception(__('admin.job.processLogFile.invalidLogEntry.assocType')); } $validFileTypes = [ StatisticsHelper::STATISTICS_FILE_TYPE_PDF, StatisticsHelper::STATISTICS_FILE_TYPE_DOC, StatisticsHelper::STATISTICS_FILE_TYPE_HTML, StatisticsHelper::STATISTICS_FILE_TYPE_OTHER, ]; if (!empty($entry->fileType) && !in_array($entry->fileType, $validFileTypes)) { throw new Exception(__('admin.job.processLogFile.invalidLogEntry.fileType')); } if (!empty($entry->country) && (!ctype_alpha($entry->country) || (strlen($entry->country) !== 2))) { throw new Exception(__('admin.job.processLogFile.invalidLogEntry.country')); } if (!empty($entry->region) && (!ctype_alnum($entry->region) || (strlen($entry->region) > 3))) { throw new Exception(__('admin.job.processLogFile.invalidLogEntry.region')); } if (!is_array($entry->institutionIds)) { throw new Exception(__('admin.job.processLogFile.invalidLogEntry.institutionIds')); } } /** * Validate date, check if the date is a valid date and in requested format */ protected function validateDate(string $datetime, string $format = 'Y-m-d H:i:s'): bool { $d = DateTime::createFromFormat($format, $datetime); return $d && $d->format($format) === $datetime; } }