mirror of
https://github.com/Piwigo/Piwigo.git
synced 2026-06-01 20:04:51 +02:00
feature #509 summarize history more often
Every 1000 log entry inserted, Piwigo performs an history summarize. The summarize process has also been optimized: no longer used column history.summarized (no longer need to update it, which took a lot in time), we now save the history_id_from and history_id_to in history_summary table. This way we know from where to start on next summarize. For now, for a simple performance reason, we keep column history.summarized, because removing it may take a long time on huge tables. Once we will have automatic purge on history, it will be safer to drop this column.
This commit is contained in:
@@ -176,6 +176,220 @@ SELECT
|
||||
return $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute statistics from history table to history_summary table
|
||||
*
|
||||
* @param int $max_lines - to only compute the next X lines, not the whole remaining lines
|
||||
*/
|
||||
function history_summarize($max_lines=null)
|
||||
{
|
||||
// we need to know which was the last line "summarized"
|
||||
$query = '
|
||||
SELECT
|
||||
*
|
||||
FROM '.HISTORY_SUMMARY_TABLE.'
|
||||
WHERE history_id_to IS NOT NULL
|
||||
ORDER BY history_id_to DESC
|
||||
LIMIT 1
|
||||
;';
|
||||
$summary_lines = query2array($query);
|
||||
|
||||
$history_min_id = 0;
|
||||
if (count($summary_lines) > 0)
|
||||
{
|
||||
$last_summary = $summary_lines[0];
|
||||
$history_min_id = $last_summary['history_id_to'];
|
||||
}
|
||||
else
|
||||
{
|
||||
// if we have no "reference", ie "starting point", we need to find
|
||||
// one. And "0" is not the right answer here, because history table may
|
||||
// have been purged already.
|
||||
$query = '
|
||||
SELECT
|
||||
MIN(id) AS min_id
|
||||
FROM '.HISTORY_TABLE.'
|
||||
;';
|
||||
$history_lines = query2array($query);
|
||||
if (count($history_lines) > 0)
|
||||
{
|
||||
$history_min_id = $history_lines[0]['min_id'] - 1;
|
||||
}
|
||||
}
|
||||
|
||||
$query = '
|
||||
SELECT
|
||||
date,
|
||||
'.pwg_db_get_hour('time').' AS hour,
|
||||
MIN(id) AS min_id,
|
||||
MAX(id) AS max_id,
|
||||
COUNT(*) AS nb_pages
|
||||
FROM '.HISTORY_TABLE.'
|
||||
WHERE id > '.$history_min_id;
|
||||
|
||||
if (isset($max_lines))
|
||||
{
|
||||
$query.= '
|
||||
AND id <= '.($history_min_id + $max_lines);
|
||||
}
|
||||
|
||||
$query.= '
|
||||
GROUP BY
|
||||
date,
|
||||
hour
|
||||
ORDER BY
|
||||
date ASC,
|
||||
hour ASC
|
||||
;';
|
||||
$result = pwg_query($query);
|
||||
|
||||
$need_update = array();
|
||||
|
||||
$is_first = true;
|
||||
$first_time_key = null;
|
||||
|
||||
while ($row = pwg_db_fetch_assoc($result))
|
||||
{
|
||||
$time_keys = array(
|
||||
substr($row['date'], 0, 4), //yyyy
|
||||
substr($row['date'], 0, 7), //yyyy-mm
|
||||
substr($row['date'], 0, 10),//yyyy-mm-dd
|
||||
sprintf(
|
||||
'%s-%02u',
|
||||
$row['date'], $row['hour']
|
||||
),
|
||||
);
|
||||
|
||||
foreach ($time_keys as $time_key)
|
||||
{
|
||||
if (!isset($need_update[$time_key]))
|
||||
{
|
||||
$need_update[$time_key] = array(
|
||||
'nb_pages' => 0,
|
||||
'history_id_from' => $row['min_id'],
|
||||
'history_id_to' => $row['max_id'],
|
||||
);
|
||||
}
|
||||
$need_update[$time_key]['nb_pages'] += $row['nb_pages'];
|
||||
|
||||
if ($row['min_id'] < $need_update[$time_key]['history_id_from'])
|
||||
{
|
||||
$need_update[$time_key]['history_id_from'] = $row['min_id'];
|
||||
}
|
||||
|
||||
if ($row['max_id'] > $need_update[$time_key]['history_id_to'])
|
||||
{
|
||||
$need_update[$time_key]['history_id_to'] = $row['max_id'];
|
||||
}
|
||||
}
|
||||
|
||||
if ($is_first)
|
||||
{
|
||||
$is_first = false;
|
||||
$first_time_key = $time_keys[3];
|
||||
}
|
||||
}
|
||||
|
||||
// Only the oldest time_key might be already summarized, so we have to
|
||||
// update the 4 corresponding lines instead of simply inserting them.
|
||||
//
|
||||
// For example, if the oldest unsummarized is 2005.08.25.21, the 4 lines
|
||||
// that can be updated are:
|
||||
//
|
||||
// +---------------+----------+
|
||||
// | id | nb_pages |
|
||||
// +---------------+----------+
|
||||
// | 2005 | 241109 |
|
||||
// | 2005-08 | 20133 |
|
||||
// | 2005-08-25 | 620 |
|
||||
// | 2005-08-25-21 | 151 |
|
||||
// +---------------+----------+
|
||||
|
||||
$updates = array();
|
||||
$inserts = array();
|
||||
|
||||
if (isset($first_time_key))
|
||||
{
|
||||
list($year, $month, $day, $hour) = explode('-', $first_time_key);
|
||||
|
||||
$query = '
|
||||
SELECT *
|
||||
FROM '.HISTORY_SUMMARY_TABLE.'
|
||||
WHERE year='.$year.'
|
||||
AND ( month IS NULL
|
||||
OR ( month='.$month.'
|
||||
AND ( day is NULL
|
||||
OR (day='.$day.'
|
||||
AND (hour IS NULL OR hour='.$hour.')
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
;';
|
||||
$result = pwg_query($query);
|
||||
while ($row = pwg_db_fetch_assoc($result))
|
||||
{
|
||||
$key = sprintf('%4u', $row['year']);
|
||||
if ( isset($row['month']) )
|
||||
{
|
||||
$key .= sprintf('-%02u', $row['month']);
|
||||
if ( isset($row['day']) )
|
||||
{
|
||||
$key .= sprintf('-%02u', $row['day']);
|
||||
if ( isset($row['hour']) )
|
||||
{
|
||||
$key .= sprintf('-%02u', $row['hour']);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($need_update[$key]))
|
||||
{
|
||||
$row['nb_pages'] += $need_update[$key]['nb_pages'];
|
||||
$row['history_id_to'] = $need_update[$key]['history_id_to'];
|
||||
$updates[] = $row;
|
||||
unset($need_update[$key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($need_update as $time_key => $summary)
|
||||
{
|
||||
$time_tokens = explode('-', $time_key);
|
||||
|
||||
$inserts[] = array(
|
||||
'year' => $time_tokens[0],
|
||||
'month' => @$time_tokens[1],
|
||||
'day' => @$time_tokens[2],
|
||||
'hour' => @$time_tokens[3],
|
||||
'nb_pages' => $summary['nb_pages'],
|
||||
'history_id_from' => $summary['history_id_from'],
|
||||
'history_id_to' => $summary['history_id_to'],
|
||||
);
|
||||
}
|
||||
|
||||
if (count($updates) > 0)
|
||||
{
|
||||
mass_updates(
|
||||
HISTORY_SUMMARY_TABLE,
|
||||
array(
|
||||
'primary' => array('year','month','day','hour'),
|
||||
'update' => array('nb_pages','history_id_to'),
|
||||
),
|
||||
$updates
|
||||
);
|
||||
}
|
||||
|
||||
if (count($inserts) > 0)
|
||||
{
|
||||
mass_inserts(
|
||||
HISTORY_SUMMARY_TABLE,
|
||||
array_keys($inserts[0]),
|
||||
$inserts
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
add_event_handler('get_history', 'get_history');
|
||||
trigger_notify('functions_history_included');
|
||||
|
||||
|
||||
+1
-163
@@ -113,169 +113,7 @@ check_status(ACCESS_ADMINISTRATOR);
|
||||
// | Refresh summary from details |
|
||||
// +-----------------------------------------------------------------------+
|
||||
|
||||
$query = '
|
||||
SELECT
|
||||
date,
|
||||
'.pwg_db_get_hour('time').' AS hour,
|
||||
MAX(id) AS max_id,
|
||||
COUNT(*) AS nb_pages
|
||||
FROM '.HISTORY_TABLE.'
|
||||
WHERE summarized = \'false\'
|
||||
GROUP BY
|
||||
date,
|
||||
hour
|
||||
ORDER BY
|
||||
date ASC,
|
||||
hour ASC
|
||||
;';
|
||||
$result = pwg_query($query);
|
||||
|
||||
$need_update = array();
|
||||
|
||||
$max_id = 0;
|
||||
$is_first = true;
|
||||
$first_time_key = null;
|
||||
|
||||
while ($row = pwg_db_fetch_assoc($result))
|
||||
{
|
||||
$time_keys = array(
|
||||
substr($row['date'], 0, 4), //yyyy
|
||||
substr($row['date'], 0, 7), //yyyy-mm
|
||||
substr($row['date'], 0, 10),//yyyy-mm-dd
|
||||
sprintf(
|
||||
'%s-%02u',
|
||||
$row['date'], $row['hour']
|
||||
),
|
||||
);
|
||||
|
||||
foreach ($time_keys as $time_key)
|
||||
{
|
||||
if (!isset($need_update[$time_key]))
|
||||
{
|
||||
$need_update[$time_key] = 0;
|
||||
}
|
||||
$need_update[$time_key] += $row['nb_pages'];
|
||||
}
|
||||
|
||||
if ($row['max_id'] > $max_id)
|
||||
{
|
||||
$max_id = $row['max_id'];
|
||||
}
|
||||
|
||||
if ($is_first)
|
||||
{
|
||||
$is_first = false;
|
||||
$first_time_key = $time_keys[3];
|
||||
}
|
||||
}
|
||||
|
||||
// Only the oldest time_key might be already summarized, so we have to
|
||||
// update the 4 corresponding lines instead of simply inserting them.
|
||||
//
|
||||
// For example, if the oldest unsummarized is 2005.08.25.21, the 4 lines
|
||||
// that can be updated are:
|
||||
//
|
||||
// +---------------+----------+
|
||||
// | id | nb_pages |
|
||||
// +---------------+----------+
|
||||
// | 2005 | 241109 |
|
||||
// | 2005-08 | 20133 |
|
||||
// | 2005-08-25 | 620 |
|
||||
// | 2005-08-25-21 | 151 |
|
||||
// +---------------+----------+
|
||||
|
||||
|
||||
$updates = array();
|
||||
$inserts = array();
|
||||
|
||||
if (isset($first_time_key))
|
||||
{
|
||||
list($year, $month, $day, $hour) = explode('-', $first_time_key);
|
||||
|
||||
$query = '
|
||||
SELECT *
|
||||
FROM '.HISTORY_SUMMARY_TABLE.'
|
||||
WHERE year='.$year.'
|
||||
AND ( month IS NULL
|
||||
OR ( month='.$month.'
|
||||
AND ( day is NULL
|
||||
OR (day='.$day.'
|
||||
AND (hour IS NULL OR hour='.$hour.')
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
;';
|
||||
$result = pwg_query($query);
|
||||
while ($row = pwg_db_fetch_assoc($result))
|
||||
{
|
||||
$key = sprintf('%4u', $row['year']);
|
||||
if ( isset($row['month']) )
|
||||
{
|
||||
$key .= sprintf('-%02u', $row['month']);
|
||||
if ( isset($row['day']) )
|
||||
{
|
||||
$key .= sprintf('-%02u', $row['day']);
|
||||
if ( isset($row['hour']) )
|
||||
{
|
||||
$key .= sprintf('-%02u', $row['hour']);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($need_update[$key]))
|
||||
{
|
||||
$row['nb_pages'] += $need_update[$key];
|
||||
$updates[] = $row;
|
||||
unset($need_update[$key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($need_update as $time_key => $nb_pages)
|
||||
{
|
||||
$time_tokens = explode('-', $time_key);
|
||||
|
||||
$inserts[] = array(
|
||||
'year' => $time_tokens[0],
|
||||
'month' => @$time_tokens[1],
|
||||
'day' => @$time_tokens[2],
|
||||
'hour' => @$time_tokens[3],
|
||||
'nb_pages' => $nb_pages,
|
||||
);
|
||||
}
|
||||
|
||||
if (count($updates) > 0)
|
||||
{
|
||||
mass_updates(
|
||||
HISTORY_SUMMARY_TABLE,
|
||||
array(
|
||||
'primary' => array('year','month','day','hour'),
|
||||
'update' => array('nb_pages'),
|
||||
),
|
||||
$updates
|
||||
);
|
||||
}
|
||||
|
||||
if (count($inserts) > 0)
|
||||
{
|
||||
mass_inserts(
|
||||
HISTORY_SUMMARY_TABLE,
|
||||
array_keys($inserts[0]),
|
||||
$inserts
|
||||
);
|
||||
}
|
||||
|
||||
if ($max_id != 0)
|
||||
{
|
||||
$query = '
|
||||
UPDATE '.HISTORY_TABLE.'
|
||||
SET summarized = \'true\'
|
||||
WHERE summarized = \'false\'
|
||||
AND id <= '.$max_id.'
|
||||
;';
|
||||
pwg_query($query);
|
||||
}
|
||||
history_summarize();
|
||||
|
||||
// +-----------------------------------------------------------------------+
|
||||
// | Page parameters check |
|
||||
|
||||
@@ -495,6 +495,13 @@ INSERT INTO '.HISTORY_TABLE.'
|
||||
;';
|
||||
pwg_query($query);
|
||||
|
||||
$history_id = pwg_db_insert_id(HISTORY_TABLE);
|
||||
if ($history_id % 1000 == 0)
|
||||
{
|
||||
include_once(PHPWG_ROOT_PATH.'admin/include/functions_history.inc.php');
|
||||
history_summarize(50000);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
<?php
|
||||
// +-----------------------------------------------------------------------+
|
||||
// | Piwigo - a PHP based photo gallery |
|
||||
// +-----------------------------------------------------------------------+
|
||||
// | Copyright(C) 2008-2016 Piwigo Team http://piwigo.org |
|
||||
// | Copyright(C) 2003-2008 PhpWebGallery Team http://phpwebgallery.net |
|
||||
// | Copyright(C) 2002-2003 Pierrick LE GALL http://le-gall.net/pierrick |
|
||||
// +-----------------------------------------------------------------------+
|
||||
// | This program is free software; you can redistribute it and/or modify |
|
||||
// | it under the terms of the GNU General Public License as published by |
|
||||
// | the Free Software Foundation |
|
||||
// | |
|
||||
// | This program is distributed in the hope that it will be useful, but |
|
||||
// | WITHOUT ANY WARRANTY; without even the implied warranty of |
|
||||
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
||||
// | General Public License for more details. |
|
||||
// | |
|
||||
// | You should have received a copy of the GNU General Public License |
|
||||
// | along with this program; if not, write to the Free Software |
|
||||
// | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
|
||||
// | USA. |
|
||||
// +-----------------------------------------------------------------------+
|
||||
|
||||
if (!defined('PHPWG_ROOT_PATH'))
|
||||
{
|
||||
die('Hacking attempt!');
|
||||
}
|
||||
|
||||
$upgrade_description = 'add history_id_from+history_id_to in history_summary table';
|
||||
|
||||
// we use PREFIX_TABLE, in case Piwigo uses an external user table
|
||||
pwg_query('
|
||||
ALTER TABLE `'.PREFIX_TABLE.'history_summary`
|
||||
ADD COLUMN `history_id_from` int(10) unsigned default NULL,
|
||||
ADD COLUMN `history_id_to` int(10) unsigned default NULL
|
||||
;');
|
||||
|
||||
$query = '
|
||||
SELECT
|
||||
*
|
||||
FROM '.PREFIX_TABLE.'history
|
||||
WHERE summarized = \'true\'
|
||||
ORDER BY id DESC
|
||||
LIMIT 1
|
||||
;';
|
||||
// note : much faster than searching MAX(ID), ie on my big sample 14 seconds Vs 2 seconds
|
||||
$history_lines = query2array($query);
|
||||
if (count($history_lines) > 0)
|
||||
{
|
||||
$last_summarized = $history_lines[0];
|
||||
|
||||
list($year, $month, $day) = explode('-', $last_summarized['date']);
|
||||
list($hour) = explode(':', $last_summarized['time']);
|
||||
|
||||
single_update(
|
||||
PREFIX_TABLE.'history_summary',
|
||||
array(
|
||||
'history_id_to' => $last_summarized['id'],
|
||||
),
|
||||
array(
|
||||
'year' => $year,
|
||||
'month' => $month,
|
||||
'day' => $day,
|
||||
'hour' => $hour,
|
||||
)
|
||||
);
|
||||
|
||||
// in case this script would update no summary line, it would mean the
|
||||
// summary has been purged and will be rebuild from scratch, based on the
|
||||
// content of history table
|
||||
}
|
||||
|
||||
// for now, we keep column history.summarized even if Piwigo 2.9 no longer
|
||||
// uses it. We will remove it in a future version. First we need to have
|
||||
// "less" lines in history table. This will be possible with the automatic
|
||||
// purge implemented in Piwigo 2.9.
|
||||
|
||||
echo "\n".$upgrade_description."\n";
|
||||
|
||||
?>
|
||||
@@ -129,12 +129,10 @@ CREATE TABLE `piwigo_history` (
|
||||
`category_id` smallint(5) default NULL,
|
||||
`tag_ids` varchar(50) default NULL,
|
||||
`image_id` mediumint(8) default NULL,
|
||||
`summarized` enum('true','false') default 'false',
|
||||
`image_type` enum('picture','high','other') default NULL,
|
||||
`format_id` int(11) unsigned default NULL,
|
||||
`auth_key_id` int(11) unsigned DEFAULT NULL,
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `history_i1` (`summarized`)
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=MyISAM;
|
||||
|
||||
--
|
||||
@@ -148,6 +146,8 @@ CREATE TABLE `piwigo_history_summary` (
|
||||
`day` tinyint(2) default NULL,
|
||||
`hour` tinyint(2) default NULL,
|
||||
`nb_pages` int(11) default NULL,
|
||||
`history_id_from` int(10) unsigned default NULL,
|
||||
`history_id_to` int(10) unsigned default NULL,
|
||||
UNIQUE KEY history_summary_ymdh (`year`,`month`,`day`,`hour`)
|
||||
) ENGINE=MyISAM;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user