Merge branch 'feature/optimize-history'

This commit is contained in:
plegall
2016-11-25 18:38:56 +01:00
6 changed files with 411 additions and 166 deletions

View File

@@ -176,6 +176,310 @@ SELECT
return $data;
}
/**
* Compute statistics from history table to history_summary table
*
* @param int $max_lines - to only compute the next X lines, not the whole remaining lines
*/
function history_summarize($max_lines=null)
{
// we need to know which was the last line "summarized"
$query = '
SELECT
*
FROM '.HISTORY_SUMMARY_TABLE.'
WHERE history_id_to IS NOT NULL
ORDER BY history_id_to DESC
LIMIT 1
;';
$summary_lines = query2array($query);
$history_min_id = 0;
if (count($summary_lines) > 0)
{
$last_summary = $summary_lines[0];
$history_min_id = $last_summary['history_id_to'];
}
else
{
// if we have no "reference", ie "starting point", we need to find
// one. And "0" is not the right answer here, because history table may
// have been purged already.
$query = '
SELECT
MIN(id) AS min_id
FROM '.HISTORY_TABLE.'
;';
$history_lines = query2array($query);
if (count($history_lines) > 0)
{
$history_min_id = $history_lines[0]['min_id'] - 1;
}
}
$query = '
SELECT
date,
'.pwg_db_get_hour('time').' AS hour,
MIN(id) AS min_id,
MAX(id) AS max_id,
COUNT(*) AS nb_pages
FROM '.HISTORY_TABLE.'
WHERE id > '.$history_min_id;
if (isset($max_lines))
{
$query.= '
AND id <= '.($history_min_id + $max_lines);
}
$query.= '
GROUP BY
date,
hour
ORDER BY
date ASC,
hour ASC
;';
$result = pwg_query($query);
$need_update = array();
$is_first = true;
$first_time_key = null;
while ($row = pwg_db_fetch_assoc($result))
{
$time_keys = array(
substr($row['date'], 0, 4), //yyyy
substr($row['date'], 0, 7), //yyyy-mm
substr($row['date'], 0, 10),//yyyy-mm-dd
sprintf(
'%s-%02u',
$row['date'], $row['hour']
),
);
foreach ($time_keys as $time_key)
{
if (!isset($need_update[$time_key]))
{
$need_update[$time_key] = array(
'nb_pages' => 0,
'history_id_from' => $row['min_id'],
'history_id_to' => $row['max_id'],
);
}
$need_update[$time_key]['nb_pages'] += $row['nb_pages'];
if ($row['min_id'] < $need_update[$time_key]['history_id_from'])
{
$need_update[$time_key]['history_id_from'] = $row['min_id'];
}
if ($row['max_id'] > $need_update[$time_key]['history_id_to'])
{
$need_update[$time_key]['history_id_to'] = $row['max_id'];
}
}
if ($is_first)
{
$is_first = false;
$first_time_key = $time_keys[3];
}
}
// Only the oldest time_key might be already summarized, so we have to
// update the 4 corresponding lines instead of simply inserting them.
//
// For example, if the oldest unsummarized is 2005.08.25.21, the 4 lines
// that can be updated are:
//
// +---------------+----------+
// | id | nb_pages |
// +---------------+----------+
// | 2005 | 241109 |
// | 2005-08 | 20133 |
// | 2005-08-25 | 620 |
// | 2005-08-25-21 | 151 |
// +---------------+----------+
$updates = array();
$inserts = array();
if (isset($first_time_key))
{
list($year, $month, $day, $hour) = explode('-', $first_time_key);
$query = '
SELECT *
FROM '.HISTORY_SUMMARY_TABLE.'
WHERE year='.$year.'
AND ( month IS NULL
OR ( month='.$month.'
AND ( day is NULL
OR (day='.$day.'
AND (hour IS NULL OR hour='.$hour.')
)
)
)
)
;';
$result = pwg_query($query);
while ($row = pwg_db_fetch_assoc($result))
{
$key = sprintf('%4u', $row['year']);
if ( isset($row['month']) )
{
$key .= sprintf('-%02u', $row['month']);
if ( isset($row['day']) )
{
$key .= sprintf('-%02u', $row['day']);
if ( isset($row['hour']) )
{
$key .= sprintf('-%02u', $row['hour']);
}
}
}
if (isset($need_update[$key]))
{
$row['nb_pages'] += $need_update[$key]['nb_pages'];
$row['history_id_to'] = $need_update[$key]['history_id_to'];
$updates[] = $row;
unset($need_update[$key]);
}
}
}
foreach ($need_update as $time_key => $summary)
{
$time_tokens = explode('-', $time_key);
$inserts[] = array(
'year' => $time_tokens[0],
'month' => @$time_tokens[1],
'day' => @$time_tokens[2],
'hour' => @$time_tokens[3],
'nb_pages' => $summary['nb_pages'],
'history_id_from' => $summary['history_id_from'],
'history_id_to' => $summary['history_id_to'],
);
}
if (count($updates) > 0)
{
mass_updates(
HISTORY_SUMMARY_TABLE,
array(
'primary' => array('year','month','day','hour'),
'update' => array('nb_pages','history_id_to'),
),
$updates
);
}
if (count($inserts) > 0)
{
mass_inserts(
HISTORY_SUMMARY_TABLE,
array_keys($inserts[0]),
$inserts
);
}
}
/**
* Smart purge on history table. Keep some lines, purge only summarized lines
*
* @since 2.9
*/
function history_autopurge()
{
global $conf, $logger;
if (0 == $conf['history_autopurge_keep_lines'])
{
return;
}
// we want to purge only if there are too many lines and if the lines are summarized
$query = '
SELECT
COUNT(*)
FROM '.HISTORY_TABLE.'
;';
list($count) = pwg_db_fetch_row(pwg_query($query));
if ($count <= $conf['history_autopurge_keep_lines'])
{
return; // no need to purge for now
}
// 1) find the last summarized history line
$query = '
SELECT
*
FROM '.HISTORY_SUMMARY_TABLE.'
WHERE history_id_to IS NOT NULL
ORDER BY history_id_to DESC
LIMIT 1
;';
$summary_lines = query2array($query);
if (count($summary_lines) == 0)
{
return; // lines not summarized, no purge
}
$history_id_last_summarized = $summary_lines[0]['history_id_to'];
// 2) find the latest history line (and substract the number of lines to keep)
$query = '
SELECT
id
FROM '.HISTORY_TABLE.'
ORDER BY id DESC
LIMIT 1
;';
$history_lines = query2array($query);
if (count($history_lines) == 0)
{
return;
}
$history_id_latest = $history_lines[0]['id'];
// 3) find the oldest history line (and add the number of lines to delete)
$query = '
SELECT
id
FROM '.HISTORY_TABLE.'
ORDER BY id ASC
LIMIT 1
;';
$history_lines = query2array($query);
$history_id_oldest = $history_lines[0]['id'];
$search_min = array(
$history_id_last_summarized,
$history_id_latest - $conf['history_autopurge_keep_lines'],
$history_id_oldest + $conf['history_autopurge_blocksize'],
);
$history_id_delete_before = min($search_min);
$logger->debug(__FUNCTION__.', '.join('/', $search_min));
$query = '
DELETE
FROM '.HISTORY_TABLE.'
WHERE id < '.$history_id_delete_before.'
;';
pwg_query($query);
}
add_event_handler('get_history', 'get_history');
trigger_notify('functions_history_included');

View File

@@ -113,169 +113,7 @@ check_status(ACCESS_ADMINISTRATOR);
// | Refresh summary from details |
// +-----------------------------------------------------------------------+
$query = '
SELECT
date,
'.pwg_db_get_hour('time').' AS hour,
MAX(id) AS max_id,
COUNT(*) AS nb_pages
FROM '.HISTORY_TABLE.'
WHERE summarized = \'false\'
GROUP BY
date,
hour
ORDER BY
date ASC,
hour ASC
;';
$result = pwg_query($query);
$need_update = array();
$max_id = 0;
$is_first = true;
$first_time_key = null;
while ($row = pwg_db_fetch_assoc($result))
{
$time_keys = array(
substr($row['date'], 0, 4), //yyyy
substr($row['date'], 0, 7), //yyyy-mm
substr($row['date'], 0, 10),//yyyy-mm-dd
sprintf(
'%s-%02u',
$row['date'], $row['hour']
),
);
foreach ($time_keys as $time_key)
{
if (!isset($need_update[$time_key]))
{
$need_update[$time_key] = 0;
}
$need_update[$time_key] += $row['nb_pages'];
}
if ($row['max_id'] > $max_id)
{
$max_id = $row['max_id'];
}
if ($is_first)
{
$is_first = false;
$first_time_key = $time_keys[3];
}
}
// Only the oldest time_key might be already summarized, so we have to
// update the 4 corresponding lines instead of simply inserting them.
//
// For example, if the oldest unsummarized is 2005.08.25.21, the 4 lines
// that can be updated are:
//
// +---------------+----------+
// | id | nb_pages |
// +---------------+----------+
// | 2005 | 241109 |
// | 2005-08 | 20133 |
// | 2005-08-25 | 620 |
// | 2005-08-25-21 | 151 |
// +---------------+----------+
$updates = array();
$inserts = array();
if (isset($first_time_key))
{
list($year, $month, $day, $hour) = explode('-', $first_time_key);
$query = '
SELECT *
FROM '.HISTORY_SUMMARY_TABLE.'
WHERE year='.$year.'
AND ( month IS NULL
OR ( month='.$month.'
AND ( day is NULL
OR (day='.$day.'
AND (hour IS NULL OR hour='.$hour.')
)
)
)
)
;';
$result = pwg_query($query);
while ($row = pwg_db_fetch_assoc($result))
{
$key = sprintf('%4u', $row['year']);
if ( isset($row['month']) )
{
$key .= sprintf('-%02u', $row['month']);
if ( isset($row['day']) )
{
$key .= sprintf('-%02u', $row['day']);
if ( isset($row['hour']) )
{
$key .= sprintf('-%02u', $row['hour']);
}
}
}
if (isset($need_update[$key]))
{
$row['nb_pages'] += $need_update[$key];
$updates[] = $row;
unset($need_update[$key]);
}
}
}
foreach ($need_update as $time_key => $nb_pages)
{
$time_tokens = explode('-', $time_key);
$inserts[] = array(
'year' => $time_tokens[0],
'month' => @$time_tokens[1],
'day' => @$time_tokens[2],
'hour' => @$time_tokens[3],
'nb_pages' => $nb_pages,
);
}
if (count($updates) > 0)
{
mass_updates(
HISTORY_SUMMARY_TABLE,
array(
'primary' => array('year','month','day','hour'),
'update' => array('nb_pages'),
),
$updates
);
}
if (count($inserts) > 0)
{
mass_inserts(
HISTORY_SUMMARY_TABLE,
array_keys($inserts[0]),
$inserts
);
}
if ($max_id != 0)
{
$query = '
UPDATE '.HISTORY_TABLE.'
SET summarized = \'true\'
WHERE summarized = \'false\'
AND id <= '.$max_id.'
;';
pwg_query($query);
}
history_summarize();
// +-----------------------------------------------------------------------+
// | Page parameters check |

View File

@@ -553,6 +553,16 @@ $conf['guest_access'] = true;
// nb_logs_page : how many logs to display on a page
$conf['nb_logs_page'] = 300;
// Every X new line in history, perform an automatic purge. The more often,
// the fewer lines to delete. 0 to disable.
$conf['history_autopurge_every'] = 1021;
// How many lines to keep in history on autopurge? 0 to disable.
$conf['history_autopurge_keep_lines'] = 1000000;
// On history autopurge, how many lines should to deleted at once, maximum?
$conf['history_autopurge_blocksize'] = 50000;
// +-----------------------------------------------------------------------+
// | urls |
// +-----------------------------------------------------------------------+

View File

@@ -495,6 +495,19 @@ INSERT INTO '.HISTORY_TABLE.'
;';
pwg_query($query);
$history_id = pwg_db_insert_id(HISTORY_TABLE);
if ($history_id % 1000 == 0)
{
include_once(PHPWG_ROOT_PATH.'admin/include/functions_history.inc.php');
history_summarize(50000);
}
if ($conf['history_autopurge_every'] > 0 and $history_id % $conf['history_autopurge_every'] == 0)
{
include_once(PHPWG_ROOT_PATH.'admin/include/functions_history.inc.php');
history_autopurge();
}
return true;
}

View File

@@ -0,0 +1,80 @@
<?php
// +-----------------------------------------------------------------------+
// | Piwigo - a PHP based photo gallery |
// +-----------------------------------------------------------------------+
// | Copyright(C) 2008-2016 Piwigo Team http://piwigo.org |
// | Copyright(C) 2003-2008 PhpWebGallery Team http://phpwebgallery.net |
// | Copyright(C) 2002-2003 Pierrick LE GALL http://le-gall.net/pierrick |
// +-----------------------------------------------------------------------+
// | This program is free software; you can redistribute it and/or modify |
// | it under the terms of the GNU General Public License as published by |
// | the Free Software Foundation |
// | |
// | This program is distributed in the hope that it will be useful, but |
// | WITHOUT ANY WARRANTY; without even the implied warranty of |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
// | General Public License for more details. |
// | |
// | You should have received a copy of the GNU General Public License |
// | along with this program; if not, write to the Free Software |
// | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
// | USA. |
// +-----------------------------------------------------------------------+
if (!defined('PHPWG_ROOT_PATH'))
{
die('Hacking attempt!');
}
$upgrade_description = 'add history_id_from+history_id_to in history_summary table';
// we use PREFIX_TABLE, in case Piwigo uses an external user table
pwg_query('
ALTER TABLE `'.PREFIX_TABLE.'history_summary`
ADD COLUMN `history_id_from` int(10) unsigned default NULL,
ADD COLUMN `history_id_to` int(10) unsigned default NULL
;');
$query = '
SELECT
*
FROM '.PREFIX_TABLE.'history
WHERE summarized = \'true\'
ORDER BY id DESC
LIMIT 1
;';
// note : much faster than searching MAX(ID), ie on my big sample 14 seconds Vs 2 seconds
$history_lines = query2array($query);
if (count($history_lines) > 0)
{
$last_summarized = $history_lines[0];
list($year, $month, $day) = explode('-', $last_summarized['date']);
list($hour) = explode(':', $last_summarized['time']);
single_update(
PREFIX_TABLE.'history_summary',
array(
'history_id_to' => $last_summarized['id'],
),
array(
'year' => $year,
'month' => $month,
'day' => $day,
'hour' => $hour,
)
);
// in case this script would update no summary line, it would mean the
// summary has been purged and will be rebuild from scratch, based on the
// content of history table
}
// for now, we keep column history.summarized even if Piwigo 2.9 no longer
// uses it. We will remove it in a future version. First we need to have
// "less" lines in history table. This will be possible with the automatic
// purge implemented in Piwigo 2.9.
echo "\n".$upgrade_description."\n";
?>

View File

@@ -129,12 +129,10 @@ CREATE TABLE `piwigo_history` (
`category_id` smallint(5) default NULL,
`tag_ids` varchar(50) default NULL,
`image_id` mediumint(8) default NULL,
`summarized` enum('true','false') default 'false',
`image_type` enum('picture','high','other') default NULL,
`format_id` int(11) unsigned default NULL,
`auth_key_id` int(11) unsigned DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `history_i1` (`summarized`)
PRIMARY KEY (`id`)
) ENGINE=MyISAM;
--
@@ -148,6 +146,8 @@ CREATE TABLE `piwigo_history_summary` (
`day` tinyint(2) default NULL,
`hour` tinyint(2) default NULL,
`nb_pages` int(11) default NULL,
`history_id_from` int(10) unsigned default NULL,
`history_id_to` int(10) unsigned default NULL,
UNIQUE KEY history_summary_ymdh (`year`,`month`,`day`,`hour`)
) ENGINE=MyISAM;