From 5fb18eda36126f4a991e8fe22a531a5633ab1295 Mon Sep 17 00:00:00 2001 From: plegall Date: Fri, 18 Nov 2016 15:08:03 +0100 Subject: [PATCH 1/3] feature #509 summarize history more often Every 1000 log entry inserted, Piwigo performs an history summarize. The summarize process has also been optimized: no longer used column history.summarized (no longer need to update it, which took a lot in time), we now save the history_id_from and history_id_to in history_summary table. This way we know from where to start on next summarize. For now, for a simple performance reason, we keep column history.summarized, because removing it may take a long time on huge tables. Once we will have automatic purge on history, it will be safer to drop this column. --- admin/include/functions_history.inc.php | 214 ++++++++++++++++++++++++ admin/stats.php | 164 +----------------- include/functions.inc.php | 7 + install/db/150-database.php | 80 +++++++++ install/piwigo_structure-mysql.sql | 6 +- 5 files changed, 305 insertions(+), 166 deletions(-) create mode 100644 install/db/150-database.php diff --git a/admin/include/functions_history.inc.php b/admin/include/functions_history.inc.php index 55783fd74..3e41582b4 100644 --- a/admin/include/functions_history.inc.php +++ b/admin/include/functions_history.inc.php @@ -176,6 +176,220 @@ SELECT return $data; } +/** + * Compute statistics from history table to history_summary table + * + * @param int $max_lines - to only compute the next X lines, not the whole remaining lines + */ +function history_summarize($max_lines=null) +{ + // we need to know which was the last line "summarized" + $query = ' +SELECT + * + FROM '.HISTORY_SUMMARY_TABLE.' + WHERE history_id_to IS NOT NULL + ORDER BY history_id_to DESC + LIMIT 1 +;'; + $summary_lines = query2array($query); + + $history_min_id = 0; + if (count($summary_lines) > 0) + { + $last_summary = $summary_lines[0]; + $history_min_id = $last_summary['history_id_to']; + } + else + { + // if we have no "reference", ie "starting point", we need to find + // one. And "0" is not the right answer here, because history table may + // have been purged already. + $query = ' +SELECT + MIN(id) AS min_id + FROM '.HISTORY_TABLE.' +;'; + $history_lines = query2array($query); + if (count($history_lines) > 0) + { + $history_min_id = $history_lines[0]['min_id'] - 1; + } + } + + $query = ' +SELECT + date, + '.pwg_db_get_hour('time').' AS hour, + MIN(id) AS min_id, + MAX(id) AS max_id, + COUNT(*) AS nb_pages + FROM '.HISTORY_TABLE.' + WHERE id > '.$history_min_id; + + if (isset($max_lines)) + { + $query.= ' + AND id <= '.($history_min_id + $max_lines); + } + + $query.= ' + GROUP BY + date, + hour + ORDER BY + date ASC, + hour ASC +;'; + $result = pwg_query($query); + + $need_update = array(); + + $is_first = true; + $first_time_key = null; + + while ($row = pwg_db_fetch_assoc($result)) + { + $time_keys = array( + substr($row['date'], 0, 4), //yyyy + substr($row['date'], 0, 7), //yyyy-mm + substr($row['date'], 0, 10),//yyyy-mm-dd + sprintf( + '%s-%02u', + $row['date'], $row['hour'] + ), + ); + + foreach ($time_keys as $time_key) + { + if (!isset($need_update[$time_key])) + { + $need_update[$time_key] = array( + 'nb_pages' => 0, + 'history_id_from' => $row['min_id'], + 'history_id_to' => $row['max_id'], + ); + } + $need_update[$time_key]['nb_pages'] += $row['nb_pages']; + + if ($row['min_id'] < $need_update[$time_key]['history_id_from']) + { + $need_update[$time_key]['history_id_from'] = $row['min_id']; + } + + if ($row['max_id'] > $need_update[$time_key]['history_id_to']) + { + $need_update[$time_key]['history_id_to'] = $row['max_id']; + } + } + + if ($is_first) + { + $is_first = false; + $first_time_key = $time_keys[3]; + } + } + +// Only the oldest time_key might be already summarized, so we have to +// update the 4 corresponding lines instead of simply inserting them. +// +// For example, if the oldest unsummarized is 2005.08.25.21, the 4 lines +// that can be updated are: +// +// +---------------+----------+ +// | id | nb_pages | +// +---------------+----------+ +// | 2005 | 241109 | +// | 2005-08 | 20133 | +// | 2005-08-25 | 620 | +// | 2005-08-25-21 | 151 | +// +---------------+----------+ + + $updates = array(); + $inserts = array(); + + if (isset($first_time_key)) + { + list($year, $month, $day, $hour) = explode('-', $first_time_key); + + $query = ' +SELECT * + FROM '.HISTORY_SUMMARY_TABLE.' + WHERE year='.$year.' + AND ( month IS NULL + OR ( month='.$month.' + AND ( day is NULL + OR (day='.$day.' + AND (hour IS NULL OR hour='.$hour.') + ) + ) + ) + ) +;'; + $result = pwg_query($query); + while ($row = pwg_db_fetch_assoc($result)) + { + $key = sprintf('%4u', $row['year']); + if ( isset($row['month']) ) + { + $key .= sprintf('-%02u', $row['month']); + if ( isset($row['day']) ) + { + $key .= sprintf('-%02u', $row['day']); + if ( isset($row['hour']) ) + { + $key .= sprintf('-%02u', $row['hour']); + } + } + } + + if (isset($need_update[$key])) + { + $row['nb_pages'] += $need_update[$key]['nb_pages']; + $row['history_id_to'] = $need_update[$key]['history_id_to']; + $updates[] = $row; + unset($need_update[$key]); + } + } + } + + foreach ($need_update as $time_key => $summary) + { + $time_tokens = explode('-', $time_key); + + $inserts[] = array( + 'year' => $time_tokens[0], + 'month' => @$time_tokens[1], + 'day' => @$time_tokens[2], + 'hour' => @$time_tokens[3], + 'nb_pages' => $summary['nb_pages'], + 'history_id_from' => $summary['history_id_from'], + 'history_id_to' => $summary['history_id_to'], + ); + } + + if (count($updates) > 0) + { + mass_updates( + HISTORY_SUMMARY_TABLE, + array( + 'primary' => array('year','month','day','hour'), + 'update' => array('nb_pages','history_id_to'), + ), + $updates + ); + } + + if (count($inserts) > 0) + { + mass_inserts( + HISTORY_SUMMARY_TABLE, + array_keys($inserts[0]), + $inserts + ); + } +} + add_event_handler('get_history', 'get_history'); trigger_notify('functions_history_included'); diff --git a/admin/stats.php b/admin/stats.php index 5398d0b26..6115fecec 100644 --- a/admin/stats.php +++ b/admin/stats.php @@ -113,169 +113,7 @@ check_status(ACCESS_ADMINISTRATOR); // | Refresh summary from details | // +-----------------------------------------------------------------------+ -$query = ' -SELECT - date, - '.pwg_db_get_hour('time').' AS hour, - MAX(id) AS max_id, - COUNT(*) AS nb_pages - FROM '.HISTORY_TABLE.' - WHERE summarized = \'false\' - GROUP BY - date, - hour - ORDER BY - date ASC, - hour ASC -;'; -$result = pwg_query($query); - -$need_update = array(); - -$max_id = 0; -$is_first = true; -$first_time_key = null; - -while ($row = pwg_db_fetch_assoc($result)) -{ - $time_keys = array( - substr($row['date'], 0, 4), //yyyy - substr($row['date'], 0, 7), //yyyy-mm - substr($row['date'], 0, 10),//yyyy-mm-dd - sprintf( - '%s-%02u', - $row['date'], $row['hour'] - ), - ); - - foreach ($time_keys as $time_key) - { - if (!isset($need_update[$time_key])) - { - $need_update[$time_key] = 0; - } - $need_update[$time_key] += $row['nb_pages']; - } - - if ($row['max_id'] > $max_id) - { - $max_id = $row['max_id']; - } - - if ($is_first) - { - $is_first = false; - $first_time_key = $time_keys[3]; - } -} - -// Only the oldest time_key might be already summarized, so we have to -// update the 4 corresponding lines instead of simply inserting them. -// -// For example, if the oldest unsummarized is 2005.08.25.21, the 4 lines -// that can be updated are: -// -// +---------------+----------+ -// | id | nb_pages | -// +---------------+----------+ -// | 2005 | 241109 | -// | 2005-08 | 20133 | -// | 2005-08-25 | 620 | -// | 2005-08-25-21 | 151 | -// +---------------+----------+ - - -$updates = array(); -$inserts = array(); - -if (isset($first_time_key)) -{ - list($year, $month, $day, $hour) = explode('-', $first_time_key); - - $query = ' -SELECT * - FROM '.HISTORY_SUMMARY_TABLE.' - WHERE year='.$year.' - AND ( month IS NULL - OR ( month='.$month.' - AND ( day is NULL - OR (day='.$day.' - AND (hour IS NULL OR hour='.$hour.') - ) - ) - ) - ) -;'; - $result = pwg_query($query); - while ($row = pwg_db_fetch_assoc($result)) - { - $key = sprintf('%4u', $row['year']); - if ( isset($row['month']) ) - { - $key .= sprintf('-%02u', $row['month']); - if ( isset($row['day']) ) - { - $key .= sprintf('-%02u', $row['day']); - if ( isset($row['hour']) ) - { - $key .= sprintf('-%02u', $row['hour']); - } - } - } - - if (isset($need_update[$key])) - { - $row['nb_pages'] += $need_update[$key]; - $updates[] = $row; - unset($need_update[$key]); - } - } -} - -foreach ($need_update as $time_key => $nb_pages) -{ - $time_tokens = explode('-', $time_key); - - $inserts[] = array( - 'year' => $time_tokens[0], - 'month' => @$time_tokens[1], - 'day' => @$time_tokens[2], - 'hour' => @$time_tokens[3], - 'nb_pages' => $nb_pages, - ); -} - -if (count($updates) > 0) -{ - mass_updates( - HISTORY_SUMMARY_TABLE, - array( - 'primary' => array('year','month','day','hour'), - 'update' => array('nb_pages'), - ), - $updates - ); -} - -if (count($inserts) > 0) -{ - mass_inserts( - HISTORY_SUMMARY_TABLE, - array_keys($inserts[0]), - $inserts - ); -} - -if ($max_id != 0) -{ - $query = ' -UPDATE '.HISTORY_TABLE.' - SET summarized = \'true\' - WHERE summarized = \'false\' - AND id <= '.$max_id.' -;'; - pwg_query($query); -} +history_summarize(); // +-----------------------------------------------------------------------+ // | Page parameters check | diff --git a/include/functions.inc.php b/include/functions.inc.php index db8c457ee..84def0b1c 100644 --- a/include/functions.inc.php +++ b/include/functions.inc.php @@ -495,6 +495,13 @@ INSERT INTO '.HISTORY_TABLE.' ;'; pwg_query($query); + $history_id = pwg_db_insert_id(HISTORY_TABLE); + if ($history_id % 1000 == 0) + { + include_once(PHPWG_ROOT_PATH.'admin/include/functions_history.inc.php'); + history_summarize(50000); + } + return true; } diff --git a/install/db/150-database.php b/install/db/150-database.php new file mode 100644 index 000000000..505a389cd --- /dev/null +++ b/install/db/150-database.php @@ -0,0 +1,80 @@ + 0) +{ + $last_summarized = $history_lines[0]; + + list($year, $month, $day) = explode('-', $last_summarized['date']); + list($hour) = explode(':', $last_summarized['time']); + + single_update( + PREFIX_TABLE.'history_summary', + array( + 'history_id_to' => $last_summarized['id'], + ), + array( + 'year' => $year, + 'month' => $month, + 'day' => $day, + 'hour' => $hour, + ) + ); + + // in case this script would update no summary line, it would mean the + // summary has been purged and will be rebuild from scratch, based on the + // content of history table +} + +// for now, we keep column history.summarized even if Piwigo 2.9 no longer +// uses it. We will remove it in a future version. First we need to have +// "less" lines in history table. This will be possible with the automatic +// purge implemented in Piwigo 2.9. + +echo "\n".$upgrade_description."\n"; + +?> diff --git a/install/piwigo_structure-mysql.sql b/install/piwigo_structure-mysql.sql index 76e9fe90b..c7b33b9f7 100644 --- a/install/piwigo_structure-mysql.sql +++ b/install/piwigo_structure-mysql.sql @@ -129,12 +129,10 @@ CREATE TABLE `piwigo_history` ( `category_id` smallint(5) default NULL, `tag_ids` varchar(50) default NULL, `image_id` mediumint(8) default NULL, - `summarized` enum('true','false') default 'false', `image_type` enum('picture','high','other') default NULL, `format_id` int(11) unsigned default NULL, `auth_key_id` int(11) unsigned DEFAULT NULL, - PRIMARY KEY (`id`), - KEY `history_i1` (`summarized`) + PRIMARY KEY (`id`) ) ENGINE=MyISAM; -- @@ -148,6 +146,8 @@ CREATE TABLE `piwigo_history_summary` ( `day` tinyint(2) default NULL, `hour` tinyint(2) default NULL, `nb_pages` int(11) default NULL, + `history_id_from` int(10) unsigned default NULL, + `history_id_to` int(10) unsigned default NULL, UNIQUE KEY history_summary_ymdh (`year`,`month`,`day`,`hour`) ) ENGINE=MyISAM; From eef8a27f5eb3490b6aa6fc95783b0a3a83204de4 Mon Sep 17 00:00:00 2001 From: plegall Date: Mon, 21 Nov 2016 12:13:21 +0100 Subject: [PATCH 2/3] feature #370, automatic purge on history --- admin/include/functions_history.inc.php | 90 +++++++++++++++++++++++++ include/config_default.inc.php | 10 +++ include/functions.inc.php | 6 ++ 3 files changed, 106 insertions(+) diff --git a/admin/include/functions_history.inc.php b/admin/include/functions_history.inc.php index 3e41582b4..f0156c025 100644 --- a/admin/include/functions_history.inc.php +++ b/admin/include/functions_history.inc.php @@ -390,6 +390,96 @@ SELECT * } } +/** + * Smart purge on history table. Keep some lines, purge only summarized lines + * + * @since 2.9 + */ +function history_autopurge() +{ + global $conf, $logger; + + if (0 == $conf['history_autopurge_keep_lines']) + { + return; + } + + // we want to purge only if there are too many lines and if the lines are summarized + + $query = ' +SELECT + COUNT(*) + FROM '.HISTORY_TABLE.' +;'; + list($count) = pwg_db_fetch_row(pwg_query($query)); + + if ($count <= $conf['history_autopurge_keep_lines']) + { + return; // no need to purge for now + } + + // 1) find the last summarized history line + $query = ' +SELECT + * + FROM '.HISTORY_SUMMARY_TABLE.' + WHERE history_id_to IS NOT NULL + ORDER BY history_id_to DESC + LIMIT 1 +;'; + $summary_lines = query2array($query); + if (count($summary_lines) == 0) + { + return; // lines not summarized, no purge + } + + $history_id_last_summarized = $summary_lines[0]['history_id_to']; + + // 2) find the latest history line (and substract the number of lines to keep) + $query = ' +SELECT + id + FROM '.HISTORY_TABLE.' + ORDER BY id DESC + LIMIT 1 +;'; + $history_lines = query2array($query); + if (count($history_lines) == 0) + { + return; + } + + $history_id_latest = $history_lines[0]['id']; + + // 3) find the oldest history line (and add the number of lines to delete) + $query = ' +SELECT + id + FROM '.HISTORY_TABLE.' + ORDER BY id ASC + LIMIT 1 +;'; + $history_lines = query2array($query); + $history_id_oldest = $history_lines[0]['id']; + + $search_min = array( + $history_id_last_summarized, + $history_id_latest - $conf['history_autopurge_keep_lines'], + $history_id_oldest + $conf['history_autopurge_blocksize'], + ); + + $history_id_delete_before = min($search_min); + + $logger->debug(__FUNCTION__.', '.join('/', $search_min)); + + $query = ' +DELETE + FROM '.HISTORY_TABLE.' + WHERE id < '.$history_id_delete_before.' +;'; + pwg_query($query); +} + add_event_handler('get_history', 'get_history'); trigger_notify('functions_history_included'); diff --git a/include/config_default.inc.php b/include/config_default.inc.php index 1f27362e7..cc5dcb925 100644 --- a/include/config_default.inc.php +++ b/include/config_default.inc.php @@ -553,6 +553,16 @@ $conf['guest_access'] = true; // nb_logs_page : how many logs to display on a page $conf['nb_logs_page'] = 300; +// Every X new line in history, perform an automatic purge. The more often, +// the fewer lines to delete. 0 to disable. +$conf['history_autopurge_every'] = 1021; + +// How many lines to keep in history on autopurge? 0 to disable. +$conf['history_autopurge_keep_lines'] = 1000000; + +// On history autopurge, how many lines should to deleted, maximum? +$conf['history_autopurge_blocksize'] = 50000; + // +-----------------------------------------------------------------------+ // | urls | // +-----------------------------------------------------------------------+ diff --git a/include/functions.inc.php b/include/functions.inc.php index 84def0b1c..f580e3863 100644 --- a/include/functions.inc.php +++ b/include/functions.inc.php @@ -502,6 +502,12 @@ INSERT INTO '.HISTORY_TABLE.' history_summarize(50000); } + if ($conf['history_autopurge_every'] > 0 and $history_id % $conf['history_autopurge_every'] == 0) + { + include_once(PHPWG_ROOT_PATH.'admin/include/functions_history.inc.php'); + history_autopurge(); + } + return true; } From 419bb728027acbc8df420e401ea87b758d8a920c Mon Sep 17 00:00:00 2001 From: plegall Date: Mon, 21 Nov 2016 15:01:43 +0100 Subject: [PATCH 3/3] feature #370, details on config setting --- include/config_default.inc.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/config_default.inc.php b/include/config_default.inc.php index cc5dcb925..c3b36064f 100644 --- a/include/config_default.inc.php +++ b/include/config_default.inc.php @@ -560,7 +560,7 @@ $conf['history_autopurge_every'] = 1021; // How many lines to keep in history on autopurge? 0 to disable. $conf['history_autopurge_keep_lines'] = 1000000; -// On history autopurge, how many lines should to deleted, maximum? +// On history autopurge, how many lines should to deleted at once, maximum? $conf['history_autopurge_blocksize'] = 50000; // +-----------------------------------------------------------------------+