diff --git a/admin/include/functions_history.inc.php b/admin/include/functions_history.inc.php index 55783fd74..f0156c025 100644 --- a/admin/include/functions_history.inc.php +++ b/admin/include/functions_history.inc.php @@ -176,6 +176,310 @@ SELECT return $data; } +/** + * Compute statistics from history table to history_summary table + * + * @param int $max_lines - to only compute the next X lines, not the whole remaining lines + */ +function history_summarize($max_lines=null) +{ + // we need to know which was the last line "summarized" + $query = ' +SELECT + * + FROM '.HISTORY_SUMMARY_TABLE.' + WHERE history_id_to IS NOT NULL + ORDER BY history_id_to DESC + LIMIT 1 +;'; + $summary_lines = query2array($query); + + $history_min_id = 0; + if (count($summary_lines) > 0) + { + $last_summary = $summary_lines[0]; + $history_min_id = $last_summary['history_id_to']; + } + else + { + // if we have no "reference", ie "starting point", we need to find + // one. And "0" is not the right answer here, because history table may + // have been purged already. + $query = ' +SELECT + MIN(id) AS min_id + FROM '.HISTORY_TABLE.' +;'; + $history_lines = query2array($query); + if (count($history_lines) > 0) + { + $history_min_id = $history_lines[0]['min_id'] - 1; + } + } + + $query = ' +SELECT + date, + '.pwg_db_get_hour('time').' AS hour, + MIN(id) AS min_id, + MAX(id) AS max_id, + COUNT(*) AS nb_pages + FROM '.HISTORY_TABLE.' + WHERE id > '.$history_min_id; + + if (isset($max_lines)) + { + $query.= ' + AND id <= '.($history_min_id + $max_lines); + } + + $query.= ' + GROUP BY + date, + hour + ORDER BY + date ASC, + hour ASC +;'; + $result = pwg_query($query); + + $need_update = array(); + + $is_first = true; + $first_time_key = null; + + while ($row = pwg_db_fetch_assoc($result)) + { + $time_keys = array( + substr($row['date'], 0, 4), //yyyy + substr($row['date'], 0, 7), //yyyy-mm + substr($row['date'], 0, 10),//yyyy-mm-dd + sprintf( + '%s-%02u', + $row['date'], $row['hour'] + ), + ); + + foreach ($time_keys as $time_key) + { + if (!isset($need_update[$time_key])) + { + $need_update[$time_key] = array( + 'nb_pages' => 0, + 'history_id_from' => $row['min_id'], + 'history_id_to' => $row['max_id'], + ); + } + $need_update[$time_key]['nb_pages'] += $row['nb_pages']; + + if ($row['min_id'] < $need_update[$time_key]['history_id_from']) + { + $need_update[$time_key]['history_id_from'] = $row['min_id']; + } + + if ($row['max_id'] > $need_update[$time_key]['history_id_to']) + { + $need_update[$time_key]['history_id_to'] = $row['max_id']; + } + } + + if ($is_first) + { + $is_first = false; + $first_time_key = $time_keys[3]; + } + } + +// Only the oldest time_key might be already summarized, so we have to +// update the 4 corresponding lines instead of simply inserting them. +// +// For example, if the oldest unsummarized is 2005.08.25.21, the 4 lines +// that can be updated are: +// +// +---------------+----------+ +// | id | nb_pages | +// +---------------+----------+ +// | 2005 | 241109 | +// | 2005-08 | 20133 | +// | 2005-08-25 | 620 | +// | 2005-08-25-21 | 151 | +// +---------------+----------+ + + $updates = array(); + $inserts = array(); + + if (isset($first_time_key)) + { + list($year, $month, $day, $hour) = explode('-', $first_time_key); + + $query = ' +SELECT * + FROM '.HISTORY_SUMMARY_TABLE.' + WHERE year='.$year.' + AND ( month IS NULL + OR ( month='.$month.' + AND ( day is NULL + OR (day='.$day.' + AND (hour IS NULL OR hour='.$hour.') + ) + ) + ) + ) +;'; + $result = pwg_query($query); + while ($row = pwg_db_fetch_assoc($result)) + { + $key = sprintf('%4u', $row['year']); + if ( isset($row['month']) ) + { + $key .= sprintf('-%02u', $row['month']); + if ( isset($row['day']) ) + { + $key .= sprintf('-%02u', $row['day']); + if ( isset($row['hour']) ) + { + $key .= sprintf('-%02u', $row['hour']); + } + } + } + + if (isset($need_update[$key])) + { + $row['nb_pages'] += $need_update[$key]['nb_pages']; + $row['history_id_to'] = $need_update[$key]['history_id_to']; + $updates[] = $row; + unset($need_update[$key]); + } + } + } + + foreach ($need_update as $time_key => $summary) + { + $time_tokens = explode('-', $time_key); + + $inserts[] = array( + 'year' => $time_tokens[0], + 'month' => @$time_tokens[1], + 'day' => @$time_tokens[2], + 'hour' => @$time_tokens[3], + 'nb_pages' => $summary['nb_pages'], + 'history_id_from' => $summary['history_id_from'], + 'history_id_to' => $summary['history_id_to'], + ); + } + + if (count($updates) > 0) + { + mass_updates( + HISTORY_SUMMARY_TABLE, + array( + 'primary' => array('year','month','day','hour'), + 'update' => array('nb_pages','history_id_to'), + ), + $updates + ); + } + + if (count($inserts) > 0) + { + mass_inserts( + HISTORY_SUMMARY_TABLE, + array_keys($inserts[0]), + $inserts + ); + } +} + +/** + * Smart purge on history table. Keep some lines, purge only summarized lines + * + * @since 2.9 + */ +function history_autopurge() +{ + global $conf, $logger; + + if (0 == $conf['history_autopurge_keep_lines']) + { + return; + } + + // we want to purge only if there are too many lines and if the lines are summarized + + $query = ' +SELECT + COUNT(*) + FROM '.HISTORY_TABLE.' +;'; + list($count) = pwg_db_fetch_row(pwg_query($query)); + + if ($count <= $conf['history_autopurge_keep_lines']) + { + return; // no need to purge for now + } + + // 1) find the last summarized history line + $query = ' +SELECT + * + FROM '.HISTORY_SUMMARY_TABLE.' + WHERE history_id_to IS NOT NULL + ORDER BY history_id_to DESC + LIMIT 1 +;'; + $summary_lines = query2array($query); + if (count($summary_lines) == 0) + { + return; // lines not summarized, no purge + } + + $history_id_last_summarized = $summary_lines[0]['history_id_to']; + + // 2) find the latest history line (and substract the number of lines to keep) + $query = ' +SELECT + id + FROM '.HISTORY_TABLE.' + ORDER BY id DESC + LIMIT 1 +;'; + $history_lines = query2array($query); + if (count($history_lines) == 0) + { + return; + } + + $history_id_latest = $history_lines[0]['id']; + + // 3) find the oldest history line (and add the number of lines to delete) + $query = ' +SELECT + id + FROM '.HISTORY_TABLE.' + ORDER BY id ASC + LIMIT 1 +;'; + $history_lines = query2array($query); + $history_id_oldest = $history_lines[0]['id']; + + $search_min = array( + $history_id_last_summarized, + $history_id_latest - $conf['history_autopurge_keep_lines'], + $history_id_oldest + $conf['history_autopurge_blocksize'], + ); + + $history_id_delete_before = min($search_min); + + $logger->debug(__FUNCTION__.', '.join('/', $search_min)); + + $query = ' +DELETE + FROM '.HISTORY_TABLE.' + WHERE id < '.$history_id_delete_before.' +;'; + pwg_query($query); +} + add_event_handler('get_history', 'get_history'); trigger_notify('functions_history_included'); diff --git a/admin/stats.php b/admin/stats.php index 5398d0b26..6115fecec 100644 --- a/admin/stats.php +++ b/admin/stats.php @@ -113,169 +113,7 @@ check_status(ACCESS_ADMINISTRATOR); // | Refresh summary from details | // +-----------------------------------------------------------------------+ -$query = ' -SELECT - date, - '.pwg_db_get_hour('time').' AS hour, - MAX(id) AS max_id, - COUNT(*) AS nb_pages - FROM '.HISTORY_TABLE.' - WHERE summarized = \'false\' - GROUP BY - date, - hour - ORDER BY - date ASC, - hour ASC -;'; -$result = pwg_query($query); - -$need_update = array(); - -$max_id = 0; -$is_first = true; -$first_time_key = null; - -while ($row = pwg_db_fetch_assoc($result)) -{ - $time_keys = array( - substr($row['date'], 0, 4), //yyyy - substr($row['date'], 0, 7), //yyyy-mm - substr($row['date'], 0, 10),//yyyy-mm-dd - sprintf( - '%s-%02u', - $row['date'], $row['hour'] - ), - ); - - foreach ($time_keys as $time_key) - { - if (!isset($need_update[$time_key])) - { - $need_update[$time_key] = 0; - } - $need_update[$time_key] += $row['nb_pages']; - } - - if ($row['max_id'] > $max_id) - { - $max_id = $row['max_id']; - } - - if ($is_first) - { - $is_first = false; - $first_time_key = $time_keys[3]; - } -} - -// Only the oldest time_key might be already summarized, so we have to -// update the 4 corresponding lines instead of simply inserting them. -// -// For example, if the oldest unsummarized is 2005.08.25.21, the 4 lines -// that can be updated are: -// -// +---------------+----------+ -// | id | nb_pages | -// +---------------+----------+ -// | 2005 | 241109 | -// | 2005-08 | 20133 | -// | 2005-08-25 | 620 | -// | 2005-08-25-21 | 151 | -// +---------------+----------+ - - -$updates = array(); -$inserts = array(); - -if (isset($first_time_key)) -{ - list($year, $month, $day, $hour) = explode('-', $first_time_key); - - $query = ' -SELECT * - FROM '.HISTORY_SUMMARY_TABLE.' - WHERE year='.$year.' - AND ( month IS NULL - OR ( month='.$month.' - AND ( day is NULL - OR (day='.$day.' - AND (hour IS NULL OR hour='.$hour.') - ) - ) - ) - ) -;'; - $result = pwg_query($query); - while ($row = pwg_db_fetch_assoc($result)) - { - $key = sprintf('%4u', $row['year']); - if ( isset($row['month']) ) - { - $key .= sprintf('-%02u', $row['month']); - if ( isset($row['day']) ) - { - $key .= sprintf('-%02u', $row['day']); - if ( isset($row['hour']) ) - { - $key .= sprintf('-%02u', $row['hour']); - } - } - } - - if (isset($need_update[$key])) - { - $row['nb_pages'] += $need_update[$key]; - $updates[] = $row; - unset($need_update[$key]); - } - } -} - -foreach ($need_update as $time_key => $nb_pages) -{ - $time_tokens = explode('-', $time_key); - - $inserts[] = array( - 'year' => $time_tokens[0], - 'month' => @$time_tokens[1], - 'day' => @$time_tokens[2], - 'hour' => @$time_tokens[3], - 'nb_pages' => $nb_pages, - ); -} - -if (count($updates) > 0) -{ - mass_updates( - HISTORY_SUMMARY_TABLE, - array( - 'primary' => array('year','month','day','hour'), - 'update' => array('nb_pages'), - ), - $updates - ); -} - -if (count($inserts) > 0) -{ - mass_inserts( - HISTORY_SUMMARY_TABLE, - array_keys($inserts[0]), - $inserts - ); -} - -if ($max_id != 0) -{ - $query = ' -UPDATE '.HISTORY_TABLE.' - SET summarized = \'true\' - WHERE summarized = \'false\' - AND id <= '.$max_id.' -;'; - pwg_query($query); -} +history_summarize(); // +-----------------------------------------------------------------------+ // | Page parameters check | diff --git a/include/config_default.inc.php b/include/config_default.inc.php index 1f27362e7..c3b36064f 100644 --- a/include/config_default.inc.php +++ b/include/config_default.inc.php @@ -553,6 +553,16 @@ $conf['guest_access'] = true; // nb_logs_page : how many logs to display on a page $conf['nb_logs_page'] = 300; +// Every X new line in history, perform an automatic purge. The more often, +// the fewer lines to delete. 0 to disable. +$conf['history_autopurge_every'] = 1021; + +// How many lines to keep in history on autopurge? 0 to disable. +$conf['history_autopurge_keep_lines'] = 1000000; + +// On history autopurge, how many lines should to deleted at once, maximum? +$conf['history_autopurge_blocksize'] = 50000; + // +-----------------------------------------------------------------------+ // | urls | // +-----------------------------------------------------------------------+ diff --git a/include/functions.inc.php b/include/functions.inc.php index 7ed6a196f..cbc896e2f 100644 --- a/include/functions.inc.php +++ b/include/functions.inc.php @@ -495,6 +495,19 @@ INSERT INTO '.HISTORY_TABLE.' ;'; pwg_query($query); + $history_id = pwg_db_insert_id(HISTORY_TABLE); + if ($history_id % 1000 == 0) + { + include_once(PHPWG_ROOT_PATH.'admin/include/functions_history.inc.php'); + history_summarize(50000); + } + + if ($conf['history_autopurge_every'] > 0 and $history_id % $conf['history_autopurge_every'] == 0) + { + include_once(PHPWG_ROOT_PATH.'admin/include/functions_history.inc.php'); + history_autopurge(); + } + return true; } diff --git a/install/db/150-database.php b/install/db/150-database.php new file mode 100644 index 000000000..505a389cd --- /dev/null +++ b/install/db/150-database.php @@ -0,0 +1,80 @@ + 0) +{ + $last_summarized = $history_lines[0]; + + list($year, $month, $day) = explode('-', $last_summarized['date']); + list($hour) = explode(':', $last_summarized['time']); + + single_update( + PREFIX_TABLE.'history_summary', + array( + 'history_id_to' => $last_summarized['id'], + ), + array( + 'year' => $year, + 'month' => $month, + 'day' => $day, + 'hour' => $hour, + ) + ); + + // in case this script would update no summary line, it would mean the + // summary has been purged and will be rebuild from scratch, based on the + // content of history table +} + +// for now, we keep column history.summarized even if Piwigo 2.9 no longer +// uses it. We will remove it in a future version. First we need to have +// "less" lines in history table. This will be possible with the automatic +// purge implemented in Piwigo 2.9. + +echo "\n".$upgrade_description."\n"; + +?> diff --git a/install/piwigo_structure-mysql.sql b/install/piwigo_structure-mysql.sql index 76e9fe90b..c7b33b9f7 100644 --- a/install/piwigo_structure-mysql.sql +++ b/install/piwigo_structure-mysql.sql @@ -129,12 +129,10 @@ CREATE TABLE `piwigo_history` ( `category_id` smallint(5) default NULL, `tag_ids` varchar(50) default NULL, `image_id` mediumint(8) default NULL, - `summarized` enum('true','false') default 'false', `image_type` enum('picture','high','other') default NULL, `format_id` int(11) unsigned default NULL, `auth_key_id` int(11) unsigned DEFAULT NULL, - PRIMARY KEY (`id`), - KEY `history_i1` (`summarized`) + PRIMARY KEY (`id`) ) ENGINE=MyISAM; -- @@ -148,6 +146,8 @@ CREATE TABLE `piwigo_history_summary` ( `day` tinyint(2) default NULL, `hour` tinyint(2) default NULL, `nb_pages` int(11) default NULL, + `history_id_from` int(10) unsigned default NULL, + `history_id_to` int(10) unsigned default NULL, UNIQUE KEY history_summary_ymdh (`year`,`month`,`day`,`hour`) ) ENGINE=MyISAM;