PHP Class сжатия HTML кода без потери работоспособности

Класс позволяющий уменьшать HTML-код без потери работоспособности.

Пример

function stripOutput($output)
{
    $options = array(
        'compress_css'                     => TRUE,
        'strip_comments'                   => TRUE,
        'keep_conditional_comments'        => TRUE,
        'compress_horizontal'              => TRUE,
        'compress_vertical'                => TRUE,
        'compress_scripts'                 => FALSE,
        'line_break'                       => PHP_EOL,
        'preserved_tags'                   => array('textarea', 'pre', 'script', 'style', 'code'),
        'script_compression_callback'      => FALSE,
        'script_compression_callback_args' => array(),
    );
    $compactor = new Compactor($options);
    return $compactor->squeeze($output);
}

Сервис сжатия HTML online

Вы можете воспользоваться онлайн сервисом сжатия HTML кода здесь

Код класса

/**
 * @author Oliver Lillie (aka buggedcom)
 * 
 *
 * @license BSD
 * @copyright Copyright (c) 2008 Oliver Lillie
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 * documentation
 * files (the "Software"), to deal in the Software without restriction, including without limitation the rights
 * to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to
 * whom the Software
 * is furnished to do so, subject to the following conditions: The above copyright notice and this permission
 * notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
 * TO THE
 * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR
 * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
 * OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * @name Compactor
 * @version 0.6.0
 * @abstract This class can be used in speeding up delivery of webpages front the server to the client browser,
 * by compacting
 * the whitespace. There are multiple options for compacting, including both horizontal and vertical whitespace
 * removal and
 * css/javascript compacting also. The class can also compact the output of a php script using automatic output
 * buffering.
 *
 * @example compressor.example1.php Compacts HTML using the default options.
 * @example compressor.example2.php Compacts remote HTML with custom javascript compression.
 * @example
 * // this example will automatically compact any buffered output from the script
 * $compactor = new Compactor(array(
 * 'use_buffer' => true,
 * 'buffer_echo' => true,
 * 'compact_on_shutdown' => true
 * ));
 *
 * @note The functions to provide deflate functionality are partially lifted from
 * minify http://code.google.com/p/minify/
 */
class Compactor
{

    /**
     * Holds the options array
     * @access private
     * @var array
     */
    private $_options = array(
        // line_break; string; The type of line break used in the HTML that you are processing.
        // ie, \r, \r\n, \n or PHP_EOL
        'line_break' => PHP_EOL,
        // preserved_tags; array; An array of html tags whose innerHTML contents format require preserving.
        'preserved_tags' => array('textarea', 'pre', 'xmp', 'script', 'style', 'code'),
        // preserved_boundry; string; The holding block that is used to replace the contents of the preserved tags
        // while the compacting is taking place.
        'preserved_boundry' => '@@PRESERVEDTAG@@',
        // use_buffer; boolean; You can optionally use output buffering instead of providing the html for compacting.
        'use_buffer' => false,
        // buffer_echo; boolean; If after processing you want to output the content set this to true, otherwise it
        // will be up to you to echo out the compacted html.
        'buffer_echo' => true,
        // compact_on_shutdown; boolean; If 'use_buffer' is enabled and this option is also enalbed it will create a
        // register_shutdown_function that will process the buffered output at the end of the script without any hassle.
        'compact_on_shutdown' => false,
        // strip_comments; boolean; This will strip html comments from the html. NOTE, if the below option
        'keep_conditional_comments',
        // is not set to true then conditional Internet Explorer comments will also be stripped.
        'strip_comments' => true,
        // keep_conditional_comments; boolean; Only applies if the baove option 'strip_comments' is set to true.
        // Only if the client browser is Internet Explorer then the conditional comments are kept.
        'keep_conditional_comments' => true,
        // conditional_boundries; array; The holding block boudries that are used to replace the opening and
        // closing tags of the conditional comments.
        'conditional_boundries' => array('@@IECOND-OPEN@@', '@@IECOND-CLOSE@@'),
        // compress_horizontal; boolean; Removes horizontal whitespace of the HTML, ie left to right whitespace (spaces and tabs).
        'compress_horizontal' => true,
        // compress_vertical; boolean; Removes vertical whitespace of the HTML, ie line breaks.
        'compress_vertical' => true,
        // compress_scripts; boolean; Compresses content from script tags using a simple algorythm. Removes javascript comments,
        // and horizontal and vertical whitespace. Note as only a simple algorythm is used there are limitations to the script
        // and you may want to use a more complex script like 'minify' http://code.google.com/p/minify/ or 'jsmin'
        // http://code.google.com/p/jsmin-php/ See test3.php for an example.
        'compress_scripts' => true,
        // script_compression_callback; boolean; The name of a callback for custom js compression. See test3.php for an example.
        'script_compression_callback' => false,
        // script_compression_callback_args; array; Any additional args for the callback. The javascript will be put to the
        // front of the array.
        'script_compression_callback_args' => array(),
        // compress_css; boolean; Compresses CSS style tags.
        'compress_css' => true,
        // deflates the output using a deflate/gzip method
        'deflate' => false
    );

    /**
     * Holds the preserved blocks so multiple scans of the html don't have to be made.
     * @access private
     * @var mixed
     */
    private $_preserved_blocks = false;

    /**
     * Holds which method of delate should be used.
     * @access private
     * @var mixed
     */
    private $_deflate_encoding = null;

    /**
     * Constructor
     */
    function __construct($options = array())
    {
        $this->setOption($options);
        if ($this->_options['compact_on_shutdown']) {
            $this->setOption(array(
                'use_buffer' => true,
                'buffer_echo' => true
            ));
        }
        if ($this->_options['use_buffer']) {
            ob_start();
        }
        if ($this->_options['compact_on_shutdown']) {
            register_shutdown_function(array(&$this, 'squeeze'));
        }
    }

    /**
     * Sets an option in the option array();
     *
     * @access public
     * @param mixed $varname Can take the form of an array of options to set a string of an option name.
     * @param mixed $varvalue The value of the option you are setting.
     **/
    public function setOption($varname, $varvalue = null)
    {
        $keys = array_keys($this->_options);
        if (gettype($varname) == 'array') {
            foreach ($varname as $name => $value) {
                if (in_array($name, $keys)) {
                    $this->_options[$name] = $value;
                }
            }
        } else {
            if (in_array($varname, $keys)) {
                $this->_options[$varname] = $varvalue;
            }
        }
    }

    /**
     * Compresses the html, either that is supplied to the function or if the use_buffer
     * option is enabled then the buffer is grabbed for compression.
     *
     * @access public
     * @param string $html HTML string required for compression, however if the use_buffer option
     * is enabled the param can be left out because it will be ignored anyway.
     * @return string
     */
    public function squeeze($html = null)
    {
        if ($this->_options['use_buffer']) {
            $html = ob_get_clean();
        }
        // unify the line breaks so we have clean html to work with
        $html = $this->_unifyLineBreaks($html);
        // compress any script tags if required
        if ($this->_options['compress_scripts'] || $this->_options['compress_css']) {
            $html = $this->_compressScriptAndStyleTags($html);
        }
        // make the compressions
        if ($this->_options['strip_comments']) {
            $html = $this->_stripHTMLComments($html);
        }
        if ($this->_options['compress_horizontal']) {
            $html = $this->_compressHorizontally($html);
        }
        if ($this->_options['compress_vertical']) {
            $html = $this->_compressVertically($html);
        }
        // replace the preserved blocks with their original content
        $html = $this->_reinstatePreservedBlocks($html);
        // deflate
        if ($this->_options['deflate']) {
            $html = $this->_deflate($html);
        }
        // if the html i to be echoed out then do the echo
        if ($this->_options['buffer_echo']) {
            header('Content-Length: ' . strlen($html));
            if ($this->_options['deflate']) {
                $this->outputDeflateHeaders($html);
            }
            echo $html;
        }
        return $html;
    }

    /**
     * Strips HTML Comments from the buffer whilst making a check to see if
     * Inernet Explorer conditional comments should be stripped or not.
     *
     * @access private
     * @param string $html The HTML string for comment removal.
     * @return string
     */
    private function _stripHTMLComments($html)
    {
        $keep_conditionals = false;
        // only process if the Internet Explorer conditional statements are to be kept
        if ($this->_options['keep_conditional_comments']) {
        // check that the opening browser is internet explorer
            $msie = '/msie\s(.*).*(win)/i';
            $keep_conditionals = (isset($_SERVER['HTTP_USER_AGENT']) && preg_match($msie, $_SERVER['HTTP_USER_AGENT']));
// $keep_doctype = false;
// if(strpos($html, ''), $this->_options['conditional_boundries'], $html);
            }
        }
// remove comments
        $html = preg_replace('//', '', $html);
// $html = preg_replace ('@@', '', $html);
// re sub-in the conditionals if required.
        if ($keep_conditionals) {
            $html = str_replace($this->_options['conditional_boundries'], array(''), $html);
        }
// if($keep_doctype)
// {
// $html = str_replace('--**@@DOCTYPE@@**--', '_preserved_blocks !== false) {
            return $html;
        }
        $tag_string = implode('|', $this->_options['preserved_tags']);
// get the textarea matches
        preg_match_all("!<(" . $tag_string . ")[^>]*>.*?!is", $html, $preserved_area_match);
        $this->_preserved_blocks = $preserved_area_match[0];
// replace the textareas inerds with markers
        return preg_replace("!<(" . $tag_string . ")[^>]*>.*?!is", $this->_options['preserved_boundry'], $html);
    }

    /**
     * Replaces any preservations made with the original content.
     *
     * @access private
     * @param string $html
     * @return string
     */
    private function _reinstatePreservedBlocks($html)
    {
        if ($this->_preserved_blocks === false) {
            return $html;
        }
        foreach ($this->_preserved_blocks as $curr_block) {
            $html = preg_replace("!" . $this->_options['preserved_boundry'] . "!", $curr_block, $html, 1);
        }
        return $html;
    }

    /**
     * Compresses white space horizontally (ie spaces, tabs etc) whilst preserving
     * textarea and pre content.
     *
     * @access private
     * @param string $html
     * @return string
     */
    private function _compressHorizontally($html)
    {
        $html = $this->_extractPreservedBlocks($html);
// remove the white space
        $html = preg_replace('/((?)' . $this->_options['line_break'] . ')[\s]+/m', '\1', $html);
// Remove extra spaces
        return preg_replace('/\t+/', '', $html);
    }

    /**
     * Compresses white space vertically (ie line breaks) whilst preserving
     * textarea and pre content.
     *
     * @access private
     * @param string $html
     * @param mixed $textarea_blocks false if no textarea blocks have already been taken out, otherwise an array.
     * @return unknown
     */
    private function _compressVertically($html)
    {
        $html = $this->_extractPreservedBlocks($html);
// remove the line breaks
        return str_replace($this->_options['line_break'], '', $html);
    }

    /**
     * Converts line breaks from the different platforms onto the one type.
     *
     * @access private
     * @param string $html HTML string
     * @param string $break The format of the line break you want to unify to. ie \r\n or \n
     * @return string
     */
    private function _unifyLineBreaks($html)
    {
        return preg_replace("/\015\012|\015|\012/", $this->_options['line_break'], $html);
    }

    /**
     * Compresses white space vertically (ie line breaks) whilst preserving
     * textarea and pre content. This uses the classes '_simpleCodeCompress' to compress
     * the javascript, however it would be advisable to use another library such as
     * 'minify' http://code.google.com/p/minify/ because this function has certain
     * limitations with comments and other regex expressions. You can set another function
     * callback using the 'compress_js_callback' option.
     *
     * @access private
     * @param string $html
     * @return string
     */
    private function _compressScriptAndStyleTags($html)
    {
        $compress_scripts = $this->_options['compress_scripts'];
        $compress_css = $this->_options['compress_css'];
        $use_script_callback = $this->_options['script_compression_callback'] != false;
// pregmatch all the script tags
        $scripts = preg_match_all("!(<(style|script)[^>]*>(?:\\s*<\\!--)?)(.*?)((?://-->\\s*)?)!is", $html, $scriptparts);
// collect and compress the parts
        $compressed = array();
        $parts = array();
        for ($i = 0; $i < count($scriptparts[0]); $i++) {
            $code = trim($scriptparts[3][$i]);
            $not_empty = !empty($code);
            $is_script = ($compress_scripts && $scriptparts[2][$i] == 'script');
            if ($not_empty && ($is_script || ($compress_css && $scriptparts[2][$i] == 'style'))) {
                if ($is_script && $use_script_callback) {
                    $callback_args = $this->_options['script_compression_callback_args'];
                    if (gettype($callback_args) !== 'array') {
                        $callback_args = array($callback_args);
                    }
                    array_unshift($callback_args, $code);
                    $minified = call_user_func_array($this->_options['script_compression_callback'], $callback_args);
                } else {
                    $minified = $this->_simpleCodeCompress($code);
                }
                array_push($parts, $scriptparts[0][$i]);
                array_push($compressed, trim($scriptparts[1][$i]) . $minified . trim($scriptparts[4][$i]));
            }
        }
// do the replacements and return
        return str_replace($parts, $compressed, $html);
    }

    /**
     * Use simple preg_replace to compresses code (ie javascript and css) whitespace.
     * It would be advisable to use another library such as 'minify' http://code.google.com/p/minify/
     * because this function has certain limitations with comments and other regex expressions.
     * You can set another function callback using the 'compress_js_callback' option.
     *
     * @access private
     * @param string $code Code string
     * @return string
     **/
    private function _simpleCodeCompress($code)
    {
// Remove multiline comment
        $code = preg_replace('/\/\*(?!-)[\x00-\xff]*?\*\//', '', $code);
// Remove single line comment
// $code = preg_replace('/[^:]\/\/.*/', '', $code);
        $code = preg_replace('/\\/\\/[^\\n\\r]*[\\n\\r]/', '', $code);
        $code = preg_replace('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', '', $code);
// Remove extra spaces
        $code = preg_replace('/\s+/', ' ', $code);
// Remove spaces that can be removed
        return preg_replace('/\s?([\{\};\=\(\)\/\+\*-])\s?/', "\\1", $code);
    }

    /**
     * Determine the client's best encoding method from the HTTP Accept-Encoding
     * header.
     * @access private
     * @return array two values, 1st is the actual encoding method, 2nd is the
     * alias of that method to use in the Content-Encoding header (some browsers
     * call gzip "x-gzip" etc.)
     */
    private function _getAcceptedEncoding()
    {
        if (!isset($_SERVER['HTTP_ACCEPT_ENCODING']) || $this->_isBrokenInternetExplorer()) {
            return array('', '');
        }
        if (preg_match('@(?:^|,)\s*((?:x-)?gzip)\s*(?:$|,|;\s*q=(?:0\.|1))@', $_SERVER['HTTP_ACCEPT_ENCODING'], $matches)) {
            return array('gzip', $matches[1]);
        }
        if (preg_match('@(?:^|,)\s*deflate\s*(?:$|,|;\s*q=(?:0\.|1))@', $_SERVER['HTTP_ACCEPT_ENCODING'])) {
            return array('deflate', 'deflate');
        }
        if (preg_match('@(?:^|,)\s*((?:x-)?compress)\s*(?:$|,|;\s*q=(?:0\.|1))@', $_SERVER['HTTP_ACCEPT_ENCODING'], $matches)) {
            return array('compress', $matches[1]);
        }
        return array('', '');
    }

    /**
     * Determines if the client is a buggy version of Internet Explorer
     * @access private
     * @return boolean
     */
    private function _isBrokenInternetExplorer()
    {
        if (strstr($_SERVER['HTTP_USER_AGENT'], 'Opera') || !preg_match('/^Mozilla\/4\.0 \(compatible; MSIE ([0-9]\.[0-9])/i', $_SERVER['HTTP_USER_AGENT'], $matches)) {
            return false;
        }
        $version = floatval($matches[1]);
        return $version < 6 || ($version == 6 && !strstr($_SERVER['HTTP_USER_AGENT'], 'SV1'));
    }

    /**
     * Determines if the client is a buggy version of Internet Explorer
     * @access private
     * @param string $html The html to deflate.
     * @return string Returns html on encoding failure or compressed data on success
     */
    private function _deflate($html)
    {
        $encoding = $this->_getAcceptedEncoding();
        $this->_deflate_encoding = $encoding[1];
        $deflate_level = is_bool($this->_options['deflate']) ? 6 : $this->_options['deflate'];
        if (empty($encoding[0]) || !$deflate_level || !extension_loaded('zlib')) {
            return $html;
        }
        if ($encoding[0] === 'gzip') {
            $encoded = gzencode($html, $deflate_level);
        } else if ($encoding[0] === 'deflate') {
            $encoded = gzdeflate($html, $deflate_level);
        } else {
            $encoded = gzcompress($html, $deflate_level);
        }
        return $encoded === false ? $html : $encoded;
    }

    /**
     * Outputs the deflated headers necessary for sending
     * delflated content.
     * @access public
     * @param string $html The html/delfated data that has been encoded.
     */
    public function outputDeflateHeaders($html)
    {
        header('Content-Length: ' . strlen($html));
        header('Content-Encoding: ' . $this->_deflate_encoding);
        header('Vary: Accept-Encoding');
    }

}

  1. PHP
  2. HTML
  3. Примеры