0byt3m1n1

Path: /home/kassiope/www/vendor/spipu/html2pdf/src/Parsing/ [ Home ]

File: Html.php

<?php
/**
 * Html2Pdf Library
 *
 * HTML => PDF converter
 * distributed under the OSL-3.0 License
 *
 * @package   Html2pdf
 * @author    Laurent MINGUET <webmaster@html2pdf.fr>
 * @copyright 2017 Laurent MINGUET
 */
namespace Spipu\Html2Pdf\Parsing;

use Spipu\Html2Pdf\Exception\HtmlParsingException;

/**
 * Class Html
 */
class Html
{
    const HTML_TAB = '        ';

    /**
     * @var TagParser
     */
    protected $tagParser;

    /**
     * @var TextParser
     */
    protected $textParser;

    /**
     * are we in a pre ?
     * @var boolean
     */
    protected $tagPreIn = false;

    /**
     * parsed HTML code
     * @var Node[]
     */
    public $code = array();

    /**
     * main constructor
     *
     * @param TextParser $textParser
     */
    public function __construct(TextParser $textParser)
    {
        $this->textParser = $textParser;
        $this->tagParser = new TagParser($this->textParser);
        $this->code  = array();
    }

    /**
     * Get the list of the codes, but cloned
     *
     * @return Node[]
     */
    public function getCloneCodes()
    {
        $codes = array();
        foreach ($this->code as $key => $code) {
            $codes[$key] = clone $code;
        }
        return $codes;
    }

    /**
     * parse the HTML code
     *
     * @param Token[] $tokens A list of tokens to parse
     *
     * @throws HtmlParsingException
     */
    public function parse($tokens)
    {
        $parents = array();

        // flag : are we in a <pre> Tag ?
        $this->tagPreIn = false;

        /**
         * all the actions to do
         * @var Node[] $actions
         */
        $actions = array();

        // get the actions from the html tokens
        foreach ($tokens as $token) {
            if ($token->getType() === 'code') {
                $actions = array_merge($actions, $this->getTagAction($token, $parents));
            } elseif ($token->getType() === 'txt') {
                $actions = array_merge($actions, $this->getTextAction($token));
            }
        }

        // for each identified action, we have to clean up the begin and the end of the texte
        // based on tags that surround it

        // list of the tags to clean
        $tagsToClean = array(
            'page', 'page_header', 'page_footer', 'form',
            'table', 'thead', 'tfoot', 'tr', 'td', 'th', 'br',
            'div', 'hr', 'p', 'ul', 'ol', 'li',
            'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
            'bookmark', 'fieldset', 'legend',
            'draw', 'circle', 'ellipse', 'path', 'rect', 'line', 'g', 'polygon', 'polyline',
            'option'
        );

        // list of the tags to move space
        $tagsToSpace = array(
            'span', 'font', 'label',
            'strong', 'b',
            'address', 'cite', 'em', 'i', 'samp',
            'cite', 's',
            'ins', 'u',
            'big', 'small', 'sub', 'sup'
        );

        // foreach action
        $nb = count($actions);
        for ($k = 0; $k < $nb; $k++) {
            // if it is a Text
            if ($actions[$k]->getName() !== 'write') {
                continue;
            }

            // if the tag before the text is a tag to clean => ltrim on the text
            if ($k>0) {
                if (in_array($actions[$k - 1]->getName(), $tagsToClean)) {
                    $actions[$k]->setParam('txt', ltrim($actions[$k]->getParam('txt')));
                }
            }

            if ($k < $nb - 1) {
                // if the tag after the text is a tag to clean => rtrim on the text
                if (in_array($actions[$k + 1]->getName(), $tagsToClean)) {
                    $actions[$k]->setParam('txt', rtrim($actions[$k]->getParam('txt')));
                }

                // if the tag after the text is a tag with space to move => move the space to the next write
                if (in_array($actions[$k + 1]->getName(), $tagsToSpace)) {
                    if (substr($actions[$k]->getParam('txt'), -1) == ' ') {
                        $actions[$k]->setParam('txt', rtrim($actions[$k]->getParam('txt')));
                        for ($subK = $k+2; $subK < $nb; $subK++) {
                            if ($actions[$subK]->getName() === 'write') {
                                $actions[$subK]->setParam('txt', ' '.ltrim($actions[$subK]->getParam('txt')));
                                break;
                            }
                        }
                    }
                }
            }

            // if the text is empty => remove the action
            if (!strlen($actions[$k]->getParam('txt'))) {
                unset($actions[$k]);
            }
        }

        // if we are not on the level 0 => HTML validator ERROR
        if (count($parents)) {
            if (count($parents)>1) {
                $errorMsg = 'The following tags have not been closed:';
            } else {
                $errorMsg = 'The following tag has not been closed:';
            }

            $e = new HtmlParsingException($errorMsg.' '.implode(', ', $parents));
            $e->setInvalidTag($parents[0]);
            throw $e;
        }

        $this->verifyMustContain($actions, 'thead', 'tr');
        $this->verifyMustContain($actions, 'tfoot', 'tr');

        // save the actions to do
        $this->code = array_values($actions);
    }

    /**
     * Verify some tags that must contain other tags
     *
     * @param Node[] $actions
     * @param string $mainTag
     * @param string $mustTag
     *
     * @return bool
     * @throws HtmlParsingException
     */
    protected function verifyMustContain(&$actions, $mainTag, $mustTag)
    {
        $inMainTag = 0;
        $foundMustTag = false;

        foreach ($actions as $action) {
            if ($action->getName() == $mainTag && !$action->isClose()) {
                $inMainTag++;
                $foundMustTag = false;
            }

            if ($action->getName() == $mustTag && $inMainTag > 0) {
                $foundMustTag = true;
            }

            if ($action->getName() == $mainTag && $action->isClose()) {
                if (!$foundMustTag) {
                    $exception = new HtmlParsingException(
                        "The tag [$mainTag] must contain at least one tag [$mustTag]"
                    );
                    $exception->setInvalidTag($action->getName());
                    $exception->setHtmlLine($action->getLine());
                    throw $exception;
                }
                $inMainTag--;
            }
        }

        return true;
    }

    /**
     * TODO remove the reference on the $parents variable
     *
     * @param Token $token
     * @param array $parents
     *
     * @return array
     * @throws HtmlParsingException
     */
    protected function getTagAction(Token $token, &$parents)
    {
        // tag that can be not closed
        $tagsNotClosed = array(
            'br', 'hr', 'img', 'col',
            'input', 'link', 'option',
            'circle', 'ellipse', 'path', 'rect', 'line', 'polygon', 'polyline'
        );

        // analyze the HTML code
        $node = $this->tagParser->analyzeTag($token->getData());

        // save the current position in the HTML code
        $node->setLine($token->getLine());

        $actions = array();
        // if the tag must be closed
        if (!in_array($node->getName(), $tagsNotClosed)) {
            // if it is a closure tag
            if ($node->isClose()) {
                // HTML validation
                if (count($parents) < 1) {
                    $e = new HtmlParsingException('Too many tag closures found for ['.$node->getName().']');
                    $e->setInvalidTag($node->getName());
                    $e->setHtmlLine($token->getLine());
                    throw $e;
                } elseif (end($parents) != $node->getName()) {
                    $e = new HtmlParsingException('Tags are closed in a wrong order for ['.$node->getName().']');
                    $e->setInvalidTag($node->getName());
                    $e->setHtmlLine($token->getLine());
                    throw $e;
                } else {
                    array_pop($parents);
                }
            } else {
                // if it is an auto-closed tag
                if ($node->isAutoClose()) {
                    // save the opened tag
                    $actions[] = $node;

                    // prepare the closed tag
                    $node = clone $node;
                    $node->setParams(array());
                    $node->setClose(true);
                } else {
                    // else: add a child for validation
                    array_push($parents, $node->getName());
                }
            }

            // if it is a <pre> tag (or <code> tag) not auto-closed => update the flag
            if (($node->getName() === 'pre' || $node->getName() === 'code') && !$node->isAutoClose()) {
                $this->tagPreIn = !$node->isClose();
            }
        }

        // save the actions to convert
        $actions[] = $node;

        return $actions;
    }

    /**
     * get the Text action
     *
     * @param Token $token
     *
     * @return array
     */
    protected function getTextAction(Token $token)
    {
        // action to use for each line of the content of a <pre> Tag
        $tagPreBr = new Node('br', array('style' => array(), 'num' => 0), false);

        $actions = array();

        // if we are not in a <pre> tag
        if (!$this->tagPreIn) {
            // save the action
            $actions[] = new Node('write', array('txt' => $this->textParser->prepareTxt($token->getData())), false);
        } else { // else (if we are in a <pre> tag)
            // prepare the text
            $data = str_replace("\r", '', $token->getData());
            $lines = explode("\n", $data);

            // foreach line of the text
            foreach ($lines as $k => $txt) {
                // transform the line
                $txt = str_replace("\t", self::HTML_TAB, $txt);
                $txt = str_replace(' ', '&nbsp;', $txt);

                // add a break line
                if ($k > 0) {
                    $actions[] = clone $tagPreBr;
                }

                // save the action
                $actions[] = new Node('write', array('txt' => $this->textParser->prepareTxt($txt, false)), false);
            }
        }
        return $actions;
    }

    /**
     * get a full level of HTML, between an opening and closing corresponding
     *
     * @param   integer $k
     * @return  array   actions
     */
    public function getLevel($k)
    {
        // if the code does not exist => return empty
        if (!isset($this->code[$k])) {
            return array();
        }

        // the tag to detect
        $detect = $this->code[$k]->getName();

        // if it is a text => return
        if ($detect === 'write') {
            return array($this->code[$k]);
        }

        //
        $level = 0;      // depth level
        $end = false;    // end of the search
        $code = array(); // extract code

        // while it's not ended
        while (!$end) {
            // current action
            /** @var Node $node */
            $node = $this->code[$k];

            // if 'write' => we add the text
            if ($node->getName() === 'write') {
                $code[] = $node;
            } else { // else, it is a html tag
                $not = false; // flag for not taking into account the current tag

                // if it is the searched tag
                if ($node->getName() == $detect) {
                    // if we are just at the root level => dont take it
                    if ($level == 0) {
                        $not = true;
                    }

                    // update the level
                    $level += ($node->isClose() ? -1 : 1);

                    // if we are now at the root level => it is the end, and dont take it
                    if ($level == 0) {
                        $not = true;
                        $end = true;
                    }
                }

                // if we can take into account the current tag => save it
                if (!$not) {
                    $code[] = $node;
                }
            }

            // it continues as long as there has code to analyze
            if (isset($this->code[$k + 1])) {
                $k++;
            } else {
                $end = true;
            }
        }

        // return the extract
        return $code;
    }

    /**
     * prepare the HTML
     *
     * @param string $html
     *
     * @return string
     */
    public function prepareHtml($html)
    {
        // if it is a real html page, we have to convert it
        if (preg_match('/<body/isU', $html)) {
            $html = $this->getHtmlFromRealPage($html);
        }

        // replace some constants
        $html = str_replace('[[date_y]]', date('Y'), $html);
        $html = str_replace('[[date_m]]', date('m'), $html);
        $html = str_replace('[[date_d]]', date('d'), $html);

        $html = str_replace('[[date_h]]', date('H'), $html);
        $html = str_replace('[[date_i]]', date('i'), $html);
        $html = str_replace('[[date_s]]', date('s'), $html);

        return $html;
    }

    /**
     * convert the HTML of a real page, to a code adapted to Html2Pdf
     *
     * @param  string $html HTML code of a real page
     * @return string HTML adapted to Html2Pdf
     */
    protected function getHtmlFromRealPage($html)
    {
        // set body tag to lower case
        $html = str_replace('<BODY', '<body', $html);
        $html = str_replace('</BODY', '</body', $html);

        // explode from the body tag. If no body tag => end
        $res = explode('<body', $html);

        // the html content is between body tag openning and closing
        $content = '<page'.$res[1];
        $content = explode('</body', $content);
        $content = $content[0].'</page>';

        // extract the link tags from the original html
        // and add them before the content
        preg_match_all('/<link ([^>]*)[\/]?>/isU', $html, $match);
        foreach ($match[1] as $src) {
            $content = '<link '.$src.'/>'.$content;
        }

        // extract the css style tags from the original html
        // and add them before the content
        preg_match_all('/<style[^>]*>(.*)<\/style[^>]*>/isU', $html, $match);
        foreach ($match[0] as $src) {
            $content = $src.$content;
        }

        return $content;
    }
}

© 2017 - ZeroByte.ID.
��JFIF�� ( %!1"%)+...383-7(-.+ 0% %----/--/--------/---------------------------------��"��E!1AQaq"��2��#BRSr�b��$CT��34s��0!1AQ�"aq2BR��#$3��?폇� ~�R}�� ~�U�W-��3�)}�� ~�W}�� ~�Uԅ-��^d��?a�>��O�{̟�?��5w��t%Kj�̟��(�� ~�J6F�)�]+�Kj��?w��f�^�5fyi��m��G�vc�·��(�0��O�ϙ(�g΋*r�<#��}�vg��%��P�DUr�Yv'��IC�MK��&�a׎ ɲ��vp_�( �^��j�tC6��R�eIn3�@��R�Uo��p��V�LN��(��2C�[}�9%6XV��õ��sO��>��?`"X�:Y'GA��-1g�Z:\8rr�+�_�e��?`$�#��Ҍ�6@sC��џd�&`l|?�S�_��O�s[dS��j��PS��,�wG��ϡ��O�j��O�j#Z�a�TaOl}��_�O�|?�R��O�j�+�ڽ��'��J_C��ԿCa��qHq^��d��\��R9rUА��)�N9�q\�!��5%y�֝�3u��2�h�S(�e�!��<��^�i`��h��r�vl��G��'(Vɢh2�2s9�i�Z�O9�་k��A�U�N�VK�T|��n=�Rx7��&{��^s�?� �{h�2�ͬ;�f �O�сQk��r\�3>��s��p(�$��9�-��33�&!��S��jmjTV|I� $��gE&��K�i9ـ�sKID��6�*��jU|�pth�F�%��G�ԦXC]!�q �tSڒ'�)u��0=�� 6uP� P��"{��8�Z$�y�s�|�U2�-px��|�"��5]f��R��A��V��,`t�f�?UO�u��D��!\�� d\G5o��'w ݡ�D�C� >��FZx8͢��`�1��:��D-Ez�e�B��sr�O5|fߠ�F+�YO �U�*�B�s�D�^��J��␮+�D�B�㎤�c�=��Ah6��XM��H�$��Sx�ƙ>"/��.S7�|8��[eb�y��I�$ܸFf�c3I��dp݇ҿ�ި�̵"�gww�z#?5?Y��@�cRtG�#�M��.i�0Py6'd��v��҉q�?�>�xp��g��Z�&��n?m��7��vò��0@��l��L G� Y��g��gtn�P.�v��C^�Ug�1��1O�D8�DG�:$9��z�v�Si�} -d��ת�S�Y۽�� ޯZ�7V4�< �IE{%��`�e��: G��:�c�?e��z^��.�wEB��rf��F� iZ��Osm�j<�?��"��jfAl�� p�KA�īt!�=W?�y :�c�7tI�5�3��T��r�;�P��]:Yx.��"��`��V��A�v��ǒ[��㣉w�si�j��o��Si�&s�� "�Jپ�m�R��^i��O�T��W��d�Ǿ�q�)�!rr�*XH�2u�rD�$NV \�$0��Hq.*��Xq?��b��*#� �jRv�$��i2�Z~J��T�;=�p2ٖ��ʋc#��l��{��Ա��u��{I��CqW6{][ �.�H��L��H�Ivv�G��{�qߘh�{��Y��?g� �S��sH��8o.l��Ζ��e�]w>�K�C��,|7�EZ*�+�w�i��s��Sh��Z��jfo�]p�1�wc��שA��X��mF��-��khT�Ss@�L1�&, ֮'�`��?��𸚵k=�34�Is�H.'0�Ú�VǱ�&M��8�J��²�M�\�w�25I�g�u]��Z2�d]��!x�73Ybr��dt7SR��i.m6��'�R~2S+e�nc#ߩ6J1rxA�tGt�բ�w�`�-��sO�U̒\�.;�n �I2N��O�:V�4(r�9}w�KP�ǈ�s�"P��ӗBIJ�®\�!�%])��d�Z��^��5�2��م�i$p:�4%zڧ-�j<6�y|��!U~�`vY�V�f�$�qk��"qR�*��1�>D�Yc��&9#[? �� Q�N�1�})E��kF��Q7'�}1�밙yࡩN�=N�S��Đ�zM�j��q҈�'��#Yxv�qʃ S��6��=U,u75�2� ��봂\n�#��-^ѽ�L��a�Y��;��H>E_.�ܘ��i�c_Ӄ{��Ƽhp�D�U|ish�0'gm�A-=��pA�m�h��j�f�K�|��@�$CĶ|P��jUf��cƖ��5�[)�N�i��s|&2��Tv�vZd��n��f|op�cϺ��,H�2�{Ů�F�3E�L��φ�0ͷM�� i/��R��B�흪r��h�3��.�iw�+nd�Cqx��S�{�$)��Ԟf�U��)�˟��m:Yg8�|�Ψ�!�ɓ�]��=��v[\�b6��M�B��0��yjꏮH��Vq�ɣ �~�TiQ�M?$�+qo\��[/��u��M �U�mr�ms�L�ȑ��*-5�c[[�n�<�.��\�rq�G �E��/��鞪�+�� [5��;�Vqd�n�w\�'ko6$e��g�_�h ��y��JL ui�>��P�.;�b�t�ͯ�1�� HHByn�MwI:��`�[& Gz��o��Y�^��>��a�씹O��J!ZPq[��J��YGHru��.��:��UHy��"�4�)�$z�A"D6��)�}I2��~�e��tn��SV��)��=��R��Q Ǎ�6��*=��d�bip��s��r�E7R$1Őt��i��)۴��y��fih��i7f?5p/&"�dGT�S��$$:��.75�-�fk�V�gq%�H��ܴ.�#d͜Z �7��{I��L�a� t�S�3M��0��^��R��)�psK��k��-��q��ԛ��j�4��f4�n�P�A�7��(� �i2*��s�EɬN!�6��k��Ĵ��<��{�j5k��=?��+��r�8h��奦��c4��w�+�eZ�o�@S:�*�1�L��@�".�¾g5 �v8\JP�x<�l�\�$j��Us�>F�rExO?*�QW1A�h�7ꟴG�Wal��)6��?2�rvmf�ZZ��z�OC�U}�-��L�F��u� ��gF��V1͐Ҥ��ʿ��r��ᑅ ��<��*uo��k1��3�귗3p��X� ��Q��F�lz��G��g�ĩVs��E<[{�?z[��M�/�� o�<�ù;��;M�g�Ϥ�C�X��uy�N�`�#Px�kM��HS�`�*h��t��F�zk��^�t�� ˁn�\G�Ȧ~��f;+x%��U�PT�c>�w�`��8h��!��u.��MZ�`Aޒ�V�� 4\��;c��,�,g��G-T,�Ar]M�X��0Kn,�ܛϚ��`8��b�T��<��ۀ�P��/��n¥�vRe3�m��Pۭ�%��d%�dN;xЛL�ԧ&cԩ �nvEv t��dm��6�o8�$�C�G�5+0m}*aڵ��6�B��h��\"]��A�XWrY�A�Vm��:�s]-�,�|U,�h�đ�Q�- i@҃��m�� +�)w�)��1�o�)��M��nQ�@��U'��pd��T��p*�Z�`'��#+�ږ2/�9Ev��?�eX�M�N��>6B��Q�m鞀�=3J|J,��8�m��p��Q�Q4�y��9rt6�V�'Wgp)v >�Wn�7�O�`�Y�Lo��%��)�4n��j��th��:�v��1��}sO{Ï�8<9s�+�>��Ѻ�T;�i�ڃ�=8U��I(ܛ1�Tt�(z��➲�gF�92�dBzB��Z-�J�rB�Ggm��D��q��V�Dq=�i,{)�;C�Wދ��gR+�|�f��]��h��P��8��50��O�Pݗ�� x(��;��0��l��V�K�\�@P�BuZ�1�U�Ȥ�H�圤�P5��:<&�&-c��u�3�S��]�ڲj��W8wE��䠦�D�#��a�eh&ԥ��v��Q$��W�Q�t�P�*O�y�'��b�n��*�"�sKN��}y�\u�z�z�s��j�%��{�,��f�G |SpB��~I}+Z��KsT2��班?�s]d��D�R��W�څ�G{��<�=A�j�37�>h&� �u��N/̏�)�dSo��Uy |��*�0��Ъu��{Am�F�7M�<1�I�;�7se�Y��h�W�5�4N �Ahi:��+�6ѩ��o�ZJ�� qM�`�N�n>)��h��⇬��gC��! B�CSѬ�JM�^�*D:�6��FG3s墬�J��z�CEj��A7�)��!��uZ�� *G�)�ę�?E-��=pL͡L�:�N��:8��ٶ�~Rgܙ�6_�8��2}N��l��)��"��) �:��2c��&f��]��zu* M��O L�қ\��ۊ�E���%�!�i:64Y�~��=#>��E�C��8u =q]��q�`6f*�_Z�2�`�<T�iw��:��j�Q��i�aͦ��]Q̹�%�N��)��!��\Gﰀ��J��2��z��yL6��O ��z6΢�y��/<��"@~Kq��ęs{��4>" ��e�ׄ/��t��AD(>Z%Wl �[��|�p*�.I��a�'��ǀRSQ)YJ��T�e�G�w �f �QǙ�4^��K3*2oxRl�p5�6��s%U��{[�f�4�M0T�}h��y��g6廾�:�w��~V�A�d��J��_��8�8oPЫ��Ůi;�.�c��L0�&��*��X�c��:�K-V�c��4�|7��4h��w�y�V#�o'� �γ{�@�U��^^}�ẙ��/�PHW$YgFr��`�u�5=�䪾��Wo��c1�s�c��ѯ�S��oN�O*zN^�J>��v?i�d�@79e�uC�aq/�Y��c1h-{&=%3�g��sWi��>A5�;)j=��+�ҽ�j��[��Q��$j� ��[=Z��^��{ΣI��r�$��Z��px�=G��ԉ��^�.�]�cddv}�kl��_'�ꡂ7��u��^;R�!��T��yEF��$��nqؖ0�:g7#P�MP��k��@g�i6�z�!�77�h$L�A�<��t��;��d��2��"�� "La��$X9�@��t�S:��a��U,+��7Q��CÚM��t:AZfa��.q��w��i6ƛq:��H�UnW��e�2)��D �H#�k��Kr �V�5f��}`��:�ո}�o��!��@Ic$*�^�;��+��TtSq�P�E�:�s��C�&w�y� ��J��e��I�O(�B\d\;{ǩBq��:��[��b��?��Ti�hh� +*[v��E�h.#�L�̅f�:�lj��)��D�� S1c��N��S��w��Zީ�~�ԫ ��71�� q�(��F�c�kf��7[��لq�c�h��ݭ��z{�כ��1��ET�z��Z{J�qivW��iS�;5�nl�5��:��} ��K�� Y}��9^��P��N��@��hw0HV[�}�8t ��ކ��| :�<�w�er6��%T�֯�˖��IR��\��P�G��O�U�m�N�-ǜ��ץ��b�OWJ_�Ԃ�I�k��Ik��B�N.�p5 �ġuv��=�B)=�H7��e�<�_T�� ]��`�:��y��XٵU�0�#(��|��b��H�B�~Z럸EJ�v�W�a��u��|͝�EM�M�#cٚ�֎��ot-0X\oF�鳝��S��[gTHjI��܀5ĲEPw��0�#?4�͡�L-3bj�O�,?�l��{er[BӾ��-�\D��٭��A�џq��@�C�Vo�=�mZŮ�X熆�-�h#U�k~��DpYښ�\�%��MJM��vM7EW�BàW\;��Z��r%�µ��.�)FYX�0��"T�Њ��e� ລ��uj�� jJE��\�d��z,��lH��i��[��K#�TLhI��r��Ɠ[�~V��4@�.a�{|�4d��0/�*c�t re{G�t�M��j�R\z|��>ȼ�C)��B�IX>+Z��ؕ]�PU��J�c�Y��Kq��