0byt3m1n1
Path:
/
home
/
kassiope
/
www
/
vendor
/
spipu
/
html2pdf
/
src
/
Parsing
/
[
Home
]
File: HtmlLexer.php
<?php /** * Html2Pdf Library - parsing Html class * * HTML => PDF converter * distributed under the OSL-3.0 License * * @package Html2pdf * @author Laurent MINGUET <webmaster@html2pdf.fr> * @copyright 2017 Laurent MINGUET */ namespace Spipu\Html2Pdf\Parsing; /** * Class HtmlLexer */ class HtmlLexer { /** * Tokenize the HTML code * * @param string $html HTML code to tokenize * * @return Token[] */ public function tokenize($html) { // initialise the array $tokens = array(); // regexp to separate the tags from the texts $reg = '/(<\/?\w[^<>]*>)|([^<]+|<)/is'; $commentRegex = '/(<!--.*-->)/isU'; // last match found $str = ''; $offset = 0; $line = 1; $length = strlen($html); // As it finds a match while ($offset < $length) { if (strpos($html, '<!--', $offset) === $offset && preg_match($commentRegex, $html, $match, PREG_OFFSET_CAPTURE, $offset) ) { $line += substr_count($match[1][0], "\n"); $offset = $match[0][1] + strlen($match[0][0]); continue; } preg_match($reg, $html, $parse, PREG_OFFSET_CAPTURE, $offset); // if it is a tag if ($parse[1][0]) { // save the previous text if it exists if ($str !== '') { $tokens[] = new Token('txt', $str); } // save the tag, with the offset $tokens[] = new Token('code', trim($parse[1][0]), $line); $line += substr_count($parse[1][0], "\n"); // init the current text $str = ''; } else { // else (if it is a text) // add the new text to the current text $str .= $parse[2][0]; $line += substr_count($parse[2][0], "\n"); } // Update offset to the end of the match $offset = $parse[0][1] + strlen($parse[0][0]); unset($parse); } // if a text is present in the end, we save it if ($str !== '') { $tokens[] = new Token('txt', $str); } return $tokens; } }