summaryrefslogtreecommitdiff
path: root/lib/HtmlFormatter.php
diff options
context:
space:
mode:
authorpolo <ordipolo@gmx.fr>2022-02-17 18:13:00 +0100
committerpolo <ordipolo@gmx.fr>2022-02-17 18:13:00 +0100
commit787d03e48471ba62cd830379428f04d996f0b74b (patch)
treee9f98c7b9288c4530b50985688dd82622106ba2d /lib/HtmlFormatter.php
parent29df6f1362745eabf4fbcaedf309eb63795152fa (diff)
downloadmelaine-787d03e48471ba62cd830379428f04d996f0b74b.zip
model update
Diffstat (limited to 'lib/HtmlFormatter.php')
-rw-r--r--lib/HtmlFormatter.php252
1 files changed, 126 insertions, 126 deletions
diff --git a/lib/HtmlFormatter.php b/lib/HtmlFormatter.php
index 89605ac..0101f1b 100644
--- a/lib/HtmlFormatter.php
+++ b/lib/HtmlFormatter.php
@@ -1,126 +1,126 @@
1<?php 1<?php
2// https://github.com/mihaeu/html-formatter 2// https://github.com/mihaeu/html-formatter
3 3
4namespace Mihaeu; 4namespace Mihaeu;
5 5
6class HtmlFormatter 6class HtmlFormatter
7{ 7{
8 /** 8 /**
9 * Formats HTML by re-indenting the tags and removing unnecessary whitespace. 9 * Formats HTML by re-indenting the tags and removing unnecessary whitespace.
10 * 10 *
11 * @param string $html HTML string. 11 * @param string $html HTML string.
12 * @param string $indentWith Characters that are being used for indentation (default = 4 spaces). 12 * @param string $indentWith Characters that are being used for indentation (default = 4 spaces).
13 * @param string $tagsWithoutIndentation Comma-separated list of HTML tags that should not be indented (default = html,link,img,meta) 13 * @param string $tagsWithoutIndentation Comma-separated list of HTML tags that should not be indented (default = html,link,img,meta)
14 * @return string Re-indented HTML. 14 * @return string Re-indented HTML.
15 */ 15 */
16 public static function format($html, $indentWith = ' ', $tagsWithoutIndentation = 'html,link,img,meta') 16 public static function format($html, $indentWith = ' ', $tagsWithoutIndentation = 'html,link,img,meta')
17 { 17 {
18 // replace newlines (CRLF and LF), followed by a non-whitespace character, with a space 18 // replace newlines (CRLF and LF), followed by a non-whitespace character, with a space
19 $html = preg_replace('/\\r?\\n([^\s])/', ' $1', $html); 19 $html = preg_replace('/\\r?\\n([^\s])/', ' $1', $html);
20 20
21 // remove all remaining line feeds and replace tabs with spaces 21 // remove all remaining line feeds and replace tabs with spaces
22 $html = str_replace(["\n", "\r", "\t"], ['', '', ' '], $html); 22 $html = str_replace(["\n", "\r", "\t"], ['', '', ' '], $html);
23 $elements = preg_split('/(<.+>)/U', $html, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); 23 $elements = preg_split('/(<.+>)/U', $html, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
24 $dom = self::parseDom($elements); 24 $dom = self::parseDom($elements);
25 25
26 $indent = 0; 26 $indent = 0;
27 $output = array(); 27 $output = array();
28 foreach ($dom as $index => $element) 28 foreach ($dom as $index => $element)
29 { 29 {
30 if ($element['opening']) 30 if ($element['opening'])
31 { 31 {
32 $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']); 32 $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']);
33 33
34 // make sure that only the elements who have not been blacklisted are being indented 34 // make sure that only the elements who have not been blacklisted are being indented
35 if ( ! in_array($element['type'], explode(',', $tagsWithoutIndentation))) 35 if ( ! in_array($element['type'], explode(',', $tagsWithoutIndentation)))
36 { 36 {
37 ++$indent; 37 ++$indent;
38 } 38 }
39 } 39 }
40 else if ($element['standalone']) 40 else if ($element['standalone'])
41 { 41 {
42 $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']); 42 $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']);
43 } 43 }
44 else if ($element['closing']) 44 else if ($element['closing'])
45 { 45 {
46 --$indent; 46 --$indent;
47 $lf = "\n".str_repeat($indentWith, abs($indent)); 47 $lf = "\n".str_repeat($indentWith, abs($indent));
48 if (isset($dom[$index - 1]) && $dom[$index - 1]['opening']) 48 if (isset($dom[$index - 1]) && $dom[$index - 1]['opening'])
49 { 49 {
50 $lf = ''; 50 $lf = '';
51 } 51 }
52 $output[] = $lf.trim($element['content']); 52 $output[] = $lf.trim($element['content']);
53 } 53 }
54 else if ($element['text']) 54 else if ($element['text'])
55 { 55 {
56 // $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']); 56 // $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']);
57 $output[] = "\n".str_repeat($indentWith, $indent).preg_replace('/ [ \t]*/', ' ', $element['content']); 57 $output[] = "\n".str_repeat($indentWith, $indent).preg_replace('/ [ \t]*/', ' ', $element['content']);
58 } 58 }
59 else if ($element['comment']) 59 else if ($element['comment'])
60 { 60 {
61 $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']); 61 $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']);
62 } 62 }
63 } 63 }
64 64
65 return trim(implode('', $output)); 65 return trim(implode('', $output));
66 } 66 }
67 67
68 /** 68 /**
69 * Parses an array of HTML tokens and adds basic information about about the type of 69 * Parses an array of HTML tokens and adds basic information about about the type of
70 * tag the token represents. 70 * tag the token represents.
71 * 71 *
72 * @param Array $elements Array of HTML tokens (tags and text tokens). 72 * @param Array $elements Array of HTML tokens (tags and text tokens).
73 * @return Array HTML elements with extra information. 73 * @return Array HTML elements with extra information.
74 */ 74 */
75 public static function parseDom(Array $elements) 75 public static function parseDom(Array $elements)
76 { 76 {
77 $dom = array(); 77 $dom = array();
78 foreach ($elements as $element) 78 foreach ($elements as $element)
79 { 79 {
80 $isText = false; 80 $isText = false;
81 $isComment = false; 81 $isComment = false;
82 $isClosing = false; 82 $isClosing = false;
83 $isOpening = false; 83 $isOpening = false;
84 $isStandalone = false; 84 $isStandalone = false;
85 85
86 $currentElement = trim($element); 86 $currentElement = trim($element);
87 87
88 // comment 88 // comment
89 if (strpos($currentElement, '<!') === 0) 89 if (strpos($currentElement, '<!') === 0)
90 { 90 {
91 $isComment = true; 91 $isComment = true;
92 } 92 }
93 // closing tag 93 // closing tag
94 else if (strpos($currentElement, '</') === 0) 94 else if (strpos($currentElement, '</') === 0)
95 { 95 {
96 $isClosing = true; 96 $isClosing = true;
97 } 97 }
98 // stand-alone tag 98 // stand-alone tag
99 else if (preg_match('/\/>$/', $currentElement)) 99 else if (preg_match('/\/>$/', $currentElement))
100 { 100 {
101 $isStandalone = true; 101 $isStandalone = true;
102 } 102 }
103 // normal opening tag 103 // normal opening tag
104 else if (strpos($currentElement, '<') === 0) 104 else if (strpos($currentElement, '<') === 0)
105 { 105 {
106 $isOpening = true; 106 $isOpening = true;
107 } 107 }
108 // text 108 // text
109 else 109 else
110 { 110 {
111 $isText = true; 111 $isText = true;
112 } 112 }
113 113
114 $dom[] = array( 114 $dom[] = array(
115 'text' => $isText, 115 'text' => $isText,
116 'comment' => $isComment, 116 'comment' => $isComment,
117 'closing' => $isClosing, 117 'closing' => $isClosing,
118 'opening' => $isOpening, 118 'opening' => $isOpening,
119 'standalone' => $isStandalone, 119 'standalone' => $isStandalone,
120 'content' => $element, 120 'content' => $element,
121 'type' => preg_replace('/^<\/?(\w+)[ >].*$/U', '$1', $element) 121 'type' => preg_replace('/^<\/?(\w+)[ >].*$/U', '$1', $element)
122 ); 122 );
123 } 123 }
124 return $dom; 124 return $dom;
125 } 125 }
126} 126}