summaryrefslogtreecommitdiff
path: root/lib/HtmlFormatter.php
diff options
context:
space:
mode:
authorpolo <ordipolo@gmx.fr>2021-04-20 21:46:33 +0200
committerpolo <ordipolo@gmx.fr>2021-04-20 21:46:33 +0200
commit87798e5554eb0330cd2de255e5034f0472d410a4 (patch)
treeacd9e26a7d912c7575cb6dd1c7b42cc3e9f52993 /lib/HtmlFormatter.php
downloadmelaine-87798e5554eb0330cd2de255e5034f0472d410a4.zip
mot de passe
Diffstat (limited to 'lib/HtmlFormatter.php')
-rw-r--r--lib/HtmlFormatter.php126
1 files changed, 126 insertions, 0 deletions
diff --git a/lib/HtmlFormatter.php b/lib/HtmlFormatter.php
new file mode 100644
index 0000000..89605ac
--- /dev/null
+++ b/lib/HtmlFormatter.php
@@ -0,0 +1,126 @@
1<?php
2// https://github.com/mihaeu/html-formatter
3
4namespace Mihaeu;
5
6class HtmlFormatter
7{
8 /**
9 * Formats HTML by re-indenting the tags and removing unnecessary whitespace.
10 *
11 * @param string $html HTML string.
12 * @param string $indentWith Characters that are being used for indentation (default = 4 spaces).
13 * @param string $tagsWithoutIndentation Comma-separated list of HTML tags that should not be indented (default = html,link,img,meta)
14 * @return string Re-indented HTML.
15 */
16 public static function format($html, $indentWith = ' ', $tagsWithoutIndentation = 'html,link,img,meta')
17 {
18 // replace newlines (CRLF and LF), followed by a non-whitespace character, with a space
19 $html = preg_replace('/\\r?\\n([^\s])/', ' $1', $html);
20
21 // remove all remaining line feeds and replace tabs with spaces
22 $html = str_replace(["\n", "\r", "\t"], ['', '', ' '], $html);
23 $elements = preg_split('/(<.+>)/U', $html, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
24 $dom = self::parseDom($elements);
25
26 $indent = 0;
27 $output = array();
28 foreach ($dom as $index => $element)
29 {
30 if ($element['opening'])
31 {
32 $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']);
33
34 // make sure that only the elements who have not been blacklisted are being indented
35 if ( ! in_array($element['type'], explode(',', $tagsWithoutIndentation)))
36 {
37 ++$indent;
38 }
39 }
40 else if ($element['standalone'])
41 {
42 $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']);
43 }
44 else if ($element['closing'])
45 {
46 --$indent;
47 $lf = "\n".str_repeat($indentWith, abs($indent));
48 if (isset($dom[$index - 1]) && $dom[$index - 1]['opening'])
49 {
50 $lf = '';
51 }
52 $output[] = $lf.trim($element['content']);
53 }
54 else if ($element['text'])
55 {
56 // $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']);
57 $output[] = "\n".str_repeat($indentWith, $indent).preg_replace('/ [ \t]*/', ' ', $element['content']);
58 }
59 else if ($element['comment'])
60 {
61 $output[] = "\n".str_repeat($indentWith, $indent).trim($element['content']);
62 }
63 }
64
65 return trim(implode('', $output));
66 }
67
68 /**
69 * Parses an array of HTML tokens and adds basic information about about the type of
70 * tag the token represents.
71 *
72 * @param Array $elements Array of HTML tokens (tags and text tokens).
73 * @return Array HTML elements with extra information.
74 */
75 public static function parseDom(Array $elements)
76 {
77 $dom = array();
78 foreach ($elements as $element)
79 {
80 $isText = false;
81 $isComment = false;
82 $isClosing = false;
83 $isOpening = false;
84 $isStandalone = false;
85
86 $currentElement = trim($element);
87
88 // comment
89 if (strpos($currentElement, '<!') === 0)
90 {
91 $isComment = true;
92 }
93 // closing tag
94 else if (strpos($currentElement, '</') === 0)
95 {
96 $isClosing = true;
97 }
98 // stand-alone tag
99 else if (preg_match('/\/>$/', $currentElement))
100 {
101 $isStandalone = true;
102 }
103 // normal opening tag
104 else if (strpos($currentElement, '<') === 0)
105 {
106 $isOpening = true;
107 }
108 // text
109 else
110 {
111 $isText = true;
112 }
113
114 $dom[] = array(
115 'text' => $isText,
116 'comment' => $isComment,
117 'closing' => $isClosing,
118 'opening' => $isOpening,
119 'standalone' => $isStandalone,
120 'content' => $element,
121 'type' => preg_replace('/^<\/?(\w+)[ >].*$/U', '$1', $element)
122 );
123 }
124 return $dom;
125 }
126}