loadHTML('' . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); libxml_clear_errors(); $markdown = ''; foreach ($dom->childNodes as $node) { $markdown .= $this->processNode($node); } // Clean up extra newlines $markdown = preg_replace('/\n{3,}/', "\n\n", $markdown); $markdown = trim($markdown); return $markdown; } private function processNode(DOMNode $node): string { $output = ''; switch ($node->nodeType) { case XML_TEXT_NODE: $output .= $this->decodeHtmlEntities($node->nodeValue); break; case XML_ELEMENT_NODE: switch (strtolower($node->nodeName)) { case 'b': case 'strong': $output .= '**' . $this->processChildren($node) . '**'; break; case 'i': case 'em': $output .= '*' . $this->processChildren($node) . '*'; break; case 'u': $output .= '__' . $this->processChildren($node) . '__'; break; case 'a': $href = $node->getAttribute('href'); // Ignorar nodos de texto con solo espacios en blanco para un análisis más preciso. $realChildNodes = []; foreach ($node->childNodes as $child) { if ($child->nodeType === XML_TEXT_NODE && trim($child->nodeValue) === '') { continue; } $realChildNodes[] = $child; } // Si el único hijo real es una imagen, procesarla directamente. if (count($realChildNodes) === 1 && strtolower($realChildNodes[0]->nodeName) === 'img') { $output .= $this->processChildren($node); } else { // Si no, trátalo como un enlace de texto normal. $text = $this->processChildren($node); $output .= "[{$text}]({$href})"; } break; case 'p': $output .= $this->processChildren($node) . "\n\n"; break; case 'br': $output .= "\n"; break; case 'ul': case 'ol': $listContent = $this->processChildren($node); $listItems = explode("\n", trim($listContent)); $formattedList = []; $counter = 1; foreach($listItems as $item) { if(empty(trim($item))) continue; if (strtolower($node->nodeName) === 'ul') { $formattedList[] = '- ' . trim($item); } else { $formattedList[] = ($counter++) . '. ' . trim($item); } } $output .= implode("\n", $formattedList) . "\n\n"; break; case 'li': $output .= $this->processChildren($node); break; case 'img': $src = $node->getAttribute('src'); if (!empty($src)) { $absoluteImageUrl = $src; // Convertir URL relativa a absoluta si es necesario if (strpos($src, 'http') !== 0 && strpos($src, '//') !== 0) { $base = rtrim(BOT_BASE_URL, '/'); $path = ltrim($src, '/'); $absoluteImageUrl = "{$base}/{$path}"; } // Dejar solo la URL para que Discord la renderice $output .= "\n" . $absoluteImageUrl . "\n"; } break; case 'div': $output .= $this->processChildren($node); break; default: // For unknown tags, just process their children $output .= $this->processChildren($node); break; } break; } return $output; } private function processChildren(DOMNode $node): string { $childrenOutput = ''; foreach ($node->childNodes as $child) { $childrenOutput .= $this->processNode($child); } return $childrenOutput; } private function decodeHtmlEntities(string $encodedString): string { return html_entity_decode($encodedString, ENT_QUOTES | ENT_HTML5, 'UTF-8'); } public function convertToArray(string $html): array { $parts = []; $dom = new DOMDocument(); libxml_use_internal_errors(true); $dom->loadHTML('' . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); libxml_clear_errors(); foreach ($dom->childNodes as $node) { $this->processNodeForArray($node, $parts); } return $parts; } private function processNodeForArray(DOMNode $node, array &$parts) { if ($node->nodeType === XML_TEXT_NODE) { $this->addTextPart($parts, $this->decodeHtmlEntities($node->nodeValue)); return; } if ($node->nodeType !== XML_ELEMENT_NODE) { return; } switch (strtolower($node->nodeName)) { case 'img': $src = $node->getAttribute('src'); if (!empty($src)) { $absoluteImageUrl = $src; if (strpos($src, 'http') !== 0 && strpos($src, '//') !== 0) { $base = rtrim(BOT_BASE_URL, '/'); $path = ltrim($src, '/'); $absoluteImageUrl = "{$base}/{$path}"; } $parts[] = ['type' => 'image', 'url' => $absoluteImageUrl]; } break; case 'p': case 'div': if ($node->hasChildNodes()) { foreach ($node->childNodes as $child) { $this->processNodeForArray($child, $parts); } } $this->addTextPart($parts, "\n\n"); break; default: if ($node->hasChildNodes()) { foreach ($node->childNodes as $child) { $this->processNodeForArray($child, $parts); } } break; } } private function addTextPart(array &$parts, string $text) { if (empty($text)) return; // Si la última parte fue texto, la unimos a ella. if (!empty($parts) && end($parts)['type'] === 'text') { $parts[key($parts)]['content'] .= $text; } else { $parts[] = ['type' => 'text', 'content' => $text]; } } }