Files
sistema_para_juego/Sistema_discord/HtmlToDiscordMarkdownConverter.php

209 lines
7.9 KiB
PHP
Executable File

<?php
// Incluir el archivo de configuración
require_once __DIR__ . '/../config/config.php';
class HtmlToDiscordMarkdownConverter
{
private const DISCORD_MESSAGE_LIMIT = 2000;
public function convert(string $html): string
{
$dom = new DOMDocument();
libxml_use_internal_errors(true); // Suppress warnings for malformed HTML
// Use LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD to prevent adding html/body tags
$dom->loadHTML('<?xml encoding="utf-8" ?>' . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
$markdown = '';
foreach ($dom->childNodes as $node) {
$markdown .= $this->processNode($node);
}
// Clean up extra newlines
$markdown = preg_replace('/\n{3,}/', "\n\n", $markdown);
$markdown = trim($markdown);
return $markdown;
}
private function processNode(DOMNode $node): string
{
$output = '';
switch ($node->nodeType) {
case XML_TEXT_NODE:
$output .= $this->decodeHtmlEntities($node->nodeValue);
break;
case XML_ELEMENT_NODE:
switch (strtolower($node->nodeName)) {
case 'b':
case 'strong':
$output .= '**' . $this->processChildren($node) . '**';
break;
case 'i':
case 'em':
$output .= '*' . $this->processChildren($node) . '*';
break;
case 'u':
$output .= '__' . $this->processChildren($node) . '__';
break;
case 'a':
$href = $node->getAttribute('href');
// Ignorar nodos de texto con solo espacios en blanco para un análisis más preciso.
$realChildNodes = [];
foreach ($node->childNodes as $child) {
if ($child->nodeType === XML_TEXT_NODE && trim($child->nodeValue) === '') {
continue;
}
$realChildNodes[] = $child;
}
// Si el único hijo real es una imagen, procesarla directamente.
if (count($realChildNodes) === 1 && strtolower($realChildNodes[0]->nodeName) === 'img') {
$output .= $this->processChildren($node);
} else {
// Si no, trátalo como un enlace de texto normal.
$text = $this->processChildren($node);
$output .= "[{$text}]({$href})";
}
break;
case 'p':
$output .= $this->processChildren($node) . "\n\n";
break;
case 'br':
$output .= "\n";
break;
case 'ul':
case 'ol':
$listContent = $this->processChildren($node);
$listItems = explode("\n", trim($listContent));
$formattedList = [];
$counter = 1;
foreach($listItems as $item) {
if(empty(trim($item))) continue;
if (strtolower($node->nodeName) === 'ul') {
$formattedList[] = '- ' . trim($item);
} else {
$formattedList[] = ($counter++) . '. ' . trim($item);
}
}
$output .= implode("\n", $formattedList) . "\n\n";
break;
case 'li':
$output .= $this->processChildren($node);
break;
case 'img':
$src = $node->getAttribute('src');
if (!empty($src)) {
$absoluteImageUrl = $src;
// Convertir URL relativa a absoluta si es necesario
if (strpos($src, 'http') !== 0 && strpos($src, '//') !== 0) {
$base = rtrim(BOT_BASE_URL, '/');
$path = ltrim($src, '/');
$absoluteImageUrl = "{$base}/{$path}";
}
// Dejar solo la URL para que Discord la renderice
$output .= "\n" . $absoluteImageUrl . "\n";
}
break;
case 'div':
$output .= $this->processChildren($node);
break;
default:
// For unknown tags, just process their children
$output .= $this->processChildren($node);
break;
}
break;
}
return $output;
}
private function processChildren(DOMNode $node): string
{
$childrenOutput = '';
foreach ($node->childNodes as $child) {
$childrenOutput .= $this->processNode($child);
}
return $childrenOutput;
}
private function decodeHtmlEntities(string $encodedString): string
{
return html_entity_decode($encodedString, ENT_QUOTES | ENT_HTML5, 'UTF-8');
}
public function convertToArray(string $html): array
{
$parts = [];
$dom = new DOMDocument();
libxml_use_internal_errors(true);
$dom->loadHTML('<?xml encoding="utf-8" ?>' . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
foreach ($dom->childNodes as $node) {
$this->processNodeForArray($node, $parts);
}
return $parts;
}
private function processNodeForArray(DOMNode $node, array &$parts)
{
if ($node->nodeType === XML_TEXT_NODE) {
$this->addTextPart($parts, $this->decodeHtmlEntities($node->nodeValue));
return;
}
if ($node->nodeType !== XML_ELEMENT_NODE) {
return;
}
switch (strtolower($node->nodeName)) {
case 'img':
$src = $node->getAttribute('src');
if (!empty($src)) {
$absoluteImageUrl = $src;
if (strpos($src, 'http') !== 0 && strpos($src, '//') !== 0) {
$base = rtrim(BOT_BASE_URL, '/');
$path = ltrim($src, '/');
$absoluteImageUrl = "{$base}/{$path}";
}
$parts[] = ['type' => 'image', 'url' => $absoluteImageUrl];
}
break;
case 'p':
case 'div':
if ($node->hasChildNodes()) {
foreach ($node->childNodes as $child) {
$this->processNodeForArray($child, $parts);
}
}
$this->addTextPart($parts, "\n\n");
break;
default:
if ($node->hasChildNodes()) {
foreach ($node->childNodes as $child) {
$this->processNodeForArray($child, $parts);
}
}
break;
}
}
private function addTextPart(array &$parts, string $text)
{
if (empty($text)) return;
// Si la última parte fue texto, la unimos a ella.
if (!empty($parts) && end($parts)['type'] === 'text') {
$parts[key($parts)]['content'] .= $text;
} else {
$parts[] = ['type' => 'text', 'content' => $text];
}
}
}