import DOMPurify from 'isomorphic-dompurify';
import { ALLOWED_ATTR, FORBID_TAGS, ADD_ATTR, USE_PROFILES } from './sanitization.const';

/**
 * Creates a simple hash of HTML content based on elements and attributes
 * Used to determine if sanitization made security-relevant changes
 * @param html - The HTML string to hash
 * @returns A string representation of the HTML structure
 */
const getContentHash = (html: string): string => {
  const tempDiv = document.createElement('div');
  tempDiv.innerHTML = html;

  const allElements = Array.from(tempDiv.querySelectorAll('*'));

  return allElements
    .map((el) => {
      const tag = el.tagName.toLowerCase();

      const attrs = {};
      Array.from(el.attributes).forEach((attr) => {
        attrs[attr.name] = attr.value;
      });

      const sortedAttrs = Object.keys(attrs)
        .sort((a, b) => a.localeCompare(b))
        .map((key) => `${key}="${attrs[key]}"`)
        .join(' ');

      return `<${tag} ${sortedAttrs}>`;
    })
    .join('');
};

/**
 * Creates a DOMPurify configuration object with security settings
 * @returns DOMPurify configuration object with customized security settings
 */
const createSanitizationConfig = () => ({
  // Explicitly forbid dangerous tags like script, style, iframe, etc.
  FORBID_TAGS,

  // Explicitly allow only needed attributes
  ALLOWED_ATTR,

  // Add security attributes like noopener and noreferrer to links
  ADD_ATTR,

  // Use HTML profile but not CSS (we'll handle styles separately)
  USE_PROFILES,
});

/**
 * Performs the actual sanitization using DOMPurify
 * @param dirtyHtml - The unsanitized HTML string
 * @returns A sanitized HTML string with dangerous elements and attributes removed
 */
const performSanitization = (dirtyHtml: string): string => {
  const config = createSanitizationConfig();

  // Perform sanitization with DOMPurify
  const sanitized = DOMPurify.sanitize(dirtyHtml, config);

  return sanitized;
};

/**
 * Sanitizes HTML content by removing potentially dangerous elements and attributes
 *
 * This function:
 * - Removes dangerous HTML elements (script, style, iframe, form, etc.)
 * - Strips event handlers and JavaScript URLs
 * - Blocks dangerous CSS expressions and properties
 * - Prevents dangerous URL protocols
 * - Only returns the sanitized version if actual security-relevant changes were made
 *
 * @param dirtyHtml - The unsanitized HTML string
 * @returns A sanitized HTML string, or the original if no security-relevant changes were needed
 */
const sanitizeHtml = (dirtyHtml: string): string => {
  if (!dirtyHtml) return '';

  const inputHash = getContentHash(dirtyHtml);
  const sanitized = performSanitization(dirtyHtml);
  const outputHash = getContentHash(sanitized);

  const hasRealChanges = inputHash !== outputHash;
  return hasRealChanges ? sanitized : dirtyHtml;
};

export { sanitizeHtml };
