$code) {
$html = str_replace("&{$entity};","{$code};",$html);
// Some ill-brained webmasters write HTML symbolic references without
// terminating semicolor (especially at www.whitehouse.gov. The following
// replacemenet is required to fix these damaged inteties, converting them
// to the numerical character reference.
//
// We use [\s<] as entity name terminator to avoid breaking up longer entity
// names by filtering in only space or HTML-tag terminated ones.
//
$html = preg_replace("/&{$entity}([\s<])/","{$code};\\1",$html);
};
// Process hecadecimal character references
while (preg_match("/([[:xdigit:]]{2,4});/i", $html, $matches)) {
// We cannot use plain str_replace, because 'x' symbol can be in both cases;
// str_ireplace have appeared in PHP 5 only, so we cannot use it due the
// compatibility problems
$html = preg_replace("/".$matches[1].";/i","".hexdec($matches[1]).";",$html);
};
}
function escape_amp($html) {
// Escape all ampersants not followed by a # sharp sign
// Note that symbolic references were replaced by numeric before this!
$html = preg_replace("/&(?!#)/si","&\\1",$html);
// Complete all numeric character references unterminated with ';'
$html = preg_replace("/(\d+)(?![\d;])/si","\\1;",$html);
// Escape all ampersants followed by # sharp and NON-DIGIT symbol
// They we're not covered by above conversions and are not a
// symbol reference.
// Also, don't forget that we've used &! They should not be converted too...
//
$html = preg_replace("/&(?!#\d)/si","&\\1",$html);
return $html;
};
function escape_lt($html) {
// Why this loop is needed here?
// The cause is that, for example, << sequence will not be replaced by
// <<, as it should be. The regular expression matches TWO symbols
// << (actually, first < symbold, and one following it, so, the second <
// will not be matched when script attempt to find and replace next occurrence using 'g' regexp
// modifier. So, we will need to check for such situations agint and, possibly, restart the
// search and replace process.
//
while (preg_match("#<(\s*[^!/a-zA-Z])#",$html)) {
$html = preg_replace("#<(\s*[^!/a-zA-Z])#si","<\\1",$html);
};
while (preg_match("#(<[^>]*?)<#si",$html)) {
$html = preg_replace("#(<[^>]*?)<#si","\\1<",$html);
};
return $html;
};
function escape_gt($html) {
$html = preg_replace("#([^\s\da-zA-Z'\"/=-])\s*>#si","\\1>",$html);
while (preg_match("#(>[^<]*?)>#si",$html)) {
$html = preg_replace("#(>[^<]*?)>#si","\\1>",$html);
};
return $html;
};
?>