Derek Allard | d2df9bc | 2007-04-15 17:41:17 +0000 | [diff] [blame] | 1 | <?php if (!defined('BASEPATH')) exit('No direct script access allowed');
|
| 2 | /**
|
| 3 | * CodeIgniter
|
| 4 | *
|
| 5 | * An open source application development framework for PHP 4.3.2 or newer
|
| 6 | *
|
| 7 | * @package CodeIgniter
|
Derek Allard | 3d879d5 | 2008-01-18 19:41:32 +0000 | [diff] [blame] | 8 | * @author ExpressionEngine Dev Team
|
Derek Allard | d2df9bc | 2007-04-15 17:41:17 +0000 | [diff] [blame] | 9 | * @copyright Copyright (c) 2006, EllisLab, Inc.
|
Derek Allard | 6838f00 | 2007-10-04 19:29:59 +0000 | [diff] [blame] | 10 | * @license http://www.codeigniter.com/user_guide/license.html
|
Derek Allard | d2df9bc | 2007-04-15 17:41:17 +0000 | [diff] [blame] | 11 | * @link http://www.codeigniter.com
|
| 12 | * @since Version 1.0
|
| 13 | * @filesource
|
| 14 | */
|
| 15 |
|
| 16 | // ------------------------------------------------------------------------
|
| 17 |
|
| 18 | /**
|
| 19 | * CodeIgniter Typography Helpers
|
| 20 | *
|
| 21 | * @package CodeIgniter
|
| 22 | * @subpackage Helpers
|
| 23 | * @category Helpers
|
Derek Allard | 3d879d5 | 2008-01-18 19:41:32 +0000 | [diff] [blame] | 24 | * @author ExpressionEngine Dev Team
|
Derek Allard | d2df9bc | 2007-04-15 17:41:17 +0000 | [diff] [blame] | 25 | * @link http://www.codeigniter.com/user_guide/helpers/typography_helper.html
|
| 26 | */
|
| 27 |
|
| 28 | // ------------------------------------------------------------------------
|
| 29 |
|
| 30 | /**
|
| 31 | * Convert newlines to HTML line breaks except within PRE tags
|
| 32 | *
|
| 33 | * @access public
|
| 34 | * @param string
|
| 35 | * @return string
|
| 36 | */
|
| 37 | function nl2br_except_pre($str)
|
| 38 | {
|
| 39 | $ex = explode("pre>",$str);
|
| 40 | $ct = count($ex);
|
| 41 |
|
| 42 | $newstr = "";
|
| 43 | for ($i = 0; $i < $ct; $i++)
|
| 44 | {
|
| 45 | if (($i % 2) == 0)
|
| 46 | {
|
| 47 | $newstr .= nl2br($ex[$i]);
|
| 48 | }
|
| 49 | else
|
| 50 | {
|
| 51 | $newstr .= $ex[$i];
|
| 52 | }
|
| 53 |
|
| 54 | if ($ct - 1 != $i)
|
| 55 | $newstr .= "pre>";
|
| 56 | }
|
| 57 |
|
| 58 | return $newstr;
|
| 59 | }
|
| 60 |
|
| 61 | // ------------------------------------------------------------------------
|
| 62 |
|
| 63 | /**
|
| 64 | * Auto Typography Wrapper Function
|
| 65 | *
|
| 66 | *
|
| 67 | * @access public
|
| 68 | * @param string
|
| 69 | * @return string
|
| 70 | */
|
| 71 | function auto_typography($str)
|
| 72 | {
|
| 73 | $TYPE = new Auto_typography();
|
| 74 | return $TYPE->convert($str);
|
| 75 | }
|
| 76 |
|
| 77 | // ------------------------------------------------------------------------
|
| 78 |
|
| 79 | /**
|
| 80 | * Auto Typography Class
|
| 81 | *
|
| 82 | *
|
| 83 | * @access private
|
| 84 | * @category Helpers
|
Derek Allard | 3d879d5 | 2008-01-18 19:41:32 +0000 | [diff] [blame] | 85 | * @author ExpressionEngine Dev Team
|
Derek Allard | d2df9bc | 2007-04-15 17:41:17 +0000 | [diff] [blame] | 86 | * @link http://www.codeigniter.com/user_guide/helpers/
|
| 87 | */
|
| 88 | class Auto_typography {
|
| 89 |
|
| 90 | // Block level elements that should not be wrapped inside <p> tags
|
Derek Allard | bab7ed9 | 2008-01-19 15:41:28 +0000 | [diff] [blame] | 91 | var $block_elements = 'div|blockquote|pre|code|h\d|script|ol|ul';
|
Derek Allard | d2df9bc | 2007-04-15 17:41:17 +0000 | [diff] [blame] | 92 |
|
| 93 | // Elements that should not have <p> and <br /> tags within them.
|
| 94 | var $skip_elements = 'pre|ol|ul';
|
| 95 |
|
| 96 | // Tags we want the parser to completely ignore when splitting the string.
|
| 97 | var $ignore_elements = 'a|b|i|em|strong|span|img|li';
|
| 98 |
|
| 99 |
|
| 100 | /**
|
| 101 | * Main Processing Function
|
| 102 | *
|
| 103 | */
|
| 104 | function convert($str)
|
| 105 | {
|
| 106 | if ($str == '')
|
| 107 | {
|
| 108 | return '';
|
| 109 | }
|
| 110 |
|
| 111 | $str = ' '.$str.' ';
|
| 112 |
|
| 113 | // Standardize Newlines to make matching easier
|
| 114 | $str = preg_replace("/(\r\n|\r)/", "\n", $str);
|
| 115 |
|
| 116 | /*
|
| 117 | * Reduce line breaks
|
| 118 | *
|
| 119 | * If there are more than two consecutive line
|
| 120 | * breaks we'll compress them down to a maximum
|
| 121 | * of two since there's no benefit to more.
|
| 122 | *
|
| 123 | */
|
| 124 | $str = preg_replace("/\n\n+/", "\n\n", $str);
|
| 125 |
|
| 126 | /*
|
| 127 | * Convert quotes within tags to temporary marker
|
| 128 | *
|
| 129 | * We don't want quotes converted within
|
| 130 | * tags so we'll temporarily convert them to
|
| 131 | * {@DQ} and {@SQ}
|
| 132 | *
|
| 133 | */
|
| 134 | if (preg_match_all("#\<.+?>#si", $str, $matches))
|
| 135 | {
|
| 136 | for ($i = 0; $i < count($matches['0']); $i++)
|
| 137 | {
|
| 138 | $str = str_replace($matches['0'][$i],
|
| 139 | str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
|
| 140 | $str);
|
| 141 | }
|
| 142 | }
|
| 143 |
|
| 144 |
|
| 145 | /*
|
| 146 | * Add closing/opening paragraph tags before/after "block" elements
|
| 147 | *
|
| 148 | * Since block elements (like <blockquotes>, <pre>, etc.) do not get
|
| 149 | * wrapped in paragraph tags we will add a closing </p> tag just before
|
| 150 | * each block element starts and an opening <p> tag right after the block element
|
| 151 | * ends. Later on we'll do some further clean up.
|
| 152 | *
|
| 153 | */
|
Derek Jones | 8b25191 | 2008-01-17 00:34:37 +0000 | [diff] [blame] | 154 | $str = preg_replace("#(<)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str);
|
| 155 | $str = preg_replace("#(</)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str);
|
Derek Allard | d2df9bc | 2007-04-15 17:41:17 +0000 | [diff] [blame] | 156 |
|
| 157 | /*
|
| 158 | * Convert "ignore" tags to temporary marker
|
| 159 | *
|
| 160 | * The parser splits out the string at every tag
|
| 161 | * it encounters. Certain inline tags, like image
|
| 162 | * tags, links, span tags, etc. will be adversely
|
| 163 | * affected if they are split out so we'll convert
|
| 164 | * the opening < temporarily to: {@TAG}
|
| 165 | *
|
| 166 | */
|
| 167 | $str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
|
| 168 |
|
| 169 | /*
|
| 170 | * Split the string at every tag
|
| 171 | *
|
| 172 | * This creates an array with this prototype:
|
| 173 | *
|
| 174 | * [array]
|
| 175 | * {
|
| 176 | * [0] = <opening tag>
|
| 177 | * [1] = Content contained between the tags
|
| 178 | * [2] = <closing tag>
|
| 179 | * Etc...
|
| 180 | * }
|
| 181 | *
|
| 182 | */
|
| 183 | $chunks = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
|
| 184 |
|
| 185 | /*
|
| 186 | * Build our finalized string
|
| 187 | *
|
| 188 | * We'll cycle through the array, skipping tags,
|
| 189 | * and processing the contained text
|
| 190 | *
|
| 191 | */
|
| 192 | $str = '';
|
| 193 | $process = TRUE;
|
| 194 | foreach ($chunks as $chunk)
|
| 195 | {
|
| 196 | /*
|
| 197 | * Are we dealing with a tag?
|
| 198 | *
|
| 199 | * If so, we'll skip the processing for this cycle.
|
| 200 | * Well also set the "process" flag which allows us
|
| 201 | * to skip <pre> tags and a few other things.
|
| 202 | *
|
| 203 | */
|
| 204 | if (preg_match("#<(/*)(".$this->block_elements.").*?\>#", $chunk, $match))
|
| 205 | {
|
| 206 | if (preg_match("#".$this->skip_elements."#", $match['2']))
|
| 207 | {
|
| 208 | $process = ($match['1'] == '/') ? TRUE : FALSE;
|
| 209 | }
|
| 210 |
|
| 211 | $str .= $chunk;
|
| 212 | continue;
|
| 213 | }
|
| 214 |
|
| 215 | if ($process == FALSE)
|
| 216 | {
|
| 217 | $str .= $chunk;
|
| 218 | continue;
|
| 219 | }
|
| 220 |
|
| 221 | // Convert Newlines into <p> and <br /> tags
|
| 222 | $str .= $this->format_newlines($chunk);
|
| 223 | }
|
| 224 |
|
| 225 | // FINAL CLEAN UP
|
| 226 | // IMPORTANT: DO NOT ALTER THE ORDER OF THE ITEMS BELOW!
|
| 227 |
|
| 228 | /*
|
| 229 | * Clean up paragraph tags before/after "block" elements
|
| 230 | *
|
| 231 | * Earlier we added <p></p> tags before/after block level elements.
|
| 232 | * Then, we added paragraph tags around double line breaks. This
|
| 233 | * potentially created incorrectly formatted paragraphs so we'll
|
| 234 | * clean it up here.
|
| 235 | *
|
| 236 | */
|
| 237 | $str = preg_replace("#<p>({@TAG}.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3", $str);
|
| 238 | $str = preg_replace("#({@TAG}/.*?)(".$this->block_elements.")(.*?>)</p>#", "\\1\\2\\3", $str);
|
| 239 |
|
| 240 | // Convert Quotes and other characters
|
| 241 | $str = $this->format_characters($str);
|
| 242 |
|
| 243 | // Fix an artifact that happens during the paragraph replacement
|
| 244 | $str = preg_replace('#(<p>\n*</p>)#', '', $str);
|
| 245 |
|
| 246 | // If the user submitted their own paragraph tags with class data
|
| 247 | // in them we will retain them instead of using our tags.
|
| 248 | $str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
|
| 249 |
|
| 250 | // Final clean up
|
| 251 | $str = str_replace(
|
| 252 | array(
|
| 253 | '</p></p>',
|
| 254 | '</p><p>',
|
| 255 | '<p> ',
|
| 256 | ' </p>',
|
| 257 | '{@TAG}',
|
| 258 | '{@DQ}',
|
| 259 | '{@SQ}',
|
| 260 | '<p></p>'
|
| 261 | ),
|
| 262 | array(
|
| 263 | '</p>',
|
| 264 | '<p>',
|
| 265 | '<p>',
|
| 266 | '</p>',
|
| 267 | '<',
|
| 268 | '"',
|
| 269 | "'",
|
| 270 | ''
|
| 271 | ),
|
| 272 | $str
|
| 273 | );
|
| 274 |
|
| 275 | return $str;
|
| 276 | }
|
| 277 |
|
| 278 | // --------------------------------------------------------------------
|
| 279 |
|
| 280 | /**
|
| 281 | * Format Characters
|
| 282 | *
|
| 283 | * This function mainly converts double and single quotes
|
| 284 | * to entities, but since these are directional, it does
|
| 285 | * it based on some rules. It also converts em-dashes
|
| 286 | * and a couple other things.
|
| 287 | */
|
| 288 | function format_characters($str)
|
| 289 | {
|
| 290 | $table = array(
|
| 291 | ' "' => " “",
|
| 292 | '" ' => "” ",
|
| 293 | " '" => " ‘",
|
| 294 | "' " => "’ ",
|
| 295 |
|
| 296 | '>"' => ">“",
|
| 297 | '"<' => "”<",
|
| 298 | ">'" => ">‘",
|
| 299 | "'<" => "’<",
|
| 300 |
|
| 301 | "\"." => "”.",
|
| 302 | "\"," => "”,",
|
| 303 | "\";" => "”;",
|
| 304 | "\":" => "”:",
|
| 305 | "\"!" => "”!",
|
| 306 | "\"?" => "”?",
|
| 307 |
|
| 308 | ". " => ". ",
|
| 309 | "? " => "? ",
|
| 310 | "! " => "! ",
|
| 311 | ": " => ": ",
|
| 312 | );
|
| 313 |
|
| 314 | // These deal with quotes within quotes, like: "'hi here'"
|
| 315 | $start = 0;
|
| 316 | $space = array("\n", "\t", " ");
|
| 317 |
|
| 318 | while(TRUE)
|
| 319 | {
|
| 320 | $current = strpos(substr($str, $start), "\"'");
|
| 321 |
|
| 322 | if ($current === FALSE) break;
|
| 323 |
|
| 324 | $one_before = substr($str, $start+$current-1, 1);
|
| 325 | $one_after = substr($str, $start+$current+2, 1);
|
| 326 |
|
| 327 | if ( ! in_array($one_after, $space, TRUE) && $one_after != "<")
|
| 328 | {
|
| 329 | $str = str_replace( $one_before."\"'".$one_after,
|
| 330 | $one_before."“‘".$one_after,
|
| 331 | $str);
|
| 332 | }
|
| 333 | elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
|
| 334 | {
|
| 335 | $str = str_replace( $one_before."\"'".$one_after,
|
| 336 | $one_before."”’".$one_after,
|
| 337 | $str);
|
| 338 | }
|
| 339 |
|
| 340 | $start = $start+$current+2;
|
| 341 | }
|
| 342 |
|
| 343 | $start = 0;
|
| 344 |
|
| 345 | while(TRUE)
|
| 346 | {
|
| 347 | $current = strpos(substr($str, $start), "'\"");
|
| 348 |
|
| 349 | if ($current === FALSE) break;
|
| 350 |
|
| 351 | $one_before = substr($str, $start+$current-1, 1);
|
| 352 | $one_after = substr($str, $start+$current+2, 1);
|
| 353 |
|
| 354 | if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
|
| 355 | {
|
| 356 | $str = str_replace( $one_before."'\"".$one_after,
|
| 357 | $one_before."‘“".$one_after,
|
| 358 | $str);
|
| 359 | }
|
| 360 | elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">")
|
| 361 | {
|
| 362 | $str = str_replace( $one_before."'\"".$one_after,
|
| 363 | $one_before."’”".$one_after,
|
| 364 | $str);
|
| 365 | }
|
| 366 |
|
| 367 | $start = $start+$current+2;
|
| 368 | }
|
| 369 |
|
| 370 | // Are there quotes within a word, as in: ("something")
|
| 371 | if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
|
| 372 | {
|
| 373 | for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
|
| 374 | {
|
| 375 | if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
|
| 376 | {
|
| 377 | $str = str_replace( $matches['0'][$i],
|
| 378 | $matches['1'][$i]."“".$matches['2'][$i]."”".$matches['3'][$i],
|
| 379 | $str);
|
| 380 | }
|
| 381 | }
|
| 382 | }
|
| 383 |
|
| 384 | if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
|
| 385 | {
|
| 386 | for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
|
| 387 | {
|
| 388 | if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
|
| 389 | {
|
| 390 | $str = str_replace( $matches['0'][$i],
|
| 391 | $matches['1'][$i]."‘".$matches['2'][$i]."’".$matches['3'][$i],
|
| 392 | $str);
|
| 393 | }
|
| 394 | }
|
| 395 | }
|
| 396 |
|
| 397 | // How about one apostrophe, as in Rick's
|
| 398 | $start = 0;
|
| 399 |
|
| 400 | while(TRUE)
|
| 401 | {
|
| 402 | $current = strpos(substr($str, $start), "'");
|
| 403 |
|
| 404 | if ($current === FALSE) break;
|
| 405 |
|
| 406 | $one_before = substr($str, $start+$current-1, 1);
|
| 407 | $one_after = substr($str, $start+$current+1, 1);
|
| 408 |
|
| 409 | if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
|
| 410 | {
|
| 411 | $str = str_replace( $one_before."'".$one_after,
|
| 412 | $one_before."’".$one_after,
|
| 413 | $str);
|
| 414 | }
|
| 415 |
|
| 416 | $start = $start+$current+2;
|
| 417 | }
|
| 418 |
|
| 419 | // Em-dashes
|
| 420 | $start = 0;
|
| 421 | while(TRUE)
|
| 422 | {
|
| 423 | $current = strpos(substr($str, $start), "--");
|
| 424 |
|
| 425 | if ($current === FALSE) break;
|
| 426 |
|
| 427 | $one_before = substr($str, $start+$current-1, 1);
|
| 428 | $one_after = substr($str, $start+$current+2, 1);
|
| 429 | $two_before = substr($str, $start+$current-2, 1);
|
| 430 | $two_after = substr($str, $start+$current+3, 1);
|
| 431 |
|
| 432 | if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
|
| 433 | OR
|
| 434 | ( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
|
| 435 | )
|
| 436 | {
|
| 437 | $str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
|
| 438 | $two_before.trim($one_before)."—".trim($one_after).$two_after,
|
| 439 | $str);
|
| 440 | }
|
| 441 |
|
| 442 | $start = $start+$current+2;
|
| 443 | }
|
| 444 |
|
| 445 | // Ellipsis
|
| 446 | $str = preg_replace("#(\w)\.\.\.(\s|<br />|</p>)#", "\\1…\\2", $str);
|
| 447 | $str = preg_replace("#(\s|<br />|</p>)\.\.\.(\w)#", "\\1…\\2", $str);
|
| 448 |
|
| 449 | // Run the translation array we defined above
|
| 450 | $str = str_replace(array_keys($table), array_values($table), $str);
|
| 451 |
|
| 452 | // If there are any stray double quotes we'll catch them here
|
| 453 |
|
| 454 | $start = 0;
|
| 455 |
|
| 456 | while(TRUE)
|
| 457 | {
|
| 458 | $current = strpos(substr($str, $start), '"');
|
| 459 |
|
| 460 | if ($current === FALSE) break;
|
| 461 |
|
| 462 | $one_before = substr($str, $start+$current-1, 1);
|
| 463 | $one_after = substr($str, $start+$current+1, 1);
|
| 464 |
|
| 465 | if ( ! in_array($one_after, $space, TRUE))
|
| 466 | {
|
| 467 | $str = str_replace( $one_before.'"'.$one_after,
|
| 468 | $one_before."“".$one_after,
|
| 469 | $str);
|
| 470 | }
|
| 471 | elseif( ! in_array($one_before, $space, TRUE))
|
| 472 | {
|
| 473 | $str = str_replace( $one_before."'".$one_after,
|
| 474 | $one_before."”".$one_after,
|
| 475 | $str);
|
| 476 | }
|
| 477 |
|
| 478 | $start = $start+$current+2;
|
| 479 | }
|
| 480 |
|
| 481 | $start = 0;
|
| 482 |
|
| 483 | while(TRUE)
|
| 484 | {
|
| 485 | $current = strpos(substr($str, $start), "'");
|
| 486 |
|
| 487 | if ($current === FALSE) break;
|
| 488 |
|
| 489 | $one_before = substr($str, $start+$current-1, 1);
|
| 490 | $one_after = substr($str, $start+$current+1, 1);
|
| 491 |
|
| 492 | if ( ! in_array($one_after, $space, TRUE))
|
| 493 | {
|
| 494 | $str = str_replace( $one_before."'".$one_after,
|
| 495 | $one_before."‘".$one_after,
|
| 496 | $str);
|
| 497 | }
|
| 498 | elseif( ! in_array($one_before, $space, TRUE))
|
| 499 | {
|
| 500 | $str = str_replace( $one_before."'".$one_after,
|
| 501 | $one_before."’".$one_after,
|
| 502 | $str);
|
| 503 | }
|
| 504 |
|
| 505 | $start = $start+$current+2;
|
| 506 | }
|
| 507 |
|
| 508 | return $str;
|
| 509 | }
|
| 510 |
|
| 511 | // --------------------------------------------------------------------
|
| 512 |
|
| 513 | /**
|
| 514 | * Format Newlines
|
| 515 | *
|
| 516 | * Converts newline characters into either <p> tags or <br />
|
| 517 | *
|
| 518 | */
|
| 519 | function format_newlines($str)
|
| 520 | {
|
| 521 | if ($str == '')
|
| 522 | {
|
| 523 | return $str;
|
| 524 | }
|
| 525 |
|
| 526 | if (strpos($str, "\n") === FALSE)
|
| 527 | {
|
| 528 | return '<p>'.$str.'</p>';
|
| 529 | }
|
| 530 |
|
| 531 | $str = str_replace("\n\n", "</p>\n\n<p>", $str);
|
| 532 | $str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
|
| 533 |
|
| 534 | return '<p>'.$str.'</p>';
|
| 535 | }
|
| 536 | }
|
| 537 |
|
| 538 |
|
admin | b0dd10f | 2006-08-25 17:25:49 +0000 | [diff] [blame] | 539 | ?> |