Derek Allard | d2df9bc | 2007-04-15 17:41:17 +0000 | [diff] [blame] | 1 | <?php if (!defined('BASEPATH')) exit('No direct script access allowed');
|
| 2 | /**
|
| 3 | * CodeIgniter
|
| 4 | *
|
| 5 | * An open source application development framework for PHP 4.3.2 or newer
|
| 6 | *
|
| 7 | * @package CodeIgniter
|
| 8 | * @author Rick Ellis
|
| 9 | * @copyright Copyright (c) 2006, EllisLab, Inc.
|
Derek Allard | 6838f00 | 2007-10-04 19:29:59 +0000 | [diff] [blame^] | 10 | * @license http://www.codeigniter.com/user_guide/license.html
|
Derek Allard | d2df9bc | 2007-04-15 17:41:17 +0000 | [diff] [blame] | 11 | * @link http://www.codeigniter.com
|
| 12 | * @since Version 1.0
|
| 13 | * @filesource
|
| 14 | */
|
| 15 |
|
| 16 | // ------------------------------------------------------------------------
|
| 17 |
|
| 18 | /**
|
| 19 | * CodeIgniter Typography Helpers
|
| 20 | *
|
| 21 | * @package CodeIgniter
|
| 22 | * @subpackage Helpers
|
| 23 | * @category Helpers
|
| 24 | * @author Rick Ellis
|
| 25 | * @link http://www.codeigniter.com/user_guide/helpers/typography_helper.html
|
| 26 | */
|
| 27 |
|
| 28 | // ------------------------------------------------------------------------
|
| 29 |
|
| 30 | /**
|
| 31 | * Convert newlines to HTML line breaks except within PRE tags
|
| 32 | *
|
| 33 | * @access public
|
| 34 | * @param string
|
| 35 | * @return string
|
| 36 | */
|
| 37 | function nl2br_except_pre($str)
|
| 38 | {
|
| 39 | $ex = explode("pre>",$str);
|
| 40 | $ct = count($ex);
|
| 41 |
|
| 42 | $newstr = "";
|
| 43 | for ($i = 0; $i < $ct; $i++)
|
| 44 | {
|
| 45 | if (($i % 2) == 0)
|
| 46 | {
|
| 47 | $newstr .= nl2br($ex[$i]);
|
| 48 | }
|
| 49 | else
|
| 50 | {
|
| 51 | $newstr .= $ex[$i];
|
| 52 | }
|
| 53 |
|
| 54 | if ($ct - 1 != $i)
|
| 55 | $newstr .= "pre>";
|
| 56 | }
|
| 57 |
|
| 58 | return $newstr;
|
| 59 | }
|
| 60 |
|
| 61 | // ------------------------------------------------------------------------
|
| 62 |
|
| 63 | /**
|
| 64 | * Auto Typography Wrapper Function
|
| 65 | *
|
| 66 | *
|
| 67 | * @access public
|
| 68 | * @param string
|
| 69 | * @return string
|
| 70 | */
|
| 71 | function auto_typography($str)
|
| 72 | {
|
| 73 | $TYPE = new Auto_typography();
|
| 74 | return $TYPE->convert($str);
|
| 75 | }
|
| 76 |
|
| 77 | // ------------------------------------------------------------------------
|
| 78 |
|
| 79 | /**
|
| 80 | * Auto Typography Class
|
| 81 | *
|
| 82 | *
|
| 83 | * @access private
|
| 84 | * @category Helpers
|
| 85 | * @author Rick Ellis
|
| 86 | * @author Paul Burdick
|
| 87 | * @link http://www.codeigniter.com/user_guide/helpers/
|
| 88 | */
|
| 89 | class Auto_typography {
|
| 90 |
|
| 91 | // Block level elements that should not be wrapped inside <p> tags
|
| 92 | var $block_elements = 'div|blockquote|pre|code|h\d|script|ol|un';
|
| 93 |
|
| 94 | // Elements that should not have <p> and <br /> tags within them.
|
| 95 | var $skip_elements = 'pre|ol|ul';
|
| 96 |
|
| 97 | // Tags we want the parser to completely ignore when splitting the string.
|
| 98 | var $ignore_elements = 'a|b|i|em|strong|span|img|li';
|
| 99 |
|
| 100 |
|
| 101 | /**
|
| 102 | * Main Processing Function
|
| 103 | *
|
| 104 | */
|
| 105 | function convert($str)
|
| 106 | {
|
| 107 | if ($str == '')
|
| 108 | {
|
| 109 | return '';
|
| 110 | }
|
| 111 |
|
| 112 | $str = ' '.$str.' ';
|
| 113 |
|
| 114 | // Standardize Newlines to make matching easier
|
| 115 | $str = preg_replace("/(\r\n|\r)/", "\n", $str);
|
| 116 |
|
| 117 | /*
|
| 118 | * Reduce line breaks
|
| 119 | *
|
| 120 | * If there are more than two consecutive line
|
| 121 | * breaks we'll compress them down to a maximum
|
| 122 | * of two since there's no benefit to more.
|
| 123 | *
|
| 124 | */
|
| 125 | $str = preg_replace("/\n\n+/", "\n\n", $str);
|
| 126 |
|
| 127 | /*
|
| 128 | * Convert quotes within tags to temporary marker
|
| 129 | *
|
| 130 | * We don't want quotes converted within
|
| 131 | * tags so we'll temporarily convert them to
|
| 132 | * {@DQ} and {@SQ}
|
| 133 | *
|
| 134 | */
|
| 135 | if (preg_match_all("#\<.+?>#si", $str, $matches))
|
| 136 | {
|
| 137 | for ($i = 0; $i < count($matches['0']); $i++)
|
| 138 | {
|
| 139 | $str = str_replace($matches['0'][$i],
|
| 140 | str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
|
| 141 | $str);
|
| 142 | }
|
| 143 | }
|
| 144 |
|
| 145 |
|
| 146 | /*
|
| 147 | * Add closing/opening paragraph tags before/after "block" elements
|
| 148 | *
|
| 149 | * Since block elements (like <blockquotes>, <pre>, etc.) do not get
|
| 150 | * wrapped in paragraph tags we will add a closing </p> tag just before
|
| 151 | * each block element starts and an opening <p> tag right after the block element
|
| 152 | * ends. Later on we'll do some further clean up.
|
| 153 | *
|
| 154 | */
|
| 155 | $str = preg_replace("#(<.*?)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str);
|
| 156 | $str = preg_replace("#(</.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str);
|
| 157 |
|
| 158 | /*
|
| 159 | * Convert "ignore" tags to temporary marker
|
| 160 | *
|
| 161 | * The parser splits out the string at every tag
|
| 162 | * it encounters. Certain inline tags, like image
|
| 163 | * tags, links, span tags, etc. will be adversely
|
| 164 | * affected if they are split out so we'll convert
|
| 165 | * the opening < temporarily to: {@TAG}
|
| 166 | *
|
| 167 | */
|
| 168 | $str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
|
| 169 |
|
| 170 | /*
|
| 171 | * Split the string at every tag
|
| 172 | *
|
| 173 | * This creates an array with this prototype:
|
| 174 | *
|
| 175 | * [array]
|
| 176 | * {
|
| 177 | * [0] = <opening tag>
|
| 178 | * [1] = Content contained between the tags
|
| 179 | * [2] = <closing tag>
|
| 180 | * Etc...
|
| 181 | * }
|
| 182 | *
|
| 183 | */
|
| 184 | $chunks = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
|
| 185 |
|
| 186 | /*
|
| 187 | * Build our finalized string
|
| 188 | *
|
| 189 | * We'll cycle through the array, skipping tags,
|
| 190 | * and processing the contained text
|
| 191 | *
|
| 192 | */
|
| 193 | $str = '';
|
| 194 | $process = TRUE;
|
| 195 | foreach ($chunks as $chunk)
|
| 196 | {
|
| 197 | /*
|
| 198 | * Are we dealing with a tag?
|
| 199 | *
|
| 200 | * If so, we'll skip the processing for this cycle.
|
| 201 | * Well also set the "process" flag which allows us
|
| 202 | * to skip <pre> tags and a few other things.
|
| 203 | *
|
| 204 | */
|
| 205 | if (preg_match("#<(/*)(".$this->block_elements.").*?\>#", $chunk, $match))
|
| 206 | {
|
| 207 | if (preg_match("#".$this->skip_elements."#", $match['2']))
|
| 208 | {
|
| 209 | $process = ($match['1'] == '/') ? TRUE : FALSE;
|
| 210 | }
|
| 211 |
|
| 212 | $str .= $chunk;
|
| 213 | continue;
|
| 214 | }
|
| 215 |
|
| 216 | if ($process == FALSE)
|
| 217 | {
|
| 218 | $str .= $chunk;
|
| 219 | continue;
|
| 220 | }
|
| 221 |
|
| 222 | // Convert Newlines into <p> and <br /> tags
|
| 223 | $str .= $this->format_newlines($chunk);
|
| 224 | }
|
| 225 |
|
| 226 | // FINAL CLEAN UP
|
| 227 | // IMPORTANT: DO NOT ALTER THE ORDER OF THE ITEMS BELOW!
|
| 228 |
|
| 229 | /*
|
| 230 | * Clean up paragraph tags before/after "block" elements
|
| 231 | *
|
| 232 | * Earlier we added <p></p> tags before/after block level elements.
|
| 233 | * Then, we added paragraph tags around double line breaks. This
|
| 234 | * potentially created incorrectly formatted paragraphs so we'll
|
| 235 | * clean it up here.
|
| 236 | *
|
| 237 | */
|
| 238 | $str = preg_replace("#<p>({@TAG}.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3", $str);
|
| 239 | $str = preg_replace("#({@TAG}/.*?)(".$this->block_elements.")(.*?>)</p>#", "\\1\\2\\3", $str);
|
| 240 |
|
| 241 | // Convert Quotes and other characters
|
| 242 | $str = $this->format_characters($str);
|
| 243 |
|
| 244 | // Fix an artifact that happens during the paragraph replacement
|
| 245 | $str = preg_replace('#(<p>\n*</p>)#', '', $str);
|
| 246 |
|
| 247 | // If the user submitted their own paragraph tags with class data
|
| 248 | // in them we will retain them instead of using our tags.
|
| 249 | $str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
|
| 250 |
|
| 251 | // Final clean up
|
| 252 | $str = str_replace(
|
| 253 | array(
|
| 254 | '</p></p>',
|
| 255 | '</p><p>',
|
| 256 | '<p> ',
|
| 257 | ' </p>',
|
| 258 | '{@TAG}',
|
| 259 | '{@DQ}',
|
| 260 | '{@SQ}',
|
| 261 | '<p></p>'
|
| 262 | ),
|
| 263 | array(
|
| 264 | '</p>',
|
| 265 | '<p>',
|
| 266 | '<p>',
|
| 267 | '</p>',
|
| 268 | '<',
|
| 269 | '"',
|
| 270 | "'",
|
| 271 | ''
|
| 272 | ),
|
| 273 | $str
|
| 274 | );
|
| 275 |
|
| 276 | return $str;
|
| 277 | }
|
| 278 |
|
| 279 | // --------------------------------------------------------------------
|
| 280 |
|
| 281 | /**
|
| 282 | * Format Characters
|
| 283 | *
|
| 284 | * This function mainly converts double and single quotes
|
| 285 | * to entities, but since these are directional, it does
|
| 286 | * it based on some rules. It also converts em-dashes
|
| 287 | * and a couple other things.
|
| 288 | */
|
| 289 | function format_characters($str)
|
| 290 | {
|
| 291 | $table = array(
|
| 292 | ' "' => " “",
|
| 293 | '" ' => "” ",
|
| 294 | " '" => " ‘",
|
| 295 | "' " => "’ ",
|
| 296 |
|
| 297 | '>"' => ">“",
|
| 298 | '"<' => "”<",
|
| 299 | ">'" => ">‘",
|
| 300 | "'<" => "’<",
|
| 301 |
|
| 302 | "\"." => "”.",
|
| 303 | "\"," => "”,",
|
| 304 | "\";" => "”;",
|
| 305 | "\":" => "”:",
|
| 306 | "\"!" => "”!",
|
| 307 | "\"?" => "”?",
|
| 308 |
|
| 309 | ". " => ". ",
|
| 310 | "? " => "? ",
|
| 311 | "! " => "! ",
|
| 312 | ": " => ": ",
|
| 313 | );
|
| 314 |
|
| 315 | // These deal with quotes within quotes, like: "'hi here'"
|
| 316 | $start = 0;
|
| 317 | $space = array("\n", "\t", " ");
|
| 318 |
|
| 319 | while(TRUE)
|
| 320 | {
|
| 321 | $current = strpos(substr($str, $start), "\"'");
|
| 322 |
|
| 323 | if ($current === FALSE) break;
|
| 324 |
|
| 325 | $one_before = substr($str, $start+$current-1, 1);
|
| 326 | $one_after = substr($str, $start+$current+2, 1);
|
| 327 |
|
| 328 | if ( ! in_array($one_after, $space, TRUE) && $one_after != "<")
|
| 329 | {
|
| 330 | $str = str_replace( $one_before."\"'".$one_after,
|
| 331 | $one_before."“‘".$one_after,
|
| 332 | $str);
|
| 333 | }
|
| 334 | elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
|
| 335 | {
|
| 336 | $str = str_replace( $one_before."\"'".$one_after,
|
| 337 | $one_before."”’".$one_after,
|
| 338 | $str);
|
| 339 | }
|
| 340 |
|
| 341 | $start = $start+$current+2;
|
| 342 | }
|
| 343 |
|
| 344 | $start = 0;
|
| 345 |
|
| 346 | while(TRUE)
|
| 347 | {
|
| 348 | $current = strpos(substr($str, $start), "'\"");
|
| 349 |
|
| 350 | if ($current === FALSE) break;
|
| 351 |
|
| 352 | $one_before = substr($str, $start+$current-1, 1);
|
| 353 | $one_after = substr($str, $start+$current+2, 1);
|
| 354 |
|
| 355 | if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
|
| 356 | {
|
| 357 | $str = str_replace( $one_before."'\"".$one_after,
|
| 358 | $one_before."‘“".$one_after,
|
| 359 | $str);
|
| 360 | }
|
| 361 | elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">")
|
| 362 | {
|
| 363 | $str = str_replace( $one_before."'\"".$one_after,
|
| 364 | $one_before."’”".$one_after,
|
| 365 | $str);
|
| 366 | }
|
| 367 |
|
| 368 | $start = $start+$current+2;
|
| 369 | }
|
| 370 |
|
| 371 | // Are there quotes within a word, as in: ("something")
|
| 372 | if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
|
| 373 | {
|
| 374 | for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
|
| 375 | {
|
| 376 | if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
|
| 377 | {
|
| 378 | $str = str_replace( $matches['0'][$i],
|
| 379 | $matches['1'][$i]."“".$matches['2'][$i]."”".$matches['3'][$i],
|
| 380 | $str);
|
| 381 | }
|
| 382 | }
|
| 383 | }
|
| 384 |
|
| 385 | if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
|
| 386 | {
|
| 387 | for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
|
| 388 | {
|
| 389 | if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
|
| 390 | {
|
| 391 | $str = str_replace( $matches['0'][$i],
|
| 392 | $matches['1'][$i]."‘".$matches['2'][$i]."’".$matches['3'][$i],
|
| 393 | $str);
|
| 394 | }
|
| 395 | }
|
| 396 | }
|
| 397 |
|
| 398 | // How about one apostrophe, as in Rick's
|
| 399 | $start = 0;
|
| 400 |
|
| 401 | while(TRUE)
|
| 402 | {
|
| 403 | $current = strpos(substr($str, $start), "'");
|
| 404 |
|
| 405 | if ($current === FALSE) break;
|
| 406 |
|
| 407 | $one_before = substr($str, $start+$current-1, 1);
|
| 408 | $one_after = substr($str, $start+$current+1, 1);
|
| 409 |
|
| 410 | if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
|
| 411 | {
|
| 412 | $str = str_replace( $one_before."'".$one_after,
|
| 413 | $one_before."’".$one_after,
|
| 414 | $str);
|
| 415 | }
|
| 416 |
|
| 417 | $start = $start+$current+2;
|
| 418 | }
|
| 419 |
|
| 420 | // Em-dashes
|
| 421 | $start = 0;
|
| 422 | while(TRUE)
|
| 423 | {
|
| 424 | $current = strpos(substr($str, $start), "--");
|
| 425 |
|
| 426 | if ($current === FALSE) break;
|
| 427 |
|
| 428 | $one_before = substr($str, $start+$current-1, 1);
|
| 429 | $one_after = substr($str, $start+$current+2, 1);
|
| 430 | $two_before = substr($str, $start+$current-2, 1);
|
| 431 | $two_after = substr($str, $start+$current+3, 1);
|
| 432 |
|
| 433 | if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
|
| 434 | OR
|
| 435 | ( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
|
| 436 | )
|
| 437 | {
|
| 438 | $str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
|
| 439 | $two_before.trim($one_before)."—".trim($one_after).$two_after,
|
| 440 | $str);
|
| 441 | }
|
| 442 |
|
| 443 | $start = $start+$current+2;
|
| 444 | }
|
| 445 |
|
| 446 | // Ellipsis
|
| 447 | $str = preg_replace("#(\w)\.\.\.(\s|<br />|</p>)#", "\\1…\\2", $str);
|
| 448 | $str = preg_replace("#(\s|<br />|</p>)\.\.\.(\w)#", "\\1…\\2", $str);
|
| 449 |
|
| 450 | // Run the translation array we defined above
|
| 451 | $str = str_replace(array_keys($table), array_values($table), $str);
|
| 452 |
|
| 453 | // If there are any stray double quotes we'll catch them here
|
| 454 |
|
| 455 | $start = 0;
|
| 456 |
|
| 457 | while(TRUE)
|
| 458 | {
|
| 459 | $current = strpos(substr($str, $start), '"');
|
| 460 |
|
| 461 | if ($current === FALSE) break;
|
| 462 |
|
| 463 | $one_before = substr($str, $start+$current-1, 1);
|
| 464 | $one_after = substr($str, $start+$current+1, 1);
|
| 465 |
|
| 466 | if ( ! in_array($one_after, $space, TRUE))
|
| 467 | {
|
| 468 | $str = str_replace( $one_before.'"'.$one_after,
|
| 469 | $one_before."“".$one_after,
|
| 470 | $str);
|
| 471 | }
|
| 472 | elseif( ! in_array($one_before, $space, TRUE))
|
| 473 | {
|
| 474 | $str = str_replace( $one_before."'".$one_after,
|
| 475 | $one_before."”".$one_after,
|
| 476 | $str);
|
| 477 | }
|
| 478 |
|
| 479 | $start = $start+$current+2;
|
| 480 | }
|
| 481 |
|
| 482 | $start = 0;
|
| 483 |
|
| 484 | while(TRUE)
|
| 485 | {
|
| 486 | $current = strpos(substr($str, $start), "'");
|
| 487 |
|
| 488 | if ($current === FALSE) break;
|
| 489 |
|
| 490 | $one_before = substr($str, $start+$current-1, 1);
|
| 491 | $one_after = substr($str, $start+$current+1, 1);
|
| 492 |
|
| 493 | if ( ! in_array($one_after, $space, TRUE))
|
| 494 | {
|
| 495 | $str = str_replace( $one_before."'".$one_after,
|
| 496 | $one_before."‘".$one_after,
|
| 497 | $str);
|
| 498 | }
|
| 499 | elseif( ! in_array($one_before, $space, TRUE))
|
| 500 | {
|
| 501 | $str = str_replace( $one_before."'".$one_after,
|
| 502 | $one_before."’".$one_after,
|
| 503 | $str);
|
| 504 | }
|
| 505 |
|
| 506 | $start = $start+$current+2;
|
| 507 | }
|
| 508 |
|
| 509 | return $str;
|
| 510 | }
|
| 511 |
|
| 512 | // --------------------------------------------------------------------
|
| 513 |
|
| 514 | /**
|
| 515 | * Format Newlines
|
| 516 | *
|
| 517 | * Converts newline characters into either <p> tags or <br />
|
| 518 | *
|
| 519 | */
|
| 520 | function format_newlines($str)
|
| 521 | {
|
| 522 | if ($str == '')
|
| 523 | {
|
| 524 | return $str;
|
| 525 | }
|
| 526 |
|
| 527 | if (strpos($str, "\n") === FALSE)
|
| 528 | {
|
| 529 | return '<p>'.$str.'</p>';
|
| 530 | }
|
| 531 |
|
| 532 | $str = str_replace("\n\n", "</p>\n\n<p>", $str);
|
| 533 | $str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
|
| 534 |
|
| 535 | return '<p>'.$str.'</p>';
|
| 536 | }
|
| 537 | }
|
| 538 |
|
| 539 |
|
admin | b0dd10f | 2006-08-25 17:25:49 +0000 | [diff] [blame] | 540 | ?> |