blob: 1681fa37c0bd98096eab54921d06384efb3725a3 [file] [log] [blame]
adminb0dd10f2006-08-25 17:25:49 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * Code Igniter
4 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
8 * @author Rick Ellis
9 * @copyright Copyright (c) 2006, pMachine, Inc.
admine334c472006-10-21 19:44:22 +000010 * @license http://www.codeignitor.com/user_guide/license.html
adminb0dd10f2006-08-25 17:25:49 +000011 * @link http://www.codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
admine334c472006-10-21 19:44:22 +000015
adminb0dd10f2006-08-25 17:25:49 +000016// ------------------------------------------------------------------------
17
18/**
19 * Code Igniter Typography Helpers
20 *
21 * @package CodeIgniter
22 * @subpackage Helpers
23 * @category Helpers
24 * @author Rick Ellis
25 * @link http://www.codeigniter.com/user_guide/helpers/typography_helper.html
26 */
27
28// ------------------------------------------------------------------------
29
30/**
31 * Convert newlines to HTML line breaks except within PRE tags
32 *
33 * @access public
34 * @param string
35 * @return string
36 */
37function nl2br_except_pre($str)
38{
39 $ex = explode("pre>",$str);
40 $ct = count($ex);
41
42 $newstr = "";
43 for ($i = 0; $i < $ct; $i++)
44 {
45 if (($i % 2) == 0)
46 {
47 $newstr .= nl2br($ex[$i]);
48 }
admine334c472006-10-21 19:44:22 +000049 else
adminb0dd10f2006-08-25 17:25:49 +000050 {
51 $newstr .= $ex[$i];
52 }
53
admine334c472006-10-21 19:44:22 +000054 if ($ct - 1 != $i)
adminb0dd10f2006-08-25 17:25:49 +000055 $newstr .= "pre>";
56 }
57
58 return $newstr;
59}
60
61// ------------------------------------------------------------------------
62
63/**
64 * Auto Typography Wrapper Function
admine334c472006-10-21 19:44:22 +000065 *
adminb0dd10f2006-08-25 17:25:49 +000066 *
67 * @access public
adminfafe28b2006-10-21 19:08:17 +000068 * @param string
adminb0dd10f2006-08-25 17:25:49 +000069 * @return string
70 */
71function auto_typography($str)
72{
73 $TYPE = new Auto_typography();
74 return $TYPE->convert($str);
75}
76
77// ------------------------------------------------------------------------
78
79/**
80 * Auto Typography Class
admine334c472006-10-21 19:44:22 +000081 *
adminb0dd10f2006-08-25 17:25:49 +000082 *
83 * @access private
84 * @category Helpers
85 * @author Rick Ellis
86 * @author Paul Burdick
87 * @link http://www.codeigniter.com/user_guide/helpers/
88 */
89class Auto_typography {
90
91 // Block level elements that should not be wrapped inside <p> tags
92 var $block_elements = 'div|blockquote|pre|code|h\d|script|ol|un';
93
94 // Elements that should not have <p> and <br /> tags within them.
95 var $skip_elements = 'pre|ol|ul';
96
97 // Tags we want the parser to completely ignore when splitting the string.
98 var $ignore_elements = 'a|b|i|em|strong|span|img|li';
99
100
101 /**
102 * Main Processing Function
103 *
104 */
105 function convert($str)
106 {
107 if ($str == '')
108 {
109 return '';
110 }
111
112 $str = ' '.$str.' ';
113
114 // Standardize Newlines to make matching easier
115 $str = preg_replace("/(\r\n|\r)/", "\n", $str);
116
117 /*
118 * Reduce line breaks
119 *
admine334c472006-10-21 19:44:22 +0000120 * If there are more than two consecutive line
adminb0dd10f2006-08-25 17:25:49 +0000121 * breaks we'll compress them down to a maximum
122 * of two since there's no benefit to more.
123 *
124 */
125 $str = preg_replace("/\n\n+/", "\n\n", $str);
126
127 /*
adminfafe28b2006-10-21 19:08:17 +0000128 * Convert quotes within tags to temporary marker
adminb0dd10f2006-08-25 17:25:49 +0000129 *
admine334c472006-10-21 19:44:22 +0000130 * We don't want quotes converted within
131 * tags so we'll temporarily convert them to
admind54078f2006-10-20 00:38:16 +0000132 * {@DQ} and {@SQ}
adminb0dd10f2006-08-25 17:25:49 +0000133 *
134 */
135 if (preg_match_all("#\<.+?>#si", $str, $matches))
136 {
137 for ($i = 0; $i < count($matches['0']); $i++)
138 {
admine334c472006-10-21 19:44:22 +0000139 $str = str_replace($matches['0'][$i],
admind54078f2006-10-20 00:38:16 +0000140 str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
adminb0dd10f2006-08-25 17:25:49 +0000141 $str);
142 }
143 }
admind54078f2006-10-20 00:38:16 +0000144
145
146 /*
147 * Add closing/opening paragraph tags before/after "block" elements
148 *
149 * Since block elements (like <blockquotes>, <pre>, etc.) do not get
150 * wrapped in paragraph tags we will add a closing </p> tag just before
151 * each block element starts and an opening <p> tag right after the block element
152 * ends. Later on we'll do some further clean up.
153 *
154 */
155 $str = preg_replace("#(<.*?)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str);
156 $str = preg_replace("#(</.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str);
157
adminb0dd10f2006-08-25 17:25:49 +0000158 /*
adminfafe28b2006-10-21 19:08:17 +0000159 * Convert "ignore" tags to temporary marker
adminb0dd10f2006-08-25 17:25:49 +0000160 *
161 * The parser splits out the string at every tag
admine334c472006-10-21 19:44:22 +0000162 * it encounters. Certain inline tags, like image
adminb0dd10f2006-08-25 17:25:49 +0000163 * tags, links, span tags, etc. will be adversely
164 * affected if they are split out so we'll convert
admind54078f2006-10-20 00:38:16 +0000165 * the opening < temporarily to: {@TAG}
adminb0dd10f2006-08-25 17:25:49 +0000166 *
admind54078f2006-10-20 00:38:16 +0000167 */
168 $str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
adminb0dd10f2006-08-25 17:25:49 +0000169
170 /*
171 * Split the string at every tag
172 *
173 * This creates an array with this prototype:
174 *
175 * [array]
176 * {
177 * [0] = <opening tag>
178 * [1] = Content contained between the tags
179 * [2] = <closing tag>
180 * Etc...
181 * }
182 *
183 */
184 $chunks = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
185
186 /*
187 * Build our finalized string
188 *
189 * We'll cycle through the array, skipping tags,
190 * and processing the contained text
191 *
192 */
193 $str = '';
194 $process = TRUE;
195 foreach ($chunks as $chunk)
196 {
197 /*
198 * Are we dealing with a tag?
199 *
200 * If so, we'll skip the processing for this cycle.
201 * Well also set the "process" flag which allows us
202 * to skip <pre> tags and a few other things.
203 *
admind54078f2006-10-20 00:38:16 +0000204 */
admine334c472006-10-21 19:44:22 +0000205 if (preg_match("#<(/*)(".$this->block_elements.").*?\>#", $chunk, $match))
adminb0dd10f2006-08-25 17:25:49 +0000206 {
207 if (preg_match("#".$this->skip_elements."#", $match['2']))
208 {
209 $process = ($match['1'] == '/') ? TRUE : FALSE;
210 }
211
212 $str .= $chunk;
213 continue;
214 }
215
216 if ($process == FALSE)
217 {
218 $str .= $chunk;
219 continue;
220 }
221
222 // Convert Newlines into <p> and <br /> tags
223 $str .= $this->format_newlines($chunk);
224 }
225
adminf4c01002006-10-20 00:53:22 +0000226 // FINAL CLEAN UP
227 // IMPORTANT: DO NOT ALTER THE ORDER OF THE ITEMS BELOW!
228
admind54078f2006-10-20 00:38:16 +0000229 /*
230 * Clean up paragraph tags before/after "block" elements
231 *
232 * Earlier we added <p></p> tags before/after block level elements.
233 * Then, we added paragraph tags around double line breaks. This
234 * potentially created incorrectly formatted paragraphs so we'll
235 * clean it up here.
236 *
237 */
238 $str = preg_replace("#<p>({@TAG}.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3", $str);
239 $str = preg_replace("#({@TAG}/.*?)(".$this->block_elements.")(.*?>)</p>#", "\\1\\2\\3", $str);
240
adminb0dd10f2006-08-25 17:25:49 +0000241 // Convert Quotes and other characters
242 $str = $this->format_characters($str);
admind54078f2006-10-20 00:38:16 +0000243
adminf4c01002006-10-20 00:53:22 +0000244 // Fix an artifact that happens during the paragraph replacement
adminb0dd10f2006-08-25 17:25:49 +0000245 $str = preg_replace('#(<p>\n*</p>)#', '', $str);
adminf4c01002006-10-20 00:53:22 +0000246
admine334c472006-10-21 19:44:22 +0000247 // If the user submitted their own paragraph tags with class data
adminf4c01002006-10-20 00:53:22 +0000248 // in them we will retain them instead of using our tags.
adminb0dd10f2006-08-25 17:25:49 +0000249 $str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
admind54078f2006-10-20 00:38:16 +0000250
adminf4c01002006-10-20 00:53:22 +0000251 // Final clean up
adminb0dd10f2006-08-25 17:25:49 +0000252 $str = str_replace(
admind54078f2006-10-20 00:38:16 +0000253 array(
254 '</p></p>',
255 '</p><p>',
256 '<p> ',
257 ' </p>',
258 '{@TAG}',
259 '{@DQ}',
260 '{@SQ}',
261 '<p></p>'
262 ),
263 array(
264 '</p>',
265 '<p>',
266 '<p>',
267 '</p>',
268 '<',
269 '"',
270 "'",
271 ''
272 ),
adminb0dd10f2006-08-25 17:25:49 +0000273 $str
admind54078f2006-10-20 00:38:16 +0000274 );
adminb0dd10f2006-08-25 17:25:49 +0000275
admind54078f2006-10-20 00:38:16 +0000276 return $str;
adminb0dd10f2006-08-25 17:25:49 +0000277 }
278
279 // --------------------------------------------------------------------
280
281 /**
282 * Format Characters
283 *
284 * This function mainly converts double and single quotes
285 * to entities, but since these are directional, it does
286 * it based on some rules. It also converts em-dashes
287 * and a couple other things.
288 */
289 function format_characters($str)
290 {
291 $table = array(
292 ' "' => " &#8220;",
293 '" ' => "&#8221; ",
294 " '" => " &#8216;",
295 "' " => "&#8217; ",
296
297 '>"' => ">&#8220;",
298 '"<' => "&#8221;<",
299 ">'" => ">&#8216;",
300 "'<" => "&#8217;<",
301
302 "\"." => "&#8221;.",
303 "\"," => "&#8221;,",
304 "\";" => "&#8221;;",
305 "\":" => "&#8221;:",
306 "\"!" => "&#8221;!",
307 "\"?" => "&#8221;?",
308
309 ". " => ".&nbsp; ",
310 "? " => "?&nbsp; ",
311 "! " => "!&nbsp; ",
312 ": " => ":&nbsp; ",
313 );
314
315 // These deal with quotes within quotes, like: "'hi here'"
316 $start = 0;
317 $space = array("\n", "\t", " ");
318
319 while(TRUE)
320 {
321 $current = strpos(substr($str, $start), "\"'");
322
323 if ($current === FALSE) break;
324
325 $one_before = substr($str, $start+$current-1, 1);
326 $one_after = substr($str, $start+$current+2, 1);
327
adminee54c112006-09-28 17:13:38 +0000328 if ( ! in_array($one_after, $space, TRUE) && $one_after != "<")
adminb0dd10f2006-08-25 17:25:49 +0000329 {
330 $str = str_replace( $one_before."\"'".$one_after,
331 $one_before."&#8220;&#8216;".$one_after,
332 $str);
333 }
adminee54c112006-09-28 17:13:38 +0000334 elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
adminb0dd10f2006-08-25 17:25:49 +0000335 {
336 $str = str_replace( $one_before."\"'".$one_after,
337 $one_before."&#8221;&#8217;".$one_after,
338 $str);
339 }
340
341 $start = $start+$current+2;
342 }
343
344 $start = 0;
345
346 while(TRUE)
347 {
348 $current = strpos(substr($str, $start), "'\"");
349
350 if ($current === FALSE) break;
351
352 $one_before = substr($str, $start+$current-1, 1);
353 $one_after = substr($str, $start+$current+2, 1);
354
adminee54c112006-09-28 17:13:38 +0000355 if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
adminb0dd10f2006-08-25 17:25:49 +0000356 {
357 $str = str_replace( $one_before."'\"".$one_after,
358 $one_before."&#8216;&#8220;".$one_after,
359 $str);
360 }
adminee54c112006-09-28 17:13:38 +0000361 elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">")
adminb0dd10f2006-08-25 17:25:49 +0000362 {
363 $str = str_replace( $one_before."'\"".$one_after,
364 $one_before."&#8217;&#8221;".$one_after,
365 $str);
366 }
367
368 $start = $start+$current+2;
369 }
370
371 // Are there quotes within a word, as in: ("something")
372 if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
373 {
374 for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
375 {
adminee54c112006-09-28 17:13:38 +0000376 if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000377 {
378 $str = str_replace( $matches['0'][$i],
379 $matches['1'][$i]."&#8220;".$matches['2'][$i]."&#8221;".$matches['3'][$i],
380 $str);
381 }
382 }
383 }
384
385 if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
386 {
387 for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
388 {
adminee54c112006-09-28 17:13:38 +0000389 if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000390 {
391 $str = str_replace( $matches['0'][$i],
392 $matches['1'][$i]."&#8216;".$matches['2'][$i]."&#8217;".$matches['3'][$i],
393 $str);
394 }
395 }
396 }
397
398 // How about one apostrophe, as in Rick's
399 $start = 0;
400
401 while(TRUE)
402 {
403 $current = strpos(substr($str, $start), "'");
404
405 if ($current === FALSE) break;
406
407 $one_before = substr($str, $start+$current-1, 1);
408 $one_after = substr($str, $start+$current+1, 1);
409
adminee54c112006-09-28 17:13:38 +0000410 if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000411 {
412 $str = str_replace( $one_before."'".$one_after,
413 $one_before."&#8217;".$one_after,
414 $str);
415 }
416
417 $start = $start+$current+2;
418 }
419
420 // Em-dashes
421 $start = 0;
422 while(TRUE)
423 {
424 $current = strpos(substr($str, $start), "--");
425
426 if ($current === FALSE) break;
427
428 $one_before = substr($str, $start+$current-1, 1);
429 $one_after = substr($str, $start+$current+2, 1);
430 $two_before = substr($str, $start+$current-2, 1);
431 $two_after = substr($str, $start+$current+3, 1);
432
adminee54c112006-09-28 17:13:38 +0000433 if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000434 OR
adminee54c112006-09-28 17:13:38 +0000435 ( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
adminb0dd10f2006-08-25 17:25:49 +0000436 )
437 {
438 $str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
439 $two_before.trim($one_before)."&#8212;".trim($one_after).$two_after,
440 $str);
441 }
442
443 $start = $start+$current+2;
444 }
445
446 // Ellipsis
admine334c472006-10-21 19:44:22 +0000447 $str = preg_replace("#(\w)\.\.\.(\s|<br />|</p>)#", "\\1&#8230;\\2", $str);
448 $str = preg_replace("#(\s|<br />|</p>)\.\.\.(\w)#", "\\1&#8230;\\2", $str);
adminb0dd10f2006-08-25 17:25:49 +0000449
450 // Run the translation array we defined above
451 $str = str_replace(array_keys($table), array_values($table), $str);
452
453 // If there are any stray double quotes we'll catch them here
454
455 $start = 0;
456
457 while(TRUE)
458 {
459 $current = strpos(substr($str, $start), '"');
460
461 if ($current === FALSE) break;
462
463 $one_before = substr($str, $start+$current-1, 1);
464 $one_after = substr($str, $start+$current+1, 1);
465
adminee54c112006-09-28 17:13:38 +0000466 if ( ! in_array($one_after, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000467 {
468 $str = str_replace( $one_before.'"'.$one_after,
469 $one_before."&#8220;".$one_after,
470 $str);
471 }
adminee54c112006-09-28 17:13:38 +0000472 elseif( ! in_array($one_before, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000473 {
474 $str = str_replace( $one_before."'".$one_after,
475 $one_before."&#8221;".$one_after,
476 $str);
477 }
478
479 $start = $start+$current+2;
480 }
481
482 $start = 0;
483
484 while(TRUE)
485 {
486 $current = strpos(substr($str, $start), "'");
487
488 if ($current === FALSE) break;
489
490 $one_before = substr($str, $start+$current-1, 1);
491 $one_after = substr($str, $start+$current+1, 1);
492
adminee54c112006-09-28 17:13:38 +0000493 if ( ! in_array($one_after, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000494 {
495 $str = str_replace( $one_before."'".$one_after,
496 $one_before."&#8216;".$one_after,
497 $str);
498 }
adminee54c112006-09-28 17:13:38 +0000499 elseif( ! in_array($one_before, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000500 {
501 $str = str_replace( $one_before."'".$one_after,
502 $one_before."&#8217;".$one_after,
503 $str);
504 }
505
506 $start = $start+$current+2;
507 }
508
509 return $str;
510 }
511
512 // --------------------------------------------------------------------
513
514 /**
515 * Format Newlines
516 *
517 * Converts newline characters into either <p> tags or <br />
518 *
519 */
520 function format_newlines($str)
521 {
522 if ($str == '')
523 {
524 return $str;
525 }
526
527 if (strpos($str, "\n") === FALSE)
528 {
529 return '<p>'.$str.'</p>';
530 }
531
532 $str = str_replace("\n\n", "</p>\n\n<p>", $str);
533 $str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
534
535 return '<p>'.$str.'</p>';
536 }
537}
538
admine334c472006-10-21 19:44:22 +0000539
adminb0dd10f2006-08-25 17:25:49 +0000540?>