blob: 88071604d426f3a5f44b02f354c3472e57ad9046 [file] [log] [blame]
Derek Allardd2df9bc2007-04-15 17:41:17 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * CodeIgniter
4 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
Derek Allard3d879d52008-01-18 19:41:32 +00008 * @author ExpressionEngine Dev Team
Derek Allardd2df9bc2007-04-15 17:41:17 +00009 * @copyright Copyright (c) 2006, EllisLab, Inc.
Derek Jones7a9193a2008-01-21 18:39:20 +000010 * @license http://codeigniter.com/user_guide/license.html
11 * @link http://codeigniter.com
Derek Allardd2df9bc2007-04-15 17:41:17 +000012 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * CodeIgniter Typography Helpers
20 *
21 * @package CodeIgniter
22 * @subpackage Helpers
23 * @category Helpers
Derek Allard3d879d52008-01-18 19:41:32 +000024 * @author ExpressionEngine Dev Team
Derek Jones7a9193a2008-01-21 18:39:20 +000025 * @link http://codeigniter.com/user_guide/helpers/typography_helper.html
Derek Allardd2df9bc2007-04-15 17:41:17 +000026 */
27
28// ------------------------------------------------------------------------
29
30/**
31 * Convert newlines to HTML line breaks except within PRE tags
32 *
33 * @access public
34 * @param string
35 * @return string
36 */
Derek Jones269b9422008-01-28 21:00:20 +000037if (! function_exists('nl2br_except_pre'))
Derek Allardd2df9bc2007-04-15 17:41:17 +000038{
Derek Jones269b9422008-01-28 21:00:20 +000039 function nl2br_except_pre($str)
Derek Allardd2df9bc2007-04-15 17:41:17 +000040 {
Derek Jones269b9422008-01-28 21:00:20 +000041 $ex = explode("pre>",$str);
42 $ct = count($ex);
Derek Allardd2df9bc2007-04-15 17:41:17 +000043
Derek Jones269b9422008-01-28 21:00:20 +000044 $newstr = "";
45 for ($i = 0; $i < $ct; $i++)
46 {
47 if (($i % 2) == 0)
48 {
49 $newstr .= nl2br($ex[$i]);
50 }
51 else
52 {
53 $newstr .= $ex[$i];
54 }
55
56 if ($ct - 1 != $i)
57 $newstr .= "pre>";
58 }
59
60 return $newstr;
61 }
Derek Allardd2df9bc2007-04-15 17:41:17 +000062}
63
64// ------------------------------------------------------------------------
65
66/**
67 * Auto Typography Wrapper Function
68 *
69 *
70 * @access public
71 * @param string
72 * @return string
73 */
Derek Jones269b9422008-01-28 21:00:20 +000074if (! function_exists('auto_typography'))
Derek Allardd2df9bc2007-04-15 17:41:17 +000075{
Derek Jones269b9422008-01-28 21:00:20 +000076 function auto_typography($str)
77 {
78 $TYPE = new Auto_typography();
79 return $TYPE->convert($str);
80 }
Derek Allardd2df9bc2007-04-15 17:41:17 +000081}
82
83// ------------------------------------------------------------------------
84
85/**
86 * Auto Typography Class
87 *
88 *
89 * @access private
90 * @category Helpers
Derek Allard3d879d52008-01-18 19:41:32 +000091 * @author ExpressionEngine Dev Team
Derek Jones7a9193a2008-01-21 18:39:20 +000092 * @link http://codeigniter.com/user_guide/helpers/
Derek Allardd2df9bc2007-04-15 17:41:17 +000093 */
94class Auto_typography {
95
96 // Block level elements that should not be wrapped inside <p> tags
Derek Allardbab7ed92008-01-19 15:41:28 +000097 var $block_elements = 'div|blockquote|pre|code|h\d|script|ol|ul';
Derek Allardd2df9bc2007-04-15 17:41:17 +000098
99 // Elements that should not have <p> and <br /> tags within them.
100 var $skip_elements = 'pre|ol|ul';
101
102 // Tags we want the parser to completely ignore when splitting the string.
103 var $ignore_elements = 'a|b|i|em|strong|span|img|li';
104
105
106 /**
107 * Main Processing Function
108 *
109 */
110 function convert($str)
111 {
112 if ($str == '')
113 {
114 return '';
115 }
116
117 $str = ' '.$str.' ';
118
119 // Standardize Newlines to make matching easier
120 $str = preg_replace("/(\r\n|\r)/", "\n", $str);
121
122 /*
123 * Reduce line breaks
124 *
125 * If there are more than two consecutive line
126 * breaks we'll compress them down to a maximum
127 * of two since there's no benefit to more.
128 *
129 */
130 $str = preg_replace("/\n\n+/", "\n\n", $str);
131
132 /*
133 * Convert quotes within tags to temporary marker
134 *
135 * We don't want quotes converted within
136 * tags so we'll temporarily convert them to
137 * {@DQ} and {@SQ}
138 *
139 */
140 if (preg_match_all("#\<.+?>#si", $str, $matches))
141 {
142 for ($i = 0; $i < count($matches['0']); $i++)
143 {
144 $str = str_replace($matches['0'][$i],
145 str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
146 $str);
147 }
148 }
149
150
151 /*
152 * Add closing/opening paragraph tags before/after "block" elements
153 *
154 * Since block elements (like <blockquotes>, <pre>, etc.) do not get
155 * wrapped in paragraph tags we will add a closing </p> tag just before
156 * each block element starts and an opening <p> tag right after the block element
157 * ends. Later on we'll do some further clean up.
158 *
159 */
Derek Jones8b251912008-01-17 00:34:37 +0000160 $str = preg_replace("#(<)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str);
161 $str = preg_replace("#(</)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str);
Derek Allardd2df9bc2007-04-15 17:41:17 +0000162
163 /*
164 * Convert "ignore" tags to temporary marker
165 *
166 * The parser splits out the string at every tag
167 * it encounters. Certain inline tags, like image
168 * tags, links, span tags, etc. will be adversely
169 * affected if they are split out so we'll convert
170 * the opening < temporarily to: {@TAG}
171 *
172 */
173 $str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
174
175 /*
176 * Split the string at every tag
177 *
178 * This creates an array with this prototype:
179 *
180 * [array]
181 * {
182 * [0] = <opening tag>
183 * [1] = Content contained between the tags
184 * [2] = <closing tag>
185 * Etc...
186 * }
187 *
188 */
189 $chunks = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
190
191 /*
192 * Build our finalized string
193 *
194 * We'll cycle through the array, skipping tags,
195 * and processing the contained text
196 *
197 */
198 $str = '';
199 $process = TRUE;
200 foreach ($chunks as $chunk)
201 {
202 /*
203 * Are we dealing with a tag?
204 *
205 * If so, we'll skip the processing for this cycle.
206 * Well also set the "process" flag which allows us
207 * to skip <pre> tags and a few other things.
208 *
209 */
210 if (preg_match("#<(/*)(".$this->block_elements.").*?\>#", $chunk, $match))
211 {
212 if (preg_match("#".$this->skip_elements."#", $match['2']))
213 {
214 $process = ($match['1'] == '/') ? TRUE : FALSE;
215 }
216
217 $str .= $chunk;
218 continue;
219 }
220
221 if ($process == FALSE)
222 {
223 $str .= $chunk;
224 continue;
225 }
226
227 // Convert Newlines into <p> and <br /> tags
228 $str .= $this->format_newlines($chunk);
229 }
230
231 // FINAL CLEAN UP
232 // IMPORTANT: DO NOT ALTER THE ORDER OF THE ITEMS BELOW!
233
234 /*
235 * Clean up paragraph tags before/after "block" elements
236 *
237 * Earlier we added <p></p> tags before/after block level elements.
238 * Then, we added paragraph tags around double line breaks. This
239 * potentially created incorrectly formatted paragraphs so we'll
240 * clean it up here.
241 *
242 */
243 $str = preg_replace("#<p>({@TAG}.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3", $str);
244 $str = preg_replace("#({@TAG}/.*?)(".$this->block_elements.")(.*?>)</p>#", "\\1\\2\\3", $str);
245
246 // Convert Quotes and other characters
247 $str = $this->format_characters($str);
248
249 // Fix an artifact that happens during the paragraph replacement
250 $str = preg_replace('#(<p>\n*</p>)#', '', $str);
251
252 // If the user submitted their own paragraph tags with class data
253 // in them we will retain them instead of using our tags.
254 $str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
255
256 // Final clean up
257 $str = str_replace(
258 array(
259 '</p></p>',
260 '</p><p>',
261 '<p> ',
262 ' </p>',
263 '{@TAG}',
264 '{@DQ}',
265 '{@SQ}',
266 '<p></p>'
267 ),
268 array(
269 '</p>',
270 '<p>',
271 '<p>',
272 '</p>',
273 '<',
274 '"',
275 "'",
276 ''
277 ),
278 $str
279 );
280
281 return $str;
282 }
283
284 // --------------------------------------------------------------------
285
286 /**
287 * Format Characters
288 *
289 * This function mainly converts double and single quotes
290 * to entities, but since these are directional, it does
291 * it based on some rules. It also converts em-dashes
292 * and a couple other things.
293 */
294 function format_characters($str)
295 {
296 $table = array(
297 ' "' => " &#8220;",
298 '" ' => "&#8221; ",
299 " '" => " &#8216;",
300 "' " => "&#8217; ",
301
302 '>"' => ">&#8220;",
303 '"<' => "&#8221;<",
304 ">'" => ">&#8216;",
305 "'<" => "&#8217;<",
306
307 "\"." => "&#8221;.",
308 "\"," => "&#8221;,",
309 "\";" => "&#8221;;",
310 "\":" => "&#8221;:",
311 "\"!" => "&#8221;!",
312 "\"?" => "&#8221;?",
313
314 ". " => ".&nbsp; ",
315 "? " => "?&nbsp; ",
316 "! " => "!&nbsp; ",
317 ": " => ":&nbsp; ",
318 );
319
320 // These deal with quotes within quotes, like: "'hi here'"
321 $start = 0;
322 $space = array("\n", "\t", " ");
323
324 while(TRUE)
325 {
326 $current = strpos(substr($str, $start), "\"'");
327
328 if ($current === FALSE) break;
329
330 $one_before = substr($str, $start+$current-1, 1);
331 $one_after = substr($str, $start+$current+2, 1);
332
Derek Allard73274992008-05-05 16:39:18 +0000333 if (! in_array($one_after, $space, TRUE) && $one_after != "<")
Derek Allardd2df9bc2007-04-15 17:41:17 +0000334 {
335 $str = str_replace( $one_before."\"'".$one_after,
336 $one_before."&#8220;&#8216;".$one_after,
337 $str);
338 }
Derek Allard73274992008-05-05 16:39:18 +0000339 elseif (! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
Derek Allardd2df9bc2007-04-15 17:41:17 +0000340 {
341 $str = str_replace( $one_before."\"'".$one_after,
342 $one_before."&#8221;&#8217;".$one_after,
343 $str);
344 }
345
346 $start = $start+$current+2;
347 }
348
349 $start = 0;
350
351 while(TRUE)
352 {
353 $current = strpos(substr($str, $start), "'\"");
354
355 if ($current === FALSE) break;
356
357 $one_before = substr($str, $start+$current-1, 1);
358 $one_after = substr($str, $start+$current+2, 1);
359
360 if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
361 {
362 $str = str_replace( $one_before."'\"".$one_after,
363 $one_before."&#8216;&#8220;".$one_after,
364 $str);
365 }
Derek Allard73274992008-05-05 16:39:18 +0000366 elseif (! in_array($one_before, $space, TRUE) && $one_before != ">")
Derek Allardd2df9bc2007-04-15 17:41:17 +0000367 {
368 $str = str_replace( $one_before."'\"".$one_after,
369 $one_before."&#8217;&#8221;".$one_after,
370 $str);
371 }
372
373 $start = $start+$current+2;
374 }
375
376 // Are there quotes within a word, as in: ("something")
377 if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
378 {
379 for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
380 {
Derek Allard73274992008-05-05 16:39:18 +0000381 if (! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
Derek Allardd2df9bc2007-04-15 17:41:17 +0000382 {
383 $str = str_replace( $matches['0'][$i],
384 $matches['1'][$i]."&#8220;".$matches['2'][$i]."&#8221;".$matches['3'][$i],
385 $str);
386 }
387 }
388 }
389
390 if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
391 {
392 for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
393 {
Derek Allard73274992008-05-05 16:39:18 +0000394 if (! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
Derek Allardd2df9bc2007-04-15 17:41:17 +0000395 {
396 $str = str_replace( $matches['0'][$i],
397 $matches['1'][$i]."&#8216;".$matches['2'][$i]."&#8217;".$matches['3'][$i],
398 $str);
399 }
400 }
401 }
402
403 // How about one apostrophe, as in Rick's
404 $start = 0;
405
406 while(TRUE)
407 {
408 $current = strpos(substr($str, $start), "'");
409
410 if ($current === FALSE) break;
411
412 $one_before = substr($str, $start+$current-1, 1);
413 $one_after = substr($str, $start+$current+1, 1);
414
Derek Allard73274992008-05-05 16:39:18 +0000415 if (! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
Derek Allardd2df9bc2007-04-15 17:41:17 +0000416 {
417 $str = str_replace( $one_before."'".$one_after,
418 $one_before."&#8217;".$one_after,
419 $str);
420 }
421
422 $start = $start+$current+2;
423 }
424
425 // Em-dashes
426 $start = 0;
427 while(TRUE)
428 {
429 $current = strpos(substr($str, $start), "--");
430
431 if ($current === FALSE) break;
432
433 $one_before = substr($str, $start+$current-1, 1);
434 $one_after = substr($str, $start+$current+2, 1);
435 $two_before = substr($str, $start+$current-2, 1);
436 $two_after = substr($str, $start+$current+3, 1);
437
Derek Allard73274992008-05-05 16:39:18 +0000438 if ((! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
Derek Allardd2df9bc2007-04-15 17:41:17 +0000439 OR
Derek Allard73274992008-05-05 16:39:18 +0000440 (! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
Derek Allardd2df9bc2007-04-15 17:41:17 +0000441 )
442 {
443 $str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
444 $two_before.trim($one_before)."&#8212;".trim($one_after).$two_after,
445 $str);
446 }
447
448 $start = $start+$current+2;
449 }
450
451 // Ellipsis
452 $str = preg_replace("#(\w)\.\.\.(\s|<br />|</p>)#", "\\1&#8230;\\2", $str);
453 $str = preg_replace("#(\s|<br />|</p>)\.\.\.(\w)#", "\\1&#8230;\\2", $str);
454
455 // Run the translation array we defined above
456 $str = str_replace(array_keys($table), array_values($table), $str);
457
458 // If there are any stray double quotes we'll catch them here
459
460 $start = 0;
461
462 while(TRUE)
463 {
464 $current = strpos(substr($str, $start), '"');
465
466 if ($current === FALSE) break;
467
468 $one_before = substr($str, $start+$current-1, 1);
469 $one_after = substr($str, $start+$current+1, 1);
470
Derek Allard73274992008-05-05 16:39:18 +0000471 if (! in_array($one_after, $space, TRUE))
Derek Allardd2df9bc2007-04-15 17:41:17 +0000472 {
473 $str = str_replace( $one_before.'"'.$one_after,
474 $one_before."&#8220;".$one_after,
475 $str);
476 }
Derek Allard73274992008-05-05 16:39:18 +0000477 elseif(! in_array($one_before, $space, TRUE))
Derek Allardd2df9bc2007-04-15 17:41:17 +0000478 {
479 $str = str_replace( $one_before."'".$one_after,
480 $one_before."&#8221;".$one_after,
481 $str);
482 }
483
484 $start = $start+$current+2;
485 }
486
487 $start = 0;
488
489 while(TRUE)
490 {
491 $current = strpos(substr($str, $start), "'");
492
493 if ($current === FALSE) break;
494
495 $one_before = substr($str, $start+$current-1, 1);
496 $one_after = substr($str, $start+$current+1, 1);
497
Derek Allard73274992008-05-05 16:39:18 +0000498 if (! in_array($one_after, $space, TRUE))
Derek Allardd2df9bc2007-04-15 17:41:17 +0000499 {
500 $str = str_replace( $one_before."'".$one_after,
501 $one_before."&#8216;".$one_after,
502 $str);
503 }
Derek Allard73274992008-05-05 16:39:18 +0000504 elseif(! in_array($one_before, $space, TRUE))
Derek Allardd2df9bc2007-04-15 17:41:17 +0000505 {
506 $str = str_replace( $one_before."'".$one_after,
507 $one_before."&#8217;".$one_after,
508 $str);
509 }
510
511 $start = $start+$current+2;
512 }
513
514 return $str;
515 }
516
517 // --------------------------------------------------------------------
518
519 /**
520 * Format Newlines
521 *
522 * Converts newline characters into either <p> tags or <br />
523 *
524 */
525 function format_newlines($str)
526 {
527 if ($str == '')
528 {
529 return $str;
530 }
531
532 if (strpos($str, "\n") === FALSE)
533 {
534 return '<p>'.$str.'</p>';
535 }
536
537 $str = str_replace("\n\n", "</p>\n\n<p>", $str);
538 $str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
539
540 return '<p>'.$str.'</p>';
541 }
542}
543
544
Derek Jonesc7deac92008-05-11 16:27:41 +0000545?>