blob: 84ca0ed6fb86344e6a3626b323fb85e002f256e8 [file] [log] [blame]
adminb0dd10f2006-08-25 17:25:49 +00001<?php if (!defined('BASEPATH')) exit('No direct script access allowed');
2/**
3 * Code Igniter
4 *
5 * An open source application development framework for PHP 4.3.2 or newer
6 *
7 * @package CodeIgniter
8 * @author Rick Ellis
9 * @copyright Copyright (c) 2006, pMachine, Inc.
10 * @license http://www.codeignitor.com/user_guide/license.html
11 * @link http://www.codeigniter.com
12 * @since Version 1.0
13 * @filesource
14 */
15
16// ------------------------------------------------------------------------
17
18/**
19 * Code Igniter Typography Helpers
20 *
21 * @package CodeIgniter
22 * @subpackage Helpers
23 * @category Helpers
24 * @author Rick Ellis
25 * @link http://www.codeigniter.com/user_guide/helpers/typography_helper.html
26 */
27
28// ------------------------------------------------------------------------
29
30/**
31 * Convert newlines to HTML line breaks except within PRE tags
32 *
33 * @access public
34 * @param string
35 * @return string
36 */
37function nl2br_except_pre($str)
38{
39 $ex = explode("pre>",$str);
40 $ct = count($ex);
41
42 $newstr = "";
43 for ($i = 0; $i < $ct; $i++)
44 {
45 if (($i % 2) == 0)
46 {
47 $newstr .= nl2br($ex[$i]);
48 }
49 else
50 {
51 $newstr .= $ex[$i];
52 }
53
54 if ($ct - 1 != $i)
55 $newstr .= "pre>";
56 }
57
58 return $newstr;
59}
60
61// ------------------------------------------------------------------------
62
63/**
64 * Auto Typography Wrapper Function
65 *
66 *
67 * @access public
68 * @parm string
69 * @return string
70 */
71function auto_typography($str)
72{
73 $TYPE = new Auto_typography();
74 return $TYPE->convert($str);
75}
76
77// ------------------------------------------------------------------------
78
79/**
80 * Auto Typography Class
81 *
82 *
83 * @access private
84 * @category Helpers
85 * @author Rick Ellis
86 * @author Paul Burdick
87 * @link http://www.codeigniter.com/user_guide/helpers/
88 */
89class Auto_typography {
90
91 // Block level elements that should not be wrapped inside <p> tags
92 var $block_elements = 'div|blockquote|pre|code|h\d|script|ol|un';
93
94 // Elements that should not have <p> and <br /> tags within them.
95 var $skip_elements = 'pre|ol|ul';
96
97 // Tags we want the parser to completely ignore when splitting the string.
98 var $ignore_elements = 'a|b|i|em|strong|span|img|li';
99
100
101 /**
102 * Main Processing Function
103 *
104 */
105 function convert($str)
106 {
107 if ($str == '')
108 {
109 return '';
110 }
111
112 $str = ' '.$str.' ';
113
114 // Standardize Newlines to make matching easier
115 $str = preg_replace("/(\r\n|\r)/", "\n", $str);
116
117 /*
118 * Reduce line breaks
119 *
120 * If there are more than two consecutive line
121 * breaks we'll compress them down to a maximum
122 * of two since there's no benefit to more.
123 *
124 */
125 $str = preg_replace("/\n\n+/", "\n\n", $str);
126
127 /*
128 * Convert quotes within tags to tempoarary marker
129 *
130 * We don't want quotes converted within
131 * tags so we'll temporarily convert them to
admind54078f2006-10-20 00:38:16 +0000132 * {@DQ} and {@SQ}
adminb0dd10f2006-08-25 17:25:49 +0000133 *
134 */
135 if (preg_match_all("#\<.+?>#si", $str, $matches))
136 {
137 for ($i = 0; $i < count($matches['0']); $i++)
138 {
139 $str = str_replace($matches['0'][$i],
admind54078f2006-10-20 00:38:16 +0000140 str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches['0'][$i]),
adminb0dd10f2006-08-25 17:25:49 +0000141 $str);
142 }
143 }
admind54078f2006-10-20 00:38:16 +0000144
145
146 /*
147 * Add closing/opening paragraph tags before/after "block" elements
148 *
149 * Since block elements (like <blockquotes>, <pre>, etc.) do not get
150 * wrapped in paragraph tags we will add a closing </p> tag just before
151 * each block element starts and an opening <p> tag right after the block element
152 * ends. Later on we'll do some further clean up.
153 *
154 */
155 $str = preg_replace("#(<.*?)(".$this->block_elements.")(.*?>)#", "</p>\\1\\2\\3", $str);
156 $str = preg_replace("#(</.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3<p>", $str);
157
adminb0dd10f2006-08-25 17:25:49 +0000158 /*
159 * Convert "ignore" tags to tempoarary marker
160 *
161 * The parser splits out the string at every tag
162 * it encounters. Certain inline tags, like image
163 * tags, links, span tags, etc. will be adversely
164 * affected if they are split out so we'll convert
admind54078f2006-10-20 00:38:16 +0000165 * the opening < temporarily to: {@TAG}
adminb0dd10f2006-08-25 17:25:49 +0000166 *
admind54078f2006-10-20 00:38:16 +0000167 */
168 $str = preg_replace("#<(/*)(".$this->ignore_elements.")#i", "{@TAG}\\1\\2", $str);
adminb0dd10f2006-08-25 17:25:49 +0000169
170 /*
171 * Split the string at every tag
172 *
173 * This creates an array with this prototype:
174 *
175 * [array]
176 * {
177 * [0] = <opening tag>
178 * [1] = Content contained between the tags
179 * [2] = <closing tag>
180 * Etc...
181 * }
182 *
183 */
184 $chunks = preg_split('/(<(?:[^<>]+(?:"[^"]*"|\'[^\']*\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
185
186 /*
187 * Build our finalized string
188 *
189 * We'll cycle through the array, skipping tags,
190 * and processing the contained text
191 *
192 */
193 $str = '';
194 $process = TRUE;
195 foreach ($chunks as $chunk)
196 {
197 /*
198 * Are we dealing with a tag?
199 *
200 * If so, we'll skip the processing for this cycle.
201 * Well also set the "process" flag which allows us
202 * to skip <pre> tags and a few other things.
203 *
admind54078f2006-10-20 00:38:16 +0000204 */
adminb0dd10f2006-08-25 17:25:49 +0000205 if (preg_match("#<(/*)(".$this->block_elements.").*?\>#", $chunk, $match))
206 {
207 if (preg_match("#".$this->skip_elements."#", $match['2']))
208 {
209 $process = ($match['1'] == '/') ? TRUE : FALSE;
210 }
211
212 $str .= $chunk;
213 continue;
214 }
215
216 if ($process == FALSE)
217 {
218 $str .= $chunk;
219 continue;
220 }
221
222 // Convert Newlines into <p> and <br /> tags
223 $str .= $this->format_newlines($chunk);
224 }
225
admind54078f2006-10-20 00:38:16 +0000226
227 /*
228 * Clean up paragraph tags before/after "block" elements
229 *
230 * Earlier we added <p></p> tags before/after block level elements.
231 * Then, we added paragraph tags around double line breaks. This
232 * potentially created incorrectly formatted paragraphs so we'll
233 * clean it up here.
234 *
235 */
236 $str = preg_replace("#<p>({@TAG}.*?)(".$this->block_elements.")(.*?>)#", "\\1\\2\\3", $str);
237 $str = preg_replace("#({@TAG}/.*?)(".$this->block_elements.")(.*?>)</p>#", "\\1\\2\\3", $str);
238
adminb0dd10f2006-08-25 17:25:49 +0000239 // Convert Quotes and other characters
240 $str = $this->format_characters($str);
admind54078f2006-10-20 00:38:16 +0000241
242 // Final clean up
adminb0dd10f2006-08-25 17:25:49 +0000243 $str = preg_replace('#(<p>\n*</p>)#', '', $str);
244 $str = preg_replace('#(<p.*?>)<p>#', "\\1", $str);
admind54078f2006-10-20 00:38:16 +0000245
adminb0dd10f2006-08-25 17:25:49 +0000246 $str = str_replace(
admind54078f2006-10-20 00:38:16 +0000247 array(
248 '</p></p>',
249 '</p><p>',
250 '<p> ',
251 ' </p>',
252 '{@TAG}',
253 '{@DQ}',
254 '{@SQ}',
255 '<p></p>'
256 ),
257 array(
258 '</p>',
259 '<p>',
260 '<p>',
261 '</p>',
262 '<',
263 '"',
264 "'",
265 ''
266 ),
adminb0dd10f2006-08-25 17:25:49 +0000267 $str
admind54078f2006-10-20 00:38:16 +0000268 );
adminb0dd10f2006-08-25 17:25:49 +0000269
admind54078f2006-10-20 00:38:16 +0000270 return $str;
adminb0dd10f2006-08-25 17:25:49 +0000271 }
272
273 // --------------------------------------------------------------------
274
275 /**
276 * Format Characters
277 *
278 * This function mainly converts double and single quotes
279 * to entities, but since these are directional, it does
280 * it based on some rules. It also converts em-dashes
281 * and a couple other things.
282 */
283 function format_characters($str)
284 {
285 $table = array(
286 ' "' => " &#8220;",
287 '" ' => "&#8221; ",
288 " '" => " &#8216;",
289 "' " => "&#8217; ",
290
291 '>"' => ">&#8220;",
292 '"<' => "&#8221;<",
293 ">'" => ">&#8216;",
294 "'<" => "&#8217;<",
295
296 "\"." => "&#8221;.",
297 "\"," => "&#8221;,",
298 "\";" => "&#8221;;",
299 "\":" => "&#8221;:",
300 "\"!" => "&#8221;!",
301 "\"?" => "&#8221;?",
302
303 ". " => ".&nbsp; ",
304 "? " => "?&nbsp; ",
305 "! " => "!&nbsp; ",
306 ": " => ":&nbsp; ",
307 );
308
309 // These deal with quotes within quotes, like: "'hi here'"
310 $start = 0;
311 $space = array("\n", "\t", " ");
312
313 while(TRUE)
314 {
315 $current = strpos(substr($str, $start), "\"'");
316
317 if ($current === FALSE) break;
318
319 $one_before = substr($str, $start+$current-1, 1);
320 $one_after = substr($str, $start+$current+2, 1);
321
adminee54c112006-09-28 17:13:38 +0000322 if ( ! in_array($one_after, $space, TRUE) && $one_after != "<")
adminb0dd10f2006-08-25 17:25:49 +0000323 {
324 $str = str_replace( $one_before."\"'".$one_after,
325 $one_before."&#8220;&#8216;".$one_after,
326 $str);
327 }
adminee54c112006-09-28 17:13:38 +0000328 elseif ( ! in_array($one_before, $space, TRUE) && (in_array($one_after, $space, TRUE) OR $one_after == '<'))
adminb0dd10f2006-08-25 17:25:49 +0000329 {
330 $str = str_replace( $one_before."\"'".$one_after,
331 $one_before."&#8221;&#8217;".$one_after,
332 $str);
333 }
334
335 $start = $start+$current+2;
336 }
337
338 $start = 0;
339
340 while(TRUE)
341 {
342 $current = strpos(substr($str, $start), "'\"");
343
344 if ($current === FALSE) break;
345
346 $one_before = substr($str, $start+$current-1, 1);
347 $one_after = substr($str, $start+$current+2, 1);
348
adminee54c112006-09-28 17:13:38 +0000349 if ( in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE) && $one_after != "<")
adminb0dd10f2006-08-25 17:25:49 +0000350 {
351 $str = str_replace( $one_before."'\"".$one_after,
352 $one_before."&#8216;&#8220;".$one_after,
353 $str);
354 }
adminee54c112006-09-28 17:13:38 +0000355 elseif ( ! in_array($one_before, $space, TRUE) && $one_before != ">")
adminb0dd10f2006-08-25 17:25:49 +0000356 {
357 $str = str_replace( $one_before."'\"".$one_after,
358 $one_before."&#8217;&#8221;".$one_after,
359 $str);
360 }
361
362 $start = $start+$current+2;
363 }
364
365 // Are there quotes within a word, as in: ("something")
366 if (preg_match_all("/(.)\"(\S+?)\"(.)/", $str, $matches))
367 {
368 for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
369 {
adminee54c112006-09-28 17:13:38 +0000370 if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000371 {
372 $str = str_replace( $matches['0'][$i],
373 $matches['1'][$i]."&#8220;".$matches['2'][$i]."&#8221;".$matches['3'][$i],
374 $str);
375 }
376 }
377 }
378
379 if (preg_match_all("/(.)\'(\S+?)\'(.)/", $str, $matches))
380 {
381 for ($i=0, $s=sizeof($matches['0']); $i < $s; ++$i)
382 {
adminee54c112006-09-28 17:13:38 +0000383 if ( ! in_array($matches['1'][$i], $space, TRUE) && ! in_array($matches['3'][$i], $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000384 {
385 $str = str_replace( $matches['0'][$i],
386 $matches['1'][$i]."&#8216;".$matches['2'][$i]."&#8217;".$matches['3'][$i],
387 $str);
388 }
389 }
390 }
391
392 // How about one apostrophe, as in Rick's
393 $start = 0;
394
395 while(TRUE)
396 {
397 $current = strpos(substr($str, $start), "'");
398
399 if ($current === FALSE) break;
400
401 $one_before = substr($str, $start+$current-1, 1);
402 $one_after = substr($str, $start+$current+1, 1);
403
adminee54c112006-09-28 17:13:38 +0000404 if ( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000405 {
406 $str = str_replace( $one_before."'".$one_after,
407 $one_before."&#8217;".$one_after,
408 $str);
409 }
410
411 $start = $start+$current+2;
412 }
413
414 // Em-dashes
415 $start = 0;
416 while(TRUE)
417 {
418 $current = strpos(substr($str, $start), "--");
419
420 if ($current === FALSE) break;
421
422 $one_before = substr($str, $start+$current-1, 1);
423 $one_after = substr($str, $start+$current+2, 1);
424 $two_before = substr($str, $start+$current-2, 1);
425 $two_after = substr($str, $start+$current+3, 1);
426
adminee54c112006-09-28 17:13:38 +0000427 if (( ! in_array($one_before, $space, TRUE) && ! in_array($one_after, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000428 OR
adminee54c112006-09-28 17:13:38 +0000429 ( ! in_array($two_before, $space, TRUE) && ! in_array($two_after, $space, TRUE) && $one_before == ' ' && $one_after == ' ')
adminb0dd10f2006-08-25 17:25:49 +0000430 )
431 {
432 $str = str_replace( $two_before.$one_before."--".$one_after.$two_after,
433 $two_before.trim($one_before)."&#8212;".trim($one_after).$two_after,
434 $str);
435 }
436
437 $start = $start+$current+2;
438 }
439
440 // Ellipsis
441 $str = preg_replace("#(\w)\.\.\.(\s|<br />|</p>)#", "\\1&#8230;\\2", $str);
442 $str = preg_replace("#(\s|<br />|</p>)\.\.\.(\w)#", "\\1&#8230;\\2", $str);
443
444 // Run the translation array we defined above
445 $str = str_replace(array_keys($table), array_values($table), $str);
446
447 // If there are any stray double quotes we'll catch them here
448
449 $start = 0;
450
451 while(TRUE)
452 {
453 $current = strpos(substr($str, $start), '"');
454
455 if ($current === FALSE) break;
456
457 $one_before = substr($str, $start+$current-1, 1);
458 $one_after = substr($str, $start+$current+1, 1);
459
adminee54c112006-09-28 17:13:38 +0000460 if ( ! in_array($one_after, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000461 {
462 $str = str_replace( $one_before.'"'.$one_after,
463 $one_before."&#8220;".$one_after,
464 $str);
465 }
adminee54c112006-09-28 17:13:38 +0000466 elseif( ! in_array($one_before, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000467 {
468 $str = str_replace( $one_before."'".$one_after,
469 $one_before."&#8221;".$one_after,
470 $str);
471 }
472
473 $start = $start+$current+2;
474 }
475
476 $start = 0;
477
478 while(TRUE)
479 {
480 $current = strpos(substr($str, $start), "'");
481
482 if ($current === FALSE) break;
483
484 $one_before = substr($str, $start+$current-1, 1);
485 $one_after = substr($str, $start+$current+1, 1);
486
adminee54c112006-09-28 17:13:38 +0000487 if ( ! in_array($one_after, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000488 {
489 $str = str_replace( $one_before."'".$one_after,
490 $one_before."&#8216;".$one_after,
491 $str);
492 }
adminee54c112006-09-28 17:13:38 +0000493 elseif( ! in_array($one_before, $space, TRUE))
adminb0dd10f2006-08-25 17:25:49 +0000494 {
495 $str = str_replace( $one_before."'".$one_after,
496 $one_before."&#8217;".$one_after,
497 $str);
498 }
499
500 $start = $start+$current+2;
501 }
502
503 return $str;
504 }
505
506 // --------------------------------------------------------------------
507
508 /**
509 * Format Newlines
510 *
511 * Converts newline characters into either <p> tags or <br />
512 *
513 */
514 function format_newlines($str)
515 {
516 if ($str == '')
517 {
518 return $str;
519 }
520
521 if (strpos($str, "\n") === FALSE)
522 {
523 return '<p>'.$str.'</p>';
524 }
525
526 $str = str_replace("\n\n", "</p>\n\n<p>", $str);
527 $str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
528
529 return '<p>'.$str.'</p>';
530 }
531}
532
533
534?>