Blame - system/libraries/Typography.php - code-igniter-v3-giggi

blob: 0c4a978aa8f85b820783a1b00c787802e3842968 [file] [log] [blame]

Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	1	<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
				2	/**
				3	* CodeIgniter
				4	*
				5	* An open source application development framework for PHP 4.3.2 or newer
				6	*
				7	* @package CodeIgniter
				8	* @author ExpressionEngine Dev Team
				9	* @copyright Copyright (c) 2008, EllisLab, Inc.
				10	* @license http://codeigniter.com/user_guide/license.html
				11	* @link http://codeigniter.com
				12	* @since Version 1.0
				13	* @filesource
				14	*/
				15
				16	// ------------------------------------------------------------------------
				17
				18	/**
				19	* Typography Class
				20	*
				21	*
				22	* @access private
				23	* @category Helpers
				24	* @author ExpressionEngine Dev Team
				25	* @link http://codeigniter.com/user_guide/helpers/
				26	*/
				27	class CI_Typography {
				28
				29	// Block level elements that should not be wrapped inside <p> tags
				30	var $block_elements = 'address\|blockquote\|div\|dl\|fieldset\|form\|h\d\|hr\|noscript\|object\|ol\|p\|pre\|script\|table\|ul';
				31
				32	// Elements that should not have <p> and <br /> tags within them.
				33	var $skip_elements = 'p\|pre\|ol\|ul\|dl\|object\|table';
				34
				35	// Tags we want the parser to completely ignore when splitting the string.
				36	var $inline_elements = 'a\|abbr\|acronym\|b\|bdo\|big\|br\|button\|cite\|code\|del\|dfn\|em\|i\|img\|ins\|input\|label\|map\|kbd\|q\|samp\|select\|small\|span\|strong\|sub\|sup\|textarea\|tt\|var';
Derek Jones	d5738d9	2008-11-14 16:53:34 +0000	[diff] [blame]	37
				38	// array of block level elements that require inner content to be within another block level element
				39	var $inner_block_required = array('blockquote');
				40
				41	// the last block element parsed
				42	var $last_block_element = '';
				43
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	44	// whether or not to protect quotes within { curly braces }
				45	var $protect_braced_quotes = FALSE;
				46
				47	/**
				48	* Nothing to do here...
				49	*
				50	*/
				51	function CI_Typography()
				52	{
				53	}
				54
				55	/**
				56	* Auto Typography
				57	*
				58	* This function converts text, making it typographically correct:
				59	* - Converts double spaces into paragraphs.
				60	* - Converts single line breaks into <br /> tags
				61	* - Converts single and double quotes into correctly facing curly quote entities.
				62	* - Converts three dots into ellipsis.
				63	* - Converts double dashes into em-dashes.
				64	* - Converts two spaces into entities
				65	*
				66	* @access public
				67	* @param string
				68	* @param bool whether to reduce more then two consecutive newlines to two
				69	* @return string
				70	*/
				71	function auto_typography($str, $reduce_linebreaks = FALSE)
				72	{
				73	if ($str == '')
				74	{
				75	return '';
				76	}
				77
				78	// Standardize Newlines to make matching easier
				79	if (strpos($str, "\r") !== FALSE)
				80	{
				81	$str = str_replace(array("\r\n", "\r"), "\n", $str);
				82	}
				83
				84	// Reduce line breaks. If there are more than two consecutive linebreaks
				85	// we'll compress them down to a maximum of two since there's no benefit to more.
				86	if ($reduce_linebreaks === TRUE)
				87	{
				88	$str = preg_replace("/\n\n+/", "\n\n", $str);
Derek Jones	d5738d9	2008-11-14 16:53:34 +0000	[diff] [blame]	89	}
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	90
Derek Jones	a633ec2	2008-12-11 14:31:33 +0000	[diff] [blame]	91	// HTML comment tags don't conform to patterns of normal tags, so pull them out separately, only if needed
				92	$html_comments = array();
				93	if (strpos($str, '<!--') !== FALSE)
				94	{
				95	if (preg_match_all("#(<!\-\-.*?\-\->)#s", $str, $matches))
				96	{
				97	for ($i = 0, $total = count($matches[0]); $i < $total; $i++)
				98	{
				99	$html_comments[] = $matches[0][$i];
				100	$str = str_replace($matches[0][$i], '{@HC'.$i.'}', $str);
				101	}
				102	}
				103	}
Derek Jones	7deecfb	2008-12-11 15:38:01 +0000	[diff] [blame^]	104
				105	// match and yank <pre> tags if they exist. It's cheaper to do this separately since most content will
				106	// not contain <pre> tags, and it keeps the PCRE patterns below simpler and faster
				107	if (strpos($str, '<pre') !== FALSE)
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	108	{
Derek Jones	7deecfb	2008-12-11 15:38:01 +0000	[diff] [blame^]	109	$str = preg_replace_callback("#<pre.?>.?</pre>#si", array($this, '_protect_characters'), $str);
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	110	}
				111
Derek Jones	7deecfb	2008-12-11 15:38:01 +0000	[diff] [blame^]	112	// Convert quotes within tags to temporary markers.
				113	$str = preg_replace_callback("#<.+?>#si", array($this, '_protect_characters'), $str);
				114
				115	// Do the same with braces if necessary
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	116	if ($this->protect_braced_quotes === TRUE)
				117	{
Derek Jones	7deecfb	2008-12-11 15:38:01 +0000	[diff] [blame^]	118	$str = preg_replace_callback("#\{.+?\}#si", array($this, '_protect_characters'), $str);
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	119	}
Derek Jones	a633ec2	2008-12-11 14:31:33 +0000	[diff] [blame]	120
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	121	// Convert "ignore" tags to temporary marker. The parser splits out the string at every tag
				122	// it encounters. Certain inline tags, like image tags, links, span tags, etc. will be
				123	// adversely affected if they are split out so we'll convert the opening bracket < temporarily to: {@TAG}
				124	$str = preg_replace("#<(/*)(".$this->inline_elements.")([ >])#i", "{@TAG}\\1\\2\\3", $str);
				125
				126	// Split the string at every tag. This expression creates an array with this prototype:
				127	//
				128	// [array]
				129	// {
				130	// [0] = <opening tag>
				131	// [1] = Content...
				132	// [2] = <closing tag>
				133	// Etc...
				134	// }
				135	$chunks = preg_split('/(<(?:[^<>]+(?:"[^"]"\|\'[^\']\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE\|PREG_SPLIT_NO_EMPTY);
				136
				137	// Build our finalized string. We cycle through the array, skipping tags, and processing the contained text
				138	$str = '';
				139	$process = TRUE;
				140	$paragraph = FALSE;
				141	foreach ($chunks as $chunk)
				142	{
				143	// Are we dealing with a tag? If so, we'll skip the processing for this cycle.
				144	// Well also set the "process" flag which allows us to skip <pre> tags and a few other things.
Derek Jones	7deecfb	2008-12-11 15:38:01 +0000	[diff] [blame^]	145	if (preg_match("#<(/)(".$this->block_elements.").?>#", $chunk, $match))
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	146	{
				147	if (preg_match("#".$this->skip_elements."#", $match[2]))
				148	{
				149	$process = ($match[1] == '/') ? TRUE : FALSE;
				150	}
				151
Derek Jones	d5738d9	2008-11-14 16:53:34 +0000	[diff] [blame]	152	if ($match[1] == '')
				153	{
				154	$this->last_block_element = $match[2];
				155	}
				156
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	157	$str .= $chunk;
				158	continue;
				159	}
Derek Jones	d5738d9	2008-11-14 16:53:34 +0000	[diff] [blame]	160
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	161	if ($process == FALSE)
				162	{
Derek Jones	7deecfb	2008-12-11 15:38:01 +0000	[diff] [blame^]	163	$str .= $chunk;
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	164	continue;
				165	}
Derek Jones	d5738d9	2008-11-14 16:53:34 +0000	[diff] [blame]	166
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	167	// Convert Newlines into <p> and <br /> tags
Derek Jones	7deecfb	2008-12-11 15:38:01 +0000	[diff] [blame^]	168	$str .= $this->_format_newlines($chunk);
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	169	}
				170
				171	// is the whole of the content inside a block level element?
Derek Jones	a633ec2	2008-12-11 14:31:33 +0000	[diff] [blame]	172	if ( ! preg_match("/^\s*<(?:".$this->block_elements.")/i", $str, $match))
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	173	{
				174	$str = "<p>{$str}</p>";
				175	}
Derek Jones	a633ec2	2008-12-11 14:31:33 +0000	[diff] [blame]	176
Derek Jones	7deecfb	2008-12-11 15:38:01 +0000	[diff] [blame^]	177	// Convert quotes, elipsis, em-dashes, non-breaking spaces, and ampersands
				178	$str = $this->format_characters($str);
				179
Derek Jones	a633ec2	2008-12-11 14:31:33 +0000	[diff] [blame]	180	// restore HTML comments
				181	for ($i = 0, $total = count($html_comments); $i < $total; $i++)
				182	{
				183	$str = preg_replace('#(?:<p>)?{@HC'.$i.'}(?:\s*</p>)?#s', $html_comments[$i], $str);
				184	}
				185
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	186	// Final clean up
				187	$table = array(
				188
				189	// If the user submitted their own paragraph tags within the text
				190	// we will retain them instead of using our tags.
Derek Jones	d5738d9	2008-11-14 16:53:34 +0000	[diff] [blame]	191	'/(<p[^>*?]>)<p>/' => '$1', // <?php BBEdit syntax coloring bug fix
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	192
				193	// Reduce multiple instances of opening/closing paragraph tags to a single one
				194	'#(</p>)+#' => '</p>',
				195	'/(<p>\W*<p>)+/' => '<p>',
				196
				197	// Clean up stray paragraph tags that appear before block level elements
				198	'#<p></p><('.$this->block_elements.')#' => '<$1',
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	199
				200	// Clean up stray non-breaking spaces preceeding block elements
				201	'#[  ]+<('.$this->block_elements.')#' => ' <$1',
Derek Jones	d5738d9	2008-11-14 16:53:34 +0000	[diff] [blame]	202
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	203	// Replace the temporary markers we added earlier
				204	'/\{@TAG\}/' => '<',
				205	'/\{@DQ\}/' => '"',
				206	'/\{@SQ\}/' => "'",
				207	'/\{@DD\}/' => '--',
				208	'/\{@NBS\}/' => ' '
				209
				210	);
Derek Jones	a633ec2	2008-12-11 14:31:33 +0000	[diff] [blame]	211
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	212	// Do we need to reduce empty lines?
				213	if ($reduce_linebreaks === TRUE)
				214	{
				215	$table['#<p>\n*</p>#'] = '';
				216	}
				217	else
				218	{
				219	// If we have empty paragraph tags we add a non-breaking space
				220	// otherwise most browsers won't treat them as true paragraphs
				221	$table['#<p></p>#'] = '<p> </p>';
				222	}
				223
				224	return preg_replace(array_keys($table), $table, $str);
				225
				226	}
				227
				228	// --------------------------------------------------------------------
				229
				230	/**
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	231	* Format Characters
				232	*
				233	* This function mainly converts double and single quotes
				234	* to curly entities, but it also converts em-dashes,
				235	* double spaces, and ampersands
				236	*
				237	* @access public
				238	* @param string
				239	* @return string
				240	*/
				241	function format_characters($str)
				242	{
				243	static $table;
				244
				245	if ( ! isset($table))
				246	{
Derek Jones	b859df8	2008-11-18 15:24:20 +0000	[diff] [blame]	247	$table = array(
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	248	// nested smart quotes, opening and closing
				249	// note that rules for grammar (English) allow only for two levels deep
				250	// and that single quotes are _supposed_ to always be on the outside
				251	// but we'll accommodate both
Derek Jones	b859df8	2008-11-18 15:24:20 +0000	[diff] [blame]	252	// Note that in all cases, whitespace is the primary determining factor
				253	// on which direction to curl, with non-word characters like punctuation
				254	// being a secondary factor only after whitespace is addressed.
				255	'/\'"(\s\|$)/' => '’”$1',
				256	'/(^\|\s)\'"/' => '$1‘“',
				257	'/\'"(\W)/' => '’”$1',
				258	'/(\W)\'"/' => '$1‘“',
				259	'/"\'(\s\|$)/' => '”’$1',
				260	'/(^\|\s)"\'/' => '$1“‘',
				261	'/"\'(\W)/' => '”’$1',
				262	'/(\W)"\'/' => '$1“‘',
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	263
				264	// single quote smart quotes
Derek Jones	b859df8	2008-11-18 15:24:20 +0000	[diff] [blame]	265	'/\'(\s\|$)/' => '’$1',
				266	'/(^\|\s)\'/' => '$1‘',
				267	'/\'(\W)/' => '’$1',
				268	'/(\W)\'/' => '$1‘',
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	269
				270	// double quote smart quotes
Derek Jones	b859df8	2008-11-18 15:24:20 +0000	[diff] [blame]	271	'/"(\s\|$)/' => '”$1',
				272	'/(^\|\s)"/' => '$1“',
				273	'/"(\W)/' => '”$1',
				274	'/(\W)"/' => '$1“',
				275
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	276	// apostrophes
Derek Jones	b859df8	2008-11-18 15:24:20 +0000	[diff] [blame]	277	"/(\w)'(\w)/" => '$1’$2',
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	278
				279	// Em dash and ellipses dots
				280	'/\s?\-\-\s?/' => '—',
				281	'/(\w)\.{3}/' => '$1…',
				282
				283	// double space after sentences
				284	'/(\W) /' => '$1  ',
				285
				286	// ampersands, if not a character entity
				287	'/&(?!#?[a-zA-Z0-9]{2,};)/' => '&'
Derek Jones	b859df8	2008-11-18 15:24:20 +0000	[diff] [blame]	288	);
				289	}
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	290
				291	return preg_replace(array_keys($table), $table, $str);
				292	}
				293
				294	// --------------------------------------------------------------------
				295
				296	/**
				297	* Format Newlines
				298	*
				299	* Converts newline characters into either <p> tags or <br />
				300	*
				301	* @access public
				302	* @param string
				303	* @return string
				304	*/
				305	function _format_newlines($str)
				306	{
				307	if ($str == '')
				308	{
				309	return $str;
				310	}
Derek Jones	d5738d9	2008-11-14 16:53:34 +0000	[diff] [blame]	311
				312	if (strpos($str, "\n") === FALSE && ! in_array($this->last_block_element, $this->inner_block_required))
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	313	{
				314	return $str;
				315	}
				316
				317	// Convert two consecutive newlines to paragraphs
				318	$str = str_replace("\n\n", "</p>\n\n<p>", $str);
				319
				320	// Convert single spaces to <br /> tags
				321	$str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
				322
				323	// Wrap the whole enchilada in enclosing paragraphs
				324	if ($str != "\n")
				325	{
				326	$str = '<p>'.$str.'</p>';
				327	}
				328
				329	// Remove empty paragraphs if they are on the first line, as this
				330	// is a potential unintended consequence of the previous code
				331	$str = preg_replace("/<p><\/p>(.*)/", "\\1", $str, 1);
				332
				333	return $str;
				334	}
				335
				336	// ------------------------------------------------------------------------
				337
				338	/**
Derek Jones	7deecfb	2008-12-11 15:38:01 +0000	[diff] [blame^]	339	* Protect Characters
				340	*
				341	* Protects special characters from being formatted later
				342	* We don't want quotes converted within tags so we'll temporarily convert them to {@DQ} and {@SQ}
				343	* and we don't want double dashes converted to emdash entities, so they are marked with {@DD}
				344	* likewise double spaces are converted to {@NBS} to prevent entity conversion
				345	*
				346	* @access public
				347	* @param array
				348	* @return string
				349	*/
				350	function _protect_characters($match)
				351	{
				352	return str_replace(array("'",'"','--',' '), array('{@SQ}', '{@DQ}', '{@DD}', '{@NBS}'), $match[0]);
				353	}
				354
				355	// --------------------------------------------------------------------
				356
				357	/**
Derek Allard	2067d1a	2008-11-13 22:59:24 +0000	[diff] [blame]	358	* Convert newlines to HTML line breaks except within PRE tags
				359	*
				360	* @access public
				361	* @param string
				362	* @return string
				363	*/
				364	function nl2br_except_pre($str)
				365	{
				366	$ex = explode("pre>",$str);
				367	$ct = count($ex);
				368
				369	$newstr = "";
				370	for ($i = 0; $i < $ct; $i++)
				371	{
				372	if (($i % 2) == 0)
				373	{
				374	$newstr .= nl2br($ex[$i]);
				375	}
				376	else
				377	{
				378	$newstr .= $ex[$i];
				379	}
				380
				381	if ($ct - 1 != $i)
				382	$newstr .= "pre>";
				383	}
				384
				385	return $newstr;
				386	}
				387
				388	}
				389	// END Typography Class
				390
				391	/* End of file Typography.php */
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	392	/* Location: ./system/libraries/Typography.php */