Blame - system/libraries/Typography.php - code-igniter-v3-giggi

blob: 3ff0d2f58f4a8b02ddb813aa422f263fa1983b8e [file] [log] [blame]

Derek Jones	85e65f6	2008-11-11 23:14:42 +0000	[diff] [blame]	1	<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
				2	/**
				3	* CodeIgniter
				4	*
				5	* An open source application development framework for PHP 4.3.2 or newer
				6	*
				7	* @package CodeIgniter
				8	* @author ExpressionEngine Dev Team
				9	* @copyright Copyright (c) 2008, EllisLab, Inc.
				10	* @license http://codeigniter.com/user_guide/license.html
				11	* @link http://codeigniter.com
				12	* @since Version 1.0
				13	* @filesource
				14	*/
				15
				16	// ------------------------------------------------------------------------
				17
				18	/**
				19	* Typography Class
				20	*
				21	*
				22	* @access private
				23	* @category Helpers
				24	* @author ExpressionEngine Dev Team
				25	* @link http://codeigniter.com/user_guide/helpers/
				26	*/
				27	class CI_Typography {
				28
				29	// Block level elements that should not be wrapped inside <p> tags
				30	var $block_elements = 'address\|blockquote\|div\|dl\|fieldset\|form\|h\d\|hr\|noscript\|object\|ol\|p\|pre\|script\|table\|ul';
				31
				32	// Elements that should not have <p> and <br /> tags within them.
				33	var $skip_elements = 'p\|pre\|ol\|ul\|dl\|object\|table';
				34
				35	// Tags we want the parser to completely ignore when splitting the string.
				36	var $inline_elements = 'a\|abbr\|acronym\|b\|bdo\|big\|br\|button\|cite\|code\|del\|dfn\|em\|i\|img\|ins\|input\|label\|map\|kbd\|q\|samp\|select\|small\|span\|strong\|sub\|sup\|textarea\|tt\|var';
				37
				38	// whether or not to protect quotes within { curly braces }
				39	var $protect_braced_quotes = FALSE;
				40
				41	/**
				42	* Nothing to do here...
				43	*
				44	*/
				45	function CI_Typography()
				46	{
				47	}
				48
				49	/**
				50	* Auto Typography
				51	*
				52	* This function converts text, making it typographically correct:
				53	* - Converts double spaces into paragraphs.
				54	* - Converts single line breaks into <br /> tags
				55	* - Converts single and double quotes into correctly facing curly quote entities.
				56	* - Converts three dots into ellipsis.
				57	* - Converts double dashes into em-dashes.
				58	* - Converts two spaces into entities
				59	*
				60	* @access public
				61	* @param string
				62	* @param bool whether to reduce more then two consecutive newlines to two
				63	* @return string
				64	*/
				65	function auto_typography($str, $reduce_linebreaks = FALSE)
				66	{
				67	if ($str == '')
				68	{
				69	return '';
				70	}
				71
				72	// Standardize Newlines to make matching easier
				73	if (strpos($str, "\r") !== FALSE)
				74	{
				75	$str = str_replace(array("\r\n", "\r"), "\n", $str);
				76	}
				77
				78	// Reduce line breaks. If there are more than two consecutive linebreaks
				79	// we'll compress them down to a maximum of two since there's no benefit to more.
				80	if ($reduce_linebreaks === TRUE)
				81	{
				82	$str = preg_replace("/\n\n+/", "\n\n", $str);
				83	}
				84
				85	// Convert quotes within tags to temporary markers. We don't want quotes converted
				86	// within tags so we'll temporarily convert them to {@DQ} and {@SQ}
				87	// and we don't want double dashes converted to emdash entities, so they are marked with {@DD}
				88	// likewise double spaces are converted to {@NBS} to prevent entity conversion
				89	if (preg_match_all("#\<.+?>#si", $str, $matches))
				90	{
				91	for ($i = 0, $total = count($matches[0]); $i < $total; $i++)
				92	{
				93	$str = str_replace($matches[0][$i],
				94	str_replace(array("'",'"','--',' '), array('{@SQ}', '{@DQ}', '{@DD}', '{@NBS}'), $matches[0][$i]),
				95	$str);
				96	}
				97	}
				98
				99	if ($this->protect_braced_quotes === TRUE)
				100	{
				101	if (preg_match_all("#\{.+?}#si", $str, $matches))
				102	{
				103	for ($i = 0, $total = count($matches[0]); $i < $total; $i++)
				104	{
				105	$str = str_replace($matches[0][$i],
				106	str_replace(array("'",'"'), array('{@SQ}', '{@DQ}'), $matches[0][$i]),
				107	$str);
				108	}
				109	}
				110	}
				111
				112	// Convert "ignore" tags to temporary marker. The parser splits out the string at every tag
				113	// it encounters. Certain inline tags, like image tags, links, span tags, etc. will be
				114	// adversely affected if they are split out so we'll convert the opening bracket < temporarily to: {@TAG}
				115	$str = preg_replace("#<(/*)(".$this->inline_elements.")([ >])#i", "{@TAG}\\1\\2\\3", $str);
				116
				117	// Split the string at every tag. This expression creates an array with this prototype:
				118	//
				119	// [array]
				120	// {
				121	// [0] = <opening tag>
				122	// [1] = Content...
				123	// [2] = <closing tag>
				124	// Etc...
				125	// }
				126	$chunks = preg_split('/(<(?:[^<>]+(?:"[^"]"\|\'[^\']\')?)+>)/', $str, -1, PREG_SPLIT_DELIM_CAPTURE\|PREG_SPLIT_NO_EMPTY);
				127
				128	// Build our finalized string. We cycle through the array, skipping tags, and processing the contained text
				129	$str = '';
				130	$process = TRUE;
				131	$paragraph = FALSE;
				132	foreach ($chunks as $chunk)
				133	{
				134	// Are we dealing with a tag? If so, we'll skip the processing for this cycle.
				135	// Well also set the "process" flag which allows us to skip <pre> tags and a few other things.
				136	if (preg_match("#<(/)(".$this->block_elements.").?>#", $chunk, $match))
				137	{
				138	if (preg_match("#".$this->skip_elements."#", $match[2]))
				139	{
				140	$process = ($match[1] == '/') ? TRUE : FALSE;
				141	}
				142
				143	$str .= $chunk;
				144	continue;
				145	}
				146	elseif (preg_match('/<(\/?)([a-z]).?>/s', $chunk, $tagmatch))
				147	{
				148	if ($tagmatch[1] == '/' && $tagmatch[2] == $this->last_tag)
				149	{
				150	$process = FALSE;
				151	}
				152	else
				153	{
				154	$process = TRUE;
				155	$this->last_tag = $tagmatch[2];
				156	}
				157	}
				158
				159	if ($process == FALSE)
				160	{
				161	$str .= $chunk;
				162	continue;
				163	}
				164
				165	// Convert Newlines into <p> and <br /> tags
				166	$str .= $this->format_characters($this->_format_newlines($chunk));
				167	}
				168
				169	// is the whole of the content inside a block level element?
				170	if ( ! preg_match("/^<(?:".$this->block_elements.")/i", $str, $match))
				171	{
				172	$str = "<p>{$str}</p>";
				173	}
				174
				175
				176	// some special linebreak cleanup
				177	$str = preg_replace_callback('#<(?!/\|'.$this->block_elements.')([^>])><p>(.?)</p><(\w*)#si', array($this, '_linebreak_cleanup'), $str);
				178
				179	// and cleanup empty paragraph tags sitting between two closing tags
				180	$str = preg_replace('#(</\w+>)<p>(\s*)</p>(</\w+>)#si', '$1$2$3', $str);
				181
				182	// Final clean up
				183	$table = array(
				184
				185	// If the user submitted their own paragraph tags within the text
				186	// we will retain them instead of using our tags.
				187	'/(<p[^>*?]>)<p>/' => '$1', // <?php BBEdit syntax coloring bug fix
				188
				189	// Reduce multiple instances of opening/closing paragraph tags to a single one
				190	'#(</p>)+#' => '</p>',
				191	'/(<p>\W*<p>)+/' => '<p>',
				192
				193	// Clean up stray paragraph tags that appear before block level elements
				194	'#<p></p><('.$this->block_elements.')#' => '<$1',
				195
				196	// Clean up open paragraph tags that appear before block level elements
				197	'#<p>(\W)<('.$this->block_elements.')#' => '<p></p>$1<$2',
				198
				199	// Clean up stray non-breaking spaces preceeding block elements
				200	'#[  ]+<('.$this->block_elements.')#' => ' <$1',
				201
				202	// Replace the temporary markers we added earlier
				203	'/\{@TAG\}/' => '<',
				204	'/\{@DQ\}/' => '"',
				205	'/\{@SQ\}/' => "'",
				206	'/\{@DD\}/' => '--',
				207	'/\{@NBS\}/' => ' '
				208
				209	);
				210
				211	// Do we need to reduce empty lines?
				212	if ($reduce_linebreaks === TRUE)
				213	{
				214	$table['#<p>\n*</p>#'] = '';
				215	}
				216	else
				217	{
				218	// If we have empty paragraph tags we add a non-breaking space
				219	// otherwise most browsers won't treat them as true paragraphs
				220	$table['#<p></p>#'] = '<p> </p>';
				221	}
				222
				223	return preg_replace(array_keys($table), $table, $str);
				224
				225	}
				226
				227	// --------------------------------------------------------------------
				228
				229	/**
				230	* Linebreak Cleanup
				231	*
				232	* Removes paragraph and line break tags inserted inbetween
				233	* inline content and a new opening block level element
				234	*
				235	* @access private
				236	* @param array
				237	* @return string
				238	*/
				239	function _linebreak_cleanup($match)
				240	{
				241	if (in_array($match[3], explode('\|', $this->block_elements)))
				242	{
				243	return "<{$match[1]}>".str_replace('<br />', '', $match[2])."<{$match[3]}";
				244	}
				245	else
				246	{
				247	return $match[0];
				248	}
				249	}
				250
				251	// --------------------------------------------------------------------
				252
				253	/**
				254	* Format Characters
				255	*
				256	* This function mainly converts double and single quotes
				257	* to curly entities, but it also converts em-dashes,
				258	* double spaces, and ampersands
				259	*
				260	* @access public
				261	* @param string
				262	* @return string
				263	*/
				264	function format_characters($str)
				265	{
				266	static $table;
				267
				268	if ( ! isset($table))
				269	{
				270	$table = array(
				271	// nested smart quotes, opening and closing
				272	// note that rules for grammar (English) allow only for two levels deep
				273	// and that single quotes are _supposed_ to always be on the outside
				274	// but we'll accommodate both
				275	'/(^\|\W\|\s)\'"/' => '$1‘“',
				276	'/\'"(\s\|\W\|$)/' => '’”$1',
				277	'/(^\|\W\|\s)"\'/' => '$1“‘',
				278	'/"\'(\s\|\W\|$)/' => '”’$1',
				279
				280	// single quote smart quotes
				281	'/\'(\s\|\W\|$)/' => '’$1',
				282	'/(^\|\W\|\s)\'/' => '$1‘',
				283
				284	// double quote smart quotes
				285	'/"(\s\|\W\|$)/' => '”$1',
				286	'/(^\|\W\|\s)"/' => '$1“',
				287
				288	// apostrophes
				289	"/(\w)'(\w)/" => '$1’$2',
				290
				291	// Em dash and ellipses dots
				292	'/\s?\-\-\s?/' => '—',
				293	'/(\w)\.{3}/' => '$1…',
				294
				295	// double space after sentences
				296	'/(\W) /' => '$1  ',
				297
				298	// ampersands, if not a character entity
				299	'/&(?!#?[a-zA-Z0-9]{2,};)/' => '&'
				300	);
				301	}
				302
				303	return preg_replace(array_keys($table), $table, $str);
				304	}
				305
				306	// --------------------------------------------------------------------
				307
				308	/**
				309	* Format Newlines
				310	*
				311	* Converts newline characters into either <p> tags or <br />
				312	*
				313	* @access public
				314	* @param string
				315	* @return string
				316	*/
				317	function _format_newlines($str)
				318	{
				319	if ($str == '')
				320	{
				321	return $str;
				322	}
				323
				324	if (strpos($str, "\n") === FALSE)
				325	{
				326	return $str;
				327	}
				328
				329	// Convert two consecutive newlines to paragraphs
				330	$str = str_replace("\n\n", "</p>\n\n<p>", $str);
				331
				332	// Convert single spaces to <br /> tags
				333	$str = preg_replace("/([^\n])(\n)([^\n])/", "\\1<br />\\2\\3", $str);
				334
				335	// Wrap the whole enchilada in enclosing paragraphs
				336	if ($str != "\n")
				337	{
				338	$str = '<p>'.$str.'</p>';
				339	}
				340
				341	// Remove empty paragraphs if they are on the first line, as this
				342	// is a potential unintended consequence of the previous code
				343	$str = preg_replace("/<p><\/p>(.*)/", "\\1", $str, 1);
				344
				345	return $str;
				346	}
				347
				348	// ------------------------------------------------------------------------
				349
				350	/**
				351	* Convert newlines to HTML line breaks except within PRE tags
				352	*
				353	* @access public
				354	* @param string
				355	* @return string
				356	*/
				357	function nl2br_except_pre($str)
				358	{
				359	$ex = explode("pre>",$str);
				360	$ct = count($ex);
				361
				362	$newstr = "";
				363	for ($i = 0; $i < $ct; $i++)
				364	{
				365	if (($i % 2) == 0)
				366	{
				367	$newstr .= nl2br($ex[$i]);
				368	}
				369	else
				370	{
				371	$newstr .= $ex[$i];
				372	}
				373
				374	if ($ct - 1 != $i)
				375	$newstr .= "pre>";
				376	}
				377
				378	return $newstr;
				379	}
				380
				381	}
				382	// END Typography Class
				383
				384	/* End of file Typography.php */
Rick Ellis	4c938ae	2008-09-10 22:58:38 +0000	[diff] [blame]	385	/* Location: ./system/libraries/Typography.php */