Code Coverage
 
Classes and Traits
Functions and Methods
Lines
Total
100.00%
1 / 1
100.00%
36 / 36
CRAP
100.00%
200 / 200
String
100.00%
1 / 1
100.00%
36 / 36
129
100.00%
200 / 200
 isUtf8Sequence($seq)
100.00%
1 / 1
34
100.00%
20 / 20
 fixCodePageString($str, $encoding)
100.00%
1 / 1
6
100.00%
18 / 18
 fixString($str, $toDecode=self::DECODE_NONE, $encoding="UTF-8")
100.00%
1 / 1
5
100.00%
22 / 22
 unsafeSubstr($start, $length)
100.00%
1 / 1
1
100.00%
1 / 1
 __construct($str="", $toDecode=self::DECODE_NONE, $encoding="UTF-8")
100.00%
1 / 1
3
100.00%
5 / 5
 create($str="", $toDecode=self::DECODE_NONE, $encoding="UTF-8")
100.00%
1 / 1
3
100.00%
6 / 6
 getCollator()
100.00%
1 / 1
2
100.00%
4 / 4
 setCollator(\Collator $coll)
100.00%
1 / 1
1
100.00%
2 / 2
 __get($name)
100.00%
1 / 1
3
100.00%
3 / 3
 __toString()
100.00%
1 / 1
1
100.00%
1 / 1
 equals(\Scrivo\String $str)
100.00%
1 / 1
1
100.00%
1 / 1
 getLength()
100.00%
1 / 1
2
100.00%
4 / 4
 count()
100.00%
1 / 1
1
100.00%
1 / 1
 current()
100.00%
1 / 1
1
100.00%
1 / 1
 key()
100.00%
1 / 1
1
100.00%
1 / 1
 next()
100.00%
1 / 1
1
100.00%
2 / 2
 rewind()
100.00%
1 / 1
1
100.00%
2 / 2
 valid()
100.00%
1 / 1
2
100.00%
1 / 1
 offsetSet($offset, $value)
100.00%
1 / 1
1
100.00%
2 / 2
 offsetGet($offset)
100.00%
1 / 1
2
100.00%
4 / 4
 offsetExists($offset)
100.00%
1 / 1
2
100.00%
1 / 1
 offsetUnset($offset)
100.00%
1 / 1
1
100.00%
2 / 2
 substr($start, $length=0xFFFF)
100.00%
1 / 1
3
100.00%
5 / 5
 substring($start, $end)
100.00%
1 / 1
4
100.00%
5 / 5
 trim()
100.00%
1 / 1
1
100.00%
2 / 2
 contains(\Scrivo\String $str, $offset=0, $ignoreCase=false)
100.00%
1 / 1
4
100.00%
7 / 7
 indexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false)
100.00%
1 / 1
5
100.00%
9 / 9
 lastIndexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false)
100.00%
1 / 1
6
100.00%
12 / 12
 firstOccurranceOf(\Scrivo\String $str, $part=false, $ignoreCase=false)
100.00%
1 / 1
4
100.00%
9 / 9
 lastOccurranceOf(\Scrivo\String $str, $part=false, $ignoreCase=false)
100.00%
1 / 1
4
100.00%
9 / 9
 replace($from, $to)
100.00%
1 / 1
13
100.00%
21 / 21
 split(\Scrivo\String $delimiter, $limit=0)
100.00%
1 / 1
4
100.00%
9 / 9
 toLowerCase()
100.00%
1 / 1
1
100.00%
1 / 1
 toUpperCase()
100.00%
1 / 1
1
100.00%
1 / 1
 compareTo(\Scrivo\String $str)
100.00%
1 / 1
1
100.00%
1 / 1
 inArray($arr)
100.00%
1 / 1
3
100.00%
5 / 5
<?php
/* Copyright (c) 2012, Geert Bergman (geert@scrivo.nl)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the name of "Scrivo" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $Id: String.php 841 2013-08-19 22:19:47Z geert $
*/
/**
* Implementation of the \Scrivo\String class.
*/
namespace Scrivo;
/**
* Wrapper class for PHP strings to enforce consistent and safe multi-byte
* (UTF-8) string handling.
*
* \Scrivo\String is a primitive wrapper class for PHP strings to make sure that
* all operations performed on the string are UTF-8 safe. As PHP does not
* enforce a consistent way to deal with multibyte strings we do it
* ourselves. In the Scrivo code base UTF-8 is the only encoding that is
* supported for operations on data and these operations should be done
* through instances of the \Scrivo\String class. If strings are used as byte
* arrays, use the ByteArray class.
*
* \Scrivo\String objects are imutable: once created you can't change them. All
* operations on a \Scrivo\String object will return a new \Scrivo\String object.
*
* Although we'll be working with UTF-8 exclusively it is possible to create
* \Scrivo\String objects that contain characters from 8 byte encoding schemes.
* Also a note on HTML entities, we work with UTF-8 so you don't need them:
* they are evil. Except entities for the reserved HTML characters (<>&'")
* there is really no use for them in UTF-8 strings. And when stored in a
* database only cause sorting and lookup errors. Therefore when construction
* \Scrivo\String objects you can opt to convert existing HTML entities to their
* corresonding UTF-8 characters.
*
* The current locale setting for LC_COLLATE is important.
* \Scrivo\String::compareTo() will use this setting when comparing strings.
*
* Please note: you might be tempted to do string comparison using
* equality operators (==). Although this works in most cases don't do this:
* you'll do PHP object comparison (i.e. comparing a
* \Scrivo\String object) and that is not what you want: use \Scrivo\String::equals()
* or \Scrivo\String::compareTo() to compare strings.
*/
class String implements \Iterator, \ArrayAccess, \Countable {
/**
* Constant to denote ISO-8859-1 encoding. This is the default encoding
* for \Scrivo\String uses for fixing and comparing.
*/
const ENC_ISO_8859_1 = "ISO-8859-1";
/**
* Constant to denote CP-1251 encoding.
*/
const ENC_CP_1251 = "CP-1251";
/**
* Constant to indicate that you don't want to decode any entities when
* constructing the string.
*/
const DECODE_NONE = 0;
/**
* Constant to indicate that you want to decode all entities when
* constructing the string.
*/
const DECODE_ALL = 1;
/**
* Constant to indicate that you want to decode all but the entities for
* reserved characters (&<>'") when constructing the string.
*/
const DECODE_UNRESERVED = 2;
/**
* The primitive UTF-8 string.
* @var string
*/
private $str;
/**
* The current position when iterating.
* @var string
*/
private $pos;
/**
* The length of the string (characters not bytes).
* @var int
*/
private $len = -1;
/**
* Collator used for sorting. This is a static shared amongst instances.
* @var \Collator
*/
private static $coll;
/**
* Map to translate 8 byte code page characters to UTF-8 sequences.
* @var array[]
*/
private static $maps = array(
self::ENC_ISO_8859_1 => array(128 =>
"€","�","‚","ƒ","„","…","†","‡","ˆ","‰","Š","‹","Œ","�","Ž","�",
"�","‘","’","“","”","•","–","—","˜","™","š","›","œ","�","ž","Ÿ",
" ","¡","¢","£","¤","¥","¦","§","¨","©","ª","«","¬","­","®","¯",
"°","±","²","³","´","µ","¶","·","¸","¹","º","»","¼","½","¾","¿",
"À","Á","Â","Ã","Ä","Å","Æ","Ç","È","É","Ê","Ë","Ì","Í","Î","Ï",
"Ð","Ñ","Ò","Ó","Ô","Õ","Ö","×","Ø","Ù","Ú","Û","Ü","Ý","Þ","ß",
"à","á","â","ã","ä","å","æ","ç","è","é","ê","ë","ì","í","î","ï",
"ð","ñ","ò","ó","ô","õ","ö","÷","ø","ù","ú","û","ü","ý","þ","ÿ",
),
self::ENC_CP_1251 => array(128 =>
"Ђ","Ѓ","‚","ѓ","„","…","†","‡","€","‰","Љ","‹","Њ","Ќ","Ћ","Џ",
"ђ","‘","’","“","”","•","–","—","�","™","љ","›","њ","ќ","ћ","џ",
" ","Ў","ў","Ј","¤","Ґ","¦","§","Ё","©","Є","«","¬","­","®","Ї",
"°","±","І","і","ґ","µ","¶","·","ё","№","є","»","ј","Ѕ","ѕ","ї",
"А","Б","В","Г","Д","Е","Ж","З","И","Й","К","Л","М","Н","О","П",
"Р","С","Т","У","Ф","Х","Ц","Ч","Ш","Щ","Ъ","Ы","Ь","Э","Ю","Я",
"а","б","в","г","д","е","ж","з","и","й","к","л","м","н","о","п",
"р","с","т","у","ф","х","ц","ч","ш","щ","ъ","ы","ь","э","ю","я",
),
);
/**
* Test if a given byte sequence is a valid UTF-8 sequence.
*
* If the tested byte sequence is a valid UTF-8 sequence the method
* returns the length of the sequence, else the method returns 0.
*
* @param string $seq The byte sequence to test.
*
* @return int The length of the UTF-8 sequence (2-4) or 0 if the
* sequence is not an UTF-8 sequence.
*/
private function isUtf8Sequence($seq) {
// check if the first byte is a UTF-8 marker and if not end it here
$b1 = ord($seq[0]);
if ($b1<0xC2 || $b1>=0xF5) {
return 0;
}
// get the length to prevent overshooting when reading
$len = strlen($seq);
// Get the 2nd and 3rd byte and test it, note that for a valid UTF-8
// sequence we only allow the byte (here byte three) after the
// sequence to be none, an ascii character, or a new UTF-8 marker
// (which is more limiting than not to allow continuation bytes
// (b3 < 0x80 && b3 >= OxBF) only).
$b2 = ($len>1) ? ord($seq[1]) : 0;
$b3 = ($len>2) ? ord($seq[2]) : 0;
if ($b1>=0xC2 && $b1<0xE0 && $b2>=0x80 && $b2<0xC0
&& ($b3<0x80 || ($b3>=0xC2 && $b3<0xF5))) {
return 2;
}
// We're not there, test for a 3 byte byte sequence. See the comment
// above on testing the 4th byte.
$b4 = ($len>3) ? ord($seq[3]) : 0;
if ($b1>=0xE0 && $b1<0xF0
&& $b2>=0x80 && $b2<0xC0 && $b3>=0x80 && $b3<0xC0
&& ($b4<0x80 || ($b4>=0xC2 && $b4<0xF5))) {
return 3;
}
// We're not there, test for a 3 byte byte sequence. See the comment
// above on testing the 5th byte. Also note that b2 is restricted
// so that we keep in the <= U+10FFFF range
$b5 = ($len>4) ? ord($seq[4]) : 0;
if ($b1>=0xF0 && $b1<0xF5 && $b2>=0x80 && $b2<0xC0
&& $b3>=0x80 && $b3<0xC0 && $b4>=0x80 && $b4<0xC0
&& ($b5<0x80 || ($b5>=0xC2 && $b5<0xF5))) {
return 4;
}
// This is not a valid UTF-8 sequence.
return 0;
}
/**
* Convert a string with UTF-8 and code page characters to a valid UTF-8
* string.
*
* When converting the input string to UTF-8 all bytes in the 0x80-0xFF
* range are first tested if they are is a valid UTF-8 byte sequences, if
* not it is assumed that it is an 8 byte code page character and
* converted according to the given encoding. Supported encodings are:
*
* * Utf8string::ENC_ISO_8859_1
* * Utf8string::ENC_CP_1251
*
* @param string $str The string with mixed UTF-8 and and 8 byte code
* page characters.
* @param string $encoding The encoding to use when converting 8 byte code
* page characters to UTF-8.
*
* @return string A valid UTF-8 string.
*/
private function fixCodePageString($str, $encoding) {
// set the encoding
if ($encoding != self::ENC_ISO_8859_1
&& $encoding != self::ENC_CP_1251) {
throw
new \Scrivo\SystemException("Unsupported encoding: $encoding");
}
// Split the data on any occurance of a byte with the high bit set
$parts =
preg_split('/[\x80-\xFF]/', $str, -1, PREG_SPLIT_OFFSET_CAPTURE);
// See if there's anything to do
$c = count($parts);
if ($c<=1) {
return $str;
}
// Start with the first part
$out = $parts[0][0];
for ($i=1; $i<$c; $i++) {
// Get a 6 byte sequence on a split location ...
$seq = substr($str, $parts[$i][1]-1, 6);
// ... and check if is a valid UTF-8 byte sequence, ...
$utf8_seq_width = $this->isUtf8Sequence($seq);
if ($utf8_seq_width) {
// ... if so add it to output ...
$res = substr($seq, 0, $utf8_seq_width);
// ... and jump over the parts.
$i += ($utf8_seq_width - 1);
} else {
// ... else treat it as a codepage character
$res = self::$maps[$encoding][ord($seq[0])];
}
// add the UTF-8 character and next part to the output
$out .= $res.$parts[$i][0];
}
return $out;
}
/**
* Convert a string with HTML entities, UTF-8 and code page characters
* to a valid UTF-8 string.
*
* When converting the input string to UTF-8 all bytes in the 0x80-0xFF
* range are first tested if they are is a valid UTF-8 byte sequences, if
* not it is assumed that it is an 8 byte code page character and
* converted according to the given encoding. Supported encodings are:
*
* * Utf8string::ENC_ISO_8859_1
* * Utf8string::ENC_CP_1251
*
* You can opt to convert HTML entities in the string to their
* corresponding characters. Possible choices are:
*
* * Utf8string::DECODE_NONE don't decode HTML entities
* * Utf8string::DECODE_ALL, decode all HTML entities;
* * Utf8string::DECODE_UNRESERVED, decode all but the HTML entities
* for <>&' and ' (HTML/XML)
*
* @param string $str The source string, a possible mixture of HTML
* entities, UTF-8 and code page characters.
* @param int $toDecode Which entities
* @param string $encoding The encoding to use when converting 8 byte code
* page characters to UTF-8.
*
* @return string A valid UTF-8 string.
*/
private function fixString($str, $toDecode=self::DECODE_NONE,
$encoding="UTF-8") {
// List of HTML-entities we want to keep.
$reserved = array(
"&lt;", "&gt;", "&amp;", "&quot;", "&#39;",
"&#60;", "&#62;", "&#38;", "&#34;", "&#039;",
"&#060;","&#062;", "&#038;", "&#034;", "&apos;"
);
// List of HTML-entity markers to replace the ones you want to
// keep, so html_entity_decode will leave them alone.
$save = array(
"#*@lt!;", "#*@gt!;", "#*@amp!;", "#*@quot!;", "#*@#039!;",
"#*@lt!;", "#*@gt!;", "#*@amp!;", "#*@quot!;", "#*@#039!;",
"#*@lt!;", "#*@gt!;", "#*@amp!;", "#*@quot!;", "#*@#039!;"
);
if ($toDecode == self::DECODE_UNRESERVED) {
// 'Save' entities for reserved characters.
$str = str_replace($reserved, $save, $str);
}
if ($encoding != "UTF-8") {
// Fix characters that are not properly UTF-8 encoded
$str = $this->fixCodePageString($str, $encoding);
}
if ($toDecode != self::DECODE_NONE) {
// Change all entities to their corresponding UTF-8 characters.
$str = html_entity_decode($str, ENT_QUOTES, "UTF-8");
}
if ($toDecode == self::DECODE_UNRESERVED) {
// 'Restore' previously saved entities.
$str = str_replace(array_slice($save, 0, 5),
array_slice($reserved, 0, 5), $str);
}
return $str;
}
/**
* Get a substring from a string without first checking the boundaries.
*
* @param int $start Start offset for the substring, use a negative number
* to use an offset from the end of the string.
* @param int $length The length of the substring.
*
* @return \Scrivo\String The requested portion of this string.
*/
private function unsafeSubstr($start, $length) {
return new \Scrivo\String(mb_substr($this->str, $start, $length, "UTF-8"));
}
/**
* Construct an \Scrivo\String.
*
* You can either construct an \Scrivo\String object from a valid UTF-8 string,
* or from a string that you expect not to contain valid UTF-8 data. In the
* latter case use the $toDecode and/or $encoding parameters.
*
* Possible choices for $toDecode are:
*
* * Utf8string::DECODE_NONE don't decode HTML entities
* * Utf8string::DECODE_ALL, decode all HTML entities;
* * Utf8string::DECODE_UNRESERVED, decode all but the HTML entities
* for <>&' and ' (HTML/XML)
*
* If you expect that the source string contains 8 byte code page character
* then you can select the encoding to use to convert them to their
* corresponding UTF-8 characters. Supported encodings are:
*
* * Utf8string::ENC_ISO_8859_1
* * Utf8string::ENC_CP_1251
*
* Note: typical use of the $toDecode and $encoding parameters is when
* you want to 'sanitize' data before you store it into a database. Setting
* these parameters start CPU intensive procedures so it's best not to use
* them in bluk operations (like that inner loop or slashdotted home page).
* And remember when all data was safely stored as UTF-8, there will be
* no need to 'sanitize' it before displaying.
*
* @param string $str The source string, a possible mixture of HTML
* entities, UTF-8 and code page characters.
* @param int $toDecode Which entities
* @param string $encoding The encoding to use when converting 8 byte code
* page characters to UTF-8.
*/
public function __construct($str="", $toDecode=self::DECODE_NONE,
$encoding="UTF-8") {
$str = (string)$str;
$this->str = $toDecode==self::DECODE_NONE && $encoding=="UTF-8" ? $str
: $this->fixString($str, $toDecode, $encoding);
$this->pos = 0;
}
/**
* Factory method to construct an \Scrivo\String.
*
* @see \Scrivo\String::__construct()
*
* @param string $str The string to create the wrapper for. It is assumed
* that this will be a valid UTF-8 string. If this is not the case,
* you'll need to set the additional parameters.
* @param int $toDecode Which entities
* @param string $encoding The encoding to use when converting 8 byte code
*
* @return \Scrivo\String|\Scrivo\String An \Scrivo\String wrapper object.
*/
public static function create($str="", $toDecode=self::DECODE_NONE,
$encoding="UTF-8") {
if (is_array($str)) {
foreach($str as $k=>$v) {
$str[$k] = self::create($v, $toDecode, $encoding);
}
return $str;
}
return new \Scrivo\String($str, $toDecode, $encoding);
}
/**
* Get the collator for sorting strings.
*
* @return \Collator The currently set collator for the \Scrivo\String
* class.
*/
public static function getCollator() {
if (!self::$coll) {
self::$coll = new \Collator(\Locale::getDefault());
}
return self::$coll;
}
/**
* Set the collator for sorting strings.
*
* @param \Collator $coll The collator to use.
*/
public static function setCollator(\Collator $coll) {
self::$coll = $coll;
}
/**
* Implementation of the readable properties using the PHP magic
* method __get().
*
* @param string $name The name of the property to get.
*
* @return mixed The value of the requested property.
*/
public function __get($name) {
switch($name) {
case "length": return $this->getLength();
case "collator": return self::getCollator();
}
throw new \Scrivo\SystemException("No such property '$name'.");
}
/**
* Return the primitive UTF-8 string for this instance.
*
* @return string The primitive UTF-8 string for this instance.
*/
public function __toString() {
return $this->str;
}
/**
* Test if this string equals another \Scrivo\String object.
*
* When you want test \Scrivo\String object for equality, use this method
* and never the equality operator (==) because then you'll compare
* objects and therefore all data members of \Scrivo\String and this can
* give you other results (or cast the \Scrivo\String strings to PHP strings
* before comparing).
*
* @param \Scrivo\String $str The string to compare this string to.
*
* @return boolean True if the given string equals this string.
*/
public function equals(\Scrivo\String $str) {
return (string)$this->str == (string)$str;
}
/**
* Get the length of the string.
*
* @return int The length of the string in characters (not bytes).
*/
public function getLength() {
if ($this->len == -1) {
$this->len = mb_strlen($this->str, "UTF-8");
}
return $this->len;
}
/**
* Return the character count of the string.
*
* This is an alias for getLength() and part of the implementation of
* Countable.
*
* @return int The length of the string in characters.
*/
public function count() {
return $this->getLength();
}
/**
* Return the current UTF-8 character when iterating.
*
* Note that this method is part of the implementation of Iterator and
* should not be called from an other context.
*
* @return string The current UTF-8 character in this string when
* iterating.
*/
public function current() {
// note: iterator will call valid() before current().
return $this->unsafeSubstr($this->pos, 1);
}
/**
* Return the index of the current UTF-8 character when iterating.
*
* Note that this method is part of the implementation of Iterator and
* should not be called from an other context.
*
* @return int The index of the current UTF-8 character in this string
* when iterating.
*/
public function key() {
return $this->pos;
}
/**
* Move forward in this string to the next UTF-8 character when iterating.
*
* Note that this method is part of the implementation of Iterator and
* should not be called from an other context.
*/
public function next() {
$this->pos++;
}
/**
* Reset the current character index so iterating will (re)start at the
* beginning of this string.
*
* Note that this method is part of the implementation of Iterator and
* should not be called from an other context.
*/
public function rewind() {
$this->pos = 0;
}
/**
* Check if the current character index for iterating is valid.
*
* Note that this method is part of the implementation of Iterator and
* should not be called from an other context.
*
* @return boolean True if the current character index is valid else false.
*/
public function valid() {
return ($this->pos >= 0 && $this->pos < $this->getLength());
}
/**
* Illegal method: set a character at a specified index location.
*
* Note that this method is part of the implementation of ArrayAccess.
* \Scrivo\Strings are immutable and therefore it is prohibited to set
* elements (characters) in a string, so this method implementation is
* not relevant and throws an exception if called.
*
* @param int $offset
* @param string $value
*
* @throws \Scrivo\SystemException If this method is called.
*/
public function offsetSet($offset, $value) {
throw new \Scrivo\SystemException(
"offsetSet can't be called on \Scrivo\String objects");
}
/**
* Get an UTF-8 character from a string using array brackets.
*
* Note that this method is part of the implementation of ArrayAccess and
* should not be called from an other context.
*
* @param int $offset A character offet in the string.
*
* @throws \Scrivo\SystemException If the requested offset was out of range.
*/
public function offsetGet($offset) {
if (!$this->offsetExists($offset)) {
throw new \Scrivo\SystemException(
"String index [$offset] out of bounds");
}
return $this->unsafeSubstr($offset, 1);
}
/**
* Check if the specified index location in this string is valid.
*
* Note that this method is part of the implementation of ArrayAccess and
* should not be called from an other context.
*
* @param int $offset A character offet in the string.
*
* @return boolean True if the specified in index is within the valid range.
*/
public function offsetExists($offset) {
return ($offset >= 0 && $offset < $this->getLength());
}
/**
* Illegal method: unset a character at a specified index location.
*
* Note that this method is part of the implementation of ArrayAccess.
* \Scrivo\Strings are immutable and therefore it is prohibited to unset
* elements (characters) in a string, so this method implementation is
* not relevant and throws an exception if called.
*
* @param int $offset
*
* @throws \Scrivo\SystemException If this method is called.
*/
public function offsetUnset($offset) {
throw new \Scrivo\SystemException(
"offsetUnset can't be called on \Scrivo\String objects");
}
/**
* Get a substring from a string using an offset and a length.
*
* Just like PHP's native substr function this method returns a substring
* from this string using an offset and a length. But note that this
* method will throw an exception if the offset is invalid.
*
* @param int $start Start offset for the substring, use a negative number
* to use an offset from the end of the string.
* @param int $length The length of the substring.
*
* @return \Scrivo\String The portion of this string specified by the $start
* and $length parameter.
*
* @throws \Scrivo\SystemException if the requested offset was out of range.
*/
public function substr($start, $length=0xFFFF) {
$tmp = $start < 1 ? -$start : $start;
if (!$this->offsetExists($tmp)) {
throw new \Scrivo\SystemException(
"String index [$start] out of bounds");
}
return $this->unsafeSubstr($start, $length);
}
/**
* Get a substring from a string using a start and end index.
*
* This method is inspired by it's JAVA counterpart and returns a
* substring of this string using an start and end index.
*
* @param int $start Start offset for the substring.
* @param int $end The end offset for the substring.
*
* @return \Scrivo\String The portion of this string specified by the $start
* and $end parameter.
*
* @throws \Scrivo\SystemException if the requested offset was out of range.
*/
public function substring($start, $end) {
if (!$this->offsetExists($start) || !$this->offsetExists($end)
|| $start > $end) {
throw new \Scrivo\SystemException(
"String index [$start, $end] out of bounds");
}
return $this->unsafeSubstr($start, $end-$start);
}
/**
* Get a trimmed copy of this string.
*
* Returns a copy of the string, with leading and trailing whitespace
* removed. Whitespace characters are: ' ', \t, \r, \n, the character
* for a non breaking space.
*
* @return \Scrivo\String A copy of this string with leading and trailing
* white space removed.
*/
public function trim() {
return new \Scrivo\String(
preg_replace("/(^[\s ]+)|([\s ]+$)/us", "", $this->str));
}
/**
* Check if the string contains the given substring.
*
* This is the test you normally use strpos(...) !== false for.
*
* @param \Scrivo\String $str The string to search for.
* @param int $offset An offset from where to start the search.
* @param boolean $ignoreCase Set to perform an case insensitive lookup.
*
* @return boolean True if the given string is contained by this string.
*
* @throws \Scrivo\SystemException If the $offset is out of range.
*/
public function contains(\Scrivo\String $str, $offset=0, $ignoreCase=false) {
if ($offset && !$this->offsetExists($offset)) {
throw new \Scrivo\SystemException(
"String index [$offset] out of bounds");
}
if ($ignoreCase) {
return mb_stripos(
$this->str, (string)$str, $offset, "UTF-8") !== false;
} else {
// binary is ok to do
return strpos($this->str, (string)$str, $offset) !== false;
}
}
/**
* Returns the index of the given substring in this string.
*
* Just like the PHP's native strpos and stripos functions this method
* returns the index of a substring in this string. But there are two
* important differences: this method returns -1 if the substring was
* not found, and this method will raise an exception if the given
* offset was out of range.
*
* @param \Scrivo\String $str The string to search for.
* @param int $offset An offset from where to start the search.
* @param boolean $ignoreCase Set to perform an case insensitive lookup.
*
* @return int The index of the first occurance of the substring after
* $offset and -1 if the substring was not found.
*
* @throws \Scrivo\SystemException If the $offset is out of range.
*/
public function indexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false) {
if ($offset && !$this->offsetExists($offset)) {
throw new \Scrivo\SystemException(
"String index [$offset] out of bounds");
}
$res = -1;
if ($ignoreCase) {
$res = mb_stripos($this->str, $str, $offset, "UTF-8");
} else {
$res = mb_strpos($this->str, $str, $offset, "UTF-8");
}
return $res !== false ? $res : -1;
}
/**
* Returns the index of the last occurance of the given substring in this
* string.
*
* Just like the PHP's native strrpos and strripos functions this method
* returns the substring of this string that start with the first occurance
* of the given a substring in this string. But note that this
* method will throw an exception if the offset is invalid.
* Also an negative offset to indicate an offset measured from the end
* of the string is allowed. But there are two important differences:
* this method returns -1 if the substring was not found, and this method
* will raise an exception if the given offset was out of range.
*
* @param \Scrivo\String $str The string to search for.
* @param int $offset An offset from where to start the search. A positive
* value indicates an offset measured from the start of the string, a
* negative value from the end of the string.
* @param boolean $ignoreCase Perform an case insensitive lookup.
*
* @return int The index of the last occurance of the substring after
* $offset.
* @throws \Scrivo\SystemException If the $offset is out of range.
*/
public function lastIndexOf(\Scrivo\String $str, $offset=0, $ignoreCase=false) {
if ($offset) {
$tmp = $offset < 1 ? -$offset : $offset;
if (!$this->offsetExists($tmp)) {
throw new \Scrivo\SystemException(
"String index [$offset] out of bounds");
}
}
$res = -1;
if ($ignoreCase) {
$res = mb_strripos($this->str, $str, $offset, "UTF-8");
} else {
$res = mb_strrpos($this->str, $str, $offset, "UTF-8");
}
return $res !== false ? $res : -1;
}
/**
* Returns the first occurance of a given substring in this string.
*
* Just like the PHP's native strstr and stristr functions this method
* returns the substring of this string that start with the first occurance
* of the given a substring in this string. Note that this method throws
* an exception if an empty string was given as search string and not
* a warning as strstr does.
*
* @param \Scrivo\String $str The string to search for.
* @param int $part Flag to indicate to return the part of the string
* before the first occurance of the given substring i.o. the part
* after the substring.
* @param boolean $ignoreCase Perform an case insensitive lookup.
*
* @return \Scrivo\String The substring plus the part of the string after the
* the first occurance of the substring, or the part of the string before
* the first occurance of the substring (excluding the substring) or NULL
* if not found.
*
* @throws \Scrivo\SystemException If an empty search string was given.
*/
public function firstOccurranceOf(\Scrivo\String $str, $part=false,
$ignoreCase=false) {
if (!$str->getLength()) {
throw new \Scrivo\SystemException(
"firstOccurranceOf requires a search string");
}
$res = NULL;
if ($ignoreCase) {
$res = mb_stristr($this->str, $str, $part, "UTF-8");
} else {
$res = mb_strstr($this->str, $str, $part, "UTF-8");
}
return $res !== false ? new \Scrivo\String($res) : NULL;
}
/**
* Returns the last occurance of a given character in this string.
*
* Just like the PHP's native strrchr and strrichr functions this method
* returns the substring of this string that start with the first occurance
* of the given a substring in this string. Note that this method throws
* an exception if an empty string was given as search string and not
* a warning as strstr does.
*
* @param \Scrivo\String $str The character to search for.
* @param int $part Flag to indicate to return part of the string before
* the last occurance of the given character i.o. the part after the
* character.
* @param boolean $ignoreCase Perform an case insensitive lookup.
*
* @return \Scrivo\String The substring plus the part of the string after the
* the last occurance of the character, or the part of the string before
* the last occurance of the character (excluding the character) or NULL
* if not found.
*
* @throws \Scrivo\SystemException If a search string of not exactly one
* character in length was given.
*/
public function lastOccurranceOf(\Scrivo\String $str, $part=false,
$ignoreCase=false) {
if ($str->getLength() != 1) {
throw new \Scrivo\SystemException(
"lastOccurranceOf accepts single charaters only");
}
$res = NULL;
if ($ignoreCase) {
$res = mb_strrichr($this->str, $str, $part, "UTF-8");
} else {
$res = mb_strrchr($this->str, $str, $part, "UTF-8");
}
return $res !== false ? new \Scrivo\String($res) : NULL;
}
/**
* Replace a substring or set of substrings in this string.
*
* You can use this method in favour of PHP's native str_replace and strtr
* functions. This method will do proper type checking for you.
*
* @param \Scrivo\String|\Scrivo\String[] $from A (set of) string(s) to replace
* in this string.
* @param \Scrivo\String|\Scrivo\String[] $to A (set of) replacement string(s) to
* replace the found string(s).
*
* @return \Scrivo\String A string with the replaced values.
*
* @throws \Scrivo\SystemException If the input data is not of type
* \Scrivo\String or \Scrivo\String[], of if the $to parameter is an array
* and $from isn't or hasn't the same number of elements.
*/
public function replace($from, $to) {
if ($from instanceof \Scrivo\String && $to instanceof \Scrivo\String) {
return new \Scrivo\String(str_replace($from, $to, $this->str));
} else if (is_array($from) && $to instanceof \Scrivo\String) {
foreach ($from as $k=>$v) {
if (!($v instanceof \Scrivo\String)) {
throw new \Scrivo\SystemException("From element is"
. " not an \Scrivo\String as array position [$k]");
}
}
return new \Scrivo\String(str_replace($from, $to, $this->str));
} else if (is_array($from) && is_array($to)) {
if (count($from) != count($to)) {
throw new \Scrivo\SystemException(
"Input arrays are not the same size");
}
foreach ($from as $k=>$v) {
if (!($v instanceof \Scrivo\String)
|| !($to[$k] instanceof \Scrivo\String)) {
throw new \Scrivo\SystemException("To or from element is"
. " not an \Scrivo\String as array position [$k]");
}
}
return new \Scrivo\String(str_replace($from, $to, $this->str));
}
throw new \Scrivo\SystemException("Invalid argument types");
}
/**
* Split this string using a delimiter.
*
* Just like PHP's native explode this method splits a string on
* boundaries formed by the string delimiter. Note that the behavoir
* of the limit parameter is a little bit different and that this method
* will throw an exception if an empty string is passed as a delimiter.
*
* @param \Scrivo\String $delimiter The boundary string.
* @param int $limit If limit is set and positive, the returned array
* will contain a maximum of limit elements with the last element
* containing the rest of string. If the limit parameter is negative,
* all components except the last -limit are returned. If the limit is
* not set or 0 no limit wil be used.
*
* @return \Scrivo\String[] An array of strings created by splitting the
* string parameter on boundaries formed by the delimiter. If the
* delimiter was not found and array containing a copy of this string
* will be returned except if limit was negative, in that case an
* empty array will be returned.
*
* @throws \Scrivo\SystemException If an empty search string was given.
*/
public function split(\Scrivo\String $delimiter, $limit=0) {
if ($delimiter == "") {
throw new \Scrivo\SystemException(
"split cannot use an empty \"\" delimiter.");
}
$r = $limit ? explode($delimiter, $this->str, $limit)
: explode($delimiter, $this->str);
foreach ($r as $k=>$v) {
$r[$k] = new \Scrivo\String($v);
}
return $r;
}
/**
* Get a copy of this string with all of its characters converted to lower
* case.
*
* @return \Scrivo\String A string containing only lower case characters.
*/
public function toLowerCase() {
return new \Scrivo\String(mb_strtolower($this->str, "UTF-8"));
}
/**
* Get a copy of this string with all of its characters converted to upper
* case.
*
* @return \Scrivo\String A string containing only upper case characters.
*/
public function toUpperCase() {
return new \Scrivo\String(mb_strtoupper($this->str, "UTF-8"));
}
/**
* Compare this string to another \Scrivo\String object.
*
* Note that this method requires the \Scrivo\String collator to be set,
* else the method falls back to the default locale for creating a
* collator and generates a warning.
*
* @param \Scrivo\String $str The string to compare this string to.
*
* @return int Less than 0 if this string is less than the given
* string $str; more than 0 if this string is greater than $str, and
* 0 if they are equal.
*/
public function compareTo(\Scrivo\String $str) {
return self::getCollator()->compare($this->str, $str);
}
/**
* Check if this string exists an array of \Scrivo\String-s.
*
* @param \Scrivo\String $arr The array to search.
*
* @return mixed If found the key of the first occurance of the string
* in the array, else null.
*/
public function inArray($arr) {
foreach ($arr as $k=>$v) {
if ($v->equals($this)) {
return $k;
}
}
return null;
}
}