Url с русскими буквами

vital

Больной Компом Детектед
29.01.2006
2 432
33
#1
Стырено в инете, копирайты в коде.
PHP:
<?php
/*
Translit PHP class.
v.1.2 
12 November 2005

---------

Транслитерация ссылок (приведение их в соответствие с форматом URL).
Латинские буквы и цифры остаются, а русские + знаки препинания преобразуются
одним из способов (способы нужны каждый для своей задачи)

Подробнее: http://pixel-apes.com/translit

---------

Методы этого класса можно использовать как статические, 
например, Translit::UrlTranslit("Свежая новость из цирка")

---------

* UrlTranslit( $string, $allow_slashes = TR_NO_SLASHES ) 
-- преобразовать строку в "красивый читаемый URL"

* Supertag( $string, $allow_slashes = TR_NO_SLASHES )	
-- преобразовать строку в "супертаг" -- короткий простой 
идентификатор, состоящий из латинских букв и цифр.

* BiDiTranslit( $string, $direction=TR_ENCODE, $allow_slashes = TR_NO_SLASHES ) 
-- преобразовать строку в "формально правильный URL"
с возможностью восстановления.
Другое значение $direction позволяет восстановить
строку обратно с незначительными потерями

* Wikify( $string, $allow_slashes = TR_NO_SLASHES )	
-- преобразовать произвольную строку в вики-адрес
например: "Привет мир" => "ПриветМир"

* DeWikify( $string )
-- попробовать восстановить примерный вид исходной строки по вики-адресу
например: "ПриветМир" => "Привет Мир"

* во всех функциях параметр $allow_slashes управляет тем, игнорировать ли символ "/",
пропуская его неисправленным, либо удалять его из строки

=============================================================== (Kukutz)

TODO:
* strtolower replacement for non-locale systems
*/

define("TR_ENCODE", 0);
define("TR_DECODE", 1);
define("TR_NO_SLASHES", 0);
define("TR_ALLOW_SLASHES", 1);

class Translit
{

//пустой конструктор, чтобы методы могли работать через ::
function Translit() {}

//URL transliterating
function UrlTranslit($string, $allow_slashes = TR_NO_SLASHES)
{
$slash = "";
if ($allow_slashes) $slash = "/";

static $LettersFrom = "абвгдезиклмнопрстуфыэйхё";
static $LettersTo  = "abvgdeziklmnoprstufyejxe";
static $Consonant = "бвгджзйклмнпрстфхцчшщ";
static $Vowel = "аеёиоуыэюя";
static $BiLetters = array( 
"ж" => "zh", "ц"=>"ts", "ч" => "ch", 
"ш" => "sh", "щ" => "sch", "ю" => "ju", "я" => "ja",
);

$string = preg_replace("/[_s.,?![](){}]+/", "_", $string);
$string = preg_replace("/-{2,}/", "--", $string);
$string = preg_replace("/_-+_/", "--", $string);
$string = preg_replace("/[_-]+$/", "", $string);

$string = strtolower( $string );
//here we replace ъ/ь 
$string = preg_replace("/(ь|ъ)([".$Vowel."])/", "j\2", $string);
$string = preg_replace("/(ь|ъ)/", "", $string);
//transliterating
$string = strtr($string, $LettersFrom, $LettersTo );
$string = strtr($string, $BiLetters );

$string = preg_replace("/j{2,}/", "j", $string);

$string = preg_replace("/[^".$slash."0-9a-z_-]+/", "", $string);

return $string;
}

//Supertag cooking
function Supertag($string, $allow_slashes = TR_NO_SLASHES)
{
$slash = "";
if ($allow_slashes) $slash = "/";

$string = Translit::UrlTranslit($string, $allow_slashes);
$string = preg_replace("/[^".$slash."0-9a-zA-Z-]+/", "", $string);
$string = preg_replace("/[-_]+/", "-", $string);
$string = preg_replace("/-+$/", "", $string);
return $string;
}


//Bidirectional translit
function BiDiTranslit($string, $direction=TR_ENCODE, $allow_slashes = TR_NO_SLASHES)
{
$slash = "";
if ($allow_slashes) $slash = "/";

static $Tran = array (
"А" => "A", "Б" => "B", "В" => "V", "Г" => "G", "Д" => "D", "Е" => "E", "Ё" => "JO", "Ж" => "ZH", "З" => "Z", "И" => "I",
"Й" => "JJ", "К" => "K", "Л" => "L", "М" => "M", "Н" => "N", "О" => "O", "П" => "P",  "Р" => "R",  "С" => "S", "Т" => "T",
"У" => "U", "Ф" => "F", "Х" => "KH", "Ц" => "C", "Ч" => "CH", "Ш" => "SH", "Щ" => "SHH", "Ъ" => "_~",  "Ы" => "Y", "Ь" => "_'",
"Э" => "EH", "Ю" => "JU", "Я" => "JA", "а" => "a", "б" => "b", "в" => "v", "г" => "g",  "д" => "d",  "е" => "e", "ё" => "jo",
"ж" => "zh", "з" => "z", "и" => "i", "й" => "jj", "к" => "k", "л" => "l", "м" => "m",  "н" => "n",  "о" => "o", "п" => "p",
"р" => "r", "с" => "s", "т" => "t", "у" => "u", "ф" => "f", "х" => "kh", "ц" => "c",  "ч" => "ch", "ш" => "sh", "щ" => "shh",
"ъ" => "~", "ы" => "y", "ь" => "'", "э" => "eh", "ю" => "ju", "я" => "ja", " " => "__", "_" => "__", );
static $DeTran = array (
"A"	=> "А",  "B"	=> "Б", "V"	=> "В", "G"	=> "Г", "D"	=> "Д", "E"	=> "Е", "JO"  => "Ё", "ZH"  => "Ж",
"Z"	=> "З",  "I"	=> "И", "JJ"  => "Й", "K"	=> "К", "L"	=> "Л", "M"	=> "М", "N"	=> "Н", "O"	=> "О",
"P"	=> "П",  "R"	=> "Р", "S"	=> "С", "T"	=> "Т", "U"	=> "У", "F"	=> "Ф", "KH"	=> "Х", "C"	=> "Ц",
"CH"  => "Ч",  "SHH" => "Щ", "SH"  => "Ш", "Y"	=> "Ы", "EH"  => "Э", "JU"  => "Ю", "_'"=>"Ь", "_~"=>"Ъ",
"JA"  => "Я",  "a"	=> "а", "b"	=> "б", "v"	=> "в", "g"	=> "г", "d"	=> "д", "e"	=> "е", "jo"  => "ё",
"zh"  => "ж",  "z"	=> "з", "i"	=> "и", "jj"  => "й", "k"	=> "к", "l"	=> "л", "m"	=> "м", "n"	=> "н",
"o"	=> "о",  "p"	=> "п", "r"	=> "р", "s"	=> "с", "t"	=> "т", "u"	=> "у", "f"	=> "ф", "kh"	=> "х",
"c"	=> "ц",  "ch"  => "ч", "shh" => "щ", "sh"  => "ш", "~"	=> "ъ", "y"	=> "ы", "'"	=> "ь", "eh"  => "э",
"ju"  => "ю",  "ja"  => "я", "__" => " ", );			

if ($direction==TR_ENCODE) 
{
$string = preg_replace("/[^- _0-9a-zA-ZxC0-xFFёЁ".$slash."]/", "", $string);
$russians = preg_split("/[0-9A-Za-z_-./']+/", $string, -1, PREG_SPLIT_NO_EMPTY);//xc0-xff

for ($i=0;$i<count($russians);$i++)
$russians[$i] = strtr($russians[$i], $Tran);

$others = preg_split('/[xc0-xffxa8xb8 ]+/', $string, -1, PREG_SPLIT_NO_EMPTY); 

if (preg_match('/[xc0-xffxa8xb8 ]/', $string[0])) 
{	 
$fr="russians";
$sr="others";
$string = "+";
} 
else 
{ 
$fr="others";
$sr="russians";
$string = "";
}

for ($i=0;$i<min(count($$fr),count($$sr));$i++)
$string.=${$fr}[$i]."+".${$sr}[$i]."+";

if (count($$fr)>count($$sr))
$string.=${$fr}[count($$fr)-1];
else
$string=substr($string,0,strlen($string)-1);
} 
else 
{
$pgs = explode("/", $string);
for ($j=0;$j<count($pgs);$j++) 
{
$strings = explode("+", $pgs[$j]);
for ($i=1;$i<count($strings);$i=$i+2)
$strings[$i] = strtr($strings[$i], $DeTran);
$pgs[$j] = implode("", $strings);
}
$string = implode(($allow_slashes!=TR_NO_SLASHES)?"/":"", $pgs);
}
return rtrim($string, "/");
}



// Convert string to wiki address
// "Привет мир" => "ПриветМир"
function Wikify( $string, $allow_slashes = TR_NO_SLASHES)
{
$slash = "";
if ($allow_slashes) $slash = "/";
$string = preg_replace("/[^- 0-9a-zA-ZxC0-xFFёЁ".$slash."]+/", " ", $string);
// wordglue
$strings = explode( " ", $string );
foreach( $strings as $k=>$v )
$strings[$k] = ucfirst($v);
$string = implode("",$strings);
return $string;
}

// Reconstruct string by given wiki address
// "ПриветМир" => "Привет Мир"
// "-"  -- nomatter "Аква-Парк"
// "0-9" -- nomatter "R 3 Читать", "С 13 Мая" "С Дороги" "СССР Разрушили"
function Dewikify( $string )
{
$string = preg_replace( "/([^-/])([A-ZАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЪЫЭЮЯ]".
"[a-zабвгдеёжзийклмнопрстуфхцчшщьъыэюя0-9])/", "$1 $2", $string );
$string = preg_replace( "/([^0-9 -/])([0-9])/", "$1 $2", $string );
return $string;
}

}
?>