Unicode 8inl source


SFML - Simple and Fast Multimedia Library Main Page Namespaces Classes Files File List Unicode.inl00001 00002 // 00003 // SFML - Simple and Fast Multimedia Library 00004 // Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com) 00005 // 00006 // This software is provided 'as-is', without any express or implied warranty. 00007 // In no event will the authors be held liable for any damages arising from the use of this software. 00008 // 00009 // Permission is granted to anyone to use this software for any purpose, 00010 // including commercial applications, and to alter it and redistribute it freely, 00011 // subject to the following restrictions: 00012 // 00013 // 1. The origin of this software must not be misrepresented; 00014 // you must not claim that you wrote the original software. 00015 // If you use this software in a product, an acknowledgment 00016 // in the product documentation would be appreciated but is not required. 00017 // 00018 // 2. Altered source versions must be plainly marked as such, 00019 // and must not be misrepresented as being the original software. 00020 // 00021 // 3. This notice may not be removed or altered from any source distribution. 00022 // 00024 00025 00030 template <typename In, typename Out> 00031 inline Out Unicode::UTF32ToANSI(In Begin, In End, Out Output, char Replacement, const std::locale& Locale) 00032 { 00033 #ifdef __MINGW32__ 00034 00035 // MinGW has a almost no support for unicode stuff 00036 // As a consequence, the MinGW version of this function can only use the default locale 00037 // and ignores the one passed as parameter 00038 while (Begin < End) 00039 { 00040 char Char = 0; 00041 if (wctomb(&Char, static_cast<wchar_t>(*Begin++)) >= 0) 00042 *Output++ = Char; 00043 else if (Replacement) 00044 *Output++ = Replacement; 00045 } 00046 00047 #else 00048 00049 // Get the facet of the locale which deals with character conversion 00050 const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale); 00051 00052 // Use the facet to convert each character of the input string 00053 while (Begin < End) 00054 *Output++ = Facet.narrow(static_cast<wchar_t>(*Begin++), Replacement); 00055 00056 #endif 00057 00058 return Output; 00059 } 00060 00061 00066 template <typename In, typename Out> 00067 inline Out Unicode::ANSIToUTF32(In Begin, In End, Out Output, const std::locale& Locale) 00068 { 00069 #ifdef __MINGW32__ 00070 00071 // MinGW has a almost no support for unicode stuff 00072 // As a consequence, the MinGW version of this function can only use the default locale 00073 // and ignores the one passed as parameter 00074 while (Begin < End) 00075 { 00076 wchar_t Char = 0; 00077 mbtowc(&Char, &*Begin, 1); 00078 Begin++; 00079 *Output++ = static_cast<Uint32>(Char); 00080 } 00081 00082 #else 00083 00084 // Get the facet of the locale which deals with character conversion 00085 const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale); 00086 00087 // Use the facet to convert each character of the input string 00088 while (Begin < End) 00089 *Output++ = static_cast<Uint32>(Facet.widen(*Begin++)); 00090 00091 #endif 00092 00093 return Output; 00094 } 00095 00096 00101 template <typename In, typename Out> 00102 inline Out Unicode::UTF8ToUTF16(In Begin, In End, Out Output, Uint16 Replacement) 00103 { 00104 while (Begin < End) 00105 { 00106 Uint32 c = 0; 00107 int TrailingBytes = UTF8TrailingBytes[static_cast<int>(*Begin)]; 00108 if (Begin + TrailingBytes < End) 00109 { 00110 // First decode the UTF-8 character 00111 switch (TrailingBytes) 00112 { 00113 case 5 : c += *Begin++; c <<= 6; 00114 case 4 : c += *Begin++; c <<= 6; 00115 case 3 : c += *Begin++; c <<= 6; 00116 case 2 : c += *Begin++; c <<= 6; 00117 case 1 : c += *Begin++; c <<= 6; 00118 case 0 : c += *Begin++; 00119 } 00120 c -= UTF8Offsets[TrailingBytes]; 00121 00122 // Then encode it in UTF-16 00123 if (c < 0xFFFF) 00124 { 00125 // Character can be converted directly to 16 bits, just need to check it's in the valid range 00126 if ((c >= 0xD800) && (c <= 0xDFFF)) 00127 { 00128 // Invalid character (this range is reserved) 00129 if (Replacement) 00130 *Output++ = Replacement; 00131 } 00132 else 00133 { 00134 // Valid character directly convertible to 16 bits 00135 *Output++ = static_cast<Uint16>(c); 00136 } 00137 } 00138 else if (c > 0x0010FFFF) 00139 { 00140 // Invalid character (greater than the maximum unicode value) 00141 if (Replacement) 00142 *Output++ = Replacement; 00143 } 00144 else 00145 { 00146 // Character will be converted to 2 UTF-16 elements 00147 c -= 0x0010000; 00148 *Output++ = static_cast<Uint16>((c >> 10) + 0xD800); 00149 *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00); 00150 } 00151 } 00152 } 00153 00154 return Output; 00155 } 00156 00157 00162 template <typename In, typename Out> 00163 inline Out Unicode::UTF8ToUTF32(In Begin, In End, Out Output, Uint32 Replacement) 00164 { 00165 while (Begin < End) 00166 { 00167 Uint32 c = 0; 00168 int TrailingBytes = UTF8TrailingBytes[static_cast<int>(*Begin)]; 00169 if (Begin + TrailingBytes < End) 00170 { 00171 // First decode the UTF-8 character 00172 switch (TrailingBytes) 00173 { 00174 case 5 : c += *Begin++; c <<= 6; 00175 case 4 : c += *Begin++; c <<= 6; 00176 case 3 : c += *Begin++; c <<= 6; 00177 case 2 : c += *Begin++; c <<= 6; 00178 case 1 : c += *Begin++; c <<= 6; 00179 case 0 : c += *Begin++; 00180 } 00181 c -= UTF8Offsets[TrailingBytes]; 00182 00183 // Then write it if valid 00184 if ((c < 0xD800) || (c > 0xDFFF)) 00185 { 00186 // Valid UTF-32 character 00187 *Output++ = c; 00188 } 00189 else 00190 { 00191 // Invalid UTF-32 character 00192 if (Replacement) 00193 *Output++ = Replacement; 00194 } 00195 } 00196 } 00197 00198 return Output; 00199 } 00200 00201 00206 template <typename In, typename Out> 00207 inline Out Unicode::UTF16ToUTF8(In Begin, In End, Out Output, Uint8 Replacement) 00208 { 00209 while (Begin < End) 00210 { 00211 Uint32 c = *Begin++; 00212 00213 // If it's a surrogate pair, first convert to a single UTF-32 character 00214 if ((c >= 0xD800) && (c <= 0xDBFF)) 00215 { 00216 if (Begin < End) 00217 { 00218 // The second element is valid : convert the two elements to a UTF-32 character 00219 Uint32 d = *Begin++; 00220 if ((d >= 0xDC00) && (d <= 0xDFFF)) 00221 c = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000); 00222 } 00223 else 00224 { 00225 // Invalid second element 00226 if (Replacement) 00227 *Output++ = Replacement; 00228 } 00229 } 00230 00231 // Then convert to UTF-8 00232 if (c > 0x0010FFFF) 00233 { 00234 // Invalid character (greater than the maximum unicode value) 00235 if (Replacement) 00236 *Output++ = Replacement; 00237 } 00238 else 00239 { 00240 // Valid character 00241 00242 // Get number of bytes to write 00243 int BytesToWrite = 1; 00244 if (c < 0x80) BytesToWrite = 1; 00245 else if (c < 0x800) BytesToWrite = 2; 00246 else if (c < 0x10000) BytesToWrite = 3; 00247 else if (c <= 0x0010FFFF) BytesToWrite = 4; 00248 00249 // Extract bytes to write 00250 Uint8 Bytes[4]; 00251 switch (BytesToWrite) 00252 { 00253 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00254 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00255 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00256 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]); 00257 } 00258 00259 // Add them to the output 00260 const Uint8* CurByte = Bytes; 00261 switch (BytesToWrite) 00262 { 00263 case 4 : *Output++ = *CurByte++; 00264 case 3 : *Output++ = *CurByte++; 00265 case 2 : *Output++ = *CurByte++; 00266 case 1 : *Output++ = *CurByte++; 00267 } 00268 } 00269 } 00270 00271 return Output; 00272 } 00273 00274 00279 template <typename In, typename Out> 00280 inline Out Unicode::UTF16ToUTF32(In Begin, In End, Out Output, Uint32 Replacement) 00281 { 00282 while (Begin < End) 00283 { 00284 Uint16 c = *Begin++; 00285 if ((c >= 0xD800) && (c <= 0xDBFF)) 00286 { 00287 // We have a surrogate pair, ie. a character composed of two elements 00288 if (Begin < End) 00289 { 00290 Uint16 d = *Begin++; 00291 if ((d >= 0xDC00) && (d <= 0xDFFF)) 00292 { 00293 // The second element is valid : convert the two elements to a UTF-32 character 00294 *Output++ = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000); 00295 } 00296 else 00297 { 00298 // Invalid second element 00299 if (Replacement) 00300 *Output++ = Replacement; 00301 } 00302 } 00303 } 00304 else if ((c >= 0xDC00) && (c <= 0xDFFF)) 00305 { 00306 // Invalid character 00307 if (Replacement) 00308 *Output++ = Replacement; 00309 } 00310 else 00311 { 00312 // Valid character directly convertible to UTF-32 00313 *Output++ = static_cast<Uint32>(c); 00314 } 00315 } 00316 00317 return Output; 00318 } 00319 00320 00325 template <typename In, typename Out> 00326 inline Out Unicode::UTF32ToUTF8(In Begin, In End, Out Output, Uint8 Replacement) 00327 { 00328 while (Begin < End) 00329 { 00330 Uint32 c = *Begin++; 00331 if (c > 0x0010FFFF) 00332 { 00333 // Invalid character (greater than the maximum unicode value) 00334 if (Replacement) 00335 *Output++ = Replacement; 00336 } 00337 else 00338 { 00339 // Valid character 00340 00341 // Get number of bytes to write 00342 int BytesToWrite = 1; 00343 if (c < 0x80) BytesToWrite = 1; 00344 else if (c < 0x800) BytesToWrite = 2; 00345 else if (c < 0x10000) BytesToWrite = 3; 00346 else if (c <= 0x0010FFFF) BytesToWrite = 4; 00347 00348 // Extract bytes to write 00349 Uint8 Bytes[4]; 00350 switch (BytesToWrite) 00351 { 00352 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00353 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00354 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6; 00355 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]); 00356 } 00357 00358 // Add them to the output 00359 const Uint8* CurByte = Bytes; 00360 switch (BytesToWrite) 00361 { 00362 case 4 : *Output++ = *CurByte++; 00363 case 3 : *Output++ = *CurByte++; 00364 case 2 : *Output++ = *CurByte++; 00365 case 1 : *Output++ = *CurByte++; 00366 } 00367 } 00368 } 00369 00370 return Output; 00371 } 00372 00373 00378 template <typename In, typename Out> 00379 inline Out Unicode::UTF32ToUTF16(In Begin, In End, Out Output, Uint16 Replacement) 00380 { 00381 while (Begin < End) 00382 { 00383 Uint32 c = *Begin++; 00384 if (c < 0xFFFF) 00385 { 00386 // Character can be converted directly to 16 bits, just need to check it's in the valid range 00387 if ((c >= 0xD800) && (c <= 0xDFFF)) 00388 { 00389 // Invalid character (this range is reserved) 00390 if (Replacement) 00391 *Output++ = Replacement; 00392 } 00393 else 00394 { 00395 // Valid character directly convertible to 16 bits 00396 *Output++ = static_cast<Uint16>(c); 00397 } 00398 } 00399 else if (c > 0x0010FFFF) 00400 { 00401 // Invalid character (greater than the maximum unicode value) 00402 if (Replacement) 00403 *Output++ = Replacement; 00404 } 00405 else 00406 { 00407 // Character will be converted to 2 UTF-16 elements 00408 c -= 0x0010000; 00409 *Output++ = static_cast<Uint16>((c >> 10) + 0xD800); 00410 *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00); 00411 } 00412 } 00413 00414 return Output; 00415 } 00416 00417 00421 template <typename In> 00422 inline std::size_t Unicode::GetUTF8Length(In Begin, In End) 00423 { 00424 std::size_t Length = 0; 00425 while (Begin < End) 00426 { 00427 int NbBytes = UTF8TrailingBytes[static_cast<int>(*Begin)]; 00428 if (Begin + NbBytes < End) 00429 ++Length; 00430 00431 Begin += NbBytes + 1; 00432 } 00433 00434 return Length; 00435 } 00436 00437 00441 template <typename In> 00442 inline std::size_t Unicode::GetUTF16Length(In Begin, In End) 00443 { 00444 std::size_t Length = 0; 00445 while (Begin < End) 00446 { 00447 if ((*Begin >= 0xD800) && (*Begin <= 0xDBFF)) 00448 { 00449 ++Begin; 00450 if ((Begin < End) && ((*Begin >= 0xDC00) && (*Begin <= 0xDFFF))) 00451 { 00452 ++Length; 00453 } 00454 } 00455 else 00456 { 00457 ++Length; 00458 } 00459 00460 ++Begin; 00461 } 00462 00463 return Length; 00464 } 00465 00466 00470 template <typename In> 00471 inline std::size_t Unicode::GetUTF32Length(In Begin, In End) 00472 { 00473 return End - Begin; 00474 }  ::  Copyright © 2007-2008 Laurent Gomila, all rights reserved  ::  Documentation generated by doxygen 1.5.2  :: 

Wyszukiwarka

Podobne podstrony:
Resource 8inl source
ResourcePtr 8inl source
Rect 8inl source
Unicode 8hpp source
Selector 8inl source
Vector3 8inl source
Vector2 8inl source
Matrix3 8inl source
Unicode?pp source
source30
Matrix3?pp source
Thread?pp source
arm biquad ?scade ?1 ?st q31? source
arm conv ?2? source
arm mat mult q15? source
arm fir lattice init q31? source
arm fir ?cimate ?st q15? source

więcej podobnych podstron