SFML - Simple and Fast Multimedia Library
Main Page
Namespaces
Classes
Files
File List
Unicode.inl00001
00002 //
00003 // SFML - Simple and Fast Multimedia Library
00004 // Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)
00005 //
00006 // This software is provided 'as-is', without any express or implied warranty.
00007 // In no event will the authors be held liable for any damages arising from the use of this software.
00008 //
00009 // Permission is granted to anyone to use this software for any purpose,
00010 // including commercial applications, and to alter it and redistribute it freely,
00011 // subject to the following restrictions:
00012 //
00013 // 1. The origin of this software must not be misrepresented;
00014 // you must not claim that you wrote the original software.
00015 // If you use this software in a product, an acknowledgment
00016 // in the product documentation would be appreciated but is not required.
00017 //
00018 // 2. Altered source versions must be plainly marked as such,
00019 // and must not be misrepresented as being the original software.
00020 //
00021 // 3. This notice may not be removed or altered from any source distribution.
00022 //
00024
00025
00030 template <typename In, typename Out>
00031 inline Out Unicode::UTF32ToANSI(In Begin, In End, Out Output, char Replacement, const std::locale& Locale)
00032 {
00033 #ifdef __MINGW32__
00034
00035 // MinGW has a almost no support for unicode stuff
00036 // As a consequence, the MinGW version of this function can only use the default locale
00037 // and ignores the one passed as parameter
00038 while (Begin < End)
00039 {
00040 char Char = 0;
00041 if (wctomb(&Char, static_cast<wchar_t>(*Begin++)) >= 0)
00042 *Output++ = Char;
00043 else if (Replacement)
00044 *Output++ = Replacement;
00045 }
00046
00047 #else
00048
00049 // Get the facet of the locale which deals with character conversion
00050 const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale);
00051
00052 // Use the facet to convert each character of the input string
00053 while (Begin < End)
00054 *Output++ = Facet.narrow(static_cast<wchar_t>(*Begin++), Replacement);
00055
00056 #endif
00057
00058 return Output;
00059 }
00060
00061
00066 template <typename In, typename Out>
00067 inline Out Unicode::ANSIToUTF32(In Begin, In End, Out Output, const std::locale& Locale)
00068 {
00069 #ifdef __MINGW32__
00070
00071 // MinGW has a almost no support for unicode stuff
00072 // As a consequence, the MinGW version of this function can only use the default locale
00073 // and ignores the one passed as parameter
00074 while (Begin < End)
00075 {
00076 wchar_t Char = 0;
00077 mbtowc(&Char, &*Begin, 1);
00078 Begin++;
00079 *Output++ = static_cast<Uint32>(Char);
00080 }
00081
00082 #else
00083
00084 // Get the facet of the locale which deals with character conversion
00085 const std::ctype<wchar_t>& Facet = std::use_facet< std::ctype<wchar_t> >(Locale);
00086
00087 // Use the facet to convert each character of the input string
00088 while (Begin < End)
00089 *Output++ = static_cast<Uint32>(Facet.widen(*Begin++));
00090
00091 #endif
00092
00093 return Output;
00094 }
00095
00096
00101 template <typename In, typename Out>
00102 inline Out Unicode::UTF8ToUTF16(In Begin, In End, Out Output, Uint16 Replacement)
00103 {
00104 while (Begin < End)
00105 {
00106 Uint32 c = 0;
00107 int TrailingBytes = UTF8TrailingBytes[static_cast<int>(*Begin)];
00108 if (Begin + TrailingBytes < End)
00109 {
00110 // First decode the UTF-8 character
00111 switch (TrailingBytes)
00112 {
00113 case 5 : c += *Begin++; c <<= 6;
00114 case 4 : c += *Begin++; c <<= 6;
00115 case 3 : c += *Begin++; c <<= 6;
00116 case 2 : c += *Begin++; c <<= 6;
00117 case 1 : c += *Begin++; c <<= 6;
00118 case 0 : c += *Begin++;
00119 }
00120 c -= UTF8Offsets[TrailingBytes];
00121
00122 // Then encode it in UTF-16
00123 if (c < 0xFFFF)
00124 {
00125 // Character can be converted directly to 16 bits, just need to check it's in the valid range
00126 if ((c >= 0xD800) && (c <= 0xDFFF))
00127 {
00128 // Invalid character (this range is reserved)
00129 if (Replacement)
00130 *Output++ = Replacement;
00131 }
00132 else
00133 {
00134 // Valid character directly convertible to 16 bits
00135 *Output++ = static_cast<Uint16>(c);
00136 }
00137 }
00138 else if (c > 0x0010FFFF)
00139 {
00140 // Invalid character (greater than the maximum unicode value)
00141 if (Replacement)
00142 *Output++ = Replacement;
00143 }
00144 else
00145 {
00146 // Character will be converted to 2 UTF-16 elements
00147 c -= 0x0010000;
00148 *Output++ = static_cast<Uint16>((c >> 10) + 0xD800);
00149 *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00);
00150 }
00151 }
00152 }
00153
00154 return Output;
00155 }
00156
00157
00162 template <typename In, typename Out>
00163 inline Out Unicode::UTF8ToUTF32(In Begin, In End, Out Output, Uint32 Replacement)
00164 {
00165 while (Begin < End)
00166 {
00167 Uint32 c = 0;
00168 int TrailingBytes = UTF8TrailingBytes[static_cast<int>(*Begin)];
00169 if (Begin + TrailingBytes < End)
00170 {
00171 // First decode the UTF-8 character
00172 switch (TrailingBytes)
00173 {
00174 case 5 : c += *Begin++; c <<= 6;
00175 case 4 : c += *Begin++; c <<= 6;
00176 case 3 : c += *Begin++; c <<= 6;
00177 case 2 : c += *Begin++; c <<= 6;
00178 case 1 : c += *Begin++; c <<= 6;
00179 case 0 : c += *Begin++;
00180 }
00181 c -= UTF8Offsets[TrailingBytes];
00182
00183 // Then write it if valid
00184 if ((c < 0xD800) || (c > 0xDFFF))
00185 {
00186 // Valid UTF-32 character
00187 *Output++ = c;
00188 }
00189 else
00190 {
00191 // Invalid UTF-32 character
00192 if (Replacement)
00193 *Output++ = Replacement;
00194 }
00195 }
00196 }
00197
00198 return Output;
00199 }
00200
00201
00206 template <typename In, typename Out>
00207 inline Out Unicode::UTF16ToUTF8(In Begin, In End, Out Output, Uint8 Replacement)
00208 {
00209 while (Begin < End)
00210 {
00211 Uint32 c = *Begin++;
00212
00213 // If it's a surrogate pair, first convert to a single UTF-32 character
00214 if ((c >= 0xD800) && (c <= 0xDBFF))
00215 {
00216 if (Begin < End)
00217 {
00218 // The second element is valid : convert the two elements to a UTF-32 character
00219 Uint32 d = *Begin++;
00220 if ((d >= 0xDC00) && (d <= 0xDFFF))
00221 c = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000);
00222 }
00223 else
00224 {
00225 // Invalid second element
00226 if (Replacement)
00227 *Output++ = Replacement;
00228 }
00229 }
00230
00231 // Then convert to UTF-8
00232 if (c > 0x0010FFFF)
00233 {
00234 // Invalid character (greater than the maximum unicode value)
00235 if (Replacement)
00236 *Output++ = Replacement;
00237 }
00238 else
00239 {
00240 // Valid character
00241
00242 // Get number of bytes to write
00243 int BytesToWrite = 1;
00244 if (c < 0x80) BytesToWrite = 1;
00245 else if (c < 0x800) BytesToWrite = 2;
00246 else if (c < 0x10000) BytesToWrite = 3;
00247 else if (c <= 0x0010FFFF) BytesToWrite = 4;
00248
00249 // Extract bytes to write
00250 Uint8 Bytes[4];
00251 switch (BytesToWrite)
00252 {
00253 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00254 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00255 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00256 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]);
00257 }
00258
00259 // Add them to the output
00260 const Uint8* CurByte = Bytes;
00261 switch (BytesToWrite)
00262 {
00263 case 4 : *Output++ = *CurByte++;
00264 case 3 : *Output++ = *CurByte++;
00265 case 2 : *Output++ = *CurByte++;
00266 case 1 : *Output++ = *CurByte++;
00267 }
00268 }
00269 }
00270
00271 return Output;
00272 }
00273
00274
00279 template <typename In, typename Out>
00280 inline Out Unicode::UTF16ToUTF32(In Begin, In End, Out Output, Uint32 Replacement)
00281 {
00282 while (Begin < End)
00283 {
00284 Uint16 c = *Begin++;
00285 if ((c >= 0xD800) && (c <= 0xDBFF))
00286 {
00287 // We have a surrogate pair, ie. a character composed of two elements
00288 if (Begin < End)
00289 {
00290 Uint16 d = *Begin++;
00291 if ((d >= 0xDC00) && (d <= 0xDFFF))
00292 {
00293 // The second element is valid : convert the two elements to a UTF-32 character
00294 *Output++ = static_cast<Uint32>(((c - 0xD800) << 10) + (d - 0xDC00) + 0x0010000);
00295 }
00296 else
00297 {
00298 // Invalid second element
00299 if (Replacement)
00300 *Output++ = Replacement;
00301 }
00302 }
00303 }
00304 else if ((c >= 0xDC00) && (c <= 0xDFFF))
00305 {
00306 // Invalid character
00307 if (Replacement)
00308 *Output++ = Replacement;
00309 }
00310 else
00311 {
00312 // Valid character directly convertible to UTF-32
00313 *Output++ = static_cast<Uint32>(c);
00314 }
00315 }
00316
00317 return Output;
00318 }
00319
00320
00325 template <typename In, typename Out>
00326 inline Out Unicode::UTF32ToUTF8(In Begin, In End, Out Output, Uint8 Replacement)
00327 {
00328 while (Begin < End)
00329 {
00330 Uint32 c = *Begin++;
00331 if (c > 0x0010FFFF)
00332 {
00333 // Invalid character (greater than the maximum unicode value)
00334 if (Replacement)
00335 *Output++ = Replacement;
00336 }
00337 else
00338 {
00339 // Valid character
00340
00341 // Get number of bytes to write
00342 int BytesToWrite = 1;
00343 if (c < 0x80) BytesToWrite = 1;
00344 else if (c < 0x800) BytesToWrite = 2;
00345 else if (c < 0x10000) BytesToWrite = 3;
00346 else if (c <= 0x0010FFFF) BytesToWrite = 4;
00347
00348 // Extract bytes to write
00349 Uint8 Bytes[4];
00350 switch (BytesToWrite)
00351 {
00352 case 4 : Bytes[3] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00353 case 3 : Bytes[2] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00354 case 2 : Bytes[1] = static_cast<Uint8>((c | 0x80) & 0xBF); c >>= 6;
00355 case 1 : Bytes[0] = static_cast<Uint8> (c | UTF8FirstBytes[BytesToWrite]);
00356 }
00357
00358 // Add them to the output
00359 const Uint8* CurByte = Bytes;
00360 switch (BytesToWrite)
00361 {
00362 case 4 : *Output++ = *CurByte++;
00363 case 3 : *Output++ = *CurByte++;
00364 case 2 : *Output++ = *CurByte++;
00365 case 1 : *Output++ = *CurByte++;
00366 }
00367 }
00368 }
00369
00370 return Output;
00371 }
00372
00373
00378 template <typename In, typename Out>
00379 inline Out Unicode::UTF32ToUTF16(In Begin, In End, Out Output, Uint16 Replacement)
00380 {
00381 while (Begin < End)
00382 {
00383 Uint32 c = *Begin++;
00384 if (c < 0xFFFF)
00385 {
00386 // Character can be converted directly to 16 bits, just need to check it's in the valid range
00387 if ((c >= 0xD800) && (c <= 0xDFFF))
00388 {
00389 // Invalid character (this range is reserved)
00390 if (Replacement)
00391 *Output++ = Replacement;
00392 }
00393 else
00394 {
00395 // Valid character directly convertible to 16 bits
00396 *Output++ = static_cast<Uint16>(c);
00397 }
00398 }
00399 else if (c > 0x0010FFFF)
00400 {
00401 // Invalid character (greater than the maximum unicode value)
00402 if (Replacement)
00403 *Output++ = Replacement;
00404 }
00405 else
00406 {
00407 // Character will be converted to 2 UTF-16 elements
00408 c -= 0x0010000;
00409 *Output++ = static_cast<Uint16>((c >> 10) + 0xD800);
00410 *Output++ = static_cast<Uint16>((c & 0x3FFUL) + 0xDC00);
00411 }
00412 }
00413
00414 return Output;
00415 }
00416
00417
00421 template <typename In>
00422 inline std::size_t Unicode::GetUTF8Length(In Begin, In End)
00423 {
00424 std::size_t Length = 0;
00425 while (Begin < End)
00426 {
00427 int NbBytes = UTF8TrailingBytes[static_cast<int>(*Begin)];
00428 if (Begin + NbBytes < End)
00429 ++Length;
00430
00431 Begin += NbBytes + 1;
00432 }
00433
00434 return Length;
00435 }
00436
00437
00441 template <typename In>
00442 inline std::size_t Unicode::GetUTF16Length(In Begin, In End)
00443 {
00444 std::size_t Length = 0;
00445 while (Begin < End)
00446 {
00447 if ((*Begin >= 0xD800) && (*Begin <= 0xDBFF))
00448 {
00449 ++Begin;
00450 if ((Begin < End) && ((*Begin >= 0xDC00) && (*Begin <= 0xDFFF)))
00451 {
00452 ++Length;
00453 }
00454 }
00455 else
00456 {
00457 ++Length;
00458 }
00459
00460 ++Begin;
00461 }
00462
00463 return Length;
00464 }
00465
00466
00470 template <typename In>
00471 inline std::size_t Unicode::GetUTF32Length(In Begin, In End)
00472 {
00473 return End - Begin;
00474 }
:: Copyright © 2007-2008 Laurent Gomila, all rights reserved ::
Documentation generated by doxygen 1.5.2 ::
Wyszukiwarka
Podobne podstrony:
Resource 8inl sourceResourcePtr 8inl sourceRect 8inl sourceUnicode 8hpp sourceSelector 8inl sourceVector3 8inl sourceVector2 8inl sourceMatrix3 8inl sourceUnicode?pp sourcesource30Matrix3?pp sourceThread?pp sourcearm biquad ?scade ?1 ?st q31? sourcearm conv ?2? sourcearm mat mult q15? sourcearm fir lattice init q31? sourcearm fir ?cimate ?st q15? sourcewięcej podobnych podstron