Logo Search packages:      
Sourcecode: dc-qt version File versions  Download package

Text.cpp

Go to the documentation of this file.
/* 
 * Copyright (C) 2001-2005 Jacek Sieka, arnetheduck on gmail point com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

#include "stdinc.h"
#include "DCPlusPlus.h"

#include "Text.h"

char Text::asciiLower[128];
wchar_t Text::lower[65536];

// When using GNU C library; setlocale should be called before Text::initialize 

void Text::initialize() {
      for(size_t i = 0; i < 65536; ++i) {
#ifdef _WIN32
            lower[i] = (wchar_t)CharLowerW((LPWSTR)i);
#else
            lower[i] = (char)towlower(i);
#endif
      }

      for(size_t i = 0; i < 128; ++i) {
            asciiLower[i] = (char)lower[i];
      }

}

int Text::utf8ToWc(const char* str, wchar_t& c) {
      u_int8_t c0 = (u_int8_t)str[0];
      if(c0 & 0x80) {                                                   // 1xxx xxxx
            if(c0 & 0x40) {                                             // 11xx xxxx
                  if(c0 & 0x20) {                                       // 111x xxxx
                        if(c0 & 0x10) {                                 // 1111 xxxx
                              int n = -4;
                              if(c0 & 0x08) {                           // 1111 1xxx
                                    n = -5;
                                    if(c0 & 0x04) {                     // 1111 11xx
                                          if(c0 & 0x02) {               // 1111 111x
                                                return -1;
                                          }
                                          n = -6;
                                    }
                              }
                              int i = -1;
                              while(i > n && (str[abs(i)] & 0x80) == 0x80)
                                    --i;
                              return i;
                        } else {          // 1110xxxx
                              u_int8_t c1 = (u_int8_t)str[1];
                              if((c1 & (0x80 | 0x40)) != 0x80)
                                    return -1;

                              u_int8_t c2 = (u_int8_t)str[2];
                              if((c2 & (0x80 | 0x40)) != 0x80)
                                    return -2;

                              // Ugly utf-16 surrogate catch
                              if((c0 & 0x0f) == 0x0d && (c1 & 0x3c) >= (0x08 << 2))
                                    return -3;

                              // Overlong encoding
                              if(c0 == (0x80 | 0x40 | 0x20) && (c1 & (0x80 | 0x40 | 0x20)) == 0x80)
                                    return -3;

                              c = (((wchar_t)c0 & 0x0f) << 12) |
                                    (((wchar_t)c1 & 0x3f) << 6) |
                                    ((wchar_t)c2 & 0x3f);

                              return 3;
                        }
                  } else {                      // 110xxxxx
                        u_int8_t c1 = (u_int8_t)str[1];
                        if((c1 & (0x80 | 0x40)) != 0x80)
                              return -1;

                        // Overlong encoding
                        if((c0 & ~1) == (0x80 | 0x40))
                              return -2;

                        c = (((wchar_t)c0 & 0x1f) << 6) |
                              ((wchar_t)c1 & 0x3f);
                        return 2;
                  }
            } else {                            // 10xxxxxx
                  return -1;
            }
      } else {                                  // 0xxxxxxx
            c = (unsigned char)str[0];
            return 1;
      }
      dcassert(0);
}

void Text::wcToUtf8(wchar_t c, string& str) {
      if(c >= 0x0800) {
            str += (char)(0x80 | 0x40 | 0x20  | (c >> 12));
            str += (char)(0x80 | ((c >> 6) & 0x3f));
            str += (char)(0x80 | (c & 0x3f));
      } else if(c >= 0x0080) {
            str += (char)(0x80 | 0x40 | (c >> 6));
            str += (char)(0x80 | (c & 0x3f));
      } else {
            str += (char)c;
      }
}

string& Text::acpToUtf8(const string& str, string& tmp) throw() {
      wstring wtmp;
      return wideToUtf8(acpToWide(str, wtmp), tmp);
}

wstring& Text::acpToWide(const string& str, wstring& tmp) throw() {
#ifdef _WIN32
      int n = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, str.c_str(), (int)str.length(), NULL, 0);
      if(n == 0) {
            return tmp;
      }

      tmp.resize(n);
      n = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, str.c_str(), (int)str.length(), &tmp[0], n);
      if(n == 0) {
            tmp.clear();
            return tmp;
      }
      return tmp;
#else
      //convert from current locale multibyte (equivalent to CP_ACP?) to wide char
      /*    const char* src = str.c_str();
      int n = mbsrtowcs(NULL, &src, 0, NULL);
      if (n < 1) {
            return tmp;
      }
      tmp.resize(n);
      n = mbsrtowcs(&tmp[0], &src, n, NULL);
      if (n < 1) {
            tmp.clear();
            return tmp;
            } */

      // Rikards CODE
      const char* src = str.c_str();
      int length = 0;
      while(*src) {length++;src++;}
      src = str.c_str();
      wchar_t *wdata = (wchar_t*)calloc(length+1,sizeof(wchar_t));
      wchar_t *p = wdata;
      while(*src) {
        *p = wchar_t( (unsigned char)(*src));
        src++;
        p++;
      }
      *p = 0;

      tmp.assign(wdata);
      free(wdata);
      // end Rikards CODE

      return tmp;
#endif
}

string& Text::wideToUtf8(const wstring& str, string& tgt) throw() {
      string::size_type n = str.length();
      for(string::size_type i = 0; i < n; ++i) {
            wcToUtf8(str[i], tgt);
      }
      return tgt;
}

string& Text::wideToAcp(const wstring& str, string& tmp) throw() {
#ifdef _WIN32
      int n = WideCharToMultiByte(CP_ACP, 0, str.c_str(), (int)str.length(), NULL, 0, NULL, NULL);
      if(n == 0) {
            return tmp;
      }

      tmp.resize(n);
      n = WideCharToMultiByte(CP_ACP, 0, str.c_str(), (int)str.length(), &tmp[0], n, NULL, NULL);
      if(n == 0) {
            tmp.clear();
            return tmp;
      }
      return tmp;
#else
      const wchar_t* src = str.c_str();
      //int n = wcsrtombs(NULL, &src, 0, NULL);
      //if(n < 1) {
      //    return tmp;
      //}
      //tmp.resize(n);
      //n = wcsrtombs(&tmp[0], &src, n, NULL);
      //if(n < 1) {
      //    tmp.clear();
      //    return tmp;
      //}
      // RIKARD CODE
      tmp.resize( str.size() );
      for(int i=0;i<str.size();i++) {
      tmp[i]=(src[i]>0xff) ? '?' :(unsigned char)src[i];
      }
      //END RIKARD CODE
      return tmp;
#endif
}

bool Text::validateUtf8(const string& str) throw() {
      string::size_type i = 0;
      while(i < str.length()) {
            wchar_t dummy = 0;
            int j = utf8ToWc(&str[i], dummy);
            if(j < 0)
                  return false;
            i += j;
      }
      return true;
}

string& Text::utf8ToAcp(const string& str, string& tmp) throw() {
      wstring wtmp;
      return wideToAcp(utf8ToWide(str, wtmp), tmp);
}

wstring& Text::utf8ToWide(const string& str, wstring& tgt) throw() {
      tgt.reserve(str.length());
      string::size_type n = str.length();
      for(string::size_type i = 0; i < n; ) {
            wchar_t c = 0;
            int x = utf8ToWc(str.c_str() + i, c);
            if(x < 0) {
                  tgt += '_';
                  i += abs(x);
            } else {
                  i += x;
                  tgt += c;
            }
      }
      return tgt;
}

wstring& Text::toLower(const wstring& str, wstring& tmp) throw() {
      tmp.reserve(str.length());
      wstring::const_iterator end = str.end();
      for(wstring::const_iterator i = str.begin(); i != end; ++i) {
            tmp += toLower(*i);
      }
      return tmp;
}

string& Text::toLower(const string& str, string& tmp) throw() {
      tmp.reserve(str.length());
      const char* end = &str[0] + str.length();
      for(const char* p = &str[0]; p < end;) {
            wchar_t c = 0;
            int n = utf8ToWc(p, c);
            if(n < 0) {
                  tmp += '_';
                  p += abs(n);
            } else {
                  p += n;
                  wcToUtf8(toLower(c), tmp);
            }
      }
      return tmp;
}

/**
 * @file
 * $Id: Text.cpp,v 1.4 2005/09/01 18:09:27 olof Exp $
 */

Generated by  Doxygen 1.6.0   Back to index