utf8-internal.h 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. // -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
  2. // Copyright (C) 2013 Henner Zeller <h.zeller@acm.org>
  3. //
  4. // This program is free software; you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation version 2.
  7. //
  8. // This program is distributed in the hope that it will be useful,
  9. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. // GNU General Public License for more details.
  12. //
  13. // You should have received a copy of the GNU General Public License
  14. // along with this program. If not, see <http://gnu.org/licenses/gpl-2.0.txt>
  15. #ifndef RPI_GRAPHICS_UTF8_H
  16. #define RPI_GRAPHICS_UTF8_H
  17. #include <stdint.h>
  18. // Utility function that reads UTF-8 encoded codepoints from byte iterator.
  19. // No error checking, we assume string is UTF-8 clean.
  20. template <typename byte_iterator>
  21. uint32_t utf8_next_codepoint(byte_iterator &it) {
  22. uint32_t cp = *it++;
  23. if (cp < 0x80) {
  24. return cp; // iterator already incremented.
  25. }
  26. else if ((cp & 0xE0) == 0xC0) {
  27. cp = ((cp & 0x1F) << 6) + (*it & 0x3F);
  28. }
  29. else if ((cp & 0xF0) == 0xE0) {
  30. cp = ((cp & 0x0F) << 12) + ((*it & 0x3F) << 6);
  31. cp += (*++it & 0x3F);
  32. }
  33. else if ((cp & 0xF8) == 0xF0) {
  34. cp = ((cp & 0x07) << 18) + ((*it & 0x3F) << 12);
  35. cp += (*++it & 0x3F) << 6;
  36. cp += (*++it & 0x3F);
  37. }
  38. else if ((cp & 0xFC) == 0xF8) {
  39. cp = ((cp & 0x03) << 24) + ((*it & 0x3F) << 18);
  40. cp += (*++it & 0x3F) << 12;
  41. cp += (*++it & 0x3F) << 6;
  42. cp += (*++it & 0x3F);
  43. }
  44. else if ((cp & 0xFE) == 0xFC) {
  45. cp = ((cp & 0x01) << 30) + ((*it & 0x3F) << 24);
  46. cp += (*++it & 0x3F) << 18;
  47. cp += (*++it & 0x3F) << 12;
  48. cp += (*++it & 0x3F) << 6;
  49. cp += (*++it & 0x3F);
  50. }
  51. ++it;
  52. return cp;
  53. }
  54. #endif // RPI_GRAPHICS_UTF8_H