/src/libsass/src/utf8_string.cpp
Line | Count | Source |
1 | | // sass.hpp must go before all system headers to get the |
2 | | // __EXTENSIONS__ fix on Solaris. |
3 | | #include "sass.hpp" |
4 | | |
5 | | #include <string> |
6 | | #include <vector> |
7 | | #include <cstdlib> |
8 | | #include <cmath> |
9 | | |
10 | | #include "utf8.h" |
11 | | |
12 | | namespace Sass { |
13 | | namespace UTF_8 { |
14 | | |
15 | | // naming conventions: |
16 | | // offset: raw byte offset (0 based) |
17 | | // position: code point offset (0 based) |
18 | | // index: code point offset (1 based or negative) |
19 | | |
20 | | // function that will count the number of code points (utf-8 characters) from the given beginning to the given end |
21 | 0 | size_t code_point_count(const sass::string& str, size_t start, size_t end) { |
22 | 0 | return utf8::distance(str.begin() + start, str.begin() + end); |
23 | 0 | } |
24 | | |
25 | 0 | size_t code_point_count(const sass::string& str) { |
26 | 0 | return utf8::distance(str.begin(), str.end()); |
27 | 0 | } |
28 | | |
29 | | // function that will return the byte offset at a code point position |
30 | 0 | size_t offset_at_position(const sass::string& str, size_t position) { |
31 | 0 | sass::string::const_iterator it = str.begin(); |
32 | 0 | utf8::advance(it, position, str.end()); |
33 | 0 | return std::distance(str.begin(), it); |
34 | 0 | } |
35 | | |
36 | | // function that returns number of bytes in a character at offset |
37 | 0 | size_t code_point_size_at_offset(const sass::string& str, size_t offset) { |
38 | | // get iterator from string and forward by offset |
39 | 0 | sass::string::const_iterator stop = str.begin() + offset; |
40 | | // check if beyond boundary |
41 | 0 | if (stop == str.end()) return 0; |
42 | | // advance by one code point |
43 | 0 | utf8::advance(stop, 1, str.end()); |
44 | | // calculate offset for code point |
45 | 0 | return stop - str.begin() - offset; |
46 | 0 | } |
47 | | |
48 | | // function that will return a normalized index, given a crazy one |
49 | 0 | size_t normalize_index(int index, size_t len) { |
50 | 0 | long signed_len = static_cast<long>(len); |
51 | | // assuming the index is 1-based |
52 | | // we are returning a 0-based index |
53 | 0 | if (index > 0 && index <= signed_len) { |
54 | | // positive and within string length |
55 | 0 | return index-1; |
56 | 0 | } |
57 | 0 | else if (index > signed_len) { |
58 | | // positive and past string length |
59 | 0 | return len; |
60 | 0 | } |
61 | 0 | else if (index == 0) { |
62 | 0 | return 0; |
63 | 0 | } |
64 | 0 | else if (std::abs((double)index) <= signed_len) { |
65 | | // negative and within string length |
66 | 0 | return index + signed_len; |
67 | 0 | } |
68 | 0 | else { |
69 | | // negative and past string length |
70 | 0 | return 0; |
71 | 0 | } |
72 | 0 | } |
73 | | |
74 | | #ifdef _WIN32 |
75 | | |
76 | | // utf16 functions |
77 | | using std::wstring; |
78 | | |
79 | | // convert from utf16/wide string to utf8 string |
80 | | sass::string convert_from_utf16(const wstring& utf16) |
81 | | { |
82 | | sass::string utf8; |
83 | | // pre-allocate expected memory |
84 | | utf8.reserve(sizeof(utf16)/2); |
85 | | utf8::utf16to8(utf16.begin(), utf16.end(), |
86 | | back_inserter(utf8)); |
87 | | return utf8; |
88 | | } |
89 | | |
90 | | // convert from utf8 string to utf16/wide string |
91 | | wstring convert_to_utf16(const sass::string& utf8) |
92 | | { |
93 | | wstring utf16; |
94 | | // pre-allocate expected memory |
95 | | utf16.reserve(code_point_count(utf8)*2); |
96 | | utf8::utf8to16(utf8.begin(), utf8.end(), |
97 | | back_inserter(utf16)); |
98 | | return utf16; |
99 | | } |
100 | | |
101 | | #endif |
102 | | |
103 | | } |
104 | | } |