/src/libsass/src/utf8_string.cpp

Source
// sass.hpp must go before all system headers to get the
// __EXTENSIONS__ fix on Solaris.
#include "sass.hpp"

#include <string>
#include <vector>
#include <cstdlib>
#include <cmath>

#include "utf8.h"

namespace Sass {
  namespace UTF_8 {

    // naming conventions:
    // offset: raw byte offset (0 based)
    // position: code point offset (0 based)
    // index: code point offset (1 based or negative)

    // function that will count the number of code points (utf-8 characters) from the given beginning to the given end
    size_t code_point_count(const sass::string& str, size_t start, size_t end) {
      return utf8::distance(str.begin() + start, str.begin() + end);
    }

    size_t code_point_count(const sass::string& str) {
      return utf8::distance(str.begin(), str.end());
    }

    // function that will return the byte offset at a code point position
    size_t offset_at_position(const sass::string& str, size_t position) {
      sass::string::const_iterator it = str.begin();
      utf8::advance(it, position, str.end());
      return std::distance(str.begin(), it);
    }

    // function that returns number of bytes in a character at offset
    size_t code_point_size_at_offset(const sass::string& str, size_t offset) {
      // get iterator from string and forward by offset
      sass::string::const_iterator stop = str.begin() + offset;
      // check if beyond boundary
      if (stop == str.end()) return 0;
      // advance by one code point
      utf8::advance(stop, 1, str.end());
      // calculate offset for code point
      return  stop - str.begin() - offset;
    }

    // function that will return a normalized index, given a crazy one
    size_t normalize_index(int index, size_t len) {
      long signed_len = static_cast<long>(len);
      // assuming the index is 1-based
      // we are returning a 0-based index
      if (index > 0 && index <= signed_len) {
        // positive and within string length
        return index-1;
      }
      else if (index > signed_len) {
        // positive and past string length
        return len;
      }
      else if (index == 0) {
        return 0;
      }
      else if (std::abs((double)index) <= signed_len) {
        // negative and within string length
        return index + signed_len;
      }
      else {
        // negative and past string length
        return 0;
      }
    }

    #ifdef _WIN32

    // utf16 functions
    using std::wstring;

    // convert from utf16/wide string to utf8 string
    sass::string convert_from_utf16(const wstring& utf16)
    {
      sass::string utf8;
      // pre-allocate expected memory
      utf8.reserve(sizeof(utf16)/2);
      utf8::utf16to8(utf16.begin(), utf16.end(),
                     back_inserter(utf8));
      return utf8;
    }

    // convert from utf8 string to utf16/wide string
    wstring convert_to_utf16(const sass::string& utf8)
    {
      wstring utf16;
      // pre-allocate expected memory
      utf16.reserve(code_point_count(utf8)*2);
      utf8::utf8to16(utf8.begin(), utf8.end(),
                     back_inserter(utf16));
      return utf16;
    }

    #endif

  }
}

Line	Count	Source
1		// sass.hpp must go before all system headers to get the
2		// __EXTENSIONS__ fix on Solaris.
3		#include "sass.hpp"
4
5		#include <string>
6		#include <vector>
7		#include <cstdlib>
8		#include <cmath>
9
10		#include "utf8.h"
11
12		namespace Sass {
13		namespace UTF_8 {
14
15		// naming conventions:
16		// offset: raw byte offset (0 based)
17		// position: code point offset (0 based)
18		// index: code point offset (1 based or negative)
19
20		// function that will count the number of code points (utf-8 characters) from the given beginning to the given end
21	0	size_t code_point_count(const sass::string& str, size_t start, size_t end) {
22	0	return utf8::distance(str.begin() + start, str.begin() + end);
23	0	}
24
25	0	size_t code_point_count(const sass::string& str) {
26	0	return utf8::distance(str.begin(), str.end());
27	0	}
28
29		// function that will return the byte offset at a code point position
30	0	size_t offset_at_position(const sass::string& str, size_t position) {
31	0	sass::string::const_iterator it = str.begin();
32	0	utf8::advance(it, position, str.end());
33	0	return std::distance(str.begin(), it);
34	0	}
35
36		// function that returns number of bytes in a character at offset
37	0	size_t code_point_size_at_offset(const sass::string& str, size_t offset) {
38		// get iterator from string and forward by offset
39	0	sass::string::const_iterator stop = str.begin() + offset;
40		// check if beyond boundary
41	0	if (stop == str.end()) return 0;
42		// advance by one code point
43	0	utf8::advance(stop, 1, str.end());
44		// calculate offset for code point
45	0	return stop - str.begin() - offset;
46	0	}
47
48		// function that will return a normalized index, given a crazy one
49	0	size_t normalize_index(int index, size_t len) {
50	0	long signed_len = static_cast<long>(len);
51		// assuming the index is 1-based
52		// we are returning a 0-based index
53	0	if (index > 0 && index <= signed_len) {
54		// positive and within string length
55	0	return index-1;
56	0	}
57	0	else if (index > signed_len) {
58		// positive and past string length
59	0	return len;
60	0	}
61	0	else if (index == 0) {
62	0	return 0;
63	0	}
64	0	else if (std::abs((double)index) <= signed_len) {
65		// negative and within string length
66	0	return index + signed_len;
67	0	}
68	0	else {
69		// negative and past string length
70	0	return 0;
71	0	}
72	0	}
73
74		#ifdef _WIN32
75
76		// utf16 functions
77		using std::wstring;
78
79		// convert from utf16/wide string to utf8 string
80		sass::string convert_from_utf16(const wstring& utf16)
81		{
82		sass::string utf8;
83		// pre-allocate expected memory
84		utf8.reserve(sizeof(utf16)/2);
85		utf8::utf16to8(utf16.begin(), utf16.end(),
86		back_inserter(utf8));
87		return utf8;
88		}
89
90		// convert from utf8 string to utf16/wide string
91		wstring convert_to_utf16(const sass::string& utf8)
92		{
93		wstring utf16;
94		// pre-allocate expected memory
95		utf16.reserve(code_point_count(utf8)*2);
96		utf8::utf8to16(utf8.begin(), utf8.end(),
97		back_inserter(utf16));
98		return utf16;
99		}
100
101		#endif
102
103		}
104		}

Coverage Report

Created: 2025-11-09 06:44