/src/mozilla-central/accessible/atk/DOMtoATK.h

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "AccessibleWrap.h"
#include "nsString.h"
#include "nsMai.h"

/**
 * ATK offsets are counted in unicode codepoints, while DOM offsets are counted
 * in UTF-16 code units.  That makes a difference for non-BMP characters,
 * which need two UTF-16 code units to be represented (a pair of surrogates),
 * while they are just one unicode character.
 *
 * To keep synchronization between ATK offsets (unicode codepoints) and DOM
 * offsets (UTF-16 code units), after translation from UTF-16 to UTF-8 we add a
 * BOM after each non-BMP character (which would otherwise use 2 UTF-16
 * code units for only 1 unicode codepoint).
 *
 * BOMs (Byte Order Marks, U+FEFF, also known as ZERO WIDTH NO-BREAK SPACE, but
 * that usage is deprecated) normally only appear at the beginning of unicode
 * files, but their occurrence within text (notably after cut&paste) is not
 * uncommon, and are thus considered as non-text.
 *
 * Since the selection requested through ATK may not contain both surrogates
 * at the ends of the selection, we need to fetch one UTF-16 code point more
 * on both side, and get rid of it before returning the string to ATK. The
 * ATKStringConverterHelper class maintains this, NewATKString should be used
 * to call it properly.
 *
 * In the end,
 * - if the start is between the high and low surrogates, the UTF-8 result
 * includes a BOM from it but not the character
 * - if the end is between the high and low surrogates, the UTF-8 result
 * includes the character but *not* the BOM
 * - all non-BMP characters that are fully in the string are in the UTF-8 result
 * as character followed by BOM
 */
namespace mozilla {
namespace a11y {

namespace DOMtoATK
{

  /**
   * Converts a string of accessible text into ATK gchar* string (by adding
   * BOMs). This can be used when offsets do not need to be adjusted because
   * ends of the string can not fall between surrogates.
   */
  gchar* Convert(const nsAString& aStr);

  /**
   * Add a BOM after each non-BMP character.
   */
  void AddBOMs(nsACString& aDest, const nsACString& aSource);

  /**
   * Replace all characters with asterisks (e.g. for password fields).
   */
  void ConvertTexttoAsterisks(nsAString& aString);

  /**
   * Parameterize conversion.
   */
  enum class AtkStringConvertFlags : uint32_t {
    None                   = 0,
    ConvertTextToAsterisks = 1 << 0,
  };

  MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(AtkStringConvertFlags)

  class ATKStringConverterHelper {
  public:
    ATKStringConverterHelper(void) :
#ifdef DEBUG
      mAdjusted (false),
#endif
      mStartShifted (false),
      mEndShifted (false) { }

    /**
     * In order to properly get non-BMP values, offsets need to be changed
     * to get one character more on each end, so that ConvertUTF16toUTF8 can
     * convert surrogates even if the originally requested offsets fall between
     * them.
     */
    void AdjustOffsets(gint* aStartOffset, gint* aEndOffset, gint count);

    /**
     * Converts a string of accessible text with adjusted offsets into ATK
     * gchar* string (by adding BOMs).  Note, AdjustOffsets has to be called
     * before getting the text passed to this.
     */
    gchar* ConvertAdjusted(const nsAString& aStr);

  private:
    /**
     * Remove the additional characters requested by PrepareUTF16toUTF8.
     */
    gchar* FinishUTF16toUTF8(nsCString& aStr);

#ifdef DEBUG
    bool mAdjusted;
#endif
    bool mStartShifted;
    bool mEndShifted;
  };

  /**
   * Get text from aAccessible, using ATKStringConverterHelper to properly
   * introduce appropriate BOMs.
   */
  template <class AccessibleOrProxy>
  gchar* NewATKString(AccessibleOrProxy* aAccessible,
                      gint aStartOffset, gint aEndOffset,
                      AtkStringConvertFlags aFlags)
  {
    gint startOffset = aStartOffset, endOffset = aEndOffset;
    ATKStringConverterHelper converter;
    converter.AdjustOffsets(&startOffset, &endOffset,
                            gint(aAccessible->CharacterCount()));
    nsAutoString str;
    aAccessible->TextSubstring(startOffset, endOffset, str);
    if (aFlags & AtkStringConvertFlags::ConvertTextToAsterisks)
      ConvertTexttoAsterisks(str);
    return converter.ConvertAdjusted(str);
  }

  /**
   * Get a character from aAccessible, fetching more data as appropriate to
   * properly get non-BMP characters or a BOM as appropriate.
   */
  template <class AccessibleCharAt>
  gunichar ATKCharacter(AccessibleCharAt* aAccessible, gint aOffset)
  {
    // char16_t is unsigned short in Mozilla, gnuichar is guint32 in glib.
    gunichar character = static_cast<gunichar>(aAccessible->CharAt(aOffset));

    if (NS_IS_LOW_SURROGATE(character)) {
      // Trailing surrogate, return BOM instead.
      return 0xFEFF;
    }

    if (NS_IS_HIGH_SURROGATE(character)) {
      // Heading surrogate, get the trailing surrogate and combine them.
      gunichar characterLow = static_cast<gunichar>(aAccessible->CharAt(aOffset + 1));

      if (!NS_IS_LOW_SURROGATE(characterLow)) {
        // It should have been a trailing surrogate... Flag the error.
        return 0xFFFD;
      }
      return SURROGATE_TO_UCS4(character, characterLow);
    }

    return character;
  }

}

} // namespace a11y
} // namespace mozilla

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -- */
2		/* vim: set ts=2 et sw=2 tw=80: */
3		/* This Source Code Form is subject to the terms of the Mozilla Public
4		* License, v. 2.0. If a copy of the MPL was not distributed with this
5		* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7		#include "AccessibleWrap.h"
8		#include "nsString.h"
9		#include "nsMai.h"
10
11		/**
12		* ATK offsets are counted in unicode codepoints, while DOM offsets are counted
13		* in UTF-16 code units. That makes a difference for non-BMP characters,
14		* which need two UTF-16 code units to be represented (a pair of surrogates),
15		* while they are just one unicode character.
16		*
17		* To keep synchronization between ATK offsets (unicode codepoints) and DOM
18		* offsets (UTF-16 code units), after translation from UTF-16 to UTF-8 we add a
19		* BOM after each non-BMP character (which would otherwise use 2 UTF-16
20		* code units for only 1 unicode codepoint).
21		*
22		* BOMs (Byte Order Marks, U+FEFF, also known as ZERO WIDTH NO-BREAK SPACE, but
23		* that usage is deprecated) normally only appear at the beginning of unicode
24		* files, but their occurrence within text (notably after cut&paste) is not
25		* uncommon, and are thus considered as non-text.
26		*
27		* Since the selection requested through ATK may not contain both surrogates
28		* at the ends of the selection, we need to fetch one UTF-16 code point more
29		* on both side, and get rid of it before returning the string to ATK. The
30		* ATKStringConverterHelper class maintains this, NewATKString should be used
31		* to call it properly.
32		*
33		* In the end,
34		* - if the start is between the high and low surrogates, the UTF-8 result
35		* includes a BOM from it but not the character
36		* - if the end is between the high and low surrogates, the UTF-8 result
37		* includes the character but not the BOM
38		* - all non-BMP characters that are fully in the string are in the UTF-8 result
39		* as character followed by BOM
40		*/
41		namespace mozilla {
42		namespace a11y {
43
44		namespace DOMtoATK
45		{
46
47		/**
48		* Converts a string of accessible text into ATK gchar* string (by adding
49		* BOMs). This can be used when offsets do not need to be adjusted because
50		* ends of the string can not fall between surrogates.
51		*/
52		gchar* Convert(const nsAString& aStr);
53
54		/**
55		* Add a BOM after each non-BMP character.
56		*/
57		void AddBOMs(nsACString& aDest, const nsACString& aSource);
58
59		/**
60		* Replace all characters with asterisks (e.g. for password fields).
61		*/
62		void ConvertTexttoAsterisks(nsAString& aString);
63
64		/**
65		* Parameterize conversion.
66		*/
67		enum class AtkStringConvertFlags : uint32_t {
68		None = 0,
69		ConvertTextToAsterisks = 1 << 0,
70		};
71
72		MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(AtkStringConvertFlags)
73
74		class ATKStringConverterHelper {
75		public:
76		ATKStringConverterHelper(void) :
77		#ifdef DEBUG
78		mAdjusted (false),
79		#endif
80		mStartShifted (false),
81	0	mEndShifted (false) { }
82
83		/**
84		* In order to properly get non-BMP values, offsets need to be changed
85		* to get one character more on each end, so that ConvertUTF16toUTF8 can
86		* convert surrogates even if the originally requested offsets fall between
87		* them.
88		*/
89		void AdjustOffsets(gint* aStartOffset, gint* aEndOffset, gint count);
90
91		/**
92		* Converts a string of accessible text with adjusted offsets into ATK
93		* gchar* string (by adding BOMs). Note, AdjustOffsets has to be called
94		* before getting the text passed to this.
95		*/
96		gchar* ConvertAdjusted(const nsAString& aStr);
97
98		private:
99		/**
100		* Remove the additional characters requested by PrepareUTF16toUTF8.
101		*/
102		gchar* FinishUTF16toUTF8(nsCString& aStr);
103
104		#ifdef DEBUG
105		bool mAdjusted;
106		#endif
107		bool mStartShifted;
108		bool mEndShifted;
109		};
110
111		/**
112		* Get text from aAccessible, using ATKStringConverterHelper to properly
113		* introduce appropriate BOMs.
114		*/
115		template <class AccessibleOrProxy>
116		gchar* NewATKString(AccessibleOrProxy* aAccessible,
117		gint aStartOffset, gint aEndOffset,
118		AtkStringConvertFlags aFlags)
119	0	{
120	0	gint startOffset = aStartOffset, endOffset = aEndOffset;
121	0	ATKStringConverterHelper converter;
122	0	converter.AdjustOffsets(&startOffset, &endOffset,
123	0	gint(aAccessible->CharacterCount()));
124	0	nsAutoString str;
125	0	aAccessible->TextSubstring(startOffset, endOffset, str);
126	0	if (aFlags & AtkStringConvertFlags::ConvertTextToAsterisks)
127	0	ConvertTexttoAsterisks(str);
128	0	return converter.ConvertAdjusted(str);
129	0	} Unexecuted instantiation: char* mozilla::a11y::DOMtoATK::NewATKString<mozilla::a11y::HyperTextAccessible>(mozilla::a11y::HyperTextAccessible, int, int, mozilla::a11y::DOMtoATK::AtkStringConvertFlags) Unexecuted instantiation: char mozilla::a11y::DOMtoATK::NewATKString<mozilla::a11y::ProxyAccessible>(mozilla::a11y::ProxyAccessible*, int, int, mozilla::a11y::DOMtoATK::AtkStringConvertFlags)
130
131		/**
132		* Get a character from aAccessible, fetching more data as appropriate to
133		* properly get non-BMP characters or a BOM as appropriate.
134		*/
135		template <class AccessibleCharAt>
136		gunichar ATKCharacter(AccessibleCharAt* aAccessible, gint aOffset)
137	0	{
138	0	// char16_t is unsigned short in Mozilla, gnuichar is guint32 in glib.
139	0	gunichar character = static_cast<gunichar>(aAccessible->CharAt(aOffset));
140	0
141	0	if (NS_IS_LOW_SURROGATE(character)) {
142	0	// Trailing surrogate, return BOM instead.
143	0	return 0xFEFF;
144	0	}
145	0
146	0	if (NS_IS_HIGH_SURROGATE(character)) {
147	0	// Heading surrogate, get the trailing surrogate and combine them.
148	0	gunichar characterLow = static_cast<gunichar>(aAccessible->CharAt(aOffset + 1));
149	0
150	0	if (!NS_IS_LOW_SURROGATE(characterLow)) {
151	0	// It should have been a trailing surrogate... Flag the error.
152	0	return 0xFFFD;
153	0	}
154	0	return SURROGATE_TO_UCS4(character, characterLow);
155	0	}
156	0
157	0	return character;
158	0	} Unexecuted instantiation: unsigned int mozilla::a11y::DOMtoATK::ATKCharacter<mozilla::a11y::HyperTextAccessible>(mozilla::a11y::HyperTextAccessible, int) Unexecuted instantiation: unsigned int mozilla::a11y::DOMtoATK::ATKCharacter<mozilla::a11y::ProxyAccessible>(mozilla::a11y::ProxyAccessible, int)
159
160		}
161
162		} // namespace a11y
163		} // namespace mozilla

Coverage Report

Created: 2018-09-25 14:53