Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : #ifndef mozInlineSpellWordUtil_h
7 : #define mozInlineSpellWordUtil_h
8 :
9 : #include "nsCOMPtr.h"
10 : #include "nsIDOMDocument.h"
11 : #include "nsIDocument.h"
12 : #include "nsString.h"
13 : #include "nsTArray.h"
14 :
15 : //#define DEBUG_SPELLCHECK
16 :
17 : class nsRange;
18 : class nsINode;
19 :
20 : /**
21 : * This class extracts text from the DOM and builds it into a single string.
22 : * The string includes whitespace breaks whereever non-inline elements begin
23 : * and end. This string is broken into "real words", following somewhat
24 : * complex rules; for example substrings that look like URLs or
25 : * email addresses are treated as single words, but otherwise many kinds of
26 : * punctuation are treated as word separators. GetNextWord provides a way
27 : * to iterate over these "real words".
28 : *
29 : * The basic operation is:
30 : *
31 : * 1. Call Init with the weak pointer to the editor that you're using.
32 : * 2. Call SetEnd to set where you want to stop spellchecking. We'll stop
33 : * at the word boundary after that. If SetEnd is not called, we'll stop
34 : * at the end of the document's root element.
35 : * 3. Call SetPosition to initialize the current position inside the
36 : * previously given range.
37 : * 4. Call GetNextWord over and over until it returns false.
38 : */
39 :
40 0 : class mozInlineSpellWordUtil
41 : {
42 : public:
43 : struct NodeOffset {
44 : nsINode* mNode;
45 : int32_t mOffset;
46 :
47 0 : NodeOffset(nsINode* aNode, int32_t aOffset) :
48 0 : mNode(aNode), mOffset(aOffset) {}
49 :
50 0 : bool operator==(const NodeOffset& aOther) const {
51 0 : return mNode == aOther.mNode && mOffset == aOther.mOffset;
52 : }
53 :
54 0 : bool operator!=(const NodeOffset& aOther) const {
55 0 : return !(*this == aOther);
56 : }
57 : };
58 :
59 0 : mozInlineSpellWordUtil()
60 0 : : mRootNode(nullptr),
61 : mSoftBegin(nullptr, 0), mSoftEnd(nullptr, 0),
62 0 : mNextWordIndex(-1), mSoftTextValid(false) {}
63 :
64 : nsresult Init(const nsWeakPtr& aWeakEditor);
65 :
66 : nsresult SetEnd(nsINode* aEndNode, int32_t aEndOffset);
67 :
68 : // sets the current position, this should be inside the range. If we are in
69 : // the middle of a word, we'll move to its start.
70 : nsresult SetPosition(nsINode* aNode, int32_t aOffset);
71 :
72 : // Given a point inside or immediately following a word, this returns the
73 : // DOM range that exactly encloses that word's characters. The current
74 : // position will be at the end of the word. This will find the previous
75 : // word if the current position is space, so if you care that the point is
76 : // inside the word, you should check the range.
77 : //
78 : // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
79 : // before you actually generate the range you are interested in and iterate
80 : // the words in it.
81 : nsresult GetRangeForWord(nsIDOMNode* aWordNode, int32_t aWordOffset,
82 : nsRange** aRange);
83 :
84 : // Moves to the the next word in the range, and retrieves it's text and range.
85 : // An empty word and a nullptr range are returned when we are done checking.
86 : // aSkipChecking will be set if the word is "special" and shouldn't be
87 : // checked (e.g., an email address).
88 : nsresult GetNextWord(nsAString& aText, nsRange** aRange,
89 : bool* aSkipChecking);
90 :
91 : // Call to normalize some punctuation. This function takes an autostring
92 : // so we can access characters directly.
93 : static void NormalizeWord(nsAString& aWord);
94 :
95 : nsIDOMDocument* GetDOMDocument() const { return mDOMDocument; }
96 : nsIDocument* GetDocument() const { return mDocument; }
97 0 : nsINode* GetRootNode() { return mRootNode; }
98 :
99 : private:
100 :
101 : // cached stuff for the editor, set by Init
102 : nsCOMPtr<nsIDOMDocument> mDOMDocument;
103 : nsCOMPtr<nsIDocument> mDocument;
104 :
105 : // range to check, see SetPosition and SetEnd
106 : nsINode* mRootNode;
107 : NodeOffset mSoftBegin;
108 : NodeOffset mSoftEnd;
109 :
110 : // DOM text covering the soft range, with newlines added at block boundaries
111 : nsString mSoftText;
112 : // A list of where we extracted text from, ordered by mSoftTextOffset. A given
113 : // DOM node appears at most once in this list.
114 : struct DOMTextMapping {
115 : NodeOffset mNodeOffset;
116 : int32_t mSoftTextOffset;
117 : int32_t mLength;
118 :
119 0 : DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset, int32_t aLength)
120 0 : : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset),
121 0 : mLength(aLength) {}
122 : };
123 : nsTArray<DOMTextMapping> mSoftTextDOMMapping;
124 :
125 : // A list of the "real words" in mSoftText, ordered by mSoftTextOffset
126 : struct RealWord {
127 : int32_t mSoftTextOffset;
128 : uint32_t mLength : 31;
129 : uint32_t mCheckableWord : 1;
130 :
131 0 : RealWord(int32_t aOffset, uint32_t aLength, bool aCheckable)
132 0 : : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable)
133 : {
134 : static_assert(sizeof(RealWord) == 8, "RealWord should be limited to 8 bytes");
135 0 : MOZ_ASSERT(aLength < INT32_MAX, "Word length is too large to fit in the bitfield");
136 0 : }
137 :
138 0 : int32_t EndOffset() const { return mSoftTextOffset + mLength; }
139 : };
140 : nsTArray<RealWord> mRealWords;
141 : int32_t mNextWordIndex;
142 :
143 : bool mSoftTextValid;
144 :
145 0 : void InvalidateWords() { mSoftTextValid = false; }
146 : nsresult EnsureWords();
147 :
148 : int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset);
149 : // Map an offset into mSoftText to a DOM position. Note that two DOM positions
150 : // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb
151 : // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So,
152 : // aHintBefore controls which position we return ... if aHint is eEnd
153 : // then the position indicates the END of a range so we return (A,4). Otherwise
154 : // the position indicates the START of a range so we return (B,0).
155 : enum DOMMapHint { HINT_BEGIN, HINT_END };
156 : NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset,
157 : DOMMapHint aHint);
158 : // Finds the index of the real word containing aSoftTextOffset, or -1 if none
159 : // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
160 : // later word (favouring the assumption that it's the BEGINning of a word),
161 : // otherwise return the earlier word (assuming it's the END of a word).
162 : // If aSearchForward is true, then if we don't find a word at the given
163 : // position, search forward until we do find a word and return that (if found).
164 : int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint,
165 : bool aSearchForward);
166 :
167 : // build mSoftText and mSoftTextDOMMapping
168 : void BuildSoftText();
169 : // Build mRealWords array
170 : nsresult BuildRealWords();
171 :
172 : nsresult SplitDOMWord(int32_t aStart, int32_t aEnd);
173 :
174 : // Convenience functions, object must be initialized
175 : nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsRange** aRange);
176 : nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange);
177 : };
178 :
179 : #endif
|