source: node_modules/autolinker/dist/es2015/matcher/email-matcher.js

main
Last change on this file was d24f17c, checked in by Aleksandar Panovski <apano77@…>, 15 months ago

Initial commit

  • Property mode set to 100644
File size: 13.0 KB
RevLine 
[d24f17c]1import { __assign, __extends } from "tslib";
2import { Matcher } from './matcher';
3import { alphaNumericAndMarksCharsStr, domainNameCharRegex } from '../regex-lib';
4import { EmailMatch } from '../match/email-match';
5import { throwUnhandledCaseError } from '../utils';
6import { tldRegex } from './tld-regex';
7// For debugging: search for other "For debugging" lines
8// import CliTable from 'cli-table';
9// RegExp objects which are shared by all instances of EmailMatcher. These are
10// here to avoid re-instantiating the RegExp objects if `Autolinker.link()` is
11// called multiple times, thus instantiating EmailMatcher and its RegExp
12// objects each time (which is very expensive - see https://github.com/gregjacobs/Autolinker.js/issues/314).
13// See descriptions of the properties where they are used for details about them
14var localPartCharRegex = new RegExp("[".concat(alphaNumericAndMarksCharsStr, "!#$%&'*+/=?^_`{|}~-]"));
15var strictTldRegex = new RegExp("^".concat(tldRegex.source, "$"));
16/**
17 * @class Autolinker.matcher.Email
18 * @extends Autolinker.matcher.Matcher
19 *
20 * Matcher to find email matches in an input string.
21 *
22 * See this class's superclass ({@link Autolinker.matcher.Matcher}) for more details.
23 */
24var EmailMatcher = /** @class */ (function (_super) {
25 __extends(EmailMatcher, _super);
26 function EmailMatcher() {
27 var _this = _super !== null && _super.apply(this, arguments) || this;
28 /**
29 * Valid characters that can be used in the "local" part of an email address,
30 * i.e. the "name" part of "name@site.com"
31 */
32 _this.localPartCharRegex = localPartCharRegex;
33 /**
34 * Stricter TLD regex which adds a beginning and end check to ensure
35 * the string is a valid TLD
36 */
37 _this.strictTldRegex = strictTldRegex;
38 return _this;
39 }
40 /**
41 * @inheritdoc
42 */
43 EmailMatcher.prototype.parseMatches = function (text) {
44 var tagBuilder = this.tagBuilder, localPartCharRegex = this.localPartCharRegex, strictTldRegex = this.strictTldRegex, matches = [], len = text.length, noCurrentEmailMatch = new CurrentEmailMatch();
45 // for matching a 'mailto:' prefix
46 var mailtoTransitions = {
47 m: 'a',
48 a: 'i',
49 i: 'l',
50 l: 't',
51 t: 'o',
52 o: ':',
53 };
54 var charIdx = 0, state = 0 /* NonEmailMatch */, currentEmailMatch = noCurrentEmailMatch;
55 // For debugging: search for other "For debugging" lines
56 // const table = new CliTable( {
57 // head: [ 'charIdx', 'char', 'state', 'charIdx', 'currentEmailAddress.idx', 'hasDomainDot' ]
58 // } );
59 while (charIdx < len) {
60 var char = text.charAt(charIdx);
61 // For debugging: search for other "For debugging" lines
62 // table.push(
63 // [ charIdx, char, State[ state ], charIdx, currentEmailAddress.idx, currentEmailAddress.hasDomainDot ]
64 // );
65 switch (state) {
66 case 0 /* NonEmailMatch */:
67 stateNonEmailAddress(char);
68 break;
69 case 1 /* Mailto */:
70 stateMailTo(text.charAt(charIdx - 1), char);
71 break;
72 case 2 /* LocalPart */:
73 stateLocalPart(char);
74 break;
75 case 3 /* LocalPartDot */:
76 stateLocalPartDot(char);
77 break;
78 case 4 /* AtSign */:
79 stateAtSign(char);
80 break;
81 case 5 /* DomainChar */:
82 stateDomainChar(char);
83 break;
84 case 6 /* DomainHyphen */:
85 stateDomainHyphen(char);
86 break;
87 case 7 /* DomainDot */:
88 stateDomainDot(char);
89 break;
90 default:
91 throwUnhandledCaseError(state);
92 }
93 // For debugging: search for other "For debugging" lines
94 // table.push(
95 // [ charIdx, char, State[ state ], charIdx, currentEmailAddress.idx, currentEmailAddress.hasDomainDot ]
96 // );
97 charIdx++;
98 }
99 // Capture any valid match at the end of the string
100 captureMatchIfValidAndReset();
101 // For debugging: search for other "For debugging" lines
102 //console.log( '\n' + table.toString() );
103 return matches;
104 // Handles the state when we're not in an email address
105 function stateNonEmailAddress(char) {
106 if (char === 'm') {
107 beginEmailMatch(1 /* Mailto */);
108 }
109 else if (localPartCharRegex.test(char)) {
110 beginEmailMatch();
111 }
112 else {
113 // not an email address character, continue
114 }
115 }
116 // Handles if we're reading a 'mailto:' prefix on the string
117 function stateMailTo(prevChar, char) {
118 if (prevChar === ':') {
119 // We've reached the end of the 'mailto:' prefix
120 if (localPartCharRegex.test(char)) {
121 state = 2 /* LocalPart */;
122 currentEmailMatch = new CurrentEmailMatch(__assign(__assign({}, currentEmailMatch), { hasMailtoPrefix: true }));
123 }
124 else {
125 // we've matched 'mailto:' but didn't get anything meaningful
126 // immediately afterwards (for example, we encountered a
127 // space character, or an '@' character which formed 'mailto:@'
128 resetToNonEmailMatchState();
129 }
130 }
131 else if (mailtoTransitions[prevChar] === char) {
132 // We're currently reading the 'mailto:' prefix, stay in
133 // Mailto state
134 }
135 else if (localPartCharRegex.test(char)) {
136 // We we're reading a prefix of 'mailto:', but encountered a
137 // different character that didn't continue the prefix
138 state = 2 /* LocalPart */;
139 }
140 else if (char === '.') {
141 // We we're reading a prefix of 'mailto:', but encountered a
142 // dot character
143 state = 3 /* LocalPartDot */;
144 }
145 else if (char === '@') {
146 // We we're reading a prefix of 'mailto:', but encountered a
147 // an @ character
148 state = 4 /* AtSign */;
149 }
150 else {
151 // not an email address character, return to "NonEmailAddress" state
152 resetToNonEmailMatchState();
153 }
154 }
155 // Handles the state when we're currently in the "local part" of an
156 // email address (as opposed to the "domain part")
157 function stateLocalPart(char) {
158 if (char === '.') {
159 state = 3 /* LocalPartDot */;
160 }
161 else if (char === '@') {
162 state = 4 /* AtSign */;
163 }
164 else if (localPartCharRegex.test(char)) {
165 // stay in the "local part" of the email address
166 }
167 else {
168 // not an email address character, return to "NonEmailAddress" state
169 resetToNonEmailMatchState();
170 }
171 }
172 // Handles the state where we've read
173 function stateLocalPartDot(char) {
174 if (char === '.') {
175 // We read a second '.' in a row, not a valid email address
176 // local part
177 resetToNonEmailMatchState();
178 }
179 else if (char === '@') {
180 // We read the '@' character immediately after a dot ('.'), not
181 // an email address
182 resetToNonEmailMatchState();
183 }
184 else if (localPartCharRegex.test(char)) {
185 state = 2 /* LocalPart */;
186 }
187 else {
188 // Anything else, not an email address
189 resetToNonEmailMatchState();
190 }
191 }
192 function stateAtSign(char) {
193 if (domainNameCharRegex.test(char)) {
194 state = 5 /* DomainChar */;
195 }
196 else {
197 // Anything else, not an email address
198 resetToNonEmailMatchState();
199 }
200 }
201 function stateDomainChar(char) {
202 if (char === '.') {
203 state = 7 /* DomainDot */;
204 }
205 else if (char === '-') {
206 state = 6 /* DomainHyphen */;
207 }
208 else if (domainNameCharRegex.test(char)) {
209 // Stay in the DomainChar state
210 }
211 else {
212 // Anything else, we potentially matched if the criteria has
213 // been met
214 captureMatchIfValidAndReset();
215 }
216 }
217 function stateDomainHyphen(char) {
218 if (char === '-' || char === '.') {
219 // Not valid to have two hyphens ("--") or hypen+dot ("-.")
220 captureMatchIfValidAndReset();
221 }
222 else if (domainNameCharRegex.test(char)) {
223 state = 5 /* DomainChar */;
224 }
225 else {
226 // Anything else
227 captureMatchIfValidAndReset();
228 }
229 }
230 function stateDomainDot(char) {
231 if (char === '.' || char === '-') {
232 // not valid to have two dots ("..") or dot+hypen (".-")
233 captureMatchIfValidAndReset();
234 }
235 else if (domainNameCharRegex.test(char)) {
236 state = 5 /* DomainChar */;
237 // After having read a '.' and then a valid domain character,
238 // we now know that the domain part of the email is valid, and
239 // we have found at least a partial EmailMatch (however, the
240 // email address may have additional characters from this point)
241 currentEmailMatch = new CurrentEmailMatch(__assign(__assign({}, currentEmailMatch), { hasDomainDot: true }));
242 }
243 else {
244 // Anything else
245 captureMatchIfValidAndReset();
246 }
247 }
248 function beginEmailMatch(newState) {
249 if (newState === void 0) { newState = 2 /* LocalPart */; }
250 state = newState;
251 currentEmailMatch = new CurrentEmailMatch({ idx: charIdx });
252 }
253 function resetToNonEmailMatchState() {
254 state = 0 /* NonEmailMatch */;
255 currentEmailMatch = noCurrentEmailMatch;
256 }
257 /*
258 * Captures the current email address as an EmailMatch if it's valid,
259 * and resets the state to read another email address.
260 */
261 function captureMatchIfValidAndReset() {
262 if (currentEmailMatch.hasDomainDot) {
263 // we need at least one dot in the domain to be considered a valid email address
264 var matchedText = text.slice(currentEmailMatch.idx, charIdx);
265 // If we read a '.' or '-' char that ended the email address
266 // (valid domain name characters, but only valid email address
267 // characters if they are followed by something else), strip
268 // it off now
269 if (/[-.]$/.test(matchedText)) {
270 matchedText = matchedText.slice(0, -1);
271 }
272 var emailAddress = currentEmailMatch.hasMailtoPrefix
273 ? matchedText.slice('mailto:'.length)
274 : matchedText;
275 // if the email address has a valid TLD, add it to the list of matches
276 if (doesEmailHaveValidTld(emailAddress)) {
277 matches.push(new EmailMatch({
278 tagBuilder: tagBuilder,
279 matchedText: matchedText,
280 offset: currentEmailMatch.idx,
281 email: emailAddress,
282 }));
283 }
284 }
285 resetToNonEmailMatchState();
286 /**
287 * Determines if the given email address has a valid TLD or not
288 * @param {string} emailAddress - email address
289 * @return {Boolean} - true is email have valid TLD, false otherwise
290 */
291 function doesEmailHaveValidTld(emailAddress) {
292 var emailAddressTld = emailAddress.split('.').pop() || '';
293 var emailAddressNormalized = emailAddressTld.toLowerCase();
294 var isValidTld = strictTldRegex.test(emailAddressNormalized);
295 return isValidTld;
296 }
297 }
298 };
299 return EmailMatcher;
300}(Matcher));
301export { EmailMatcher };
302var CurrentEmailMatch = /** @class */ (function () {
303 function CurrentEmailMatch(cfg) {
304 if (cfg === void 0) { cfg = {}; }
305 this.idx = cfg.idx !== undefined ? cfg.idx : -1;
306 this.hasMailtoPrefix = !!cfg.hasMailtoPrefix;
307 this.hasDomainDot = !!cfg.hasDomainDot;
308 }
309 return CurrentEmailMatch;
310}());
311//# sourceMappingURL=email-matcher.js.map
Note: See TracBrowser for help on using the repository browser.