source: node_modules/autolinker/dist/es2015/matcher/url-match-validator.js@ d24f17c

main
Last change on this file since d24f17c was d24f17c, checked in by Aleksandar Panovski <apano77@…>, 15 months ago

Initial commit

  • Property mode set to 100644
File size: 7.8 KB
Line 
1import { alphaCharsStr } from '../regex-lib';
2/**
3 * @private
4 * @class Autolinker.matcher.UrlMatchValidator
5 * @singleton
6 *
7 * Used by Autolinker to filter out false URL positives from the
8 * {@link Autolinker.matcher.Url UrlMatcher}.
9 *
10 * Due to the limitations of regular expressions (including the missing feature
11 * of look-behinds in JS regular expressions), we cannot always determine the
12 * validity of a given match. This class applies a bit of additional logic to
13 * filter out any false positives that have been matched by the
14 * {@link Autolinker.matcher.Url UrlMatcher}.
15 */
16var UrlMatchValidator = /** @class */ (function () {
17 function UrlMatchValidator() {
18 }
19 /**
20 * Determines if a given URL match found by the {@link Autolinker.matcher.Url UrlMatcher}
21 * is valid. Will return `false` for:
22 *
23 * 1) URL matches which do not have at least have one period ('.') in the
24 * domain name (effectively skipping over matches like "abc:def").
25 * However, URL matches with a protocol will be allowed (ex: 'http://localhost')
26 * 2) URL matches which do not have at least one word character in the
27 * domain name (effectively skipping over matches like "git:1.0").
28 * However, URL matches with a protocol will be allowed (ex: 'intra-net://271219.76')
29 * 3) A protocol-relative url match (a URL beginning with '//') whose
30 * previous character is a word character (effectively skipping over
31 * strings like "abc//google.com")
32 *
33 * Otherwise, returns `true`.
34 *
35 * @param {String} urlMatch The matched URL, if there was one. Will be an
36 * empty string if the match is not a URL match.
37 * @param {String} protocolUrlMatch The match URL string for a protocol
38 * match. Ex: 'http://yahoo.com'. This is used to match something like
39 * 'http://localhost', where we won't double check that the domain name
40 * has at least one '.' in it.
41 * @return {Boolean} `true` if the match given is valid and should be
42 * processed, or `false` if the match is invalid and/or should just not be
43 * processed.
44 */
45 UrlMatchValidator.isValid = function (urlMatch, protocolUrlMatch) {
46 if ((protocolUrlMatch && !this.isValidUriScheme(protocolUrlMatch)) ||
47 this.urlMatchDoesNotHaveProtocolOrDot(urlMatch, protocolUrlMatch) || // At least one period ('.') must exist in the URL match for us to consider it an actual URL, *unless* it was a full protocol match (like 'http://localhost')
48 (this.urlMatchDoesNotHaveAtLeastOneWordChar(urlMatch, protocolUrlMatch) && // At least one letter character must exist in the domain name after a protocol match. Ex: skip over something like "git:1.0"
49 !this.isValidIpAddress(urlMatch)) || // Except if it's an IP address
50 this.containsMultipleDots(urlMatch)) {
51 return false;
52 }
53 return true;
54 };
55 UrlMatchValidator.isValidIpAddress = function (uriSchemeMatch) {
56 var newRegex = new RegExp(this.hasFullProtocolRegex.source + this.ipRegex.source);
57 var uriScheme = uriSchemeMatch.match(newRegex);
58 return uriScheme !== null;
59 };
60 UrlMatchValidator.containsMultipleDots = function (urlMatch) {
61 var stringBeforeSlash = urlMatch;
62 if (this.hasFullProtocolRegex.test(urlMatch)) {
63 stringBeforeSlash = urlMatch.split('://')[1];
64 }
65 return stringBeforeSlash.split('/')[0].indexOf('..') > -1;
66 };
67 /**
68 * Determines if the URI scheme is a valid scheme to be autolinked. Returns
69 * `false` if the scheme is 'javascript:' or 'vbscript:'
70 *
71 * @private
72 * @param {String} uriSchemeMatch The match URL string for a full URI scheme
73 * match. Ex: 'http://yahoo.com' or 'mailto:a@a.com'.
74 * @return {Boolean} `true` if the scheme is a valid one, `false` otherwise.
75 */
76 UrlMatchValidator.isValidUriScheme = function (uriSchemeMatch) {
77 var uriSchemeMatchArr = uriSchemeMatch.match(this.uriSchemeRegex), uriScheme = uriSchemeMatchArr && uriSchemeMatchArr[0].toLowerCase();
78 return uriScheme !== 'javascript:' && uriScheme !== 'vbscript:';
79 };
80 /**
81 * Determines if a URL match does not have either:
82 *
83 * a) a full protocol (i.e. 'http://'), or
84 * b) at least one dot ('.') in the domain name (for a non-full-protocol
85 * match).
86 *
87 * Either situation is considered an invalid URL (ex: 'git:d' does not have
88 * either the '://' part, or at least one dot in the domain name. If the
89 * match was 'git:abc.com', we would consider this valid.)
90 *
91 * @private
92 * @param {String} urlMatch The matched URL, if there was one. Will be an
93 * empty string if the match is not a URL match.
94 * @param {String} protocolUrlMatch The match URL string for a protocol
95 * match. Ex: 'http://yahoo.com'. This is used to match something like
96 * 'http://localhost', where we won't double check that the domain name
97 * has at least one '.' in it.
98 * @return {Boolean} `true` if the URL match does not have a full protocol,
99 * or at least one dot ('.') in a non-full-protocol match.
100 */
101 UrlMatchValidator.urlMatchDoesNotHaveProtocolOrDot = function (urlMatch, protocolUrlMatch) {
102 return (!!urlMatch &&
103 (!protocolUrlMatch || !this.hasFullProtocolRegex.test(protocolUrlMatch)) &&
104 urlMatch.indexOf('.') === -1);
105 };
106 /**
107 * Determines if a URL match does not have either:
108 *
109 * a) a full protocol (i.e. 'http://'), or
110 * b) at least one word character after the protocol (i.e. in the domain name)
111 *
112 * At least one letter character must exist in the domain name after a
113 * protocol match. Ex: skip over something like "git:1.0"
114 *
115 * @private
116 * @param {String} urlMatch The matched URL, if there was one. Will be an
117 * empty string if the match is not a URL match.
118 * @param {String} protocolUrlMatch The match URL string for a protocol
119 * match. Ex: 'http://yahoo.com'. This is used to know whether or not we
120 * have a protocol in the URL string, in order to check for a word
121 * character after the protocol separator (':').
122 * @return {Boolean} `true` if the URL match does not have a full protocol, or
123 * at least one word character in it, `false` otherwise.
124 */
125 UrlMatchValidator.urlMatchDoesNotHaveAtLeastOneWordChar = function (urlMatch, protocolUrlMatch) {
126 if (urlMatch && protocolUrlMatch) {
127 return (!this.hasFullProtocolRegex.test(protocolUrlMatch) &&
128 !this.hasWordCharAfterProtocolRegex.test(urlMatch));
129 }
130 else {
131 return false;
132 }
133 };
134 /**
135 * Regex to test for a full protocol, with the two trailing slashes. Ex: 'http://'
136 *
137 * @private
138 * @property {RegExp} hasFullProtocolRegex
139 */
140 UrlMatchValidator.hasFullProtocolRegex = /^[A-Za-z][-.+A-Za-z0-9]*:\/\//;
141 /**
142 * Regex to find the URI scheme, such as 'mailto:'.
143 *
144 * This is used to filter out 'javascript:' and 'vbscript:' schemes.
145 *
146 * @private
147 * @property {RegExp} uriSchemeRegex
148 */
149 UrlMatchValidator.uriSchemeRegex = /^[A-Za-z][-.+A-Za-z0-9]*:/;
150 /**
151 * Regex to determine if at least one word char exists after the protocol (i.e. after the ':')
152 *
153 * @private
154 * @property {RegExp} hasWordCharAfterProtocolRegex
155 */
156 UrlMatchValidator.hasWordCharAfterProtocolRegex = new RegExp(':[^\\s]*?[' + alphaCharsStr + ']');
157 /**
158 * Regex to determine if the string is a valid IP address
159 *
160 * @private
161 * @property {RegExp} ipRegex
162 */
163 UrlMatchValidator.ipRegex = /[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?(:[0-9]*)?\/?$/;
164 return UrlMatchValidator;
165}());
166export { UrlMatchValidator };
167//# sourceMappingURL=url-match-validator.js.map
Note: See TracBrowser for help on using the repository browser.