[d24f17c] | 1 | import { Matcher, MatcherConfig } from './matcher';
|
---|
| 2 | import { StripPrefixConfigObj } from '../autolinker';
|
---|
| 3 | import { Match } from '../match/match';
|
---|
| 4 | /**
|
---|
| 5 | * @class Autolinker.matcher.Url
|
---|
| 6 | * @extends Autolinker.matcher.Matcher
|
---|
| 7 | *
|
---|
| 8 | * Matcher to find URL matches in an input string.
|
---|
| 9 | *
|
---|
| 10 | * See this class's superclass ({@link Autolinker.matcher.Matcher}) for more details.
|
---|
| 11 | */
|
---|
| 12 | export declare class UrlMatcher extends Matcher {
|
---|
| 13 | /**
|
---|
| 14 | * @cfg {Object} stripPrefix (required)
|
---|
| 15 | *
|
---|
| 16 | * The Object form of {@link Autolinker#cfg-stripPrefix}.
|
---|
| 17 | */
|
---|
| 18 | protected stripPrefix: Required<StripPrefixConfigObj>;
|
---|
| 19 | /**
|
---|
| 20 | * @cfg {Boolean} stripTrailingSlash (required)
|
---|
| 21 | * @inheritdoc Autolinker#stripTrailingSlash
|
---|
| 22 | */
|
---|
| 23 | protected stripTrailingSlash: boolean;
|
---|
| 24 | /**
|
---|
| 25 | * @cfg {Boolean} decodePercentEncoding (required)
|
---|
| 26 | * @inheritdoc Autolinker#decodePercentEncoding
|
---|
| 27 | */
|
---|
| 28 | protected decodePercentEncoding: boolean;
|
---|
| 29 | /**
|
---|
| 30 | * @protected
|
---|
| 31 | * @property {RegExp} matcherRegex
|
---|
| 32 | *
|
---|
| 33 | * The regular expression to match URLs with an optional scheme, port
|
---|
| 34 | * number, path, query string, and hash anchor.
|
---|
| 35 | *
|
---|
| 36 | * Example matches:
|
---|
| 37 | *
|
---|
| 38 | * http://google.com
|
---|
| 39 | * www.google.com
|
---|
| 40 | * google.com/path/to/file?q1=1&q2=2#myAnchor
|
---|
| 41 | *
|
---|
| 42 | *
|
---|
| 43 | * This regular expression will have the following capturing groups:
|
---|
| 44 | *
|
---|
| 45 | * 1. Group that matches a scheme-prefixed URL (i.e. 'http://google.com').
|
---|
| 46 | * This is used to match scheme URLs with just a single word, such as
|
---|
| 47 | * 'http://localhost', where we won't double check that the domain name
|
---|
| 48 | * has at least one dot ('.') in it.
|
---|
| 49 | * 2. Group that matches a 'www.' prefixed URL. This is only matched if the
|
---|
| 50 | * 'www.' text was not prefixed by a scheme (i.e.: not prefixed by
|
---|
| 51 | * 'http://', 'ftp:', etc.)
|
---|
| 52 | * 3. A protocol-relative ('//') match for the case of a 'www.' prefixed
|
---|
| 53 | * URL. Will be an empty string if it is not a protocol-relative match.
|
---|
| 54 | * We need to know the character before the '//' in order to determine
|
---|
| 55 | * if it is a valid match or the // was in a string we don't want to
|
---|
| 56 | * auto-link.
|
---|
| 57 | * 4. Group that matches a known TLD (top level domain), when a scheme
|
---|
| 58 | * or 'www.'-prefixed domain is not matched.
|
---|
| 59 | * 5. A protocol-relative ('//') match for the case of a known TLD prefixed
|
---|
| 60 | * URL. Will be an empty string if it is not a protocol-relative match.
|
---|
| 61 | * See #3 for more info.
|
---|
| 62 | */
|
---|
| 63 | protected matcherRegex: RegExp;
|
---|
| 64 | /**
|
---|
| 65 | * A regular expression to use to check the character before a protocol-relative
|
---|
| 66 | * URL match. We don't want to match a protocol-relative URL if it is part
|
---|
| 67 | * of another word.
|
---|
| 68 | *
|
---|
| 69 | * For example, we want to match something like "Go to: //google.com",
|
---|
| 70 | * but we don't want to match something like "abc//google.com"
|
---|
| 71 | *
|
---|
| 72 | * This regular expression is used to test the character before the '//'.
|
---|
| 73 | *
|
---|
| 74 | * @protected
|
---|
| 75 | * @type {RegExp} wordCharRegExp
|
---|
| 76 | */
|
---|
| 77 | protected wordCharRegExp: RegExp;
|
---|
| 78 | /**
|
---|
| 79 | * @method constructor
|
---|
| 80 | * @param {Object} cfg The configuration properties for the Match instance,
|
---|
| 81 | * specified in an Object (map).
|
---|
| 82 | */
|
---|
| 83 | constructor(cfg: UrlMatcherConfig);
|
---|
| 84 | /**
|
---|
| 85 | * @inheritdoc
|
---|
| 86 | */
|
---|
| 87 | parseMatches(text: string): Match[];
|
---|
| 88 | /**
|
---|
| 89 | * Determines if a match found has an unmatched closing parenthesis,
|
---|
| 90 | * square bracket or curly bracket. If so, the symbol will be removed
|
---|
| 91 | * from the match itself, and appended after the generated anchor tag.
|
---|
| 92 | *
|
---|
| 93 | * A match may have an extra closing parenthesis at the end of the match
|
---|
| 94 | * because the regular expression must include parenthesis for URLs such as
|
---|
| 95 | * "wikipedia.com/something_(disambiguation)", which should be auto-linked.
|
---|
| 96 | *
|
---|
| 97 | * However, an extra parenthesis *will* be included when the URL itself is
|
---|
| 98 | * wrapped in parenthesis, such as in the case of:
|
---|
| 99 | * "(wikipedia.com/something_(disambiguation))"
|
---|
| 100 | * In this case, the last closing parenthesis should *not* be part of the
|
---|
| 101 | * URL itself, and this method will return `true`.
|
---|
| 102 | *
|
---|
| 103 | * For square brackets in URLs such as in PHP arrays, the same behavior as
|
---|
| 104 | * parenthesis discussed above should happen:
|
---|
| 105 | * "[http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3]"
|
---|
| 106 | * The closing square bracket should not be part of the URL itself, and this
|
---|
| 107 | * method will return `true`.
|
---|
| 108 | *
|
---|
| 109 | * @protected
|
---|
| 110 | * @param {String} matchStr The full match string from the {@link #matcherRegex}.
|
---|
| 111 | * @return {Boolean} `true` if there is an unbalanced closing parenthesis or
|
---|
| 112 | * square bracket at the end of the `matchStr`, `false` otherwise.
|
---|
| 113 | */
|
---|
| 114 | protected matchHasUnbalancedClosingParen(matchStr: string): boolean;
|
---|
| 115 | /**
|
---|
| 116 | * Determine if there's an invalid character after the TLD in a URL. Valid
|
---|
| 117 | * characters after TLD are ':/?#'. Exclude scheme matched URLs from this
|
---|
| 118 | * check.
|
---|
| 119 | *
|
---|
| 120 | * @protected
|
---|
| 121 | * @param {String} urlMatch The matched URL, if there was one. Will be an
|
---|
| 122 | * empty string if the match is not a URL match.
|
---|
| 123 | * @param {String} schemeUrlMatch The match URL string for a scheme
|
---|
| 124 | * match. Ex: 'http://yahoo.com'. This is used to match something like
|
---|
| 125 | * 'http://localhost', where we won't double check that the domain name
|
---|
| 126 | * has at least one '.' in it.
|
---|
| 127 | * @return {Number} the position where the invalid character was found. If
|
---|
| 128 | * no such character was found, returns -1
|
---|
| 129 | */
|
---|
| 130 | protected matchHasInvalidCharAfterTld(urlMatch: string, schemeUrlMatch: string): number;
|
---|
| 131 | }
|
---|
| 132 | export interface UrlMatcherConfig extends MatcherConfig {
|
---|
| 133 | stripPrefix: Required<StripPrefixConfigObj>;
|
---|
| 134 | stripTrailingSlash: boolean;
|
---|
| 135 | decodePercentEncoding: boolean;
|
---|
| 136 | }
|
---|