[d565449] | 1 | "use strict";
|
---|
| 2 | Object.defineProperty(exports, "__esModule", { value: true });
|
---|
| 3 | const boundaries_1 = require("./boundaries");
|
---|
| 4 | // BreakTypes
|
---|
| 5 | // @type {BreakType}
|
---|
| 6 | const NotBreak = 0;
|
---|
| 7 | const BreakStart = 1;
|
---|
| 8 | const Break = 2;
|
---|
| 9 | const BreakLastRegional = 3;
|
---|
| 10 | const BreakPenultimateRegional = 4;
|
---|
| 11 | class GraphemerHelper {
|
---|
| 12 | /**
|
---|
| 13 | * Check if the the character at the position {pos} of the string is surrogate
|
---|
| 14 | * @param str {string}
|
---|
| 15 | * @param pos {number}
|
---|
| 16 | * @returns {boolean}
|
---|
| 17 | */
|
---|
| 18 | static isSurrogate(str, pos) {
|
---|
| 19 | return (0xd800 <= str.charCodeAt(pos) &&
|
---|
| 20 | str.charCodeAt(pos) <= 0xdbff &&
|
---|
| 21 | 0xdc00 <= str.charCodeAt(pos + 1) &&
|
---|
| 22 | str.charCodeAt(pos + 1) <= 0xdfff);
|
---|
| 23 | }
|
---|
| 24 | /**
|
---|
| 25 | * The String.prototype.codePointAt polyfill
|
---|
| 26 | * Private function, gets a Unicode code point from a JavaScript UTF-16 string
|
---|
| 27 | * handling surrogate pairs appropriately
|
---|
| 28 | * @param str {string}
|
---|
| 29 | * @param idx {number}
|
---|
| 30 | * @returns {number}
|
---|
| 31 | */
|
---|
| 32 | static codePointAt(str, idx) {
|
---|
| 33 | if (idx === undefined) {
|
---|
| 34 | idx = 0;
|
---|
| 35 | }
|
---|
| 36 | const code = str.charCodeAt(idx);
|
---|
| 37 | // if a high surrogate
|
---|
| 38 | if (0xd800 <= code && code <= 0xdbff && idx < str.length - 1) {
|
---|
| 39 | const hi = code;
|
---|
| 40 | const low = str.charCodeAt(idx + 1);
|
---|
| 41 | if (0xdc00 <= low && low <= 0xdfff) {
|
---|
| 42 | return (hi - 0xd800) * 0x400 + (low - 0xdc00) + 0x10000;
|
---|
| 43 | }
|
---|
| 44 | return hi;
|
---|
| 45 | }
|
---|
| 46 | // if a low surrogate
|
---|
| 47 | if (0xdc00 <= code && code <= 0xdfff && idx >= 1) {
|
---|
| 48 | const hi = str.charCodeAt(idx - 1);
|
---|
| 49 | const low = code;
|
---|
| 50 | if (0xd800 <= hi && hi <= 0xdbff) {
|
---|
| 51 | return (hi - 0xd800) * 0x400 + (low - 0xdc00) + 0x10000;
|
---|
| 52 | }
|
---|
| 53 | return low;
|
---|
| 54 | }
|
---|
| 55 | // just return the char if an unmatched surrogate half or a
|
---|
| 56 | // single-char codepoint
|
---|
| 57 | return code;
|
---|
| 58 | }
|
---|
| 59 | //
|
---|
| 60 | /**
|
---|
| 61 | * Private function, returns whether a break is allowed between the two given grapheme breaking classes
|
---|
| 62 | * Implemented the UAX #29 3.1.1 Grapheme Cluster Boundary Rules on extended grapheme clusters
|
---|
| 63 | * @param start {number}
|
---|
| 64 | * @param mid {Array<number>}
|
---|
| 65 | * @param end {number}
|
---|
| 66 | * @param startEmoji {number}
|
---|
| 67 | * @param midEmoji {Array<number>}
|
---|
| 68 | * @param endEmoji {number}
|
---|
| 69 | * @returns {number}
|
---|
| 70 | */
|
---|
| 71 | static shouldBreak(start, mid, end, startEmoji, midEmoji, endEmoji) {
|
---|
| 72 | const all = [start].concat(mid).concat([end]);
|
---|
| 73 | const allEmoji = [startEmoji].concat(midEmoji).concat([endEmoji]);
|
---|
| 74 | const previous = all[all.length - 2];
|
---|
| 75 | const next = end;
|
---|
| 76 | const nextEmoji = endEmoji;
|
---|
| 77 | // Lookahead terminator for:
|
---|
| 78 | // GB12. ^ (RI RI)* RI ? RI
|
---|
| 79 | // GB13. [^RI] (RI RI)* RI ? RI
|
---|
| 80 | const rIIndex = all.lastIndexOf(boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR);
|
---|
| 81 | if (rIIndex > 0 &&
|
---|
| 82 | all.slice(1, rIIndex).every(function (c) {
|
---|
| 83 | return c === boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR;
|
---|
| 84 | }) &&
|
---|
| 85 | [boundaries_1.CLUSTER_BREAK.PREPEND, boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR].indexOf(previous) === -1) {
|
---|
| 86 | if (all.filter(function (c) {
|
---|
| 87 | return c === boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR;
|
---|
| 88 | }).length %
|
---|
| 89 | 2 ===
|
---|
| 90 | 1) {
|
---|
| 91 | return BreakLastRegional;
|
---|
| 92 | }
|
---|
| 93 | else {
|
---|
| 94 | return BreakPenultimateRegional;
|
---|
| 95 | }
|
---|
| 96 | }
|
---|
| 97 | // GB3. CR × LF
|
---|
| 98 | if (previous === boundaries_1.CLUSTER_BREAK.CR && next === boundaries_1.CLUSTER_BREAK.LF) {
|
---|
| 99 | return NotBreak;
|
---|
| 100 | }
|
---|
| 101 | // GB4. (Control|CR|LF) ÷
|
---|
| 102 | else if (previous === boundaries_1.CLUSTER_BREAK.CONTROL ||
|
---|
| 103 | previous === boundaries_1.CLUSTER_BREAK.CR ||
|
---|
| 104 | previous === boundaries_1.CLUSTER_BREAK.LF) {
|
---|
| 105 | return BreakStart;
|
---|
| 106 | }
|
---|
| 107 | // GB5. ÷ (Control|CR|LF)
|
---|
| 108 | else if (next === boundaries_1.CLUSTER_BREAK.CONTROL ||
|
---|
| 109 | next === boundaries_1.CLUSTER_BREAK.CR ||
|
---|
| 110 | next === boundaries_1.CLUSTER_BREAK.LF) {
|
---|
| 111 | return BreakStart;
|
---|
| 112 | }
|
---|
| 113 | // GB6. L × (L|V|LV|LVT)
|
---|
| 114 | else if (previous === boundaries_1.CLUSTER_BREAK.L &&
|
---|
| 115 | (next === boundaries_1.CLUSTER_BREAK.L ||
|
---|
| 116 | next === boundaries_1.CLUSTER_BREAK.V ||
|
---|
| 117 | next === boundaries_1.CLUSTER_BREAK.LV ||
|
---|
| 118 | next === boundaries_1.CLUSTER_BREAK.LVT)) {
|
---|
| 119 | return NotBreak;
|
---|
| 120 | }
|
---|
| 121 | // GB7. (LV|V) × (V|T)
|
---|
| 122 | else if ((previous === boundaries_1.CLUSTER_BREAK.LV || previous === boundaries_1.CLUSTER_BREAK.V) &&
|
---|
| 123 | (next === boundaries_1.CLUSTER_BREAK.V || next === boundaries_1.CLUSTER_BREAK.T)) {
|
---|
| 124 | return NotBreak;
|
---|
| 125 | }
|
---|
| 126 | // GB8. (LVT|T) × (T)
|
---|
| 127 | else if ((previous === boundaries_1.CLUSTER_BREAK.LVT || previous === boundaries_1.CLUSTER_BREAK.T) &&
|
---|
| 128 | next === boundaries_1.CLUSTER_BREAK.T) {
|
---|
| 129 | return NotBreak;
|
---|
| 130 | }
|
---|
| 131 | // GB9. × (Extend|ZWJ)
|
---|
| 132 | else if (next === boundaries_1.CLUSTER_BREAK.EXTEND || next === boundaries_1.CLUSTER_BREAK.ZWJ) {
|
---|
| 133 | return NotBreak;
|
---|
| 134 | }
|
---|
| 135 | // GB9a. × SpacingMark
|
---|
| 136 | else if (next === boundaries_1.CLUSTER_BREAK.SPACINGMARK) {
|
---|
| 137 | return NotBreak;
|
---|
| 138 | }
|
---|
| 139 | // GB9b. Prepend ×
|
---|
| 140 | else if (previous === boundaries_1.CLUSTER_BREAK.PREPEND) {
|
---|
| 141 | return NotBreak;
|
---|
| 142 | }
|
---|
| 143 | // GB11. \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic}
|
---|
| 144 | const previousNonExtendIndex = allEmoji
|
---|
| 145 | .slice(0, -1)
|
---|
| 146 | .lastIndexOf(boundaries_1.EXTENDED_PICTOGRAPHIC);
|
---|
| 147 | if (previousNonExtendIndex !== -1 &&
|
---|
| 148 | allEmoji[previousNonExtendIndex] === boundaries_1.EXTENDED_PICTOGRAPHIC &&
|
---|
| 149 | all.slice(previousNonExtendIndex + 1, -2).every(function (c) {
|
---|
| 150 | return c === boundaries_1.CLUSTER_BREAK.EXTEND;
|
---|
| 151 | }) &&
|
---|
| 152 | previous === boundaries_1.CLUSTER_BREAK.ZWJ &&
|
---|
| 153 | nextEmoji === boundaries_1.EXTENDED_PICTOGRAPHIC) {
|
---|
| 154 | return NotBreak;
|
---|
| 155 | }
|
---|
| 156 | // GB12. ^ (RI RI)* RI × RI
|
---|
| 157 | // GB13. [^RI] (RI RI)* RI × RI
|
---|
| 158 | if (mid.indexOf(boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR) !== -1) {
|
---|
| 159 | return Break;
|
---|
| 160 | }
|
---|
| 161 | if (previous === boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR &&
|
---|
| 162 | next === boundaries_1.CLUSTER_BREAK.REGIONAL_INDICATOR) {
|
---|
| 163 | return NotBreak;
|
---|
| 164 | }
|
---|
| 165 | // GB999. Any ? Any
|
---|
| 166 | return BreakStart;
|
---|
| 167 | }
|
---|
| 168 | }
|
---|
| 169 | exports.default = GraphemerHelper;
|
---|