[57e58a3] | 1 | import htmlDecodeTree from "./generated/decode-data-html.js";
|
---|
| 2 | import xmlDecodeTree from "./generated/decode-data-xml.js";
|
---|
| 3 | import decodeCodePoint from "./decode_codepoint.js";
|
---|
| 4 | export { htmlDecodeTree, xmlDecodeTree, decodeCodePoint };
|
---|
| 5 | export { replaceCodePoint, fromCodePoint } from "./decode_codepoint.js";
|
---|
| 6 | export declare enum BinTrieFlags {
|
---|
| 7 | VALUE_LENGTH = 49152,
|
---|
| 8 | BRANCH_LENGTH = 16256,
|
---|
| 9 | JUMP_TABLE = 127
|
---|
| 10 | }
|
---|
| 11 | export declare enum DecodingMode {
|
---|
| 12 | /** Entities in text nodes that can end with any character. */
|
---|
| 13 | Legacy = 0,
|
---|
| 14 | /** Only allow entities terminated with a semicolon. */
|
---|
| 15 | Strict = 1,
|
---|
| 16 | /** Entities in attributes have limitations on ending characters. */
|
---|
| 17 | Attribute = 2
|
---|
| 18 | }
|
---|
| 19 | /**
|
---|
| 20 | * Producers for character reference errors as defined in the HTML spec.
|
---|
| 21 | */
|
---|
| 22 | export interface EntityErrorProducer {
|
---|
| 23 | missingSemicolonAfterCharacterReference(): void;
|
---|
| 24 | absenceOfDigitsInNumericCharacterReference(consumedCharacters: number): void;
|
---|
| 25 | validateNumericCharacterReference(code: number): void;
|
---|
| 26 | }
|
---|
| 27 | /**
|
---|
| 28 | * Token decoder with support of writing partial entities.
|
---|
| 29 | */
|
---|
| 30 | export declare class EntityDecoder {
|
---|
| 31 | /** The tree used to decode entities. */
|
---|
| 32 | private readonly decodeTree;
|
---|
| 33 | /**
|
---|
| 34 | * The function that is called when a codepoint is decoded.
|
---|
| 35 | *
|
---|
| 36 | * For multi-byte named entities, this will be called multiple times,
|
---|
| 37 | * with the second codepoint, and the same `consumed` value.
|
---|
| 38 | *
|
---|
| 39 | * @param codepoint The decoded codepoint.
|
---|
| 40 | * @param consumed The number of bytes consumed by the decoder.
|
---|
| 41 | */
|
---|
| 42 | private readonly emitCodePoint;
|
---|
| 43 | /** An object that is used to produce errors. */
|
---|
| 44 | private readonly errors?;
|
---|
| 45 | constructor(
|
---|
| 46 | /** The tree used to decode entities. */
|
---|
| 47 | decodeTree: Uint16Array,
|
---|
| 48 | /**
|
---|
| 49 | * The function that is called when a codepoint is decoded.
|
---|
| 50 | *
|
---|
| 51 | * For multi-byte named entities, this will be called multiple times,
|
---|
| 52 | * with the second codepoint, and the same `consumed` value.
|
---|
| 53 | *
|
---|
| 54 | * @param codepoint The decoded codepoint.
|
---|
| 55 | * @param consumed The number of bytes consumed by the decoder.
|
---|
| 56 | */
|
---|
| 57 | emitCodePoint: (cp: number, consumed: number) => void,
|
---|
| 58 | /** An object that is used to produce errors. */
|
---|
| 59 | errors?: EntityErrorProducer | undefined);
|
---|
| 60 | /** The current state of the decoder. */
|
---|
| 61 | private state;
|
---|
| 62 | /** Characters that were consumed while parsing an entity. */
|
---|
| 63 | private consumed;
|
---|
| 64 | /**
|
---|
| 65 | * The result of the entity.
|
---|
| 66 | *
|
---|
| 67 | * Either the result index of a numeric entity, or the codepoint of a
|
---|
| 68 | * numeric entity.
|
---|
| 69 | */
|
---|
| 70 | private result;
|
---|
| 71 | /** The current index in the decode tree. */
|
---|
| 72 | private treeIndex;
|
---|
| 73 | /** The number of characters that were consumed in excess. */
|
---|
| 74 | private excess;
|
---|
| 75 | /** The mode in which the decoder is operating. */
|
---|
| 76 | private decodeMode;
|
---|
| 77 | /** Resets the instance to make it reusable. */
|
---|
| 78 | startEntity(decodeMode: DecodingMode): void;
|
---|
| 79 | /**
|
---|
| 80 | * Write an entity to the decoder. This can be called multiple times with partial entities.
|
---|
| 81 | * If the entity is incomplete, the decoder will return -1.
|
---|
| 82 | *
|
---|
| 83 | * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
|
---|
| 84 | * entity is incomplete, and resume when the next string is written.
|
---|
| 85 | *
|
---|
| 86 | * @param string The string containing the entity (or a continuation of the entity).
|
---|
| 87 | * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
|
---|
| 88 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
---|
| 89 | */
|
---|
| 90 | write(str: string, offset: number): number;
|
---|
| 91 | /**
|
---|
| 92 | * Switches between the numeric decimal and hexadecimal states.
|
---|
| 93 | *
|
---|
| 94 | * Equivalent to the `Numeric character reference state` in the HTML spec.
|
---|
| 95 | *
|
---|
| 96 | * @param str The string containing the entity (or a continuation of the entity).
|
---|
| 97 | * @param offset The current offset.
|
---|
| 98 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
---|
| 99 | */
|
---|
| 100 | private stateNumericStart;
|
---|
| 101 | private addToNumericResult;
|
---|
| 102 | /**
|
---|
| 103 | * Parses a hexadecimal numeric entity.
|
---|
| 104 | *
|
---|
| 105 | * Equivalent to the `Hexademical character reference state` in the HTML spec.
|
---|
| 106 | *
|
---|
| 107 | * @param str The string containing the entity (or a continuation of the entity).
|
---|
| 108 | * @param offset The current offset.
|
---|
| 109 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
---|
| 110 | */
|
---|
| 111 | private stateNumericHex;
|
---|
| 112 | /**
|
---|
| 113 | * Parses a decimal numeric entity.
|
---|
| 114 | *
|
---|
| 115 | * Equivalent to the `Decimal character reference state` in the HTML spec.
|
---|
| 116 | *
|
---|
| 117 | * @param str The string containing the entity (or a continuation of the entity).
|
---|
| 118 | * @param offset The current offset.
|
---|
| 119 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
---|
| 120 | */
|
---|
| 121 | private stateNumericDecimal;
|
---|
| 122 | /**
|
---|
| 123 | * Validate and emit a numeric entity.
|
---|
| 124 | *
|
---|
| 125 | * Implements the logic from the `Hexademical character reference start
|
---|
| 126 | * state` and `Numeric character reference end state` in the HTML spec.
|
---|
| 127 | *
|
---|
| 128 | * @param lastCp The last code point of the entity. Used to see if the
|
---|
| 129 | * entity was terminated with a semicolon.
|
---|
| 130 | * @param expectedLength The minimum number of characters that should be
|
---|
| 131 | * consumed. Used to validate that at least one digit
|
---|
| 132 | * was consumed.
|
---|
| 133 | * @returns The number of characters that were consumed.
|
---|
| 134 | */
|
---|
| 135 | private emitNumericEntity;
|
---|
| 136 | /**
|
---|
| 137 | * Parses a named entity.
|
---|
| 138 | *
|
---|
| 139 | * Equivalent to the `Named character reference state` in the HTML spec.
|
---|
| 140 | *
|
---|
| 141 | * @param str The string containing the entity (or a continuation of the entity).
|
---|
| 142 | * @param offset The current offset.
|
---|
| 143 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
---|
| 144 | */
|
---|
| 145 | private stateNamedEntity;
|
---|
| 146 | /**
|
---|
| 147 | * Emit a named entity that was not terminated with a semicolon.
|
---|
| 148 | *
|
---|
| 149 | * @returns The number of characters consumed.
|
---|
| 150 | */
|
---|
| 151 | private emitNotTerminatedNamedEntity;
|
---|
| 152 | /**
|
---|
| 153 | * Emit a named entity.
|
---|
| 154 | *
|
---|
| 155 | * @param result The index of the entity in the decode tree.
|
---|
| 156 | * @param valueLength The number of bytes in the entity.
|
---|
| 157 | * @param consumed The number of characters consumed.
|
---|
| 158 | *
|
---|
| 159 | * @returns The number of characters consumed.
|
---|
| 160 | */
|
---|
| 161 | private emitNamedEntityData;
|
---|
| 162 | /**
|
---|
| 163 | * Signal to the parser that the end of the input was reached.
|
---|
| 164 | *
|
---|
| 165 | * Remaining data will be emitted and relevant errors will be produced.
|
---|
| 166 | *
|
---|
| 167 | * @returns The number of characters consumed.
|
---|
| 168 | */
|
---|
| 169 | end(): number;
|
---|
| 170 | }
|
---|
| 171 | /**
|
---|
| 172 | * Determines the branch of the current node that is taken given the current
|
---|
| 173 | * character. This function is used to traverse the trie.
|
---|
| 174 | *
|
---|
| 175 | * @param decodeTree The trie.
|
---|
| 176 | * @param current The current node.
|
---|
| 177 | * @param nodeIdx The index right after the current node and its value.
|
---|
| 178 | * @param char The current character.
|
---|
| 179 | * @returns The index of the next node, or -1 if no branch is taken.
|
---|
| 180 | */
|
---|
| 181 | export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIdx: number, char: number): number;
|
---|
| 182 | /**
|
---|
| 183 | * Decodes an HTML string.
|
---|
| 184 | *
|
---|
| 185 | * @param str The string to decode.
|
---|
| 186 | * @param mode The decoding mode.
|
---|
| 187 | * @returns The decoded string.
|
---|
| 188 | */
|
---|
| 189 | export declare function decodeHTML(str: string, mode?: DecodingMode): string;
|
---|
| 190 | /**
|
---|
| 191 | * Decodes an HTML string in an attribute.
|
---|
| 192 | *
|
---|
| 193 | * @param str The string to decode.
|
---|
| 194 | * @returns The decoded string.
|
---|
| 195 | */
|
---|
| 196 | export declare function decodeHTMLAttribute(str: string): string;
|
---|
| 197 | /**
|
---|
| 198 | * Decodes an HTML string, requiring all entities to be terminated by a semicolon.
|
---|
| 199 | *
|
---|
| 200 | * @param str The string to decode.
|
---|
| 201 | * @returns The decoded string.
|
---|
| 202 | */
|
---|
| 203 | export declare function decodeHTMLStrict(str: string): string;
|
---|
| 204 | /**
|
---|
| 205 | * Decodes an XML string, requiring all entities to be terminated by a semicolon.
|
---|
| 206 | *
|
---|
| 207 | * @param str The string to decode.
|
---|
| 208 | * @returns The decoded string.
|
---|
| 209 | */
|
---|
| 210 | export declare function decodeXML(str: string): string;
|
---|
| 211 | //# sourceMappingURL=decode.d.ts.map |
---|