1 | import htmlDecodeTree from "./generated/decode-data-html.js";
|
---|
2 | import xmlDecodeTree from "./generated/decode-data-xml.js";
|
---|
3 | import decodeCodePoint from "./decode_codepoint.js";
|
---|
4 | export { htmlDecodeTree, xmlDecodeTree, decodeCodePoint };
|
---|
5 | export { replaceCodePoint, fromCodePoint } from "./decode_codepoint.js";
|
---|
6 | export declare enum BinTrieFlags {
|
---|
7 | VALUE_LENGTH = 49152,
|
---|
8 | BRANCH_LENGTH = 16256,
|
---|
9 | JUMP_TABLE = 127
|
---|
10 | }
|
---|
11 | export declare enum DecodingMode {
|
---|
12 | /** Entities in text nodes that can end with any character. */
|
---|
13 | Legacy = 0,
|
---|
14 | /** Only allow entities terminated with a semicolon. */
|
---|
15 | Strict = 1,
|
---|
16 | /** Entities in attributes have limitations on ending characters. */
|
---|
17 | Attribute = 2
|
---|
18 | }
|
---|
19 | /**
|
---|
20 | * Producers for character reference errors as defined in the HTML spec.
|
---|
21 | */
|
---|
22 | export interface EntityErrorProducer {
|
---|
23 | missingSemicolonAfterCharacterReference(): void;
|
---|
24 | absenceOfDigitsInNumericCharacterReference(consumedCharacters: number): void;
|
---|
25 | validateNumericCharacterReference(code: number): void;
|
---|
26 | }
|
---|
27 | /**
|
---|
28 | * Token decoder with support of writing partial entities.
|
---|
29 | */
|
---|
30 | export declare class EntityDecoder {
|
---|
31 | /** The tree used to decode entities. */
|
---|
32 | private readonly decodeTree;
|
---|
33 | /**
|
---|
34 | * The function that is called when a codepoint is decoded.
|
---|
35 | *
|
---|
36 | * For multi-byte named entities, this will be called multiple times,
|
---|
37 | * with the second codepoint, and the same `consumed` value.
|
---|
38 | *
|
---|
39 | * @param codepoint The decoded codepoint.
|
---|
40 | * @param consumed The number of bytes consumed by the decoder.
|
---|
41 | */
|
---|
42 | private readonly emitCodePoint;
|
---|
43 | /** An object that is used to produce errors. */
|
---|
44 | private readonly errors?;
|
---|
45 | constructor(
|
---|
46 | /** The tree used to decode entities. */
|
---|
47 | decodeTree: Uint16Array,
|
---|
48 | /**
|
---|
49 | * The function that is called when a codepoint is decoded.
|
---|
50 | *
|
---|
51 | * For multi-byte named entities, this will be called multiple times,
|
---|
52 | * with the second codepoint, and the same `consumed` value.
|
---|
53 | *
|
---|
54 | * @param codepoint The decoded codepoint.
|
---|
55 | * @param consumed The number of bytes consumed by the decoder.
|
---|
56 | */
|
---|
57 | emitCodePoint: (cp: number, consumed: number) => void,
|
---|
58 | /** An object that is used to produce errors. */
|
---|
59 | errors?: EntityErrorProducer | undefined);
|
---|
60 | /** The current state of the decoder. */
|
---|
61 | private state;
|
---|
62 | /** Characters that were consumed while parsing an entity. */
|
---|
63 | private consumed;
|
---|
64 | /**
|
---|
65 | * The result of the entity.
|
---|
66 | *
|
---|
67 | * Either the result index of a numeric entity, or the codepoint of a
|
---|
68 | * numeric entity.
|
---|
69 | */
|
---|
70 | private result;
|
---|
71 | /** The current index in the decode tree. */
|
---|
72 | private treeIndex;
|
---|
73 | /** The number of characters that were consumed in excess. */
|
---|
74 | private excess;
|
---|
75 | /** The mode in which the decoder is operating. */
|
---|
76 | private decodeMode;
|
---|
77 | /** Resets the instance to make it reusable. */
|
---|
78 | startEntity(decodeMode: DecodingMode): void;
|
---|
79 | /**
|
---|
80 | * Write an entity to the decoder. This can be called multiple times with partial entities.
|
---|
81 | * If the entity is incomplete, the decoder will return -1.
|
---|
82 | *
|
---|
83 | * Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
|
---|
84 | * entity is incomplete, and resume when the next string is written.
|
---|
85 | *
|
---|
86 | * @param string The string containing the entity (or a continuation of the entity).
|
---|
87 | * @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
|
---|
88 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
---|
89 | */
|
---|
90 | write(str: string, offset: number): number;
|
---|
91 | /**
|
---|
92 | * Switches between the numeric decimal and hexadecimal states.
|
---|
93 | *
|
---|
94 | * Equivalent to the `Numeric character reference state` in the HTML spec.
|
---|
95 | *
|
---|
96 | * @param str The string containing the entity (or a continuation of the entity).
|
---|
97 | * @param offset The current offset.
|
---|
98 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
---|
99 | */
|
---|
100 | private stateNumericStart;
|
---|
101 | private addToNumericResult;
|
---|
102 | /**
|
---|
103 | * Parses a hexadecimal numeric entity.
|
---|
104 | *
|
---|
105 | * Equivalent to the `Hexademical character reference state` in the HTML spec.
|
---|
106 | *
|
---|
107 | * @param str The string containing the entity (or a continuation of the entity).
|
---|
108 | * @param offset The current offset.
|
---|
109 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
---|
110 | */
|
---|
111 | private stateNumericHex;
|
---|
112 | /**
|
---|
113 | * Parses a decimal numeric entity.
|
---|
114 | *
|
---|
115 | * Equivalent to the `Decimal character reference state` in the HTML spec.
|
---|
116 | *
|
---|
117 | * @param str The string containing the entity (or a continuation of the entity).
|
---|
118 | * @param offset The current offset.
|
---|
119 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
---|
120 | */
|
---|
121 | private stateNumericDecimal;
|
---|
122 | /**
|
---|
123 | * Validate and emit a numeric entity.
|
---|
124 | *
|
---|
125 | * Implements the logic from the `Hexademical character reference start
|
---|
126 | * state` and `Numeric character reference end state` in the HTML spec.
|
---|
127 | *
|
---|
128 | * @param lastCp The last code point of the entity. Used to see if the
|
---|
129 | * entity was terminated with a semicolon.
|
---|
130 | * @param expectedLength The minimum number of characters that should be
|
---|
131 | * consumed. Used to validate that at least one digit
|
---|
132 | * was consumed.
|
---|
133 | * @returns The number of characters that were consumed.
|
---|
134 | */
|
---|
135 | private emitNumericEntity;
|
---|
136 | /**
|
---|
137 | * Parses a named entity.
|
---|
138 | *
|
---|
139 | * Equivalent to the `Named character reference state` in the HTML spec.
|
---|
140 | *
|
---|
141 | * @param str The string containing the entity (or a continuation of the entity).
|
---|
142 | * @param offset The current offset.
|
---|
143 | * @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
---|
144 | */
|
---|
145 | private stateNamedEntity;
|
---|
146 | /**
|
---|
147 | * Emit a named entity that was not terminated with a semicolon.
|
---|
148 | *
|
---|
149 | * @returns The number of characters consumed.
|
---|
150 | */
|
---|
151 | private emitNotTerminatedNamedEntity;
|
---|
152 | /**
|
---|
153 | * Emit a named entity.
|
---|
154 | *
|
---|
155 | * @param result The index of the entity in the decode tree.
|
---|
156 | * @param valueLength The number of bytes in the entity.
|
---|
157 | * @param consumed The number of characters consumed.
|
---|
158 | *
|
---|
159 | * @returns The number of characters consumed.
|
---|
160 | */
|
---|
161 | private emitNamedEntityData;
|
---|
162 | /**
|
---|
163 | * Signal to the parser that the end of the input was reached.
|
---|
164 | *
|
---|
165 | * Remaining data will be emitted and relevant errors will be produced.
|
---|
166 | *
|
---|
167 | * @returns The number of characters consumed.
|
---|
168 | */
|
---|
169 | end(): number;
|
---|
170 | }
|
---|
171 | /**
|
---|
172 | * Determines the branch of the current node that is taken given the current
|
---|
173 | * character. This function is used to traverse the trie.
|
---|
174 | *
|
---|
175 | * @param decodeTree The trie.
|
---|
176 | * @param current The current node.
|
---|
177 | * @param nodeIdx The index right after the current node and its value.
|
---|
178 | * @param char The current character.
|
---|
179 | * @returns The index of the next node, or -1 if no branch is taken.
|
---|
180 | */
|
---|
181 | export declare function determineBranch(decodeTree: Uint16Array, current: number, nodeIdx: number, char: number): number;
|
---|
182 | /**
|
---|
183 | * Decodes an HTML string.
|
---|
184 | *
|
---|
185 | * @param str The string to decode.
|
---|
186 | * @param mode The decoding mode.
|
---|
187 | * @returns The decoded string.
|
---|
188 | */
|
---|
189 | export declare function decodeHTML(str: string, mode?: DecodingMode): string;
|
---|
190 | /**
|
---|
191 | * Decodes an HTML string in an attribute.
|
---|
192 | *
|
---|
193 | * @param str The string to decode.
|
---|
194 | * @returns The decoded string.
|
---|
195 | */
|
---|
196 | export declare function decodeHTMLAttribute(str: string): string;
|
---|
197 | /**
|
---|
198 | * Decodes an HTML string, requiring all entities to be terminated by a semicolon.
|
---|
199 | *
|
---|
200 | * @param str The string to decode.
|
---|
201 | * @returns The decoded string.
|
---|
202 | */
|
---|
203 | export declare function decodeHTMLStrict(str: string): string;
|
---|
204 | /**
|
---|
205 | * Decodes an XML string, requiring all entities to be terminated by a semicolon.
|
---|
206 | *
|
---|
207 | * @param str The string to decode.
|
---|
208 | * @returns The decoded string.
|
---|
209 | */
|
---|
210 | export declare function decodeXML(str: string): string;
|
---|
211 | //# sourceMappingURL=decode.d.ts.map |
---|