[79a0317] | 1 | /** All the states the tokenizer can be in. */
|
---|
| 2 | declare const enum State {
|
---|
| 3 | Text = 1,
|
---|
| 4 | BeforeTagName = 2,
|
---|
| 5 | InTagName = 3,
|
---|
| 6 | InSelfClosingTag = 4,
|
---|
| 7 | BeforeClosingTagName = 5,
|
---|
| 8 | InClosingTagName = 6,
|
---|
| 9 | AfterClosingTagName = 7,
|
---|
| 10 | BeforeAttributeName = 8,
|
---|
| 11 | InAttributeName = 9,
|
---|
| 12 | AfterAttributeName = 10,
|
---|
| 13 | BeforeAttributeValue = 11,
|
---|
| 14 | InAttributeValueDq = 12,
|
---|
| 15 | InAttributeValueSq = 13,
|
---|
| 16 | InAttributeValueNq = 14,
|
---|
| 17 | BeforeDeclaration = 15,
|
---|
| 18 | InDeclaration = 16,
|
---|
| 19 | InProcessingInstruction = 17,
|
---|
| 20 | BeforeComment = 18,
|
---|
| 21 | InComment = 19,
|
---|
| 22 | InSpecialComment = 20,
|
---|
| 23 | AfterComment1 = 21,
|
---|
| 24 | AfterComment2 = 22,
|
---|
| 25 | BeforeCdata1 = 23,
|
---|
| 26 | BeforeCdata2 = 24,
|
---|
| 27 | BeforeCdata3 = 25,
|
---|
| 28 | BeforeCdata4 = 26,
|
---|
| 29 | BeforeCdata5 = 27,
|
---|
| 30 | BeforeCdata6 = 28,
|
---|
| 31 | InCdata = 29,
|
---|
| 32 | AfterCdata1 = 30,
|
---|
| 33 | AfterCdata2 = 31,
|
---|
| 34 | BeforeSpecialS = 32,
|
---|
| 35 | BeforeSpecialSEnd = 33,
|
---|
| 36 | BeforeScript1 = 34,
|
---|
| 37 | BeforeScript2 = 35,
|
---|
| 38 | BeforeScript3 = 36,
|
---|
| 39 | BeforeScript4 = 37,
|
---|
| 40 | BeforeScript5 = 38,
|
---|
| 41 | AfterScript1 = 39,
|
---|
| 42 | AfterScript2 = 40,
|
---|
| 43 | AfterScript3 = 41,
|
---|
| 44 | AfterScript4 = 42,
|
---|
| 45 | AfterScript5 = 43,
|
---|
| 46 | BeforeStyle1 = 44,
|
---|
| 47 | BeforeStyle2 = 45,
|
---|
| 48 | BeforeStyle3 = 46,
|
---|
| 49 | BeforeStyle4 = 47,
|
---|
| 50 | AfterStyle1 = 48,
|
---|
| 51 | AfterStyle2 = 49,
|
---|
| 52 | AfterStyle3 = 50,
|
---|
| 53 | AfterStyle4 = 51,
|
---|
| 54 | BeforeSpecialT = 52,
|
---|
| 55 | BeforeSpecialTEnd = 53,
|
---|
| 56 | BeforeTitle1 = 54,
|
---|
| 57 | BeforeTitle2 = 55,
|
---|
| 58 | BeforeTitle3 = 56,
|
---|
| 59 | BeforeTitle4 = 57,
|
---|
| 60 | AfterTitle1 = 58,
|
---|
| 61 | AfterTitle2 = 59,
|
---|
| 62 | AfterTitle3 = 60,
|
---|
| 63 | AfterTitle4 = 61,
|
---|
| 64 | BeforeEntity = 62,
|
---|
| 65 | BeforeNumericEntity = 63,
|
---|
| 66 | InNamedEntity = 64,
|
---|
| 67 | InNumericEntity = 65,
|
---|
| 68 | InHexEntity = 66
|
---|
| 69 | }
|
---|
| 70 | export interface Callbacks {
|
---|
| 71 | onattribdata(value: string): void;
|
---|
| 72 | onattribend(quote: string | undefined | null): void;
|
---|
| 73 | onattribname(name: string): void;
|
---|
| 74 | oncdata(data: string): void;
|
---|
| 75 | onclosetag(name: string): void;
|
---|
| 76 | oncomment(data: string): void;
|
---|
| 77 | ondeclaration(content: string): void;
|
---|
| 78 | onend(): void;
|
---|
| 79 | onerror(error: Error, state?: State): void;
|
---|
| 80 | onopentagend(): void;
|
---|
| 81 | onopentagname(name: string): void;
|
---|
| 82 | onprocessinginstruction(instruction: string): void;
|
---|
| 83 | onselfclosingtag(): void;
|
---|
| 84 | ontext(value: string): void;
|
---|
| 85 | }
|
---|
| 86 | export default class Tokenizer {
|
---|
| 87 | /** The current state the tokenizer is in. */
|
---|
| 88 | _state: State;
|
---|
| 89 | /** The read buffer. */
|
---|
| 90 | private buffer;
|
---|
| 91 | /** The beginning of the section that is currently being read. */
|
---|
| 92 | sectionStart: number;
|
---|
| 93 | /** The index within the buffer that we are currently looking at. */
|
---|
| 94 | _index: number;
|
---|
| 95 | /**
|
---|
| 96 | * Data that has already been processed will be removed from the buffer occasionally.
|
---|
| 97 | * `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate.
|
---|
| 98 | */
|
---|
| 99 | private bufferOffset;
|
---|
| 100 | /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
|
---|
| 101 | private baseState;
|
---|
| 102 | /** For special parsing behavior inside of script and style tags. */
|
---|
| 103 | private special;
|
---|
| 104 | /** Indicates whether the tokenizer has been paused. */
|
---|
| 105 | private running;
|
---|
| 106 | /** Indicates whether the tokenizer has finished running / `.end` has been called. */
|
---|
| 107 | private ended;
|
---|
| 108 | private readonly cbs;
|
---|
| 109 | private readonly xmlMode;
|
---|
| 110 | private readonly decodeEntities;
|
---|
| 111 | constructor(options: {
|
---|
| 112 | xmlMode?: boolean;
|
---|
| 113 | decodeEntities?: boolean;
|
---|
| 114 | } | null, cbs: Callbacks);
|
---|
| 115 | reset(): void;
|
---|
| 116 | write(chunk: string): void;
|
---|
| 117 | end(chunk?: string): void;
|
---|
| 118 | pause(): void;
|
---|
| 119 | resume(): void;
|
---|
| 120 | /**
|
---|
| 121 | * The current index within all of the written data.
|
---|
| 122 | */
|
---|
| 123 | getAbsoluteIndex(): number;
|
---|
| 124 | private stateText;
|
---|
| 125 | /**
|
---|
| 126 | * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
|
---|
| 127 | *
|
---|
| 128 | * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
|
---|
| 129 | * We allow anything that wouldn't end the tag.
|
---|
| 130 | */
|
---|
| 131 | private isTagStartChar;
|
---|
| 132 | private stateBeforeTagName;
|
---|
| 133 | private stateInTagName;
|
---|
| 134 | private stateBeforeClosingTagName;
|
---|
| 135 | private stateInClosingTagName;
|
---|
| 136 | private stateAfterClosingTagName;
|
---|
| 137 | private stateBeforeAttributeName;
|
---|
| 138 | private stateInSelfClosingTag;
|
---|
| 139 | private stateInAttributeName;
|
---|
| 140 | private stateAfterAttributeName;
|
---|
| 141 | private stateBeforeAttributeValue;
|
---|
| 142 | private handleInAttributeValue;
|
---|
| 143 | private stateInAttributeValueDoubleQuotes;
|
---|
| 144 | private stateInAttributeValueSingleQuotes;
|
---|
| 145 | private stateInAttributeValueNoQuotes;
|
---|
| 146 | private stateBeforeDeclaration;
|
---|
| 147 | private stateInDeclaration;
|
---|
| 148 | private stateInProcessingInstruction;
|
---|
| 149 | private stateBeforeComment;
|
---|
| 150 | private stateInComment;
|
---|
| 151 | private stateInSpecialComment;
|
---|
| 152 | private stateAfterComment1;
|
---|
| 153 | private stateAfterComment2;
|
---|
| 154 | private stateBeforeCdata6;
|
---|
| 155 | private stateInCdata;
|
---|
| 156 | private stateAfterCdata1;
|
---|
| 157 | private stateAfterCdata2;
|
---|
| 158 | private stateBeforeSpecialS;
|
---|
| 159 | private stateBeforeSpecialSEnd;
|
---|
| 160 | private stateBeforeSpecialLast;
|
---|
| 161 | private stateAfterSpecialLast;
|
---|
| 162 | private parseFixedEntity;
|
---|
| 163 | private parseLegacyEntity;
|
---|
| 164 | private stateInNamedEntity;
|
---|
| 165 | private decodeNumericEntity;
|
---|
| 166 | private stateInNumericEntity;
|
---|
| 167 | private stateInHexEntity;
|
---|
| 168 | private cleanup;
|
---|
| 169 | /**
|
---|
| 170 | * Iterates through the buffer, calling the function corresponding to the current state.
|
---|
| 171 | *
|
---|
| 172 | * States that are more likely to be hit are higher up, as a performance improvement.
|
---|
| 173 | */
|
---|
| 174 | private parse;
|
---|
| 175 | private finish;
|
---|
| 176 | private handleTrailingData;
|
---|
| 177 | private getSection;
|
---|
| 178 | private emitToken;
|
---|
| 179 | private emitPartial;
|
---|
| 180 | }
|
---|
| 181 | export {};
|
---|
| 182 | //# sourceMappingURL=Tokenizer.d.ts.map |
---|