1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
use crate::common::{is_whitespace_char, XmlVersion};
use crate::reader::error::SyntaxError;
use crate::reader::events::XmlEvent;
use crate::reader::lexer::Token;
use crate::util::Encoding;

use super::{
    DeclarationSubstate, Encountered, PullParser, QualifiedNameTarget, Result, State,
    DEFAULT_VERSION,
};

impl PullParser {
    #[inline(never)]
    fn emit_start_document(&mut self) -> Option<Result> {
        debug_assert!(self.encountered == Encountered::None);
        self.encountered = Encountered::Declaration;

        let version = self.data.version;
        let encoding = self.data.take_encoding();
        let standalone = self.data.standalone;

        if let Some(new_encoding) = encoding.as_deref() {
            let new_encoding = match new_encoding.parse() {
                Ok(e) => e,
                Err(_) if self.config.ignore_invalid_encoding_declarations => Encoding::Latin1,
                Err(_) => return Some(self.error(SyntaxError::UnsupportedEncoding(new_encoding.into()))),
            };
            let current_encoding = self.lexer.encoding();
            if current_encoding != new_encoding {
                let set = match (current_encoding, new_encoding) {
                    (Encoding::Unknown | Encoding::Default, new) if new != Encoding::Utf16 => new,
                    (Encoding::Utf16Be | Encoding::Utf16Le, Encoding::Utf16) => current_encoding,
                    _ if self.config.ignore_invalid_encoding_declarations => current_encoding,
                    _ => return Some(self.error(SyntaxError::ConflictingEncoding(new_encoding, current_encoding))),
                };
                self.lexer.set_encoding(set);
            }
        }

        let current_encoding = self.lexer.encoding();
        self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument {
            version: version.unwrap_or(DEFAULT_VERSION),
            encoding: encoding.unwrap_or_else(move || current_encoding.to_string()),
            standalone
        }))
    }

    // TODO: remove redundancy via macros or extra methods
    pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> {

        match s {
            DeclarationSubstate::BeforeVersion => match t {
                Token::Character('v') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)),
                Token::Character(c) if is_whitespace_char(c) => None,  // continue
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
                match &*name.local_name {
                    "ersion" if name.namespace.is_none() =>
                        this.into_state_continue(State::InsideDeclaration(
                            if token == Token::EqualsSign {
                                DeclarationSubstate::InsideVersionValue
                            } else {
                                DeclarationSubstate::AfterVersion
                            }
                        )),
                    _ => Some(this.error(SyntaxError::UnexpectedNameInsideXml(name.to_string().into()))),
                }
            }),

            DeclarationSubstate::AfterVersion => match t {
                Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)),
                Token::Character(c) if is_whitespace_char(c) => None,
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| {
                this.data.version = match &*value {
                    "1.0" => Some(XmlVersion::Version10),
                    "1.1" => Some(XmlVersion::Version11),
                    _     => None
                };
                if this.data.version.is_some() {
                    this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue))
                } else {
                    Some(this.error(SyntaxError::UnexpectedXmlVersion(value.into())))
                }
            }),

            DeclarationSubstate::AfterVersionValue => match t {
                Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)),
                Token::ProcessingInstructionEnd => self.emit_start_document(),
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::BeforeEncoding => match t {
                Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)),
                Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
                Token::ProcessingInstructionEnd => self.emit_start_document(),
                Token::Character(c) if is_whitespace_char(c) => None,  // skip whitespace
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
                match &*name.local_name {
                    "ncoding" if name.namespace.is_none() =>
                        this.into_state_continue(State::InsideDeclaration(
                            if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding }
                        )),
                    _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into())))
                }
            }),

            DeclarationSubstate::AfterEncoding => match t {
                Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)),
                Token::Character(c) if is_whitespace_char(c) => None,
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| {
                this.data.encoding = Some(value);
                this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue))
            }),

            DeclarationSubstate::AfterEncodingValue => match t {
                Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)),
                Token::ProcessingInstructionEnd => self.emit_start_document(),
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::BeforeStandaloneDecl => match t {
                Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
                Token::ProcessingInstructionEnd => self.emit_start_document(),
                Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
                match &*name.local_name {
                    "tandalone" if name.namespace.is_none() =>
                        this.into_state_continue(State::InsideDeclaration(
                            if token == Token::EqualsSign {
                                DeclarationSubstate::InsideStandaloneDeclValue
                            } else {
                                DeclarationSubstate::AfterStandaloneDecl
                            }
                        )),
                    _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))),
                }
            }),

            DeclarationSubstate::AfterStandaloneDecl => match t {
                Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)),
                Token::Character(c) if is_whitespace_char(c) => None,
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },

            DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| {
                let standalone = match &*value {
                    "yes" => Some(true),
                    "no"  => Some(false),
                    _     => None
                };
                if standalone.is_some() {
                    this.data.standalone = standalone;
                    this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue))
                } else {
                    Some(this.error(SyntaxError::InvalidStandaloneDeclaration(value.into())))
                }
            }),

            DeclarationSubstate::AfterStandaloneDeclValue => match t {
                Token::ProcessingInstructionEnd => self.emit_start_document(),
                Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
            },
        }
    }
}