1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
use crate::reader::error::SyntaxError;
use crate::reader::events::XmlEvent;
use crate::reader::lexer::Token;
use super::{DeclarationSubstate, Encountered, ProcessingInstructionSubstate, PullParser, Result, State};
impl PullParser {
pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> {
match s {
ProcessingInstructionSubstate::PIInsideName => match t {
Token::Character(c) if self.buf.is_empty() && is_name_start_char(c) ||
self.buf_has_data() && is_name_char(c) => {
if self.buf.len() > self.config.max_name_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
self.buf.push(c);
None
},
Token::ProcessingInstructionEnd => {
// self.buf contains PI name
let name = self.take_buf();
// Don't need to check for declaration because it has mandatory attributes
// but there is none
match &*name {
// Name is empty, it is an error
"" => Some(self.error(SyntaxError::ProcessingInstructionWithoutName)),
// Found <?xml-like PI not at the beginning of a document,
// it is an error - see section 2.6 of XML 1.1 spec
n if "xml".eq_ignore_ascii_case(n) =>
Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))),
// All is ok, emitting event
_ => {
debug_assert!(self.next_event.is_none(), "{:?}", self.next_event);
// can't have a PI before `<?xml`
let event1 = self.set_encountered(Encountered::Declaration);
let event2 = Some(Ok(XmlEvent::ProcessingInstruction {
name,
data: None
}));
// emitting two events at once is cumbersome
let event1 = if event1.is_some() {
self.next_event = event2;
event1
} else {
event2
};
self.into_state(State::OutsideTag, event1)
},
}
},
Token::Character(c) if is_whitespace_char(c) => {
// self.buf contains PI name
let name = self.take_buf();
match &*name {
// We have not ever encountered an element and have not parsed XML declaration
"xml" if self.encountered == Encountered::None =>
self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)),
// Found <?xml-like PI after the beginning of a document,
// it is an error - see section 2.6 of XML 1.1 spec
n if "xml".eq_ignore_ascii_case(n) =>
Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))),
// All is ok, starting parsing PI data
_ => {
self.data.name = name;
// can't have a PI before `<?xml`
let next_event = self.set_encountered(Encountered::Declaration);
self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData), next_event)
}
}
},
_ => {
let buf = self.take_buf();
Some(self.error(SyntaxError::UnexpectedProcessingInstruction(buf.into(), t)))
},
},
ProcessingInstructionSubstate::PIInsideData => match t {
Token::ProcessingInstructionEnd => {
let name = self.data.take_name();
let data = self.take_buf();
self.into_state_emit(
State::OutsideTag,
Ok(XmlEvent::ProcessingInstruction { name, data: Some(data) }),
)
},
Token::Character(c) if !self.is_valid_xml_char(c) => {
Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
},
// Any other token should be treated as plain characters
_ => {
if self.buf.len() > self.config.max_data_length {
return Some(self.error(SyntaxError::ExceededConfiguredLimit));
}
t.push_to_string(&mut self.buf);
None
},
},
}
}
}