1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
//! Contains `XmlEvent` datatype, instances of which are emitted by the parser.
use crate::attribute::OwnedAttribute;
use crate::common::XmlVersion;
use crate::name::OwnedName;
use crate::namespace::Namespace;
use std::fmt;
/// An element of an XML input stream.
///
/// Items of this enum are emitted by `reader::EventReader`. They correspond to different
/// elements of an XML document.
#[derive(PartialEq, Clone)]
pub enum XmlEvent {
/// Corresponds to XML document declaration.
///
/// This event is always emitted before any other event. It is emitted
/// even if the actual declaration is not present in the document.
StartDocument {
/// XML version.
///
/// If XML declaration is not present, defaults to `Version10`.
version: XmlVersion,
/// XML document encoding.
///
/// If XML declaration is not present or does not contain `encoding` attribute,
/// defaults to `"UTF-8"`. This field is currently used for no other purpose than
/// informational.
encoding: String,
/// XML standalone declaration.
///
/// If XML document is not present or does not contain `standalone` attribute,
/// defaults to `None`. This field is currently used for no other purpose than
/// informational.
standalone: Option<bool>,
},
/// Denotes to the end of the document stream.
///
/// This event is always emitted after any other event (except `Error`). After it
/// is emitted for the first time, it will always be emitted on next event pull attempts.
EndDocument,
/// Denotes an XML processing instruction.
///
/// This event contains a processing instruction target (`name`) and opaque `data`. It
/// is up to the application to process them.
ProcessingInstruction {
/// Processing instruction target.
name: String,
/// Processing instruction content.
data: Option<String>,
},
/// Denotes a beginning of an XML element.
///
/// This event is emitted after parsing opening tags or after parsing bodiless tags. In the
/// latter case `EndElement` event immediately follows.
StartElement {
/// Qualified name of the element.
name: OwnedName,
/// A list of attributes associated with the element.
///
/// Currently attributes are not checked for duplicates (TODO)
attributes: Vec<OwnedAttribute>,
/// Contents of the namespace mapping at this point of the document.
namespace: Namespace,
},
/// Denotes an end of an XML element.
///
/// This event is emitted after parsing closing tags or after parsing bodiless tags. In the
/// latter case it is emitted immediately after corresponding `StartElement` event.
EndElement {
/// Qualified name of the element.
name: OwnedName,
},
/// Denotes CDATA content.
///
/// This event contains unparsed data. No unescaping will be performed.
///
/// It is possible to configure a parser to emit `Characters` event instead of `CData`. See
/// `pull::ParserConfiguration` structure for more information.
CData(String),
/// Denotes a comment.
///
/// It is possible to configure a parser to ignore comments, so this event will never be emitted.
/// See `pull::ParserConfiguration` structure for more information.
Comment(String),
/// Denotes character data outside of tags.
///
/// Contents of this event will always be unescaped, so no entities like `<` or `&` or `{`
/// will appear in it.
///
/// It is possible to configure a parser to trim leading and trailing whitespace for this event.
/// See `pull::ParserConfiguration` structure for more information.
Characters(String),
/// Denotes a chunk of whitespace outside of tags.
///
/// It is possible to configure a parser to emit `Characters` event instead of `Whitespace`.
/// See `pull::ParserConfiguration` structure for more information. When combined with whitespace
/// trimming, it will eliminate standalone whitespace from the event stream completely.
Whitespace(String),
}
impl fmt::Debug for XmlEvent {
#[cold]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
XmlEvent::StartDocument { ref version, ref encoding, standalone } =>
write!(f, "StartDocument({}, {}, {:?})", version, *encoding, standalone),
XmlEvent::EndDocument =>
write!(f, "EndDocument"),
XmlEvent::ProcessingInstruction { ref name, ref data } =>
write!(f, "ProcessingInstruction({}{})", *name, match *data {
Some(ref data) => format!(", {data}"),
None => String::new()
}),
XmlEvent::StartElement { ref name, ref attributes, namespace: Namespace(ref namespace) } =>
write!(f, "StartElement({}, {:?}{})", name, namespace, if attributes.is_empty() {
String::new()
} else {
let attributes: Vec<String> = attributes.iter().map(
|a| format!("{} -> {}", a.name, a.value)
).collect();
format!(", [{}]", attributes.join(", "))
}),
XmlEvent::EndElement { ref name } =>
write!(f, "EndElement({name})"),
XmlEvent::Comment(ref data) =>
write!(f, "Comment({data})"),
XmlEvent::CData(ref data) =>
write!(f, "CData({data})"),
XmlEvent::Characters(ref data) =>
write!(f, "Characters({data})"),
XmlEvent::Whitespace(ref data) =>
write!(f, "Whitespace({data})")
}
}
}
impl XmlEvent {
/// Obtains a writer event from this reader event.
///
/// This method is useful for streaming processing of XML documents where the output
/// is also an XML document. With this method it is possible to process some events
/// while passing other events through to the writer unchanged:
///
/// ```rust
/// use std::str;
///
/// use xml::{EventReader, EventWriter};
/// use xml::reader::XmlEvent as ReaderEvent;
/// use xml::writer::XmlEvent as WriterEvent;
///
/// let mut input: &[u8] = b"<hello>world</hello>";
/// let mut output: Vec<u8> = Vec::new();
///
/// {
/// let mut reader = EventReader::new(&mut input);
/// let mut writer = EventWriter::new(&mut output);
///
/// for e in reader {
/// match e.unwrap() {
/// ReaderEvent::Characters(s) =>
/// writer.write(WriterEvent::characters(&s.to_uppercase())).unwrap(),
/// e => if let Some(e) = e.as_writer_event() {
/// writer.write(e).unwrap()
/// }
/// }
/// }
/// }
///
/// assert_eq!(
/// str::from_utf8(&output).unwrap(),
/// r#"<?xml version="1.0" encoding="UTF-8"?><hello>WORLD</hello>"#
/// );
/// ```
///
/// Note that this API may change or get additions in future to improve its ergonomics.
#[must_use]
pub fn as_writer_event(&self) -> Option<crate::writer::events::XmlEvent<'_>> {
match *self {
XmlEvent::StartDocument { version, ref encoding, standalone } =>
Some(crate::writer::events::XmlEvent::StartDocument {
version,
encoding: Some(encoding),
standalone
}),
XmlEvent::ProcessingInstruction { ref name, ref data } =>
Some(crate::writer::events::XmlEvent::ProcessingInstruction {
name,
data: data.as_ref().map(|s| &**s)
}),
XmlEvent::StartElement { ref name, ref attributes, ref namespace } =>
Some(crate::writer::events::XmlEvent::StartElement {
name: name.borrow(),
attributes: attributes.iter().map(|a| a.borrow()).collect(),
namespace: namespace.borrow(),
}),
XmlEvent::EndElement { ref name } =>
Some(crate::writer::events::XmlEvent::EndElement { name: Some(name.borrow()) }),
XmlEvent::Comment(ref data) => Some(crate::writer::events::XmlEvent::Comment(data)),
XmlEvent::CData(ref data) => Some(crate::writer::events::XmlEvent::CData(data)),
XmlEvent::Characters(ref data) |
XmlEvent::Whitespace(ref data) => Some(crate::writer::events::XmlEvent::Characters(data)),
XmlEvent::EndDocument => None,
}
}
}