hello
world").at_css("div")
* div.first_element_child
* # => #(Element:0x3c { name = "span", children = [ #(Text "world")] })
*/
static VALUE
rb_xml_node_first_element_child(VALUE self)
{
xmlNodePtr node, child;
Noko_Node_Get_Struct(self, xmlNode, node);
child = xmlFirstElementChild(node);
if (!child) { return Qnil; }
return noko_xml_node_wrap(Qnil, child);
}
/*
* :call-seq:
* last_element_child() → Node
*
* [Returns] The last child Node that is an element.
*
* *Example:*
*
* Note that the "hello" child, which is a Text node, is skipped and the
yes
* element is returned.
*
* div = Nokogiri::HTML5("
noyesskip
").at_css("div")
* div.last_element_child
* # => #(Element:0x3c { name = "span", children = [ #(Text "yes")] })
*/
static VALUE
rb_xml_node_last_element_child(VALUE self)
{
xmlNodePtr node, child;
Noko_Node_Get_Struct(self, xmlNode, node);
child = xmlLastElementChild(node);
if (!child) { return Qnil; }
return noko_xml_node_wrap(Qnil, child);
}
/*
* call-seq:
* key?(attribute)
*
* Returns true if +attribute+ is set
*/
static VALUE
key_eh(VALUE self, VALUE attribute)
{
xmlNodePtr node;
Noko_Node_Get_Struct(self, xmlNode, node);
if (xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) {
return Qtrue;
}
return Qfalse;
}
/*
* call-seq:
* namespaced_key?(attribute, namespace)
*
* Returns true if +attribute+ is set with +namespace+
*/
static VALUE
namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
{
xmlNodePtr node;
Noko_Node_Get_Struct(self, xmlNode, node);
if (xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute),
NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) {
return Qtrue;
}
return Qfalse;
}
/*
* call-seq:
* []=(property, value)
*
* Set the +property+ to +value+
*/
static VALUE
set(VALUE self, VALUE property, VALUE value)
{
xmlNodePtr node, cur;
xmlAttrPtr prop;
Noko_Node_Get_Struct(self, xmlNode, node);
/* If a matching attribute node already exists, then xmlSetProp will destroy
* the existing node's children. However, if Nokogiri has a node object
* pointing to one of those children, we are left with a broken reference.
*
* We can avoid this by unlinking these nodes first.
*/
if (node->type != XML_ELEMENT_NODE) {
return (Qnil);
}
prop = xmlHasProp(node, (xmlChar *)StringValueCStr(property));
if (prop && prop->children) {
for (cur = prop->children; cur; cur = cur->next) {
if (cur->_private) {
noko_xml_document_pin_node(cur);
xmlUnlinkNode(cur);
}
}
}
xmlSetProp(node, (xmlChar *)StringValueCStr(property),
(xmlChar *)StringValueCStr(value));
return value;
}
/*
* call-seq:
* get(attribute)
*
* Get the value for +attribute+
*/
static VALUE
get(VALUE self, VALUE rattribute)
{
xmlNodePtr node;
xmlChar *value = 0;
VALUE rvalue;
xmlChar *colon;
xmlChar *attribute, *attr_name, *prefix;
xmlNsPtr ns;
if (NIL_P(rattribute)) { return Qnil; }
Noko_Node_Get_Struct(self, xmlNode, node);
attribute = xmlCharStrdup(StringValueCStr(rattribute));
colon = DISCARD_CONST_QUAL_XMLCHAR(xmlStrchr(attribute, (const xmlChar)':'));
if (colon) {
/* split the attribute string into separate prefix and name by
* null-terminating the prefix at the colon */
prefix = attribute;
attr_name = colon + 1;
(*colon) = 0;
ns = xmlSearchNs(node->doc, node, prefix);
if (ns) {
value = xmlGetNsProp(node, attr_name, ns->href);
} else {
value = xmlGetProp(node, (xmlChar *)StringValueCStr(rattribute));
}
} else {
value = xmlGetNoNsProp(node, attribute);
}
xmlFree((void *)attribute);
if (!value) { return Qnil; }
rvalue = NOKOGIRI_STR_NEW2(value);
xmlFree((void *)value);
return rvalue ;
}
/*
* call-seq:
* set_namespace(namespace)
*
* Set the namespace to +namespace+
*/
static VALUE
set_namespace(VALUE self, VALUE namespace)
{
xmlNodePtr node;
xmlNsPtr ns = NULL;
Noko_Node_Get_Struct(self, xmlNode, node);
if (!NIL_P(namespace)) {
Noko_Namespace_Get_Struct(namespace, xmlNs, ns);
}
xmlSetNs(node, ns);
return self;
}
/*
* :call-seq:
* namespace() → Namespace
*
* [Returns] The Namespace of the element or attribute node, or +nil+ if there is no namespace.
*
* *Example:*
*
* doc = Nokogiri::XML(<<~EOF)
*
*
*
*
*
* EOF
* doc.at_xpath("//first").namespace
* # => nil
* doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace
* # => #(Namespace:0x3c { href = "http://example.com/child" })
* doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace
* # => #(Namespace:0x50 { prefix = "foo", href = "http://example.com/foo" })
*/
static VALUE
rb_xml_node_namespace(VALUE rb_node)
{
xmlNodePtr c_node ;
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
if (c_node->ns) {
return noko_xml_namespace_wrap(c_node->ns, c_node->doc);
}
return Qnil ;
}
/*
* :call-seq:
* namespace_definitions() → Array
*
* [Returns]
* Namespaces that are defined directly on this node, as an Array of Namespace objects. The array
* will be empty if no namespaces are defined on this node.
*
* *Example:*
*
* doc = Nokogiri::XML(<<~EOF)
*
*
*
*
*
* EOF
* doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_definitions
* # => []
* doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace_definitions
* # => [#(Namespace:0x3c { href = "http://example.com/child" }),
* # #(Namespace:0x50 {
* # prefix = "unused",
* # href = "http://example.com/unused"
* # })]
* doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace_definitions
* # => [#(Namespace:0x64 { prefix = "foo", href = "http://example.com/foo" })]
*/
static VALUE
namespace_definitions(VALUE rb_node)
{
/* this code in the mode of xmlHasProp() */
xmlNodePtr c_node ;
xmlNsPtr c_namespace;
VALUE definitions = rb_ary_new();
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
c_namespace = c_node->nsDef;
if (!c_namespace) {
return definitions;
}
while (c_namespace != NULL) {
rb_ary_push(definitions, noko_xml_namespace_wrap(c_namespace, c_node->doc));
c_namespace = c_namespace->next;
}
return definitions;
}
/*
* :call-seq:
* namespace_scopes() → Array
*
* [Returns] Array of all the Namespaces on this node and its ancestors.
*
* See also #namespaces
*
* *Example:*
*
* doc = Nokogiri::XML(<<~EOF)
*
*
*
*
*
* EOF
* doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes
* # => [#(Namespace:0x3c { href = "http://example.com/root" }),
* # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
* doc.at_xpath("//child:second", "child" => "http://example.com/child").namespace_scopes
* # => [#(Namespace:0x64 { href = "http://example.com/child" }),
* # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
* doc.at_xpath("//root:third", "root" => "http://example.com/root").namespace_scopes
* # => [#(Namespace:0x78 { prefix = "foo", href = "http://example.com/foo" }),
* # #(Namespace:0x3c { href = "http://example.com/root" }),
* # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })]
*/
static VALUE
rb_xml_node_namespace_scopes(VALUE rb_node)
{
xmlNodePtr c_node ;
xmlNsPtr *namespaces;
VALUE scopes = rb_ary_new();
int j;
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
namespaces = xmlGetNsList(c_node->doc, c_node);
if (!namespaces) {
return scopes;
}
for (j = 0 ; namespaces[j] != NULL ; ++j) {
rb_ary_push(scopes, noko_xml_namespace_wrap(namespaces[j], c_node->doc));
}
xmlFree(namespaces);
return scopes;
}
/*
* call-seq:
* node_type
*
* Get the type for this Node
*/
static VALUE
node_type(VALUE self)
{
xmlNodePtr node;
Noko_Node_Get_Struct(self, xmlNode, node);
return INT2NUM(node->type);
}
/*
* call-seq:
* native_content=(input)
*
* Set the content of this node to +input+.
*
* [Parameters]
* - +input+ (String) The new content for this node.
*
* ⚠ This method behaves differently depending on the node type. For Text, CDATA, Comment, and
* ProcessingInstruction nodes, it treats the input as raw content, which means that the final DOM
* will contain the entity-escaped version of the input (see example below). For Element and Attr
* nodes, it treats the input as parsed content and expects it to be valid markup that is already
* entity-escaped.
*
* 💡 Use Node#content= for a more consistent API across node types.
*
* [Example]
* Note the behavior differences of this method between Text and Element nodes:
*
* doc = Nokogiri::HTML::Document.parse(<<~HTML)
*
*
* asdf
* asdf
* HTML
*
* text_node = doc.at_css("div#first").children.first
* div_node = doc.at_css("div#second")
*
* value = "You & Me"
*
* text_node.native_content = value
* div_node.native_content = value
*
* doc.css("div").to_html
* # => "You & Me
* # You & Me
"
*
* See also: #content=
*/
static VALUE
set_native_content(VALUE self, VALUE content)
{
xmlNodePtr node, child, next ;
Noko_Node_Get_Struct(self, xmlNode, node);
child = node->children;
while (NULL != child) {
next = child->next ;
xmlUnlinkNode(child) ;
noko_xml_document_pin_node(child);
child = next ;
}
xmlNodeSetContent(node, (xmlChar *)StringValueCStr(content));
return content;
}
/*
* call-seq:
* lang=
*
* Set the language of a node, i.e. the values of the xml:lang attribute.
*/
static VALUE
set_lang(VALUE self_rb, VALUE lang_rb)
{
xmlNodePtr self ;
xmlChar *lang ;
Noko_Node_Get_Struct(self_rb, xmlNode, self);
lang = (xmlChar *)StringValueCStr(lang_rb);
xmlNodeSetLang(self, lang);
return Qnil ;
}
/*
* call-seq:
* lang
*
* Searches the language of a node, i.e. the values of the xml:lang attribute or
* the one carried by the nearest ancestor.
*/
static VALUE
get_lang(VALUE self_rb)
{
xmlNodePtr self ;
xmlChar *lang ;
VALUE lang_rb ;
Noko_Node_Get_Struct(self_rb, xmlNode, self);
lang = xmlNodeGetLang(self);
if (lang) {
lang_rb = NOKOGIRI_STR_NEW2(lang);
xmlFree(lang);
return lang_rb ;
}
return Qnil ;
}
/* :nodoc: */
static VALUE
add_child(VALUE self, VALUE new_child)
{
return reparent_node_with(self, new_child, xmlAddChild);
}
/*
* call-seq:
* parent
*
* Get the parent Node for this Node
*/
static VALUE
get_parent(VALUE self)
{
xmlNodePtr node, parent;
Noko_Node_Get_Struct(self, xmlNode, node);
parent = node->parent;
if (!parent) { return Qnil; }
return noko_xml_node_wrap(Qnil, parent) ;
}
/*
* call-seq:
* name=(new_name)
*
* Set the name for this Node
*/
static VALUE
set_name(VALUE self, VALUE new_name)
{
xmlNodePtr node;
Noko_Node_Get_Struct(self, xmlNode, node);
xmlNodeSetName(node, (xmlChar *)StringValueCStr(new_name));
return new_name;
}
/*
* call-seq:
* name
*
* Returns the name for this Node
*/
static VALUE
get_name(VALUE self)
{
xmlNodePtr node;
Noko_Node_Get_Struct(self, xmlNode, node);
if (node->name) {
return NOKOGIRI_STR_NEW2(node->name);
}
return Qnil;
}
/*
* call-seq:
* path
*
* Returns the path associated with this Node
*/
static VALUE
rb_xml_node_path(VALUE rb_node)
{
xmlNodePtr c_node;
xmlChar *c_path ;
VALUE rval;
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
c_path = xmlGetNodePath(c_node);
if (c_path == NULL) {
// see https://github.com/sparklemotion/nokogiri/issues/2250
// this behavior is clearly undesirable, but is what libxml <= 2.9.10 returned, and so we
// do this for now to preserve the behavior across libxml2 versions.
rval = NOKOGIRI_STR_NEW2("?");
} else {
rval = NOKOGIRI_STR_NEW2(c_path);
xmlFree(c_path);
}
return rval ;
}
/* :nodoc: */
static VALUE
add_next_sibling(VALUE self, VALUE new_sibling)
{
return reparent_node_with(self, new_sibling, xmlAddNextSibling) ;
}
/* :nodoc: */
static VALUE
add_previous_sibling(VALUE self, VALUE new_sibling)
{
return reparent_node_with(self, new_sibling, xmlAddPrevSibling) ;
}
/*
* call-seq:
* native_write_to(io, encoding, options)
*
* Write this Node to +io+ with +encoding+ and +options+
*/
static VALUE
native_write_to(
VALUE self,
VALUE io,
VALUE encoding,
VALUE indent_string,
VALUE options
)
{
xmlNodePtr node;
const char *before_indent;
xmlSaveCtxtPtr savectx;
Noko_Node_Get_Struct(self, xmlNode, node);
xmlIndentTreeOutput = 1;
before_indent = xmlTreeIndentString;
xmlTreeIndentString = StringValueCStr(indent_string);
savectx = xmlSaveToIO(
(xmlOutputWriteCallback)noko_io_write,
(xmlOutputCloseCallback)noko_io_close,
(void *)io,
RTEST(encoding) ? StringValueCStr(encoding) : NULL,
(int)NUM2INT(options)
);
xmlSaveTree(savectx, node);
xmlSaveClose(savectx);
xmlTreeIndentString = before_indent;
return io;
}
static inline void
output_partial_string(VALUE out, char const *str, size_t length)
{
if (length) {
rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
}
}
static inline void
output_char(VALUE out, char ch)
{
output_partial_string(out, &ch, 1);
}
static inline void
output_string(VALUE out, char const *str)
{
output_partial_string(out, str, strlen(str));
}
static inline void
output_tagname(VALUE out, xmlNodePtr elem)
{
// Elements in the HTML, MathML, and SVG namespaces do not use a namespace
// prefix in the HTML syntax.
char const *name = (char const *)elem->name;
xmlNsPtr ns = elem->ns;
if (ns && ns->href && ns->prefix
&& strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
&& strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
&& strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
output_string(out, (char const *)elem->ns->prefix);
output_char(out, ':');
char const *colon = strchr(name, ':');
if (colon) {
name = colon + 1;
}
}
output_string(out, name);
}
static inline void
output_attr_name(VALUE out, xmlAttrPtr attr)
{
xmlNsPtr ns = attr->ns;
char const *name = (char const *)attr->name;
if (ns && ns->href) {
char const *uri = (char const *)ns->href;
char const *localname = strchr(name, ':');
if (localname) {
++localname;
} else {
localname = name;
}
if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
output_string(out, "xml:");
name = localname;
} else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
// xmlns:xmlns -> xmlns
// xmlns:foo -> xmlns:foo
if (strcmp(localname, "xmlns")) {
output_string(out, "xmlns:");
}
name = localname;
} else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
output_string(out, "xlink:");
name = localname;
} else if (ns->prefix) {
output_string(out, (char const *)ns->prefix);
output_char(out, ':');
name = localname;
}
}
output_string(out, name);
}
static void
output_escaped_string(VALUE out, xmlChar const *start, bool attr)
{
xmlChar const *next = start;
int ch;
while ((ch = *next) != 0) {
char const *replacement = NULL;
size_t replaced_bytes = 1;
if (ch == '&') {
replacement = "&";
} else if (ch == 0xC2 && next[1] == 0xA0) {
// U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
replacement = " ";
replaced_bytes = 2;
} else if (attr && ch == '"') {
replacement = """;
} else if (!attr && ch == '<') {
replacement = "<";
} else if (!attr && ch == '>') {
replacement = ">";
} else {
++next;
continue;
}
output_partial_string(out, (char const *)start, (size_t)(next - start));
output_string(out, replacement);
next += replaced_bytes;
start = next;
}
output_partial_string(out, (char const *)start, (size_t)(next - start));
}
static bool
should_prepend_newline(xmlNodePtr node)
{
char const *name = (char const *)node->name;
xmlNodePtr child = node->children;
if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
return false;
}
return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
}
static VALUE
rb_prepend_newline(VALUE self)
{
xmlNodePtr node;
Noko_Node_Get_Struct(self, xmlNode, node);
return should_prepend_newline(node) ? Qtrue : Qfalse;
}
static bool
is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
{
char const *name = (char const *)node->name;
if (name == NULL) { // fragments don't have a name
return false;
}
if (node->ns != NULL) {
// if the node has a namespace, it's in a foreign context and is not one of the HTML tags we're
// matching against.
return false;
}
for (size_t idx = 0; idx < num_tagnames; ++idx) {
if (!strcmp(name, tagnames[idx])) {
return true;
}
}
return false;
}
static void
output_node(
VALUE out,
xmlNodePtr node,
bool preserve_newline
)
{
static char const *const VOID_ELEMENTS[] = {
"area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
"img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
};
static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
};
switch (node->type) {
case XML_ELEMENT_NODE:
// Serialize the start tag.
output_char(out, '<');
output_tagname(out, node);
// Add attributes.
for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
output_char(out, ' ');
output_node(out, (xmlNodePtr)attr, preserve_newline);
}
output_char(out, '>');
// Add children and end tag if element is not void.
if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
if (preserve_newline && should_prepend_newline(node)) {
output_char(out, '\n');
}
for (xmlNodePtr child = node->children; child; child = child->next) {
output_node(out, child, preserve_newline);
}
output_string(out, "");
output_tagname(out, node);
output_char(out, '>');
}
break;
case XML_ATTRIBUTE_NODE: {
xmlAttrPtr attr = (xmlAttrPtr)node;
output_attr_name(out, attr);
if (attr->children) {
output_string(out, "=\"");
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
output_escaped_string(out, value, true);
xmlFree(value);
output_char(out, '"');
} else {
// Output name=""
output_string(out, "=\"\"");
}
}
break;
case XML_TEXT_NODE:
if (node->parent
&& is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
output_string(out, (char const *)node->content);
} else {
output_escaped_string(out, node->content, false);
}
break;
case XML_CDATA_SECTION_NODE:
output_string(out, "content);
output_string(out, "]]>");
break;
case XML_COMMENT_NODE:
output_string(out, "");
break;
case XML_PI_NODE:
output_string(out, "");
output_string(out, (char const *)node->content);
output_char(out, '>');
break;
case XML_DOCUMENT_TYPE_NODE:
case XML_DTD_NODE:
output_string(out, "name);
output_string(out, ">");
break;
case XML_DOCUMENT_NODE:
case XML_DOCUMENT_FRAG_NODE:
case XML_HTML_DOCUMENT_NODE:
for (xmlNodePtr child = node->children; child; child = child->next) {
output_node(out, child, preserve_newline);
}
break;
default:
rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
break;
}
}
static VALUE
html_standard_serialize(
VALUE self,
VALUE preserve_newline
)
{
xmlNodePtr node;
Noko_Node_Get_Struct(self, xmlNode, node);
VALUE output = rb_str_buf_new(4096);
output_node(output, node, RTEST(preserve_newline));
return output;
}
/*
* :call-seq:
* line() → Integer
*
* [Returns] The line number of this Node.
*
* ---
*
* ⚠ The CRuby and JRuby implementations differ in important ways!
*
* Semantic differences:
* - The CRuby method reflects the node's line number in the parsed string
* - The JRuby method reflects the node's line number in the final DOM structure after
* corrections have been applied
*
* Performance differences:
* - The CRuby method is {O(1)}[https://en.wikipedia.org/wiki/Time_complexity#Constant_time]
* (constant time)
* - The JRuby method is {O(n)}[https://en.wikipedia.org/wiki/Time_complexity#Linear_time] (linear
* time, where n is the number of nodes before/above the element in the DOM)
*
* If you'd like to help improve the JRuby implementation, please review these issues and reach out
* to the maintainers:
* - https://github.com/sparklemotion/nokogiri/issues/1223
* - https://github.com/sparklemotion/nokogiri/pull/2177
* - https://github.com/sparklemotion/nokogiri/issues/2380
*/
static VALUE
rb_xml_node_line(VALUE rb_node)
{
xmlNodePtr c_node;
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
return LONG2NUM(xmlGetLineNo(c_node));
}
/*
* call-seq:
* line=(num)
*
* Sets the line for this Node. num must be less than 65535.
*/
static VALUE
rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
{
xmlNodePtr c_node;
int line_number = NUM2INT(rb_line_number);
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
// libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
// search for "psvi" in SAX2.c and tree.c to learn more.
if (line_number < 65535) {
c_node->line = (short unsigned)line_number;
} else {
c_node->line = 65535;
if (c_node->type == XML_TEXT_NODE) {
c_node->psvi = (void *)(ptrdiff_t)line_number;
}
}
return rb_line_number;
}
/* :nodoc: documented in lib/nokogiri/xml/node.rb */
static VALUE
rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
{
xmlNodePtr c_document_node;
xmlNodePtr c_node;
VALUE rb_name;
VALUE rb_document_node;
VALUE rest;
VALUE rb_node;
rb_scan_args(argc, argv, "2*", &rb_name, &rb_document_node, &rest);
if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlNode)) {
rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
}
if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
}
Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
c_node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(rb_name));
c_node->doc = c_document_node->doc;
noko_xml_document_pin_node(c_node);
rb_node = noko_xml_node_wrap(
klass == cNokogiriXmlNode ? (VALUE)NULL : klass,
c_node
);
rb_obj_call_init(rb_node, argc, argv);
if (rb_block_given_p()) { rb_yield(rb_node); }
return rb_node;
}
/*
* call-seq:
* dump_html
*
* Returns the Node as html.
*/
static VALUE
dump_html(VALUE self)
{
xmlBufferPtr buf ;
xmlNodePtr node ;
VALUE html;
Noko_Node_Get_Struct(self, xmlNode, node);
buf = xmlBufferCreate() ;
htmlNodeDump(buf, node->doc, node);
html = NOKOGIRI_STR_NEW2(xmlBufferContent(buf));
xmlBufferFree(buf);
return html ;
}
/*
* call-seq:
* compare(other)
*
* Compare this Node to +other+ with respect to their Document
*/
static VALUE
compare(VALUE self, VALUE _other)
{
xmlNodePtr node, other;
Noko_Node_Get_Struct(self, xmlNode, node);
Noko_Node_Get_Struct(_other, xmlNode, other);
return INT2NUM(xmlXPathCmpNodes(other, node));
}
/*
* call-seq:
* process_xincludes(flags)
*
* Loads and substitutes all xinclude elements below the node. The
* parser context will be initialized with +flags+.
*/
static VALUE
noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
{
int status ;
xmlNodePtr c_node;
VALUE rb_errors = rb_ary_new();
libxmlStructuredErrorHandlerState handler_state;
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
noko__structured_error_func_restore(&handler_state);
if (status < 0) {
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
if (RB_TEST(exception)) {
rb_exc_raise(exception);
} else {
rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
}
}
return rb_node;
}
/* TODO: DOCUMENT ME */
static VALUE
in_context(VALUE self, VALUE _str, VALUE _options)
{
xmlNodePtr node, list = 0, tmp, child_iter, node_children, doc_children;
xmlNodeSetPtr set;
xmlParserErrors error;
VALUE doc, err;
int doc_is_empty;
Noko_Node_Get_Struct(self, xmlNode, node);
doc = DOC_RUBY_OBJECT(node->doc);
err = rb_iv_get(doc, "@errors");
doc_is_empty = (node->doc->children == NULL) ? 1 : 0;
node_children = node->children;
doc_children = node->doc->children;
xmlSetStructuredErrorFunc((void *)err, noko__error_array_pusher);
/* This function adds a fake node to the child of +node+. If the parser
* does not exit cleanly with XML_ERR_OK, the list is freed. This can
* leave the child pointers in a bad state if they were originally empty.
*
* http://git.gnome.org/browse/libxml2/tree/parser.c#n13177
* */
error = xmlParseInNodeContext(node, StringValuePtr(_str),
(int)RSTRING_LEN(_str),
(int)NUM2INT(_options), &list);
/* xmlParseInNodeContext should not mutate the original document or node,
* so reassigning these pointers should be OK. The reason we're reassigning
* is because if there were errors, it's possible for the child pointers
* to be manipulated. */
if (error != XML_ERR_OK) {
node->doc->children = doc_children;
node->children = node_children;
}
/* make sure parent/child pointers are coherent so an unlink will work
* properly (#331)
*/
child_iter = node->doc->children ;
while (child_iter) {
child_iter->parent = (xmlNodePtr)node->doc;
child_iter = child_iter->next;
}
xmlSetStructuredErrorFunc(NULL, NULL);
/*
* Workaround for a libxml2 bug where a parsing error may leave a broken
* node reference in node->doc->children.
*
* https://bugzilla.gnome.org/show_bug.cgi?id=668155
*
* This workaround is limited to when a parse error occurs, the document
* went from having no children to having children, and the context node is
* part of a document fragment.
*
* TODO: This was fixed in libxml 2.8.0 by 71a243d
*/
if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
child_iter = node;
while (child_iter->parent) {
child_iter = child_iter->parent;
}
if (child_iter->type == XML_DOCUMENT_FRAG_NODE) {
node->doc->children = NULL;
}
}
/* FIXME: This probably needs to handle more constants... */
switch (error) {
case XML_ERR_INTERNAL_ERROR:
case XML_ERR_NO_MEMORY:
rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
break;
default:
break;
}
set = xmlXPathNodeSetCreate(NULL);
while (list) {
tmp = list->next;
list->next = NULL;
xmlXPathNodeSetAddUnique(set, list);
noko_xml_document_pin_node(list);
list = tmp;
}
return noko_xml_node_set_wrap(set, doc);
}
/* :nodoc: */
VALUE
rb_xml_node_data_ptr_eh(VALUE self)
{
xmlNodePtr c_node;
Noko_Node_Get_Struct(self, xmlNode, c_node);
return c_node ? Qtrue : Qfalse;
}
VALUE
noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
{
VALUE rb_document, rb_node_cache, rb_node;
nokogiriTuplePtr node_has_a_document;
xmlDocPtr c_doc;
assert(c_node);
if (c_node->type == XML_DOCUMENT_NODE || c_node->type == XML_HTML_DOCUMENT_NODE) {
return DOC_RUBY_OBJECT(c_node->doc);
}
c_doc = c_node->doc;
// Nodes yielded from XML::Reader don't have a fully-realized Document
node_has_a_document = DOC_RUBY_OBJECT_TEST(c_doc);
if (c_node->_private && node_has_a_document) {
return (VALUE)c_node->_private;
}
if (!RTEST(rb_class)) {
switch (c_node->type) {
case XML_ELEMENT_NODE:
rb_class = cNokogiriXmlElement;
break;
case XML_TEXT_NODE:
rb_class = cNokogiriXmlText;
break;
case XML_ATTRIBUTE_NODE:
rb_class = cNokogiriXmlAttr;
break;
case XML_ENTITY_REF_NODE:
rb_class = cNokogiriXmlEntityReference;
break;
case XML_COMMENT_NODE:
rb_class = cNokogiriXmlComment;
break;
case XML_DOCUMENT_FRAG_NODE:
rb_class = cNokogiriXmlDocumentFragment;
break;
case XML_PI_NODE:
rb_class = cNokogiriXmlProcessingInstruction;
break;
case XML_ENTITY_DECL:
rb_class = cNokogiriXmlEntityDecl;
break;
case XML_CDATA_SECTION_NODE:
rb_class = cNokogiriXmlCData;
break;
case XML_DTD_NODE:
rb_class = cNokogiriXmlDtd;
break;
case XML_ATTRIBUTE_DECL:
rb_class = cNokogiriXmlAttributeDecl;
break;
case XML_ELEMENT_DECL:
rb_class = cNokogiriXmlElementDecl;
break;
default:
rb_class = cNokogiriXmlNode;
}
}
rb_node = _xml_node_alloc(rb_class);
_xml_node_data_ptr_set(rb_node, c_node);
if (node_has_a_document) {
rb_document = DOC_RUBY_OBJECT(c_doc);
rb_node_cache = DOC_NODE_CACHE(c_doc);
rb_ary_push(rb_node_cache, rb_node);
rb_funcall(rb_document, id_decorate, 1, rb_node);
}
return rb_node ;
}
/*
* return Array containing the node's attributes
*/
VALUE
noko_xml_node_attrs(xmlNodePtr c_node)
{
VALUE rb_properties = rb_ary_new();
xmlAttrPtr c_property;
c_property = c_node->properties ;
while (c_property != NULL) {
rb_ary_push(rb_properties, noko_xml_node_wrap(Qnil, (xmlNodePtr)c_property));
c_property = c_property->next ;
}
return rb_properties;
}
void
noko_init_xml_node(void)
{
cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
rb_define_alloc_func(cNokogiriXmlNode, _xml_node_alloc);
rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
rb_define_method(cNokogiriXmlNode, "add_namespace_definition", rb_xml_node_add_namespace_definition, 2);
rb_define_method(cNokogiriXmlNode, "attribute", rb_xml_node_attribute, 1);
rb_define_method(cNokogiriXmlNode, "attribute_nodes", rb_xml_node_attribute_nodes, 0);
rb_define_method(cNokogiriXmlNode, "attribute_with_ns", rb_xml_node_attribute_with_ns, 2);
rb_define_method(cNokogiriXmlNode, "blank?", rb_xml_node_blank_eh, 0);
rb_define_method(cNokogiriXmlNode, "child", rb_xml_node_child, 0);
rb_define_method(cNokogiriXmlNode, "children", rb_xml_node_children, 0);
rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
rb_define_method(cNokogiriXmlNode, "data_ptr?", rb_xml_node_data_ptr_eh, 0);
rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
rb_define_method(cNokogiriXmlNode, "first_element_child", rb_xml_node_first_element_child, 0);
rb_define_method(cNokogiriXmlNode, "internal_subset", internal_subset, 0);
rb_define_method(cNokogiriXmlNode, "key?", key_eh, 1);
rb_define_method(cNokogiriXmlNode, "lang", get_lang, 0);
rb_define_method(cNokogiriXmlNode, "lang=", set_lang, 1);
rb_define_method(cNokogiriXmlNode, "last_element_child", rb_xml_node_last_element_child, 0);
rb_define_method(cNokogiriXmlNode, "line", rb_xml_node_line, 0);
rb_define_method(cNokogiriXmlNode, "line=", rb_xml_node_line_set, 1);
rb_define_method(cNokogiriXmlNode, "namespace", rb_xml_node_namespace, 0);
rb_define_method(cNokogiriXmlNode, "namespace_definitions", namespace_definitions, 0);
rb_define_method(cNokogiriXmlNode, "namespace_scopes", rb_xml_node_namespace_scopes, 0);
rb_define_method(cNokogiriXmlNode, "namespaced_key?", namespaced_key_eh, 2);
rb_define_method(cNokogiriXmlNode, "native_content=", set_native_content, 1);
rb_define_method(cNokogiriXmlNode, "next_element", next_element, 0);
rb_define_method(cNokogiriXmlNode, "next_sibling", next_sibling, 0);
rb_define_method(cNokogiriXmlNode, "node_name", get_name, 0);
rb_define_method(cNokogiriXmlNode, "node_name=", set_name, 1);
rb_define_method(cNokogiriXmlNode, "node_type", node_type, 0);
rb_define_method(cNokogiriXmlNode, "parent", get_parent, 0);
rb_define_method(cNokogiriXmlNode, "path", rb_xml_node_path, 0);
rb_define_method(cNokogiriXmlNode, "pointer_id", rb_xml_node_pointer_id, 0);
rb_define_method(cNokogiriXmlNode, "previous_element", previous_element, 0);
rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
rb_define_private_method(cNokogiriXmlNode, "compare", compare, 1);
rb_define_private_method(cNokogiriXmlNode, "dump_html", dump_html, 0);
rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
rb_define_private_method(cNokogiriXmlNode, "process_xincludes", noko_xml_node__process_xincludes, 1);
rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
id_decorate = rb_intern("decorate");
id_decorate_bang = rb_intern("decorate!");
}