Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

Parser.cpp

Go to the documentation of this file.
00001 
00005 /* --------------------------------------------------------------
00006 Copyright (C) 2001 LifeLine Networks BV <soap2corba@lifeline.nl>
00007 
00008 This program is free software; you can redistribute it and/or
00009 modify it under the terms of the GNU General Public License
00010 as published by the Free Software Foundation; either
00011 version 2 of the License, or (at your option) any later
00012 version.
00013 
00014 This program is distributed in the hope that it will be useful,
00015 but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 GNU General Public License for more details.
00018 
00019 You should have received a copy of the GNU General Public License
00020 along with this program; if not, write to the Free Software
00021 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00022 -------------------------------------------------------------- */
00023 
00024 #include "Parser.h"
00025 
00027 static bool noInitXerces = true;
00028 
00030 void releaseDOM (DOM_H hdl)
00031 {
00032     delete (DOM_Node *) hdl;
00033 }
00034 
00036 DOM_H parseDOM (const unsigned char *pBuf, const unsigned long size)
00037 {
00038     if (noInitXerces) {
00039         // Initialize the XML4C2 system
00040         try
00041         {
00042             XMLPlatformUtils::Initialize();
00043             noInitXerces = false;
00044         }
00045 
00046         catch(const XMLException& toCatch)
00047         {
00048             (void) toCatch;
00049             cerr << "Error during Xerces-c Initialization." << endl;
00050         }
00051     }
00052     DOMParser::ValSchemes gValScheme = DOMParser::Val_Never;
00053     bool gDoNamespaces = true;
00054     bool gDoCreate = true;
00055 
00056     DOMParser parser;
00057     parser.setValidationScheme(gValScheme);
00058     parser.setDoNamespaces(gDoNamespaces);
00059     DOMTreeErrorReporter errReporter;
00060     parser.setErrorHandler(&errReporter);
00061     parser.setCreateEntityReferenceNodes(gDoCreate);
00062     parser.setToCreateXMLDeclTypeNode(true);
00063 
00064     MemBufInputSource inpSrc (pBuf, size, "XmlParser", false);
00065     //
00066     //  Parse the XML file, catching any XML exceptions that might propogate
00067     //  out of it.
00068     //
00069     bool errorsOccured = false;
00070     try
00071     {
00072         parser.parse(inpSrc);
00073     }
00074 
00075     catch (const XMLException& e)
00076     {
00077         char *msg = DOMString(e.getMessage()).transcode ();
00078         CString Msg = "A XML exception occurred during parsing\r\nMessage: ";
00079         Msg += msg;
00080         delete msg;
00081         cerr << (const char *) Msg << endl;
00082         errorsOccured = true;
00083     }
00084 
00085 
00086     catch (const DOM_DOMException& e)
00087     {
00088         char *msg = DOMString(e.msg).transcode ();
00089         CString Msg = "A DOM exception occurred during parsing\r\nMessage: ";
00090         Msg += msg;
00091         delete msg;
00092         cerr << (const char *) Msg << endl;
00093         errorsOccured = true;
00094     }
00095 
00096     catch (...)
00097     {
00098         cerr << "An error occurred during parsing" << endl;
00099         errorsOccured = true;
00100     }
00101 
00102 
00103     if (!errorsOccured) {
00104         DOM_Node *pNode = new DOM_Node (parser.getDocument ());
00105         return pNode;
00106     }
00107     return NULL;
00108 }
00109 
00111 static const XMLCh  gEndElement[] = { chOpenAngle, chForwardSlash, chNull };
00113 static const XMLCh  gEndPI[] = { chQuestion, chCloseAngle, chNull};
00115 static const XMLCh  gStartPI[] = { chOpenAngle, chQuestion, chNull };
00117 static const XMLCh  gXMLDecl1[] =
00118 {
00119         chOpenAngle, chQuestion, chLatin_x, chLatin_m, chLatin_l
00120     ,   chSpace, chLatin_v, chLatin_e, chLatin_r, chLatin_s, chLatin_i
00121     ,   chLatin_o, chLatin_n, chEqual, chDoubleQuote, chNull
00122 };
00124 static const XMLCh  gXMLDecl2[] =
00125 {
00126         chDoubleQuote, chSpace, chLatin_e, chLatin_n, chLatin_c
00127     ,   chLatin_o, chLatin_d, chLatin_i, chLatin_n, chLatin_g, chEqual
00128     ,   chDoubleQuote, chNull
00129 };
00131 static const XMLCh  gXMLDecl3[] =
00132 {
00133         chDoubleQuote, chSpace, chLatin_s, chLatin_t, chLatin_a
00134     ,   chLatin_n, chLatin_d, chLatin_a, chLatin_l, chLatin_o
00135     ,   chLatin_n, chLatin_e, chEqual, chDoubleQuote, chNull
00136 };
00138 static const XMLCh  gXMLDecl4[] =
00139 {
00140         chDoubleQuote, chQuestion, chCloseAngle
00141     ,   chCR, chLF, chNull
00142 };
00144 static const XMLCh  gStartCDATA[] =
00145 { 
00146         chOpenAngle, chBang, chOpenSquare, chLatin_C, chLatin_D,
00147         chLatin_A, chLatin_T, chLatin_A, chOpenSquare, chNull
00148 };
00150 static const XMLCh  gEndCDATA[] =
00151 {
00152     chCloseSquare, chCloseSquare, chCloseAngle, chNull
00153 };
00155 static const XMLCh  gStartComment[] =
00156 { 
00157     chOpenAngle, chBang, chDash, chDash, chNull
00158 };
00160 static const XMLCh  gEndComment[] =
00161 {
00162     chDash, chDash, chCloseAngle, chNull
00163 };
00165 static const XMLCh  gStartDoctype[] =
00166 { 
00167     chOpenAngle, chBang, chLatin_D, chLatin_O, chLatin_C, chLatin_T,
00168     chLatin_Y, chLatin_P, chLatin_E, chSpace, chNull
00169 };
00171 static const XMLCh  gPublic[] =
00172 { 
00173     chLatin_P, chLatin_U, chLatin_B, chLatin_L, chLatin_I,
00174     chLatin_C, chSpace, chDoubleQuote, chNull
00175 };
00177 static const XMLCh  gSystem[] =
00178 { 
00179     chLatin_S, chLatin_Y, chLatin_S, chLatin_T, chLatin_E,
00180     chLatin_M, chSpace, chDoubleQuote, chNull
00181 };
00183 static const XMLCh  gStartEntity[] =
00184 { 
00185     chOpenAngle, chBang, chLatin_E, chLatin_N, chLatin_T, chLatin_I,
00186     chLatin_T, chLatin_Y, chSpace, chNull
00187 };
00189 static const XMLCh  gNotation[] =
00190 { 
00191     chLatin_N, chLatin_D, chLatin_A, chLatin_T, chLatin_A,
00192     chSpace, chDoubleQuote, chNull
00193 };
00194 
00196 XMLFormatter& operator<< (XMLFormatter& strm, const DOMString& s)
00197 {
00198     unsigned int lent = s.length();
00199     
00200     if (lent <= 0)
00201         return strm;
00202     
00203     XMLCh*  buf = new XMLCh[lent + 1];
00204     XMLString::copyNString(buf, s.rawBuffer(), lent);
00205     buf[lent] = 0;
00206     strm << buf;
00207     delete [] buf;
00208     return strm;
00209 }
00210 
00212 static void WalkXML(XMLFormatter *gFormatter, DOM_Node &toWrite)
00213 {
00214     if (toWrite == NULL) return;
00215     // Get the name and value out for convenience
00216     DOMString   nodeName = toWrite.getNodeName();
00217     DOMString   nodeValue = toWrite.getNodeValue();
00218     unsigned long lent = nodeValue.length();
00219 
00220     switch (toWrite.getNodeType())
00221     {
00222         case DOM_Node::TEXT_NODE:
00223         {
00224             gFormatter->formatBuf(nodeValue.rawBuffer(), 
00225                                   lent, XMLFormatter::CharEscapes);
00226             break;
00227         }
00228 
00229 
00230         case DOM_Node::PROCESSING_INSTRUCTION_NODE :
00231         {
00232             *gFormatter << XMLFormatter::NoEscapes << gStartPI  << nodeName;
00233             if (lent > 0)
00234             {
00235                 *gFormatter << chSpace << nodeValue;
00236             }
00237             *gFormatter << XMLFormatter::NoEscapes << gEndPI;
00238             break;
00239         }
00240 
00241 
00242         case DOM_Node::DOCUMENT_NODE :
00243         {
00244             DOMString utf8 ("UTF-8");
00245             if (!utf8.equals (gFormatter->getEncodingName ())) {
00246                 DOM_Document doc = (DOM_Document &) toWrite;
00247                 DOM_Node decl = doc.createXMLDecl ("1.0", gFormatter->getEncodingName (), 0);
00248                 WalkXML (gFormatter, decl);
00249             }
00250             DOM_Node child = toWrite.getFirstChild();
00251             while( child != 0)
00252             {
00253                 WalkXML (gFormatter, child);
00254                 child = child.getNextSibling();
00255             }
00256             break;
00257         }
00258 
00259 
00260         case DOM_Node::ELEMENT_NODE :
00261         {
00262             // The name has to be representable without any escapes
00263             *gFormatter  << XMLFormatter::NoEscapes
00264                          << chOpenAngle << nodeName;
00265 
00266             // Output the element start tag.
00267 
00268             // Output any attributes on this element
00269             DOM_NamedNodeMap attributes = toWrite.getAttributes();
00270             int attrCount = attributes.getLength();
00271             for (int i = 0; i < attrCount; i++)
00272             {
00273                 DOM_Node  attribute = attributes.item(i);
00274 
00275                 //
00276                 //  Again the name has to be completely representable. But the
00277                 //  attribute can have refs and requires the attribute style
00278                 //  escaping.
00279                 //
00280                 *gFormatter  << XMLFormatter::NoEscapes
00281                              << chSpace << attribute.getNodeName()
00282                              << chEqual << chDoubleQuote
00283                              << XMLFormatter::AttrEscapes
00284                              << attribute.getNodeValue()
00285                              << XMLFormatter::NoEscapes
00286                              << chDoubleQuote;
00287             }
00288 
00289             //
00290             //  Test for the presence of children, which includes both
00291             //  text content and nested elements.
00292             //
00293             DOM_Node child = toWrite.getFirstChild();
00294             if (child != 0)
00295             {
00296                 // There are children. Close start-tag, and output children.
00297                 // No escapes are legal here
00298                 *gFormatter << XMLFormatter::NoEscapes << chCloseAngle;
00299 
00300                 while( child != 0)
00301                 {
00302                     WalkXML (gFormatter, child);
00303                     child = child.getNextSibling();
00304                 }
00305 
00306                 //
00307                 // Done with children.  Output the end tag.
00308                 //
00309                 *gFormatter << XMLFormatter::NoEscapes << gEndElement
00310                             << nodeName << chCloseAngle;
00311             }
00312             else
00313             {
00314                 //
00315                 //  There were no children. Output the short form close of
00316                 //  the element start tag, making it an empty-element tag.
00317                 //
00318                 *gFormatter << XMLFormatter::NoEscapes << chForwardSlash << chCloseAngle;
00319             }
00320             break;
00321         }
00322         
00323         
00324         case DOM_Node::ENTITY_REFERENCE_NODE:
00325             {
00326                 DOM_Node child;
00327 #if 0
00328                 for (child = toWrite.getFirstChild();
00329                 child != 0;
00330                 child = child.getNextSibling())
00331                 {
00332                     target << child;
00333                 }
00334 #else
00335                 //
00336                 // Instead of printing the refernece tree 
00337                 // we'd output the actual text as it appeared in the xml file.
00338                 // This would be the case when -e option was chosen
00339                 //
00340                     *gFormatter << XMLFormatter::NoEscapes << chAmpersand
00341                         << nodeName << chSemiColon;
00342 #endif
00343                 break;
00344             }
00345             
00346             
00347         case DOM_Node::CDATA_SECTION_NODE:
00348             {
00349             *gFormatter << XMLFormatter::NoEscapes << gStartCDATA
00350                         << nodeValue << gEndCDATA;
00351             break;
00352         }
00353 
00354         
00355         case DOM_Node::COMMENT_NODE:
00356         {
00357             *gFormatter << XMLFormatter::NoEscapes << gStartComment
00358                         << nodeValue << gEndComment;
00359             break;
00360         }
00361 
00362         
00363         case DOM_Node::DOCUMENT_TYPE_NODE:
00364         {
00365             DOM_DocumentType doctype = (DOM_DocumentType &)toWrite;;
00366 
00367             *gFormatter << XMLFormatter::NoEscapes  << gStartDoctype
00368                         << nodeName;
00369  
00370             DOMString id = doctype.getPublicId();
00371             if (id != 0)
00372             {
00373                 *gFormatter << XMLFormatter::NoEscapes << chSpace << gPublic
00374                     << id << chDoubleQuote;
00375                 id = doctype.getSystemId();
00376                 if (id != 0)
00377                 {
00378                     *gFormatter << XMLFormatter::NoEscapes << chSpace 
00379                        << chDoubleQuote << id << chDoubleQuote;
00380                 }
00381             }
00382             else
00383             {
00384                 id = doctype.getSystemId();
00385                 if (id != 0)
00386                 {
00387                     *gFormatter << XMLFormatter::NoEscapes << chSpace << gSystem
00388                         << id << chDoubleQuote;
00389                 }
00390             }
00391             
00392             id = doctype.getInternalSubset(); 
00393             if (id !=0)
00394                 *gFormatter << XMLFormatter::NoEscapes << chOpenSquare
00395                             << id << chCloseSquare;
00396 
00397             *gFormatter << XMLFormatter::NoEscapes << chCloseAngle;
00398             break;
00399         }
00400         
00401         
00402         case DOM_Node::ENTITY_NODE:
00403         {
00404             *gFormatter << XMLFormatter::NoEscapes << gStartEntity
00405                         << nodeName;
00406 
00407             DOMString id = ((DOM_Entity &)toWrite).getPublicId();
00408             if (id != 0)
00409                 *gFormatter << XMLFormatter::NoEscapes << gPublic
00410                             << id << chDoubleQuote;
00411 
00412             id = ((DOM_Entity &)toWrite).getSystemId();
00413             if (id != 0)
00414                 *gFormatter << XMLFormatter::NoEscapes << gSystem
00415                             << id << chDoubleQuote;
00416             
00417             id = ((DOM_Entity &)toWrite).getNotationName();
00418             if (id != 0)
00419                 *gFormatter << XMLFormatter::NoEscapes << gNotation
00420                             << id << chDoubleQuote;
00421 
00422             *gFormatter << XMLFormatter::NoEscapes << chCloseAngle << chCR << chLF;
00423 
00424             break;
00425         }
00426         
00427         
00428         case DOM_Node::XML_DECL_NODE:
00429         {
00430             DOMString  str;
00431 
00432             *gFormatter << gXMLDecl1 << ((DOM_XMLDecl &)toWrite).getVersion();
00433 
00434             *gFormatter << gXMLDecl2 << gFormatter->getEncodingName ();
00435             
00436             str = ((DOM_XMLDecl &)toWrite).getStandalone();
00437             if (str != 0)
00438                 *gFormatter << gXMLDecl3 << str;
00439             
00440             *gFormatter << gXMLDecl4;
00441 
00442             break;
00443         }
00444         
00445         
00446         default:
00447             cerr <<  "Unrecognized node type in GetXML()" << endl;
00448             break;
00449     }
00450 }
00451 
00461 void GetXML(unsigned char * &buf, unsigned long &siz, DOM_H toWrite, const char *encoding)
00462 {
00463     DOMPrintFormatTarget* formatTarget = new DOMPrintFormatTarget();
00464     XMLFormatter *gFormatter = new XMLFormatter(encoding, formatTarget, XMLFormatter::NoEscapes, 
00465         XMLFormatter::UnRep_CharRef);
00466     WalkXML (gFormatter, *((DOM_Node *) toWrite));
00467     buf = formatTarget->outBuf;
00468     siz = formatTarget->outPtr;
00469     delete formatTarget;
00470     delete gFormatter;
00471 }
00472 
00480 void addComment (DOM_Document &doc, DOM_Node &node, const char *rem)
00481 {
00482     DOM_Comment Rem = doc.createComment (rem);
00483     node.appendChild (Rem);
00484 }
00485 
00493 void addTextI (DOM_Document &doc, DOM_Node &node, int num)
00494 {
00495     char st [20];
00496     sprintf (st, "%ld", num);
00497     DOM_Text Text = doc.createTextNode (st);
00498     node.appendChild (Text);
00499 }
00500 
00508 void addTextU (DOM_Document &doc, DOM_Node &node, unsigned int num)
00509 {
00510     char st [20];
00511     sprintf (st, "%lu", num);
00512     DOM_Text Text = doc.createTextNode (st);
00513     node.appendChild (Text);
00514 }
00515 
00523 void addTextD (DOM_Document &doc, DOM_Node &node, double num)
00524 {
00525     char st [20];
00526     sprintf (st, "%lg", num);
00527     DOM_Text Text = doc.createTextNode (st);
00528     node.appendChild (Text);
00529 }
00530 
00539 void addTextC (DOM_Document &doc, DOM_Node &node, XMLCh bt)
00540 {
00541     DOMString st (&bt, 1);
00542     DOM_Text Text = doc.createTextNode (st);
00543     node.appendChild (Text);
00544 }
00545 
00553 void addText (DOM_Document &doc, DOM_Node &node, const char *st)
00554 {
00555     DOM_Text Text = doc.createTextNode (st);
00556     node.appendChild (Text);
00557 }
00558 
00567 void addText (DOM_Document &doc, DOM_Node &node, unsigned short *st)
00568 {
00569     DOM_Text Text = doc.createTextNode (st);
00570     node.appendChild (Text);
00571 }
00572 
00580 DOM_Element addElement (DOM_Document &doc, DOM_Node &node, const char *st)
00581 {
00582     if (noInitXerces) {
00583         // Initialize the XML4C2 system
00584         try
00585         {
00586             XMLPlatformUtils::Initialize();
00587             noInitXerces = false;
00588         }
00589 
00590         catch(const XMLException& toCatch)
00591         {
00592             (void) toCatch;
00593             cerr << "Error during Xerces-c Initialization." << endl;
00594         }
00595     }
00596     DOM_Element Elm = doc.createElement (st);
00597     node.appendChild (Elm);
00598     return Elm;
00599 }
00600 
00608 void createEmptySoap (DOM_Document &doc, DOM_Element &soapEnvelope, DOM_Element &soapBody)
00609 {
00610     doc = DOM_Document::createDocument ();
00611     soapEnvelope = addElement (doc, doc, "SOAP-ENV:Envelope");
00612     soapEnvelope.setAttribute ("xmlns:xsi", "http://www.w3.org/1999/XMLSchema-instance");
00613     soapEnvelope.setAttribute ("xmlns:xsd", "http://www.w3.org/1999/XMLSchema");
00614     soapEnvelope.setAttribute ("xmlns:SOAP-ENV", "http://schemas.xmlsoap.org/soap/envelope/");
00615     soapBody = addElement (doc, soapEnvelope, "SOAP-ENV:Body");
00616 }
00617 
00625 void addHeader (DOM_Document &doc, DOM_Element &envel, DOM_Element &soapHeader,
00626                 const char *objRef, const char *ctxKey, const char *soapAction)
00627 {
00628     soapHeader = addElement (doc, envel, "SOAP-ENV:Header");
00629     if (objRef || ctxKey || soapAction) {
00630         soapHeader.setAttribute ("xmlns:ll", "http://www.lifeline.nl/soapbridge/");
00631         if (objRef) {
00632             DOM_Element elm = addElement (doc, soapHeader, "ll:ObjRef");
00633             addText (doc, elm, objRef);
00634         }
00635         if (ctxKey) {
00636             DOM_Element elm = addElement (doc, soapHeader, "ll:Context");
00637             addText (doc, elm, ctxKey);
00638         }
00639         if (soapAction) {
00640             DOM_Element elm = addElement (doc, soapHeader, "ll:Action");
00641             addText (doc, elm, soapAction);
00642         }
00643     }
00644 }
00645 
00653 void importHeader (DOM_Document &doc, DOM_Element &envel, DOM_Element &soapHeader)
00654 {
00655     if (soapHeader != NULL) {
00656         DOM_Node tmp = doc.importNode (soapHeader, true);
00657         envel.appendChild (tmp);
00658     }
00659 }
00660 
00662 CString DOM2CS (DOMString &ds)
00663 {
00664     char *del = ds.transcode ();
00665     CString cs = del;
00666     delete del;
00667     return cs;
00668 }
00669 
00671 CString DOM2CS (DOM_Element &el)
00672 {
00673     DOM_Node tn = el.getFirstChild ();
00674     CString cs;
00675     // loop in case the element has more than one textnode with or without other nodes intermixed.
00676     while (tn != NULL) {
00677         if (tn.getNodeType () == DOM_Node::TEXT_NODE) {
00678             DOMString ds (tn.getNodeValue ());
00679             cs += DOM2CS (ds);
00680         }
00681         tn = tn.getNextSibling ();
00682     }
00683     return cs;
00684 }
00685 
00686 

This documentation is part of the "SOAP to CORBA bridge" project
Copyright © 2000 by Lifeline Networks bv.
All rights are reserved.