top button
Flag Notify
    Connect to us
      Site Registration

Site Registration

Parse through xml file in Python?

+2 votes
576 views

I know basic of python and I have an xml file created from csv which has three attributes "category", "definition" and "definition description". I want to parse through xml file and identify actors, constraints, principal from the text.

However, I am not sure what is the best way to go. Any suggestion?

posted Apr 6, 2015 by anonymous

Share this question
Facebook Share Button Twitter Share Button LinkedIn Share Button

1 Answer

0 votes

import bs4
b=bs4.BeautifulSoup("xml content")
b.your tag name.get_text()
u'content'

answer Apr 27, 2015 by Krishnan Goskan
Similar Questions
+3 votes

I am new for Xml and want to what all functions need to know before writing c code to parse a .xml file ?

+2 votes

This is the xml file, I need to parse and store data into database using java some nodes are missing during parsing.

<Products>
<Product>
<ProductURLs>
<ProductURL>http://www.partner.viator.com/en/13689/tours/Rome/Skip-the-Line-Vatican-Museums-Walking-Tour-including-Sistine-Chapel-Raphael-s-Rooms-and-St-Peter-s/d511-3731VATICAN</ProductURL>
</ProductURLs>
<ProductStarRating>
<AvgRating>4.5</AvgRating>
<AvgRatingStarURL>http://www.partner.viator.com/images/stars/red/17-4_5.gif</AvgRatingStarURL>
</ProductStarRating>
<IATAcode>Rome</IATACode>
<BookingType>FreesaleOnRequest</BookingType>
<VoucherOption>VOUCHER_E</VoucherOption>
<ProductStarRating>
<AvgRating>4.5</AvgRating>
<AvgRatingStarURL>http://www.partner.viator.com/images/stars/red/17-4_5.gif</AvgRatingStarURL>
</ProductStarRating>
</Product>
<Product>
<ProductURLs>
<ProductURL>http://www.partner.viator.com/en/13689/tours/Rome/Skip-the-Line-Vatican-Museums-Walking-Tour-including-Sistine-Chapel-Raphael-s-Rooms-and-St-Peter-s/d511-3731VATICAN</ProductURL>
</ProductURLs>
<ProductStarRating>
<AvgRating>4.5</AvgRating>
<AvgRatingStarURL>http://www.partner.viator.com/images/stars/red/17-4_5.gif</AvgRatingStarURL>
</ProductStarRating>
<IATAcode>Rome</IATACode>
<BookingType>FreesaleOnRequest</BookingType>
<VoucherOption>VOUCHER_E</VoucherOption>
</Product>
<Product>
<ProductURLs>
<ProductURL>http://www.partner.viator.com/en/13689/tours/Rome/Skip-the-Line-Vatican-Museums-Walking-Tour-including-Sistine-Chapel-Raphael-s-Rooms-and-St-Peter-s/d511-3731VATICAN</ProductURL>
</ProductURLs>
<ProductStarRating>
<AvgRating>4.5</AvgRating>
<AvgRatingStarURL>http://www.partner.viator.com/images/stars/red/17-4_5.gif</AvgRatingStarURL>
</ProductStarRating>
<BookingType>FreesaleOnRequest</BookingType>
<VoucherOption>VOUCHER_E</VoucherOption>
</Product>
</Products>

this is my java code

package test;

import javax.print.attribute.standard.Destination;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import org.w3c.dom.Text;

import com.mysql.jdbc.PreparedStatement;
import java.io.File;
import java.io.StringWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;


public class Test3 {

    public static void main(String[] args) {

        try {
            PreparedStatement pstatement = null;
            Class.forName("com.mysql.jdbc.Driver");
            Connection con = DriverManager.getConnection("jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8", "root", "passwurd");
            Statement st=con.createStatement();
            File fXmlFile = new File("E:/xml/xml/test.xml");
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
            Document doc = dBuilder.parse(fXmlFile);

            //optional, but recommended
            //read this - http://stackoverflow.com/questions/13786607/normalization-in-dom-parsing-with-java-how-does-it-work
            doc.getDocumentElement().normalize();

            System.out.println("Root element :" + doc.getDocumentElement().getNodeName());

            NodeList nList1 = doc.getElementsByTagName("Product");

            System.out.println("----------------------------");

            for (int temp = 0; temp < nList1.getLength(); temp++) {

                Node nNode = nList1.item(temp);

                //System.out.println("\nCurrent Element :" + nNode.getNodeName());  

                Element eElement = (Element) nNode;
                //System.out.println("Rank : " + eElement.getElementsByTagName("Rank").item(0).getTextContent());

                if(eElement.getElementsByTagName("IATACode")  == null || eElement.getElementsByTagName("IATACode").getLength() <1 ) 
                {

                    System.out.println("2");
                    String IATA_CODE="";
                    String queryString = "INSERT INTO VIATOR_PRODUCTS(IATA_CODE) VALUES (?)";
                    pstatement = (PreparedStatement) con.prepareStatement(queryString);
                    pstatement.setString(1, IATA_CODE);

                    pstatement.executeUpdate(); 
                }
                else if(eElement.getElementsByTagName("IATACode")  != null && eElement.getElementsByTagName("IATACode").getLength() >0 || eElement.getElementsByTagName("AvgRating")  != null && eElement.getElementsByTagName("AvgRating").getLength() >0) 
                {
                    System.out.println("1");
                    String IATA_CODE=eElement.getElementsByTagName("IATACode").item(0).getTextContent();
                    //  String AVG_RATING_STAR_URL=eElement.getElementsByTagName("AvgRatingStarURL").item(0).getTextContent();
                    String  AVG_RATING=eElement.getElementsByTagName("AvgRating").item(0).getTextContent();

                    //String IATA_CODE=eElement.getElementsByTagName("IATACode").item(0).getTextContent();

                    //  String IATA_CODE=eElement.getElementsByTagName("IATACode").item(0).getTextContent();
                    String queryString = "INSERT INTO VIATOR_PRODUCTS(IATA_CODE,AVG_RATING) VALUES (?,?)";
                    pstatement = (PreparedStatement) con.prepareStatement(queryString);
                    pstatement.setString(1, IATA_CODE);
                    //  pstatement.setString(2, AVG_RATING_STAR_URL);
                    pstatement.setString(2, AVG_RATING);
                    pstatement.executeUpdate(); 
                }
                else 
                {   
                    System.out.println("3");
                    String  AVG_RATING="";

                    String queryString = "INSERT INTO VIATOR_PRODUCTS(AVG_RATING) VALUES (?)";
                    pstatement = (PreparedStatement) con.prepareStatement(queryString);
                    pstatement.setString(1, AVG_RATING);

                    pstatement.executeUpdate(); 
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
...