How to strip html?

manil

New member
Joined
Aug 16, 2008
Messages
1
Programming Experience
Beginner
hey guys! i'm new here...

i'm trying out how to strip data in a html file.. in my design i have a combobox that contains categories.. which are stored in a arraylist.. so far i have been able to strip the categories off a file... now i want to retrieve the link to the category so that i could display the data in a listbox.. each a user has to change category in the combobox, they have to click in the refresh button i created to produce the data from different categories.. i created 3 listboxes.. they are lstProduct, lstPrice, lstSeller..

this is how i stripped the category from the site http://pricespy.com.au/sitemap.php

VB.NET:
Public Function g_sShortText(ByVal sTextA As String, ByVal sStartTagA As String, ByVal sEndTagA As String) As String
        '------------------------------------------------------------
        ' Subroutine  : g_sShortText 
        ' Author      : Manil Estabillo                      Date   : 14/08/2008
        ' Aim         : extract relevant information from text
        '------------------------------------------------------------
        ' Incoming Parameters:
        '                           sTextA - text to be examined
        '                           sStartTagA - tag that begins relevant text
        '                           sEndTagA - tag that ends relevant text
        '------------------------------------------------------------

        Dim sNewText As String = ""

        Dim iStartTagPos As Integer        ' Store start Position of the Start tag
        Dim iEndTagPos As Integer          ' Store Start Position of the End tag
        Dim iStartSearching As Integer
        Dim iNoOfCharactersToStrip, iStartStrippingPos As Integer
        Dim sValue As String

        iStartSearching = 0
        iStartTagPos = sTextA.IndexOf(sStartTagA, iStartSearching)
        'Start of do-loop 
        Do While iStartTagPos <> -1
            '***************
            '  Extraction of one string between tags
            '***************
            iEndTagPos = sTextA.IndexOf(sEndTagA, iStartSearching)
            iEndTagPos = iEndTagPos + sEndTagA.Length

            iStartStrippingPos = iStartTagPos
            iNoOfCharactersToStrip = iEndTagPos - iStartStrippingPos

            sValue = sTextA.Substring(iStartStrippingPos, iNoOfCharactersToStrip)
            sNewText = sNewText & sValue
            iStartSearching = iEndTagPos + 1
            iStartTagPos = sTextA.IndexOf(sStartTagA, iStartSearching)
        Loop 'End of do loop

        Return sNewText
    End Function


Public Function g_alCategoriesSites(ByVal sXMLTextA As String, ByVal sStartTagA As String, ByVal sEndTagA As String) As ArrayList
        '------------------------------------------------------------
        ' Aim : Put the categories' page links into array list
        ' Author : Manil Estabillo
        ' Date : 15/08/2008
        '------------------------------------------------------------
        ' Incoming Parameters :
        '                       sXMLTextA - text to be examined
        '                       sStartTagA - tag that begins relevant text
        '                       sEndTagA - tag that ends relevant text   
        '
        ' Return data :
        '                       alstCategoriesSites - array list filled with name of page links
        '------------------------------------------------------------
        Dim alstCategoriesSites As New ArrayList
        Dim sStartTag, sEndTag As String
        Dim sXMLText As String = sXMLTextA

        sXMLText = g_sShortText(sXMLText, sStartTagA, sEndTagA)

        sStartTag = "<a href="""
        sEndTag = ".html"

        alstCategoriesSites = g_alShortText2(sXMLText, sStartTag, sEndTag)
        Return alstCategoriesSites
    End Function


Private Sub frmMain_Load(ByVal sender As Object, ByVal e As System.EventArgs) Handles Me.Load
        '------------------------------------------------------------
        ' Aim : Load the product categories into arrays and combo box
        ' Author : Manil Estabillo
        ' Date : 14/08/2008
        '------------------------------------------------------------
        ' Incoming Parameters : -
        ' Outgoing Parameters : -
        ' Return data : -
        '------------------------------------------------------------
        Dim alstCategories As New ArrayList
        Dim alstCategoriesSites As New ArrayList
        Dim sURL As String = "file://" & Application.StartupPath & _
                      "/sitemap.php"
        Dim sXMLText, sStartTag, sEndTag As String

        sXMLText = g_sDownload(sURL) '- Assign the string returned by g_sDownload() 

        sStartTag = "<h2>"
        sEndTag = "</h2>"

        alstCategoriesSites = g_alCategoriesSites(sXMLText, sStartTag, sEndTag)
        alstCategories = g_alCategories(sXMLText, sStartTag, sEndTag)

        For iCount = 0 To alstCategories.Count - 1
            cboCategory.Items.Add(alstCategories(iCount))
        Next iCount

        cboCategory.SelectedIndex = cboCategory.Items.IndexOf("Mobile phone")

    End Sub

i am currently working on an offline page.. i saved the sitemap.php page in my documents... in the application.startup, how do i make it access an online page..?? i'm using VB 2008 EE
 
Back
Top