Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Get text of webpage
#34
This version can parse HTML deeper. EXAMPLE 2 is for you.

Code:
Copy      Help
;/
function $HTML VARIANT'tableNameOrIndex [ARRAY(str)&a] [ARRAY(MSHTML.IHTMLElement)&a2] [flags] [str&tableText] ;;flags: 1 get HTML, 2 get all TD elements, including of inner tables

;Gets cells of a HTML table into an array.

;HTML - all HTML (page source).
;tableNameOrIndex - table name or 0-based index in the html. The function throws error if the specified table does not exist.
;a - array variable for results. The function creates 1-dimension array where each element is cell text. Can be 0 if not needed.
;a2 - array variable for results. The function creates 1-dimension array where each element is cell object that can be used to get html elements within the cell. Can be 0 if not needed.
;tableText - str variable that receives whole text or html of the table. Can be 0 if not needed.


;EXAMPLE
;out
;str s
;IntGetFile "http://www.weather.com/weather/tenday/48183" s
;
;ARRAY(str) a
;HtmlTableToArray s 12 a
;
;;display text in first cell of each row
;int i ncolumns=2
;for i 0 a.len ncolumns

,;out a[i]
,;;out a[i+1] ;;second cell, and so on
,;out "---------"

;;EXAMPLE2
;out
;str s
;IntGetFile "http://www.weather.com/weather/tenday/48183" s
;
;ARRAY(str) a
;HtmlTableToArray s 12 a 0 2 ;;uses flag 2 to get all TD elements, including of inner tables
;
;;display text in some cells of each row, including of cells of inner tables
;int i nTDinRow=8
;for i 0 a.len nTDinRow

,;out a[i+1]
,;out "---"
,;out a[i+2]
,;out "---"
,;out a[i+5]
,;out "----------------"

;;EXAMPLE3
;out
;str s
;IntGetFile "http://www.weather.com/weather/tenday/48183" s
;
;ARRAY(MSHTML.IHTMLElement) a
;HtmlTableToArray s 12 0 a
;
;;display tag and text of all inner html elements of each cell
;int i ncolumns=2
;for i 0 a.len ncolumns

,;MSHTML.IHTMLElement el
,;foreach el a[i].all
,,;str tag=el.tagName
,,;str txt=el.innerText
,,;out "<%s> %s[]" tag txt
,;out "----------------"


MSHTML.IHTMLDocument2 d; MSHTML.IHTMLDocument3 d3
HtmlParse HTML d d3

MSHTML.IHTMLElement t=d3.getElementsByTagName("TABLE").item(tableNameOrIndex); err end "the specified table does not exist"

if(&tableText) if(flags&1) tableText=t.innerHTML; else tableText=t.innerText

MSHTML.IHTMLElementCollection cells
if(flags&2)
,MSHTML.IHTMLElement2 t2=+t
,cells=t2.getElementsByTagName("TD")
else
,MSHTML.IHTMLTable2 table=+t
,cells=table.cells

int i nc=cells.length
if(&a) a.create(nc)
if(&a2) a2.create(nc)

for i 0 nc
,MSHTML.IHTMLElement el=cells.item(i)
,if(&a) if(flags&1) a[i]=el.innerHTML; else a[i]=el.innerText
,if(&a2) a2[i]=el
,el=0 ;;without this may be exception in destructors, don't know why


Messages In This Thread

Forum Jump:


Users browsing this thread: 1 Guest(s)