' Html Check Error Url: http://validator.w3.org/
Private Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
Global strUrlCompanyInfo As String
Global strUrlCompanyDividend As String
Global strUrlCompanyProfit As String
Global strIdCompany As String
Global strXpathCompanyName As String
Global strXpathCompanyIndustry As String
Global strXpathDividendHeader1 As String
Global strXpathDividendHeader As String
Global strXpathDividendData As String
Global strXpathProfitHeader1 As String
Global strXpathProfitHeader As String
Global strXpathProfitData As String
Global strNewData As String
Global strSection As String
Global bIsString As Boolean
Global bIsScript As Boolean
Global strPrefix As String
Global strLastPreFix As String
Global strLastLabel As String
Global nTabCount As Integer
Global nTotalTabCount As Integer
' 初始化全域變數
Sub Global_Variable_Init()
strNewData = ""
strSection = ""
bIsString = False
bIsScript = False
strPrefix = "/"
strLastPreFix = strPrefix
strLastLabel = ""
nTabCount = 0
nTotalTabCount = 0
' 公司名稱與產品別
strUrlCompanyInfo = "https://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID="
strXpathCompanyName = "/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr[2]/td[3]/table[2]/tbody/tr[1]/td[2]"
strXpathCompanyIndustry = "/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr[2]/td[3]/table[2]/tbody/tr[2]/td[2]"
' 股利政策
strUrlCompanyDividend = "https://goodinfo.tw/StockInfo/StockDividendPolicy.asp?STOCK_ID="
strXpathDividendHeader1 = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr[1]/td"
strXpathDividendHeader = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr"
strXpathDividendData = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/tbody[1]/tr"
' 獲利狀況
strUrlCompanyProfit = "https://goodinfo.tw/StockInfo/StockBzPerformance.asp?STOCK_ID="
strXpathProfitHeader1 = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr[1]/td"
strXpathProfitHeader = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr"
strXpathProfitData = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/tbody[1]/tr"
End Sub
' 判斷是否為標籤結尾,如 <html>或 <html+空白
Function Is_End_Of_Label(ByVal ch As String)
If ch = " " Or ch = ">" Then
Is_End_Of_Label = True
Else
Is_End_Of_Label = False
End If
End Function
' 遇到標籤結尾將階層ID回推一層
Sub Prefix_Finish(ByVal label As String)
Dim nEnd As Integer
If Is_End_Of_Label(Mid(strSection, Len(label) + 1, 1)) = True Then ' 確認是'>'或' '結尾
nEnd = Len(strPrefix)
Do While True
nEnd = nEnd - 1
If Mid(strPrefix, nEnd, 1) = "/" Then ' 將id除/html/body/table,縮短為/html/body/,因為已經找到</table>
nEnd = nEnd + 1 ' Keep "/"
Exit Do
End If
Loop
strLastPreFix = strPrefix ' 保留這次id,用來計算索引
strPrefix = Mid(strPrefix, 1, nEnd - 1) ' 縮短的id
nTotalTabCount = nTotalTabCount - 1
End If
End Sub
' 遇到標籤起頭將階層ID增加一層,並決定陣列索引
Sub Prefix_Start(ByVal label As String)
Dim strTmp As String
Dim nIdStart As Integer, nIdEnd As Integer, nLength As Integer
strTmp = "<" + label
If Is_End_Of_Label(Mid(strSection, Len(strTmp) + 1, 1)) = True Then ' 確認是'>'或' '結尾
' 設定編號,如table[1]、table[2]
strPrefix = strPrefix + label + "["
If InStr(1, strLastPreFix, strPrefix) = 1 Then ' 如果前一次的標籤跟這次一樣,則索引值+1
strTmp = Replace(strLastPreFix, strPrefix, "")
strPrefix = strPrefix + Trim(Str(Int(Mid(strTmp, 1, Len(strTmp) - 2) + 1)))
ElseIf InStr(1, strNewData, strPrefix + "1") > 0 Then ' 如果語法為<table></table><div></div><table></table>,陣列值會不對,因此多加判斷
strPrefix = strPrefix + "2"
Else
strPrefix = strPrefix + "1" ' 都沒有找到,從1開始
End If
strPrefix = strPrefix + "]/"
nIdStart = InStr(1, strSection, "id=") - 1
If nIdStart <> -1 Then ' 判斷原語句裡面是否就有id設定
nIdEnd = InStr(nIdStart + 1, strSection, " ")
If nIdEnd = 0 Then ' 如果找不到空白,表示id已經是最後一個屬性,就直接將id放到最後面
strSection = Mid(strSection, 1, nIdStart) + " id='" + strPrefix + "'>"
Else
nLength = Len(strSection) - nIdEnd ' 找到空白,保留除id外的全部屬性,並且將id屬性放到最後面
strSection = Mid(strSection, 1, nIdStart) + Mid(strSection, nIdEnd, nLength) + " id='" + strPrefix + "'>"
End If
Else ' 沒有id設定,直接插入
strSection = Mid(strSection, 1, Len(strSection) - 1) + " id='" + strPrefix + "'>"
End If
nTabCount = 1
End If
End Sub
' 解析html,並添加階層ID
Sub Parse_Html(ByVal strData As String)
Dim i As Long, j As Long, k As Long
Dim ch As String
Dim last_ch As String
strData = Replace(strData, Chr(9), "") ' replace \t
For i = 1 To Len(strData)
ch = Mid(strData, i, 1)
If " " = ch And Len(strSection) = 0 Then ' 空白字省略
ch = ""
End If
strSection = strSection + ch
' 因為html裡面有單引號不對稱狀況,所以做個例外處理
' Cover Html Error: Line 42, Column 693: an attribute specification must start with a name or name token
If last_ch = "'" And ch = ">" And bIsString Then
bIsString = False
End If
last_ch = ch
If ch = """" Or ch = "'" Then ' 字串不需要做判斷
If bIsString = True Then
bIsString = False
Else
bIsString = True
End If
ElseIf bIsString = False Then ' 非字串處理
If ch = ">" Or (ch = "<" And Len(strSection) > 1) Then ' 完整標籤,拿來做解析
If ch = "<" Then ' 我們只要<前面的字串
strSection = Mid(strSection, 1, Len(strSection) - 1)
End If
strSection = Trim(strSection)
nTabCount = 0
' script內容無須理會
If InStr(1, strSection, "</script") = 1 Then
bIsScript = False
ElseIf InStr(1, strSection, "<script") = 1 Then
bIsScript = True
End If
If bIsScript = False Then
' 針對下面標籤內容串接成Xpath樣式
If InStr(1, strSection, "</html") = 1 Then
Prefix_Finish "</html"
ElseIf InStr(1, strSection, "<html") = 1 Then
Prefix_Start "html"
ElseIf InStr(1, strSection, "</body") = 1 Then
Prefix_Finish "</body"
ElseIf InStr(1, strSection, "<body") = 1 Then
Prefix_Start "body"
ElseIf InStr(1, strSection, "</div") = 1 Then
Prefix_Finish "</div"
ElseIf InStr(1, strSection, "<div") = 1 Then
Prefix_Start "div"
ElseIf InStr(1, strSection, "</table") = 1 Then
Prefix_Finish "</table"
ElseIf InStr(1, strSection, "<table") = 1 Then
Prefix_Start "table"
ElseIf InStr(1, strSection, "</thead") = 1 Then
Prefix_Finish "</thead"
ElseIf InStr(1, strSection, "<thead") = 1 Then
Prefix_Start "thead"
ElseIf InStr(1, strSection, "</tr") = 1 Then
Prefix_Finish "</tr"
ElseIf InStr(1, strSection, "<tr") = 1 Then
j = Len(strPrefix)
Do
j = j - 1
If Mid(strPrefix, j, 1) = "/" Then
j = j + 1
Exit Do
End If
Loop
For k = (j + 1) To Len(strPrefix)
If Mid(strPrefix, k, 1) = "[" Then
Exit For
End If
Next k
' 網頁裡面有<tr>沒有</tr>作結束就接<tr>的錯誤,做個例外處理
' Cover Html Error: Line 304, Column 975: document type does not allow element "tr" here
If Mid(strPrefix, j, k - j) = "tr" Then ' 先結束前面一個</tr>,再接一個新的<tr>
Dim strTmp As String
strTmp = strSection
strSection = "</tr>"
Prefix_Finish "</tr"
strSection = strTmp
End If
Prefix_Start "tr"
ElseIf InStr(1, strSection, "</td") = 1 Then
Prefix_Finish "</td"
ElseIf InStr(1, strSection, "<td") = 1 Then
Prefix_Start "td"
End If
'If nTotalTabCount > 0 Then
' For j = 0 To nTotalTabCount - 1
' strNewData = strNewData + "\t"
' Next j
'End If
strNewData = strNewData + strSection ' + "\n"
nTotalTabCount = nTotalTabCount + nTabCount
End If
If ch = "<" Then
strSection = "<"
Else
strSection = ""
End If
End If
End If
Next i
End Sub
' 將Xpath節點名稱,加工為需要的ID
Function Get_Html_ID(ByVal strHtmlID As String)
Dim i As Integer
Dim strTmp As String, strChar As String, strLastChar As String
Dim ArrStr() As String
i = 0
strTmp = ""
strLastChar = ""
'Replace(strHtmlID, "tbody/", "")
ArrStr = Split(strHtmlID, "/tbody")
strHtmlID = ""
For i = 0 To UBound(ArrStr)
If Mid(ArrStr(i), 1, 1) <> "[" Then ' 如果tbody不是陣列型式,直接加起來
strHtmlID = strHtmlID + ArrStr(i)
Else ' tbody是陣列型式,只要/之後的資料
strHtmlID = strHtmlID + Mid(ArrStr(i), InStr(2, ArrStr(i), "/"))
End If
Next i
For i = 1 To Len(strHtmlID)
strChar = Mid(strHtmlID, i, 1)
If strChar = "/" And strLastChar <> "" And strLastChar <> "]" Then ' 將/html/改為/html[1]/
strTmp = strTmp + "[1]"
End If
strTmp = strTmp + strChar
strLastChar = strChar
Next i
If strChar <> "/" Then ' 結尾不是/,則補上
strTmp = strTmp + "/"
End If
Get_Html_ID = strTmp
End Function
' 取得公司名稱與產業別
Sub Get_Company_Info(ByVal strID As String)
Dim url As String, strCompanyName As String, strCompanyIndustry As String
Set objHttp = CreateObject("MSXML2.XMLHTTP")
Set objHtml = CreateObject("HTMLFile")
url = strUrlCompanyInfo + strID
objHttp.Open "GET", url, False ' 網頁連線
objHttp.send
While objHttp.ReadyState <> 4 ' 等待完成
Sleep (1)
Wend
If objHttp.Status = 200 Then ' 確認狀態
Parse_Html objHttp.responseText ' 解析字串
objHtml.body.innerHTML = strNewData ' 將新網頁內容設定給html元件
' 公司名稱
strCompanyName = objHtml.getElementById(Get_Html_ID(strXpathCompanyName)).innerText
' 產業別
strCompanyIndustry = objHtml.getElementById(Get_Html_ID(strXpathCompanyIndustry)).innerText
End If
End Sub
' 若屬性中有包含行列,則回傳該值,否則回傳1
Function Get_Span_Value(ByVal obj As Object, ByVal span As String)
If IsNull(obj.getAttribute(span)) = False Then
Get_Span_Value = Int(obj.getAttribute(span))
Else
Get_Span_Value = 1
End If
End Function
' 抓取股利政策
Sub Get_Company_Dividend(ByVal strID As String)
Dim url As String, strTmp As String, ArrHeader2D() As String
Dim i As Integer, j As Integer, k As Integer, r As Integer
Dim row As Integer, rows As Integer, c As Integer, col As Integer, cols As Integer, totalCols As Integer
Set objHttp = CreateObject("MSXML2.XMLHTTP")
Set objHtml = CreateObject("HTMLFile")
strTmp = ""
url = strUrlCompanyDividend + strID
objHttp.Open "GET", url, False ' 網頁連線
objHttp.send
While objHttp.ReadyState <> 4 ' 等待完成
Sleep (1)
Wend
If objHttp.Status = 200 Then ' 確認狀態
Parse_Html objHttp.responseText
objHtml.body.innerHTML = strNewData
' 統計股利政策欄位數量
strTmp = Get_Html_ID(strXpathDividendHeader1)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
cols = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
cols = cols + Get_Span_Value(obj, "colspan")
Loop
totalCols = cols
ReDim ArrHeader2D(4 - 1, totalCols - 1)
' 計算表格Columns
i = 0
j = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
rows = Get_Span_Value(obj, "rowspan")
cols = Get_Span_Value(obj, "colspan")
For col = 0 To (cols - 1)
For row = 0 To (rows - 1)
ArrHeader2D(row, col + j) = Replace(Replace(Replace(Replace(obj.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
Next row
Next col
j = j + cols
Loop
' 補齊Header內容
strTmp = Get_Html_ID(strXpathDividendHeader)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
i = 0
r = 0
c = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
If r <> 0 Then
For Each td In obj.childNodes
rows = Get_Span_Value(td, "rowspan")
cols = Get_Span_Value(td, "colspan")
For col = 0 To (cols - 1)
For row = 0 To (rows - 1)
Do While True
If ArrHeader2D(row + r, col + c) = "" Then
Exit Do
End If
c = c + 1
Loop
ArrHeader2D(row + r, col + c) = Replace(Replace(Replace(Replace(td.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
Next row
Next col
Next
End If
r = r + 1
c = 0
Loop
' 設定想要的欄位內容
Dim liDividendHeader(14) As String
liDividendHeader(0) = "股利發放年度"
' 股利
liDividendHeader(1) = "合計"
liDividendHeader(2) = "合計"
liDividendHeader(3) = "股利合計"
' 股價
liDividendHeader(4) = "最高"
liDividendHeader(5) = "最低"
liDividendHeader(6) = "年均"
' 殖利率
liDividendHeader(7) = "現金"
liDividendHeader(8) = "股票"
liDividendHeader(9) = "合計"
liDividendHeader(10) = "EPS(元)"
' 發放率
liDividendHeader(11) = "配息"
liDividendHeader(12) = "配股"
liDividendHeader(13) = "合計"
' 找出欄位索引值
Dim colData() As Integer
ReDim colData(UBound(liDividendHeader) - 1)
For i = 0 To UBound(liDividendHeader) - 1
If i = 0 Then
j = 0
Else
j = colData(i - 1) + 1
End If
Do While True
If j >= totalCols Then
Exit Do
ElseIf ArrHeader2D(3, j) = liDividendHeader(i) Then
colData(i) = j
Exit Do
End If
j = j + 1
Loop
Next i
' 取得表格內容
Dim liDividend() As String
ReDim liDividend(10000, UBound(liDividendHeader) - 1)
strTmp = Get_Html_ID(strXpathDividendData)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
i = 0
j = 0
k = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
For j = 0 To (UBound(liDividendHeader) - 1)
liDividend(k, j) = obj.childNodes(colData(j)).innerText
Next j
k = k + 1
Loop
End If
End Sub
' 抓取獲利狀況
Sub Get_Company_Profit(ByVal strID As String)
Dim url As String, strTmp As String, ArrHeader2D() As String
Dim i As Integer, j As Integer, k As Integer, r As Integer
Dim row As Integer, rows As Integer, c As Integer, col As Integer, cols As Integer, totalCols As Integer
Set objHttp = CreateObject("MSXML2.XMLHTTP")
Set objHtml = CreateObject("HTMLFile")
strTmp = ""
url = strUrlCompanyProfit + strID
objHttp.Open "GET", url, False
objHttp.send
While objHttp.ReadyState <> 4
Sleep (1)
Wend
If objHttp.Status = 200 Then
Parse_Html objHttp.responseText
objHtml.body.innerHTML = strNewData
' 統計獲利狀況欄位數量
strTmp = Get_Html_ID(strXpathProfitHeader1)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
cols = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
cols = cols + Get_Span_Value(obj, "colspan")
Loop
totalCols = cols
ReDim ArrHeader2D(4 - 1, totalCols - 1)
' 計算表格Columns
i = 0
j = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
rows = Get_Span_Value(obj, "rowspan")
cols = Get_Span_Value(obj, "colspan")
For col = 0 To (cols - 1)
For row = 0 To (rows - 1)
ArrHeader2D(row, col + j) = Replace(Replace(Replace(Replace(obj.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
Next row
Next col
j = j + cols
Loop
' 補齊Header內容
strTmp = Get_Html_ID(strXpathProfitHeader)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
i = 0
r = 0
c = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
If r <> 0 Then
For Each td In obj.childNodes
rows = Get_Span_Value(td, "rowspan")
cols = Get_Span_Value(td, "colspan")
For col = 0 To (cols - 1)
For row = 0 To (rows - 1)
Do While True
If ArrHeader2D(row + r, col + c) = "" Then
Exit Do
End If
c = c + 1
Loop
ArrHeader2D(row + r, col + c) = Replace(Replace(Replace(Replace(td.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
Next row
Next col
Next
End If
r = r + 1
c = 0
Loop
' 設定想要的欄位內容
Dim liProfitHeader(14) As String
liProfitHeader(0) = "年度"
liProfitHeader(1) = "財報評分"
' 股價
liProfitHeader(2) = "收盤"
liProfitHeader(3) = "平均"
liProfitHeader(4) = "漲跌"
liProfitHeader(5) = "漲跌(%)"
' 獲利金額
liProfitHeader(6) = "營業收入"
liProfitHeader(7) = "營業毛利"
liProfitHeader(8) = "稅後淨利"
' 獲利率
liProfitHeader(9) = "營業毛利"
liProfitHeader(10) = "稅後淨利"
liProfitHeader(11) = "ROE(%)"
liProfitHeader(12) = "稅後EPS"
liProfitHeader(13) = "年增(元)"
' 找出欄位索引值
Dim colData() As Integer
ReDim colData(UBound(liProfitHeader) - 1)
For i = 0 To UBound(liProfitHeader) - 1
If i = 0 Then
j = 0
Else
j = colData(i - 1) + 1
End If
Do While True
If j >= totalCols Then
Exit Do
ElseIf ArrHeader2D(1, j) = liProfitHeader(i) Then
colData(i) = j
Exit Do
End If
j = j + 1
Loop
Next i
' 取得表格內容
Dim liProfit() As String
ReDim liProfit(10000, UBound(liProfitHeader) - 1)
strTmp = Get_Html_ID(strXpathProfitData)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
i = 0
j = 0
k = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
For j = 0 To (UBound(liProfitHeader) - 1)
liProfit(k, j) = obj.childNodes(colData(j)).innerText
Next j
k = k + 1
Loop
End If
End Sub
' 將網頁原始碼存為檔案後,利用該檔案內容進行測試
Sub Local_Test()
Dim strTest As String
Dim i As Integer, j As Integer
strTest = ""
i = 0
j = 0
Global_Variable_Init
strIdCompany = "3008"
'Open "info.txt" For Input As #1
Open "dividend.txt" For Input As #1
Do While Not EOF(1)
Line Input #1, strTmp
strTest = strTest + strTmp
Loop
Close #1
Parse_Html strTest ' 解析字串
objHtml.body.innerHTML = strNewData ' 將新網頁內容設定給html元件
End Sub
Public Sub Main()
Global_Variable_Init
strIdCompany = "3008"
Get_Company_Info strIdCompany
'Get_Company_Dividend strIdCompany
'Get_Company_Profit strIdCompany
End Sub
Private Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
Global strUrlCompanyInfo As String
Global strUrlCompanyDividend As String
Global strUrlCompanyProfit As String
Global strIdCompany As String
Global strXpathCompanyName As String
Global strXpathCompanyIndustry As String
Global strXpathDividendHeader1 As String
Global strXpathDividendHeader As String
Global strXpathDividendData As String
Global strXpathProfitHeader1 As String
Global strXpathProfitHeader As String
Global strXpathProfitData As String
Global strNewData As String
Global strSection As String
Global bIsString As Boolean
Global bIsScript As Boolean
Global strPrefix As String
Global strLastPreFix As String
Global strLastLabel As String
Global nTabCount As Integer
Global nTotalTabCount As Integer
' 初始化全域變數
Sub Global_Variable_Init()
strNewData = ""
strSection = ""
bIsString = False
bIsScript = False
strPrefix = "/"
strLastPreFix = strPrefix
strLastLabel = ""
nTabCount = 0
nTotalTabCount = 0
' 公司名稱與產品別
strUrlCompanyInfo = "https://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID="
strXpathCompanyName = "/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr[2]/td[3]/table[2]/tbody/tr[1]/td[2]"
strXpathCompanyIndustry = "/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr[2]/td[3]/table[2]/tbody/tr[2]/td[2]"
' 股利政策
strUrlCompanyDividend = "https://goodinfo.tw/StockInfo/StockDividendPolicy.asp?STOCK_ID="
strXpathDividendHeader1 = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr[1]/td"
strXpathDividendHeader = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr"
strXpathDividendData = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/tbody[1]/tr"
' 獲利狀況
strUrlCompanyProfit = "https://goodinfo.tw/StockInfo/StockBzPerformance.asp?STOCK_ID="
strXpathProfitHeader1 = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr[1]/td"
strXpathProfitHeader = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr"
strXpathProfitData = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/tbody[1]/tr"
End Sub
' 判斷是否為標籤結尾,如 <html>或 <html+空白
Function Is_End_Of_Label(ByVal ch As String)
If ch = " " Or ch = ">" Then
Is_End_Of_Label = True
Else
Is_End_Of_Label = False
End If
End Function
' 遇到標籤結尾將階層ID回推一層
Sub Prefix_Finish(ByVal label As String)
Dim nEnd As Integer
If Is_End_Of_Label(Mid(strSection, Len(label) + 1, 1)) = True Then ' 確認是'>'或' '結尾
nEnd = Len(strPrefix)
Do While True
nEnd = nEnd - 1
If Mid(strPrefix, nEnd, 1) = "/" Then ' 將id除/html/body/table,縮短為/html/body/,因為已經找到</table>
nEnd = nEnd + 1 ' Keep "/"
Exit Do
End If
Loop
strLastPreFix = strPrefix ' 保留這次id,用來計算索引
strPrefix = Mid(strPrefix, 1, nEnd - 1) ' 縮短的id
nTotalTabCount = nTotalTabCount - 1
End If
End Sub
' 遇到標籤起頭將階層ID增加一層,並決定陣列索引
Sub Prefix_Start(ByVal label As String)
Dim strTmp As String
Dim nIdStart As Integer, nIdEnd As Integer, nLength As Integer
strTmp = "<" + label
If Is_End_Of_Label(Mid(strSection, Len(strTmp) + 1, 1)) = True Then ' 確認是'>'或' '結尾
' 設定編號,如table[1]、table[2]
strPrefix = strPrefix + label + "["
If InStr(1, strLastPreFix, strPrefix) = 1 Then ' 如果前一次的標籤跟這次一樣,則索引值+1
strTmp = Replace(strLastPreFix, strPrefix, "")
strPrefix = strPrefix + Trim(Str(Int(Mid(strTmp, 1, Len(strTmp) - 2) + 1)))
ElseIf InStr(1, strNewData, strPrefix + "1") > 0 Then ' 如果語法為<table></table><div></div><table></table>,陣列值會不對,因此多加判斷
strPrefix = strPrefix + "2"
Else
strPrefix = strPrefix + "1" ' 都沒有找到,從1開始
End If
strPrefix = strPrefix + "]/"
nIdStart = InStr(1, strSection, "id=") - 1
If nIdStart <> -1 Then ' 判斷原語句裡面是否就有id設定
nIdEnd = InStr(nIdStart + 1, strSection, " ")
If nIdEnd = 0 Then ' 如果找不到空白,表示id已經是最後一個屬性,就直接將id放到最後面
strSection = Mid(strSection, 1, nIdStart) + " id='" + strPrefix + "'>"
Else
nLength = Len(strSection) - nIdEnd ' 找到空白,保留除id外的全部屬性,並且將id屬性放到最後面
strSection = Mid(strSection, 1, nIdStart) + Mid(strSection, nIdEnd, nLength) + " id='" + strPrefix + "'>"
End If
Else ' 沒有id設定,直接插入
strSection = Mid(strSection, 1, Len(strSection) - 1) + " id='" + strPrefix + "'>"
End If
nTabCount = 1
End If
End Sub
' 解析html,並添加階層ID
Sub Parse_Html(ByVal strData As String)
Dim i As Long, j As Long, k As Long
Dim ch As String
Dim last_ch As String
strData = Replace(strData, Chr(9), "") ' replace \t
For i = 1 To Len(strData)
ch = Mid(strData, i, 1)
If " " = ch And Len(strSection) = 0 Then ' 空白字省略
ch = ""
End If
strSection = strSection + ch
' 因為html裡面有單引號不對稱狀況,所以做個例外處理
' Cover Html Error: Line 42, Column 693: an attribute specification must start with a name or name token
If last_ch = "'" And ch = ">" And bIsString Then
bIsString = False
End If
last_ch = ch
If ch = """" Or ch = "'" Then ' 字串不需要做判斷
If bIsString = True Then
bIsString = False
Else
bIsString = True
End If
ElseIf bIsString = False Then ' 非字串處理
If ch = ">" Or (ch = "<" And Len(strSection) > 1) Then ' 完整標籤,拿來做解析
If ch = "<" Then ' 我們只要<前面的字串
strSection = Mid(strSection, 1, Len(strSection) - 1)
End If
strSection = Trim(strSection)
nTabCount = 0
' script內容無須理會
If InStr(1, strSection, "</script") = 1 Then
bIsScript = False
ElseIf InStr(1, strSection, "<script") = 1 Then
bIsScript = True
End If
If bIsScript = False Then
' 針對下面標籤內容串接成Xpath樣式
If InStr(1, strSection, "</html") = 1 Then
Prefix_Finish "</html"
ElseIf InStr(1, strSection, "<html") = 1 Then
Prefix_Start "html"
ElseIf InStr(1, strSection, "</body") = 1 Then
Prefix_Finish "</body"
ElseIf InStr(1, strSection, "<body") = 1 Then
Prefix_Start "body"
ElseIf InStr(1, strSection, "</div") = 1 Then
Prefix_Finish "</div"
ElseIf InStr(1, strSection, "<div") = 1 Then
Prefix_Start "div"
ElseIf InStr(1, strSection, "</table") = 1 Then
Prefix_Finish "</table"
ElseIf InStr(1, strSection, "<table") = 1 Then
Prefix_Start "table"
ElseIf InStr(1, strSection, "</thead") = 1 Then
Prefix_Finish "</thead"
ElseIf InStr(1, strSection, "<thead") = 1 Then
Prefix_Start "thead"
ElseIf InStr(1, strSection, "</tr") = 1 Then
Prefix_Finish "</tr"
ElseIf InStr(1, strSection, "<tr") = 1 Then
j = Len(strPrefix)
Do
j = j - 1
If Mid(strPrefix, j, 1) = "/" Then
j = j + 1
Exit Do
End If
Loop
For k = (j + 1) To Len(strPrefix)
If Mid(strPrefix, k, 1) = "[" Then
Exit For
End If
Next k
' 網頁裡面有<tr>沒有</tr>作結束就接<tr>的錯誤,做個例外處理
' Cover Html Error: Line 304, Column 975: document type does not allow element "tr" here
If Mid(strPrefix, j, k - j) = "tr" Then ' 先結束前面一個</tr>,再接一個新的<tr>
Dim strTmp As String
strTmp = strSection
strSection = "</tr>"
Prefix_Finish "</tr"
strSection = strTmp
End If
Prefix_Start "tr"
ElseIf InStr(1, strSection, "</td") = 1 Then
Prefix_Finish "</td"
ElseIf InStr(1, strSection, "<td") = 1 Then
Prefix_Start "td"
End If
'If nTotalTabCount > 0 Then
' For j = 0 To nTotalTabCount - 1
' strNewData = strNewData + "\t"
' Next j
'End If
strNewData = strNewData + strSection ' + "\n"
nTotalTabCount = nTotalTabCount + nTabCount
End If
If ch = "<" Then
strSection = "<"
Else
strSection = ""
End If
End If
End If
Next i
End Sub
' 將Xpath節點名稱,加工為需要的ID
Function Get_Html_ID(ByVal strHtmlID As String)
Dim i As Integer
Dim strTmp As String, strChar As String, strLastChar As String
Dim ArrStr() As String
i = 0
strTmp = ""
strLastChar = ""
'Replace(strHtmlID, "tbody/", "")
ArrStr = Split(strHtmlID, "/tbody")
strHtmlID = ""
For i = 0 To UBound(ArrStr)
If Mid(ArrStr(i), 1, 1) <> "[" Then ' 如果tbody不是陣列型式,直接加起來
strHtmlID = strHtmlID + ArrStr(i)
Else ' tbody是陣列型式,只要/之後的資料
strHtmlID = strHtmlID + Mid(ArrStr(i), InStr(2, ArrStr(i), "/"))
End If
Next i
For i = 1 To Len(strHtmlID)
strChar = Mid(strHtmlID, i, 1)
If strChar = "/" And strLastChar <> "" And strLastChar <> "]" Then ' 將/html/改為/html[1]/
strTmp = strTmp + "[1]"
End If
strTmp = strTmp + strChar
strLastChar = strChar
Next i
If strChar <> "/" Then ' 結尾不是/,則補上
strTmp = strTmp + "/"
End If
Get_Html_ID = strTmp
End Function
' 取得公司名稱與產業別
Sub Get_Company_Info(ByVal strID As String)
Dim url As String, strCompanyName As String, strCompanyIndustry As String
Set objHttp = CreateObject("MSXML2.XMLHTTP")
Set objHtml = CreateObject("HTMLFile")
url = strUrlCompanyInfo + strID
objHttp.Open "GET", url, False ' 網頁連線
objHttp.send
While objHttp.ReadyState <> 4 ' 等待完成
Sleep (1)
Wend
If objHttp.Status = 200 Then ' 確認狀態
Parse_Html objHttp.responseText ' 解析字串
objHtml.body.innerHTML = strNewData ' 將新網頁內容設定給html元件
' 公司名稱
strCompanyName = objHtml.getElementById(Get_Html_ID(strXpathCompanyName)).innerText
' 產業別
strCompanyIndustry = objHtml.getElementById(Get_Html_ID(strXpathCompanyIndustry)).innerText
End If
End Sub
' 若屬性中有包含行列,則回傳該值,否則回傳1
Function Get_Span_Value(ByVal obj As Object, ByVal span As String)
If IsNull(obj.getAttribute(span)) = False Then
Get_Span_Value = Int(obj.getAttribute(span))
Else
Get_Span_Value = 1
End If
End Function
' 抓取股利政策
Sub Get_Company_Dividend(ByVal strID As String)
Dim url As String, strTmp As String, ArrHeader2D() As String
Dim i As Integer, j As Integer, k As Integer, r As Integer
Dim row As Integer, rows As Integer, c As Integer, col As Integer, cols As Integer, totalCols As Integer
Set objHttp = CreateObject("MSXML2.XMLHTTP")
Set objHtml = CreateObject("HTMLFile")
strTmp = ""
url = strUrlCompanyDividend + strID
objHttp.Open "GET", url, False ' 網頁連線
objHttp.send
While objHttp.ReadyState <> 4 ' 等待完成
Sleep (1)
Wend
If objHttp.Status = 200 Then ' 確認狀態
Parse_Html objHttp.responseText
objHtml.body.innerHTML = strNewData
' 統計股利政策欄位數量
strTmp = Get_Html_ID(strXpathDividendHeader1)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
cols = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
cols = cols + Get_Span_Value(obj, "colspan")
Loop
totalCols = cols
ReDim ArrHeader2D(4 - 1, totalCols - 1)
' 計算表格Columns
i = 0
j = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
rows = Get_Span_Value(obj, "rowspan")
cols = Get_Span_Value(obj, "colspan")
For col = 0 To (cols - 1)
For row = 0 To (rows - 1)
ArrHeader2D(row, col + j) = Replace(Replace(Replace(Replace(obj.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
Next row
Next col
j = j + cols
Loop
' 補齊Header內容
strTmp = Get_Html_ID(strXpathDividendHeader)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
i = 0
r = 0
c = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
If r <> 0 Then
For Each td In obj.childNodes
rows = Get_Span_Value(td, "rowspan")
cols = Get_Span_Value(td, "colspan")
For col = 0 To (cols - 1)
For row = 0 To (rows - 1)
Do While True
If ArrHeader2D(row + r, col + c) = "" Then
Exit Do
End If
c = c + 1
Loop
ArrHeader2D(row + r, col + c) = Replace(Replace(Replace(Replace(td.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
Next row
Next col
Next
End If
r = r + 1
c = 0
Loop
' 設定想要的欄位內容
Dim liDividendHeader(14) As String
liDividendHeader(0) = "股利發放年度"
' 股利
liDividendHeader(1) = "合計"
liDividendHeader(2) = "合計"
liDividendHeader(3) = "股利合計"
' 股價
liDividendHeader(4) = "最高"
liDividendHeader(5) = "最低"
liDividendHeader(6) = "年均"
' 殖利率
liDividendHeader(7) = "現金"
liDividendHeader(8) = "股票"
liDividendHeader(9) = "合計"
liDividendHeader(10) = "EPS(元)"
' 發放率
liDividendHeader(11) = "配息"
liDividendHeader(12) = "配股"
liDividendHeader(13) = "合計"
' 找出欄位索引值
Dim colData() As Integer
ReDim colData(UBound(liDividendHeader) - 1)
For i = 0 To UBound(liDividendHeader) - 1
If i = 0 Then
j = 0
Else
j = colData(i - 1) + 1
End If
Do While True
If j >= totalCols Then
Exit Do
ElseIf ArrHeader2D(3, j) = liDividendHeader(i) Then
colData(i) = j
Exit Do
End If
j = j + 1
Loop
Next i
' 取得表格內容
Dim liDividend() As String
ReDim liDividend(10000, UBound(liDividendHeader) - 1)
strTmp = Get_Html_ID(strXpathDividendData)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
i = 0
j = 0
k = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
For j = 0 To (UBound(liDividendHeader) - 1)
liDividend(k, j) = obj.childNodes(colData(j)).innerText
Next j
k = k + 1
Loop
End If
End Sub
' 抓取獲利狀況
Sub Get_Company_Profit(ByVal strID As String)
Dim url As String, strTmp As String, ArrHeader2D() As String
Dim i As Integer, j As Integer, k As Integer, r As Integer
Dim row As Integer, rows As Integer, c As Integer, col As Integer, cols As Integer, totalCols As Integer
Set objHttp = CreateObject("MSXML2.XMLHTTP")
Set objHtml = CreateObject("HTMLFile")
strTmp = ""
url = strUrlCompanyProfit + strID
objHttp.Open "GET", url, False
objHttp.send
While objHttp.ReadyState <> 4
Sleep (1)
Wend
If objHttp.Status = 200 Then
Parse_Html objHttp.responseText
objHtml.body.innerHTML = strNewData
' 統計獲利狀況欄位數量
strTmp = Get_Html_ID(strXpathProfitHeader1)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
cols = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
cols = cols + Get_Span_Value(obj, "colspan")
Loop
totalCols = cols
ReDim ArrHeader2D(4 - 1, totalCols - 1)
' 計算表格Columns
i = 0
j = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
rows = Get_Span_Value(obj, "rowspan")
cols = Get_Span_Value(obj, "colspan")
For col = 0 To (cols - 1)
For row = 0 To (rows - 1)
ArrHeader2D(row, col + j) = Replace(Replace(Replace(Replace(obj.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
Next row
Next col
j = j + cols
Loop
' 補齊Header內容
strTmp = Get_Html_ID(strXpathProfitHeader)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
i = 0
r = 0
c = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
If r <> 0 Then
For Each td In obj.childNodes
rows = Get_Span_Value(td, "rowspan")
cols = Get_Span_Value(td, "colspan")
For col = 0 To (cols - 1)
For row = 0 To (rows - 1)
Do While True
If ArrHeader2D(row + r, col + c) = "" Then
Exit Do
End If
c = c + 1
Loop
ArrHeader2D(row + r, col + c) = Replace(Replace(Replace(Replace(td.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
Next row
Next col
Next
End If
r = r + 1
c = 0
Loop
' 設定想要的欄位內容
Dim liProfitHeader(14) As String
liProfitHeader(0) = "年度"
liProfitHeader(1) = "財報評分"
' 股價
liProfitHeader(2) = "收盤"
liProfitHeader(3) = "平均"
liProfitHeader(4) = "漲跌"
liProfitHeader(5) = "漲跌(%)"
' 獲利金額
liProfitHeader(6) = "營業收入"
liProfitHeader(7) = "營業毛利"
liProfitHeader(8) = "稅後淨利"
' 獲利率
liProfitHeader(9) = "營業毛利"
liProfitHeader(10) = "稅後淨利"
liProfitHeader(11) = "ROE(%)"
liProfitHeader(12) = "稅後EPS"
liProfitHeader(13) = "年增(元)"
' 找出欄位索引值
Dim colData() As Integer
ReDim colData(UBound(liProfitHeader) - 1)
For i = 0 To UBound(liProfitHeader) - 1
If i = 0 Then
j = 0
Else
j = colData(i - 1) + 1
End If
Do While True
If j >= totalCols Then
Exit Do
ElseIf ArrHeader2D(1, j) = liProfitHeader(i) Then
colData(i) = j
Exit Do
End If
j = j + 1
Loop
Next i
' 取得表格內容
Dim liProfit() As String
ReDim liProfit(10000, UBound(liProfitHeader) - 1)
strTmp = Get_Html_ID(strXpathProfitData)
strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
i = 0
j = 0
k = 0
Do While True
i = i + 1
Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
If obj Is Nothing Then
Exit Do
End If
For j = 0 To (UBound(liProfitHeader) - 1)
liProfit(k, j) = obj.childNodes(colData(j)).innerText
Next j
k = k + 1
Loop
End If
End Sub
' 將網頁原始碼存為檔案後,利用該檔案內容進行測試
Sub Local_Test()
Dim strTest As String
Dim i As Integer, j As Integer
strTest = ""
i = 0
j = 0
Global_Variable_Init
strIdCompany = "3008"
'Open "info.txt" For Input As #1
Open "dividend.txt" For Input As #1
Do While Not EOF(1)
Line Input #1, strTmp
strTest = strTest + strTmp
Loop
Close #1
Parse_Html strTest ' 解析字串
objHtml.body.innerHTML = strNewData ' 將新網頁內容設定給html元件
End Sub
Public Sub Main()
Global_Variable_Init
strIdCompany = "3008"
Get_Company_Info strIdCompany
'Get_Company_Dividend strIdCompany
'Get_Company_Profit strIdCompany
End Sub
0 意見:
張貼留言