2021年3月30日 星期二

使用C#與Python實作遊戲修改大師

 首先我們編寫一支測試程式

按鈕每按一次將全域變數(Value)加1,並且將數值顯示在右邊標籤

右邊文本框在視窗初始化時,顯示全域變數的位址

程式流程如下

首先使用GetCurrentProcess取得修改程式控制碼,接著使用相關Windows API將權限提升

使用FindWindow找到視窗控制碼,再使用GetWindowsThreadProcessID得到ProcessID,最後再利用這個ID取得程序控制碼


搜尋目標數值前,我們先使用GetSystemInfor取得應用程式的記憶體上下限

再利用VirtualQueryEx確定記憶體區塊屬性為可讀可寫


如果條件都成立,使用ReadProcessMemory將整個記憶體區塊內容讀出,並比對數值是否為我們要的,如果是就記錄該記憶體位址

測試程式修改記憶體後,檢查記錄地址的記憶體內容數值是否依然匹配,若不匹配則移除該地址,直到只剩下一個地址

最後我們使用WriteProcessMemoroy去修改測試程式的記憶體數值

C#程式碼

[程式碼] C#修改程式記憶體

 using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Runtime.InteropServices;  // DllImport
using System.Diagnostics;  // Process
namespace RwMem
{
    // https://www.pinvoke.net/default.aspx
    public class AdjPriv  // 提升權限
    {
        [StructLayout(LayoutKind.Sequential, Pack = 1)]
        struct TokPriv1Luid
        {
            public int Count;
            public long Luid;
            public int Attr;
        }
        [DllImport("advapi32.dll", ExactSpelling = true, SetLastError = true)]
        static extern bool AdjustTokenPrivileges(IntPtr htok, bool disall, 
            ref TokPriv1Luid newst, int len, IntPtr prev, IntPtr relen);
        [DllImport("kernel32.dll", ExactSpelling = true)]
        static extern IntPtr GetCurrentProcess();
        [DllImport("advapi32.dll", ExactSpelling = true, SetLastError = true)]
        static extern bool OpenProcessToken(IntPtr h, int acc, ref IntPtr phtok);
        [DllImport("advapi32.dll", SetLastError = true)]
        static extern bool LookupPrivilegeValue(string host, string name, ref long pluid);
        const int SE_PRIVILEGE_ENABLED = 0x00000002;
        const int TOKEN_QUERY = 0x00000008;
        const int TOKEN_ADJUST_PRIVILEGES = 0x00000020;
        const string SE_TIME_ZONE_NAMETEXT = "SeTimeZonePrivilege"; //http://msdn.microsoft.com/en-us/library/bb530716(VS.85).aspx
        public bool SetPriv()
        {
            try
            {
                bool retVal;
                TokPriv1Luid tp;
                IntPtr hproc = GetCurrentProcess();
                IntPtr htok = IntPtr.Zero;
                retVal = OpenProcessToken(hproc, TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, ref htok);
                tp.Count = 1;
                tp.Luid = 0;
                tp.Attr = SE_PRIVILEGE_ENABLED;
                retVal = LookupPrivilegeValue(null, SE_TIME_ZONE_NAMETEXT, ref tp.Luid);
                retVal = AdjustTokenPrivileges(htok, false, ref tp, 0, IntPtr.Zero, IntPtr.Zero);
                return retVal;
            }
            catch (Exception ex)
            {
                throw;
                return false;
            }
        }
    }
    class Program
    {
        static List<IntPtr> liAddr = new List<IntPtr>();  // 記錄查找到的記憶體位址
        [StructLayout(LayoutKind.Sequential)]
        internal struct SYSTEM_INFO
        {
            internal ushort wProcessorArchitecture;
            internal ushort wReserved;
            internal uint dwPageSize;
            internal IntPtr lpMinimumApplicationAddress;
            internal IntPtr lpMaximumApplicationAddress;
            internal IntPtr dwActiveProcessorMask;
            internal uint dwNumberOfProcessors;
            internal uint dwProcessorType;
            internal uint dwAllocationGranularity;
            internal ushort wProcessorLevel;
            internal ushort wProcessorRevision;
        }
        [DllImport("kernel32.dll", SetLastError = true)]
        static extern void GetSystemInfo(ref SYSTEM_INFO Info);  // 取得系統資訊
        [DllImport("user32.dll", EntryPoint = "FindWindow", SetLastError = true)]
        static extern IntPtr FindWindowByCaption(IntPtr ZeroOnly, string lpWindowName);  // 尋找視窗標題
        [DllImport("user32.dll", SetLastError = true)]
        static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint lpdwProcessId);  // 取得程序ID
        private enum ProcessAccessTypes
        {
            PROCESS_TERMINATE = 0x00000001,
            PROCESS_CREATE_THREAD = 0x00000002,
            PROCESS_SET_SESSIONID = 0x00000004,
            PROCESS_VM_OPERATION = 0x00000008,
            PROCESS_VM_READ = 0x00000010,
            PROCESS_VM_WRITE = 0x00000020,
            PROCESS_DUP_HANDLE = 0x00000040,
            PROCESS_CREATE_PROCESS = 0x00000080,
            PROCESS_SET_QUOTA = 0x00000100,
            PROCESS_SET_INFORMATION = 0x00000200,
            PROCESS_QUERY_INFORMATION = 0x00000400,
            STANDARD_RIGHTS_REQUIRED = 0x000F0000,
            SYNCHRONIZE = 0x00100000,
            PROCESS_ALL_ACCESS = PROCESS_TERMINATE | PROCESS_CREATE_THREAD | PROCESS_SET_SESSIONID | PROCESS_VM_OPERATION |
              PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_DUP_HANDLE | PROCESS_CREATE_PROCESS | PROCESS_SET_QUOTA |
              PROCESS_SET_INFORMATION | PROCESS_QUERY_INFORMATION | STANDARD_RIGHTS_REQUIRED | SYNCHRONIZE
        }
        [DllImport("kernel32.dll", SetLastError = true)]
        public static extern IntPtr OpenProcess(uint processAccess, bool bInheritHandle, int processId);
        [DllImport("kernel32.dll")]
        static extern bool ReadProcessMemory(IntPtr hProcess, IntPtr lpBaseAddress, [Out] byte[] lpBuffer,
            int dwSize, out IntPtr lpNumberOfBytesRead);
        [DllImport("kernel32.dll", SetLastError = true)]
        public static extern bool WriteProcessMemory(IntPtr hProcess, IntPtr lpBaseAddress, byte[] lpBuffer,
            Int32 nSize, out IntPtr lpNumberOfBytesWritten);
        public enum AllocationProtectEnum : uint
        {
            PAGE_EXECUTE = 0x00000010,
            PAGE_EXECUTE_READ = 0x00000020,
            PAGE_EXECUTE_READWRITE = 0x00000040,
            PAGE_EXECUTE_WRITECOPY = 0x00000080,
            PAGE_NOACCESS = 0x00000001,
            PAGE_READONLY = 0x00000002,
            PAGE_READWRITE = 0x00000004,
            PAGE_WRITECOPY = 0x00000008,
            PAGE_GUARD = 0x00000100,
            PAGE_NOCACHE = 0x00000200,
            PAGE_WRITECOMBINE = 0x00000400
        }
        public enum StateEnum : uint
        {
            MEM_COMMIT = 0x1000,
            MEM_FREE = 0x10000,
            MEM_RESERVE = 0x2000
        }
        [StructLayout(LayoutKind.Sequential)]
        public struct MEMORY_BASIC_INFORMATION
        {
            public IntPtr BaseAddress;
            public IntPtr AllocationBase;
            public uint AllocationProtect;
            public IntPtr RegionSize;
            public uint State;
            public uint Protect;
            public uint Type;
        }
        [DllImport("kernel32.dll")]
        static extern int VirtualQueryEx(IntPtr hProcess, IntPtr lpAddress,  // 查詢記憶體資訊
            out MEMORY_BASIC_INFORMATION lpBuffer, uint dwLength);
        static void ScanMemStep0(IntPtr hProcess, int nSize, int nValue)  // 從頭開始尋找記憶體並記錄匹配位址
        {
            SYSTEM_INFO stSysInfo = new SYSTEM_INFO();
            GetSystemInfo(ref stSysInfo);
            MEMORY_BASIC_INFORMATION stMemBasicInfo = new MEMORY_BASIC_INFORMATION();
            UInt64 BaseAddr = (UInt64)stSysInfo.lpMinimumApplicationAddress;
            UInt64 MaxAddr = (UInt64)stSysInfo.lpMaximumApplicationAddress;
            liAddr.Clear();
            while (BaseAddr < MaxAddr)
            {
                VirtualQueryEx(hProcess, (IntPtr)BaseAddr, out stMemBasicInfo, (uint)Marshal.SizeOf(stMemBasicInfo));
                if (stMemBasicInfo.Protect == (uint)AllocationProtectEnum.PAGE_READWRITE &&  // 可讀可寫
                    stMemBasicInfo.State == (uint)StateEnum.MEM_COMMIT)
                {
                    IntPtr bytesRead = IntPtr.Zero;
                    byte[] buffer = new byte[(UInt64)stMemBasicInfo.RegionSize];
                    ReadProcessMemory(hProcess, stMemBasicInfo.BaseAddress, buffer, (int)stMemBasicInfo.RegionSize, out bytesRead);
                    if(nSize == 4)
                    {
                        for (int i = 0; i < (int)stMemBasicInfo.RegionSize; i+=4)
                            if (BitConverter.ToInt32(buffer, i) == nValue)
                                liAddr.Add(stMemBasicInfo.BaseAddress + i);
                    }
                }
                BaseAddr += (UInt64)stMemBasicInfo.RegionSize;
            }
        }

        static void ScanMemStep1(IntPtr hProcess, int nSize, int nValue)  // 接續尋找變更資料後的記憶體
        {
            IntPtr bytesRead = IntPtr.Zero;
            byte[] buffer = new byte[nSize];
            for (int i = liAddr.Count-1; i >= 0; i--)
            {
                ReadProcessMemory(hProcess, liAddr[i], buffer, nSize, out bytesRead);
                if (nSize == 4)
                {
                    if (BitConverter.ToInt32(buffer, 0) != nValue)
                        liAddr.RemoveAt(i);
                }
            }
        }

        static void WriteMem(IntPtr hProcess, int nSize, int nValue)  // 修改記憶體內容
        {
            IntPtr bytesRead = IntPtr.Zero;
            byte[] buffer = BitConverter.GetBytes(nValue);
            if (1 != liAddr.Count)
                return;
            WriteProcessMemory(hProcess, liAddr[0], buffer, nSize, out bytesRead);
        }

        static void Main(string[] args)
        {
            AdjPriv ajdPriv = new AdjPriv();
            ajdPriv.SetPriv();
            IntPtr hWnd = FindWindowByCaption(IntPtr.Zero, "維京碼農");
            if(hWnd != IntPtr.Zero)
            {
                uint dwThreadID = 0;
                GetWindowThreadProcessId(hWnd, out dwThreadID);
                IntPtr hProcess = OpenProcess((uint)(ProcessAccessTypes.PROCESS_QUERY_INFORMATION |
                    ProcessAccessTypes.PROCESS_VM_READ | 
                    ProcessAccessTypes.PROCESS_VM_WRITE),
                    false,
                    (int)dwThreadID);
                while(true)
                {
                    Console.Write("Input Scan Step、Size And Value: ");
                    string str = Console.ReadLine();
                    string[] arrStr = str.Split(' ');
                    if(arrStr[0] == "0")
                    {
                        ScanMemStep0(hProcess, Convert.ToInt32(arrStr[1]), Convert.ToInt32(arrStr[2]));
                        Console.WriteLine("Count Of liAddr = {0}\n", liAddr.Count);
                    }
                    else if(arrStr[0] == "1")
                    {
                        ScanMemStep1(hProcess, Convert.ToInt32(arrStr[1]), Convert.ToInt32(arrStr[2]));
                        Console.WriteLine("Count Of liAddr = {0}\n", liAddr.Count);
                        if(liAddr.Count == 1)
                            Console.WriteLine("Addr Of Value = {0:X}\n", (int)liAddr[0]);
                    }
                    else if(arrStr[0] == "w")
                    {
                        WriteMem(hProcess, Convert.ToInt32(arrStr[1]), Convert.ToInt32(arrStr[2]));
                    }
                    else if (arrStr[0] == "q")
                    {
                        Console.WriteLine("Bye Bye\n", (int)liAddr[0]);
                    }
                }
            }
            else
            {
                Console.WriteLine("App Not Found");
            }
            Console.WriteLine("Press Any Key...");
            Console.ReadKey(true); //Pause
        }
    }
}

[程式碼] Python修改程式記憶體

 #-*-coding:utf-8 -*-
import io, sys
try:
  sys.stdout=io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')
except:
  pass
import win32api, win32gui, win32con, win32process, win32security
from ctypes import *
from ctypes import wintypes
liAddr = []

# https://www.programcreek.com/python/example/114361/win32security.AdjustTokenPrivileges
def AcquirePrivilege(privilege):
    process = win32process.GetCurrentProcess()
    token = win32security.OpenProcessToken(
        process,
        win32security.TOKEN_ADJUST_PRIVILEGES | win32security.TOKEN_QUERY)
    priv_luid = win32security.LookupPrivilegeValue(None, privilege)
    privilege_enable = [(priv_luid, win32security.SE_PRIVILEGE_ENABLED)]
    #privilege_disable = [(priv_luid, win32security.SE_PRIVILEGE_REMOVED)]
    win32security.AdjustTokenPrivileges(token, False, privilege_enable)

#https://yiyibooks.cn/__trs__/meikunyuan6/pywin32/pywin32/PyWin32/win32api__GetSystemInfo_meth.html
'''
wProcessorArchitecture
dwPageSize
lpMinimumApplicationAddress
lpMaximumApplicationAddress
dwActiveProcessorMask
dwNumberOfProcessors
dwProcessorType
dwAllocationGranularity
'''
# http://www.rohitab.com/discuss/topic/39525-process-memory-scannerpy/
# https://forums.codeguru.com/showthread.php?560337-Windows-Python-Memory-Scanner
# https://mpxd.net/code/jan/mem_edit/commit/5c75da31d5a7ec1e43f9ab542c1f8b4eea01f44a?lang=ja-JP
class MEMORY_BASIC_INFORMATION32(Structure):
    _fields_ = [
            ('BaseAddress', wintypes.DWORD),
            ('AllocationBase', wintypes.DWORD),
            ('AllocationProtect', wintypes.DWORD),
            ('RegionSize', wintypes.DWORD),
            ('State', wintypes.DWORD),
            ('Protect', wintypes.DWORD),
            ('Type', wintypes.DWORD),
            ]
class MEMORY_BASIC_INFORMATION64(Structure):
    _fields_ = [
            ('BaseAddress', c_ulonglong),
            ('AllocationBase', c_ulonglong),
            ('AllocationProtect', wintypes.DWORD),
            ('RegionSize', c_ulonglong),
            ('State', wintypes.DWORD),
            ('Protect', wintypes.DWORD),
            ('Type', wintypes.DWORD),
            ]

def ScanMemStep0(hProcess, nSize, nValue):  # 從頭開始尋找記憶體並記錄匹配位址
    li = win32api.GetSystemInfo()
    BaseAddr = li[2]
    MaxAddr = li[3]
    global liAddr
    liAddr = []
    windll.kernel32.VirtualQueryEx.argtypes = [wintypes.HANDLE, 
        wintypes.LPCVOID,
        c_void_p,
        c_size_t]
    while BaseAddr < MaxAddr:
        PTR_SIZE = sizeof(c_void_p)
        if PTR_SIZE == 8:       # 64-bit python
            MEMORY_BASIC_INFORMATION = MEMORY_BASIC_INFORMATION64
        elif PTR_SIZE == 4:     # 32-bit python
            MEMORY_BASIC_INFORMATION = MEMORY_BASIC_INFORMATION32
        MBI = MEMORY_BASIC_INFORMATION()
        MBI_pointer = byref (MBI)
        size = sizeof (MBI)
        windll.kernel32.VirtualQueryEx(
        hProcess,
        BaseAddr,
        MBI_pointer,
        size)
        if MBI.Protect == win32con.PAGE_READWRITE and \
            MBI.State == win32con.MEM_COMMIT:
            for i in range(0, MBI.RegionSize, nSize):
                data = win32process.ReadProcessMemory(hProcess, MBI.BaseAddress+i, nSize)
                if int.from_bytes(data, byteorder='little') == nValue:
                    liAddr.append(MBI.BaseAddress+i)
        BaseAddr += MBI.RegionSize

def ScanMemStep1(hProcess, nSize, nValue):  # 接續尋找變更資料後的記憶體
    global liAddr
    for i in range(len(liAddr)-1, -1, -1):
        data = win32process.ReadProcessMemory(hProcess, liAddr[i], nSize)
        if int.from_bytes(data, byteorder='little', signed=True) != nValue:
            liAddr.pop(i)

def WriteMem(hProcess, nSize, nValue):  # 修改記憶體內容
    global liAddr
    if len(liAddr) != 1:
        return
    # 使用此方式異常
    # buffer = nValue.to_bytes(nSize, byteorder="little", signed=True)
    # win32process.WriteProcessMemory(hProcess, liAddr[0], buffer)
    windll.kernel32.WriteProcessMemory.argtypes = [c_void_p, c_void_p, c_void_p, c_int, c_void_p]
    lpNumberOfBytesWritten = c_size_t(0)
    windll.kernel32.WriteProcessMemory(hProcess, 
        c_char_p(liAddr[0]), # lpBaseAddress
        addressof(c_longlong(nValue)),  # lpBuffer
        nSize,
        byref(lpNumberOfBytesWritten))

if __name__ == "__main__":
    AcquirePrivilege("SeTimeZonePrivilege")
    hWnd = win32gui.FindWindow(None, "維京碼農")
    if hWnd != 0:
        tid, pid = win32process.GetWindowThreadProcessId(hWnd)
        '''
        hProcess = win32api.OpenProcess(win32con.PROCESS_QUERY_INFORMATION |
            win32con.PROCESS_VM_READ | 
            win32con.PROCESS_VM_WRITE,
            False,
            pid)
        '''
        hProcess = windll.kernel32.OpenProcess(win32con.PROCESS_QUERY_INFORMATION |
            win32con.PROCESS_VM_READ | 
            win32con.PROCESS_VM_WRITE,
            False,
            pid)
        while True:
            str = input("Input Scan Step、Size And Value: ")
            liStr = str.split(" ")
            if liStr[0] == "0":
                ScanMemStep0(hProcess, int(liStr[1]), int(liStr[2]))
                print("Count Of liAddr = %d\n" % len(liAddr))
            elif liStr[0] == "1":
                ScanMemStep1(hProcess, int(liStr[1]), int(liStr[2]))
                print("Count Of liAddr = %d\n" % len(liAddr))
                if len(liAddr) == 1:
                    print("Addr Of Value = %x\n" % liAddr[0])
            elif liStr[0] == "w":
                WriteMem(hProcess, int(liStr[1]), int(liStr[2]))
            elif liStr[0] == "q":
                print("Bye Bye")
    else:
        print("App Not Found")
    input("Press Any Key...")

2021年3月28日 星期日

[程式碼] 蝦咪! VBA網頁爬蟲也能用Xpath抓資料!!?

' Html Check Error Url: http://validator.w3.org/
Private Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
Global strUrlCompanyInfo As String
Global strUrlCompanyDividend As String
Global strUrlCompanyProfit As String
Global strIdCompany As String
Global strXpathCompanyName As String
Global strXpathCompanyIndustry As String
Global strXpathDividendHeader1 As String
Global strXpathDividendHeader As String
Global strXpathDividendData As String
Global strXpathProfitHeader1 As String
Global strXpathProfitHeader As String
Global strXpathProfitData As String
Global strNewData As String
Global strSection As String
Global bIsString As Boolean
Global bIsScript As Boolean
Global strPrefix As String
Global strLastPreFix As String
Global strLastLabel As String
Global nTabCount As Integer
Global nTotalTabCount As Integer
' 初始化全域變數
Sub Global_Variable_Init()
    strNewData = ""
    strSection = ""
    bIsString = False
    bIsScript = False
    strPrefix = "/"
    strLastPreFix = strPrefix
    strLastLabel = ""
    nTabCount = 0
    nTotalTabCount = 0
    
    ' 公司名稱與產品別
    strUrlCompanyInfo = "https://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID="
    strXpathCompanyName = "/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr[2]/td[3]/table[2]/tbody/tr[1]/td[2]"
    strXpathCompanyIndustry = "/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr[2]/td[3]/table[2]/tbody/tr[2]/td[2]"
    
    ' 股利政策
    strUrlCompanyDividend = "https://goodinfo.tw/StockInfo/StockDividendPolicy.asp?STOCK_ID="
    strXpathDividendHeader1 = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr[1]/td"
    strXpathDividendHeader = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr"
    strXpathDividendData = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/tbody[1]/tr"
    
    ' 獲利狀況
    strUrlCompanyProfit = "https://goodinfo.tw/StockInfo/StockBzPerformance.asp?STOCK_ID="
    strXpathProfitHeader1 = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr[1]/td"
    strXpathProfitHeader = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr"
    strXpathProfitData = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/tbody[1]/tr"
End Sub

' 判斷是否為標籤結尾,如 <html>或 <html+空白
Function Is_End_Of_Label(ByVal ch As String)
    If ch = " " Or ch = ">" Then
        Is_End_Of_Label = True
    Else
        Is_End_Of_Label = False
    End If
End Function

' 遇到標籤結尾將階層ID回推一層
Sub Prefix_Finish(ByVal label As String)
    Dim nEnd As Integer
    
    If Is_End_Of_Label(Mid(strSection, Len(label) + 1, 1)) = True Then  ' 確認是'>'或' '結尾
        nEnd = Len(strPrefix)
        Do While True
            nEnd = nEnd - 1
            If Mid(strPrefix, nEnd, 1) = "/" Then  ' 將id除/html/body/table,縮短為/html/body/,因為已經找到</table>
                nEnd = nEnd + 1  ' Keep "/"
                Exit Do
            End If
        Loop
        
        strLastPreFix = strPrefix  ' 保留這次id,用來計算索引
        strPrefix = Mid(strPrefix, 1, nEnd - 1)  ' 縮短的id
        nTotalTabCount = nTotalTabCount - 1
    End If
End Sub

' 遇到標籤起頭將階層ID增加一層,並決定陣列索引
Sub Prefix_Start(ByVal label As String)
    Dim strTmp As String
    Dim nIdStart As Integer, nIdEnd As Integer, nLength As Integer
    
    strTmp = "<" + label
    
    If Is_End_Of_Label(Mid(strSection, Len(strTmp) + 1, 1)) = True Then  ' 確認是'>'或' '結尾
        ' 設定編號,如table[1]、table[2]
        strPrefix = strPrefix + label + "["
        If InStr(1, strLastPreFix, strPrefix) = 1 Then  ' 如果前一次的標籤跟這次一樣,則索引值+1
            strTmp = Replace(strLastPreFix, strPrefix, "")
            strPrefix = strPrefix + Trim(Str(Int(Mid(strTmp, 1, Len(strTmp) - 2) + 1)))
        ElseIf InStr(1, strNewData, strPrefix + "1") > 0 Then  ' 如果語法為<table></table><div></div><table></table>,陣列值會不對,因此多加判斷
            strPrefix = strPrefix + "2"
        Else
            strPrefix = strPrefix + "1"  ' 都沒有找到,從1開始
        End If
        
        strPrefix = strPrefix + "]/"
        
        nIdStart = InStr(1, strSection, "id=") - 1
        If nIdStart <> -1 Then  ' 判斷原語句裡面是否就有id設定
            nIdEnd = InStr(nIdStart + 1, strSection, " ")
            
            If nIdEnd = 0 Then  ' 如果找不到空白,表示id已經是最後一個屬性,就直接將id放到最後面
                strSection = Mid(strSection, 1, nIdStart) + " id='" + strPrefix + "'>"
            Else
                nLength = Len(strSection) - nIdEnd  ' 找到空白,保留除id外的全部屬性,並且將id屬性放到最後面
                strSection = Mid(strSection, 1, nIdStart) + Mid(strSection, nIdEnd, nLength) + " id='" + strPrefix + "'>"
            End If
        Else  ' 沒有id設定,直接插入
            strSection = Mid(strSection, 1, Len(strSection) - 1) + " id='" + strPrefix + "'>"
        End If
        
        nTabCount = 1
    End If
End Sub

' 解析html,並添加階層ID
Sub Parse_Html(ByVal strData As String)
    Dim i As Long, j As Long, k As Long
    Dim ch As String
    Dim last_ch As String
    
    strData = Replace(strData, Chr(9), "")  ' replace \t
    
    For i = 1 To Len(strData)
        ch = Mid(strData, i, 1)
        If " " = ch And Len(strSection) = 0 Then  ' 空白字省略
            ch = ""
        End If
            
        strSection = strSection + ch
        
        ' 因為html裡面有單引號不對稱狀況,所以做個例外處理
        ' Cover Html Error: Line 42, Column 693: an attribute specification must start with a name or name token
        If last_ch = "'" And ch = ">" And bIsString Then
            bIsString = False
        End If
        
        last_ch = ch
        
        If ch = """" Or ch = "'" Then  ' 字串不需要做判斷
            If bIsString = True Then
                bIsString = False
            Else
                bIsString = True
            End If
        ElseIf bIsString = False Then  ' 非字串處理
            If ch = ">" Or (ch = "<" And Len(strSection) > 1) Then  ' 完整標籤,拿來做解析
                If ch = "<" Then   ' 我們只要<前面的字串
                    strSection = Mid(strSection, 1, Len(strSection) - 1)
                End If
                
                strSection = Trim(strSection)
                nTabCount = 0
                
                ' script內容無須理會
                If InStr(1, strSection, "</script") = 1 Then
                    bIsScript = False
                ElseIf InStr(1, strSection, "<script") = 1 Then
                    bIsScript = True
                End If
                
                If bIsScript = False Then
                    ' 針對下面標籤內容串接成Xpath樣式
                    If InStr(1, strSection, "</html") = 1 Then
                        Prefix_Finish "</html"
                    ElseIf InStr(1, strSection, "<html") = 1 Then
                        Prefix_Start "html"
                        
                    ElseIf InStr(1, strSection, "</body") = 1 Then
                        Prefix_Finish "</body"
                    ElseIf InStr(1, strSection, "<body") = 1 Then
                        Prefix_Start "body"
                        
                    ElseIf InStr(1, strSection, "</div") = 1 Then
                        Prefix_Finish "</div"
                    ElseIf InStr(1, strSection, "<div") = 1 Then
                        Prefix_Start "div"
                        
                    ElseIf InStr(1, strSection, "</table") = 1 Then
                        Prefix_Finish "</table"
                    ElseIf InStr(1, strSection, "<table") = 1 Then
                        Prefix_Start "table"
                        
                    ElseIf InStr(1, strSection, "</thead") = 1 Then
                        Prefix_Finish "</thead"
                    ElseIf InStr(1, strSection, "<thead") = 1 Then
                        Prefix_Start "thead"
                        
                    ElseIf InStr(1, strSection, "</tr") = 1 Then
                        Prefix_Finish "</tr"
                    ElseIf InStr(1, strSection, "<tr") = 1 Then
                        j = Len(strPrefix)
                        Do
                            j = j - 1
                            If Mid(strPrefix, j, 1) = "/" Then
                                j = j + 1
                                Exit Do
                            End If
                        Loop
                        
                        For k = (j + 1) To Len(strPrefix)
                            If Mid(strPrefix, k, 1) = "[" Then
                                Exit For
                            End If
                        Next k
                        
                        ' 網頁裡面有<tr>沒有</tr>作結束就接<tr>的錯誤,做個例外處理
                        ' Cover Html Error: Line 304, Column 975: document type does not allow element "tr" here
                        If Mid(strPrefix, j, k - j) = "tr" Then  ' 先結束前面一個</tr>,再接一個新的<tr>
                            Dim strTmp As String
                            strTmp = strSection
                            strSection = "</tr>"
                            Prefix_Finish "</tr"
                            strSection = strTmp
                        End If
                        
                        Prefix_Start "tr"
                        
                    ElseIf InStr(1, strSection, "</td") = 1 Then
                        Prefix_Finish "</td"
                    ElseIf InStr(1, strSection, "<td") = 1 Then
                        Prefix_Start "td"
                    End If
                    
                    'If nTotalTabCount > 0 Then
                    '    For j = 0 To nTotalTabCount - 1
                    '        strNewData = strNewData + "\t"
                    '    Next j
                    'End If
                    
                    strNewData = strNewData + strSection ' + "\n"
                    nTotalTabCount = nTotalTabCount + nTabCount
                End If
                
                If ch = "<" Then
                    strSection = "<"
                Else
                    strSection = ""
                End If
            End If
        End If
    Next i
End Sub

' 將Xpath節點名稱,加工為需要的ID
Function Get_Html_ID(ByVal strHtmlID As String)
    Dim i As Integer
    Dim strTmp As String, strChar As String, strLastChar As String
    Dim ArrStr() As String
    
    i = 0
    strTmp = ""
    strLastChar = ""
    'Replace(strHtmlID, "tbody/", "")
    
    ArrStr = Split(strHtmlID, "/tbody")
    strHtmlID = ""
    For i = 0 To UBound(ArrStr)
        If Mid(ArrStr(i), 1, 1) <> "[" Then  ' 如果tbody不是陣列型式,直接加起來
            strHtmlID = strHtmlID + ArrStr(i)
        Else  ' tbody是陣列型式,只要/之後的資料
            strHtmlID = strHtmlID + Mid(ArrStr(i), InStr(2, ArrStr(i), "/"))
        End If
    Next i
    
    For i = 1 To Len(strHtmlID)
        strChar = Mid(strHtmlID, i, 1)
        If strChar = "/" And strLastChar <> "" And strLastChar <> "]" Then  ' 將/html/改為/html[1]/
            strTmp = strTmp + "[1]"
        End If
        strTmp = strTmp + strChar
        strLastChar = strChar
    Next i
    
    If strChar <> "/" Then  ' 結尾不是/,則補上
      strTmp = strTmp + "/"
    End If
    
    Get_Html_ID = strTmp
End Function

' 取得公司名稱與產業別
Sub Get_Company_Info(ByVal strID As String)
    Dim url  As String, strCompanyName As String, strCompanyIndustry As String
    Set objHttp = CreateObject("MSXML2.XMLHTTP")
    Set objHtml = CreateObject("HTMLFile")
    
    url = strUrlCompanyInfo + strID
    objHttp.Open "GET", url, False  ' 網頁連線
    objHttp.send
    While objHttp.ReadyState <> 4  ' 等待完成
        Sleep (1)
    Wend
    
    If objHttp.Status = 200 Then  ' 確認狀態
        Parse_Html objHttp.responseText  ' 解析字串
        objHtml.body.innerHTML = strNewData  ' 將新網頁內容設定給html元件
        
        ' 公司名稱
        strCompanyName = objHtml.getElementById(Get_Html_ID(strXpathCompanyName)).innerText
        
        ' 產業別
        strCompanyIndustry = objHtml.getElementById(Get_Html_ID(strXpathCompanyIndustry)).innerText
    End If
End Sub

' 若屬性中有包含行列,則回傳該值,否則回傳1
Function Get_Span_Value(ByVal obj As Object, ByVal span As String)
    If IsNull(obj.getAttribute(span)) = False Then
        Get_Span_Value = Int(obj.getAttribute(span))
    Else
        Get_Span_Value = 1
    End If
End Function

' 抓取股利政策
Sub Get_Company_Dividend(ByVal strID As String)
    Dim url  As String, strTmp As String, ArrHeader2D() As String
    Dim i As Integer, j As Integer, k As Integer, r As Integer
    Dim row As Integer, rows As Integer, c As Integer, col As Integer, cols As Integer, totalCols As Integer
    Set objHttp = CreateObject("MSXML2.XMLHTTP")
    Set objHtml = CreateObject("HTMLFile")
    
    strTmp = ""
    url = strUrlCompanyDividend + strID
    objHttp.Open "GET", url, False  ' 網頁連線
    objHttp.send
    While objHttp.ReadyState <> 4  ' 等待完成
        Sleep (1)
    Wend
    
    If objHttp.Status = 200 Then  ' 確認狀態
        Parse_Html objHttp.responseText
        objHtml.body.innerHTML = strNewData
        
        ' 統計股利政策欄位數量
        strTmp = Get_Html_ID(strXpathDividendHeader1)
        strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
        
        cols = 0
        Do While True
            i = i + 1
            Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
            If obj Is Nothing Then
                Exit Do
            End If
            cols = cols + Get_Span_Value(obj, "colspan")
        Loop
        
        totalCols = cols
        ReDim ArrHeader2D(4 - 1, totalCols - 1)
    
        ' 計算表格Columns
        i = 0
        j = 0
        Do While True
            i = i + 1
            Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
            If obj Is Nothing Then
                Exit Do
            End If
            rows = Get_Span_Value(obj, "rowspan")
            cols = Get_Span_Value(obj, "colspan")
            
            For col = 0 To (cols - 1)
                For row = 0 To (rows - 1)
                    ArrHeader2D(row, col + j) = Replace(Replace(Replace(Replace(obj.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
                Next row
            Next col
            j = j + cols
        Loop
        
        ' 補齊Header內容
        strTmp = Get_Html_ID(strXpathDividendHeader)
        strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
        i = 0
        r = 0
        c = 0
        Do While True
            i = i + 1
            Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
            If obj Is Nothing Then
                Exit Do
            End If
            
            If r <> 0 Then
                For Each td In obj.childNodes
                    rows = Get_Span_Value(td, "rowspan")
                    cols = Get_Span_Value(td, "colspan")
                        
                    For col = 0 To (cols - 1)
                        For row = 0 To (rows - 1)
                            Do While True
                                If ArrHeader2D(row + r, col + c) = "" Then
                                    Exit Do
                                End If
                                c = c + 1
                            Loop
                                
                            ArrHeader2D(row + r, col + c) = Replace(Replace(Replace(Replace(td.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
                        Next row
                    Next col
                Next
            End If
            r = r + 1
            c = 0
        Loop
        
        ' 設定想要的欄位內容
        Dim liDividendHeader(14) As String
        liDividendHeader(0) = "股利發放年度"
        ' 股利
        liDividendHeader(1) = "合計"
        liDividendHeader(2) = "合計"
        liDividendHeader(3) = "股利合計"
        ' 股價
        liDividendHeader(4) = "最高"
        liDividendHeader(5) = "最低"
        liDividendHeader(6) = "年均"
        ' 殖利率
        liDividendHeader(7) = "現金"
        liDividendHeader(8) = "股票"
        liDividendHeader(9) = "合計"
        liDividendHeader(10) = "EPS(元)"
        ' 發放率
        liDividendHeader(11) = "配息"
        liDividendHeader(12) = "配股"
        liDividendHeader(13) = "合計"
    
        ' 找出欄位索引值
        Dim colData() As Integer
        ReDim colData(UBound(liDividendHeader) - 1)
        For i = 0 To UBound(liDividendHeader) - 1
            If i = 0 Then
                j = 0
            Else
                j = colData(i - 1) + 1
            End If
            
            Do While True
                If j >= totalCols Then
                    Exit Do
                ElseIf ArrHeader2D(3, j) = liDividendHeader(i) Then
                    colData(i) = j
                    Exit Do
                End If
                j = j + 1
            Loop
        Next i
    
        ' 取得表格內容
        Dim liDividend() As String
        ReDim liDividend(10000, UBound(liDividendHeader) - 1)
        strTmp = Get_Html_ID(strXpathDividendData)
        strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
        i = 0
        j = 0
        k = 0
        Do While True
            i = i + 1
            Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
            If obj Is Nothing Then
                Exit Do
            End If
            
            For j = 0 To (UBound(liDividendHeader) - 1)
                liDividend(k, j) = obj.childNodes(colData(j)).innerText
            Next j
            
            k = k + 1
        Loop
    End If
End Sub

' 抓取獲利狀況
Sub Get_Company_Profit(ByVal strID As String)
    Dim url  As String, strTmp As String, ArrHeader2D() As String
    Dim i As Integer, j As Integer, k As Integer, r As Integer
    Dim row As Integer, rows As Integer, c As Integer, col As Integer, cols As Integer, totalCols As Integer
    Set objHttp = CreateObject("MSXML2.XMLHTTP")
    Set objHtml = CreateObject("HTMLFile")
    
    strTmp = ""
    url = strUrlCompanyProfit + strID
    objHttp.Open "GET", url, False
    objHttp.send
    While objHttp.ReadyState <> 4
        Sleep (1)
    Wend
    
    If objHttp.Status = 200 Then
        Parse_Html objHttp.responseText
        objHtml.body.innerHTML = strNewData
        
        ' 統計獲利狀況欄位數量
        strTmp = Get_Html_ID(strXpathProfitHeader1)
        strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
        
        cols = 0
        Do While True
            i = i + 1
            Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
            If obj Is Nothing Then
                Exit Do
            End If
            cols = cols + Get_Span_Value(obj, "colspan")
        Loop
        
        totalCols = cols
        ReDim ArrHeader2D(4 - 1, totalCols - 1)
    
        ' 計算表格Columns
        i = 0
        j = 0
        Do While True
            i = i + 1
            Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
            If obj Is Nothing Then
                Exit Do
            End If
            rows = Get_Span_Value(obj, "rowspan")
            cols = Get_Span_Value(obj, "colspan")
            
            For col = 0 To (cols - 1)
                For row = 0 To (rows - 1)
                    ArrHeader2D(row, col + j) = Replace(Replace(Replace(Replace(obj.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
                Next row
            Next col
            j = j + cols
        Loop
        
        ' 補齊Header內容
        strTmp = Get_Html_ID(strXpathProfitHeader)
        strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
        i = 0
        r = 0
        c = 0
        Do While True
            i = i + 1
            Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
            If obj Is Nothing Then
                Exit Do
            End If
            
            If r <> 0 Then
                For Each td In obj.childNodes
                    rows = Get_Span_Value(td, "rowspan")
                    cols = Get_Span_Value(td, "colspan")
                        
                    For col = 0 To (cols - 1)
                        For row = 0 To (rows - 1)
                            Do While True
                                If ArrHeader2D(row + r, col + c) = "" Then
                                    Exit Do
                                End If
                                c = c + 1
                            Loop
                                
                            ArrHeader2D(row + r, col + c) = Replace(Replace(Replace(Replace(td.innerText, Chr(-24256), ""), " ", ""), Chr(13), ""), Chr(10), "")
                        Next row
                    Next col
                Next
            End If
            r = r + 1
            c = 0
        Loop
        
        ' 設定想要的欄位內容
        Dim liProfitHeader(14) As String
        liProfitHeader(0) = "年度"
        liProfitHeader(1) = "財報評分"
        ' 股價
        liProfitHeader(2) = "收盤"
        liProfitHeader(3) = "平均"
        liProfitHeader(4) = "漲跌"
        liProfitHeader(5) = "漲跌(%)"
        ' 獲利金額
        liProfitHeader(6) = "營業收入"
        liProfitHeader(7) = "營業毛利"
        liProfitHeader(8) = "稅後淨利"
        ' 獲利率
        liProfitHeader(9) = "營業毛利"
        liProfitHeader(10) = "稅後淨利"
        liProfitHeader(11) = "ROE(%)"
        liProfitHeader(12) = "稅後EPS"
        liProfitHeader(13) = "年增(元)"
    
        ' 找出欄位索引值
        Dim colData() As Integer
        ReDim colData(UBound(liProfitHeader) - 1)
        For i = 0 To UBound(liProfitHeader) - 1
            If i = 0 Then
                j = 0
            Else
                j = colData(i - 1) + 1
            End If
            
            Do While True
                If j >= totalCols Then
                    Exit Do
                ElseIf ArrHeader2D(1, j) = liProfitHeader(i) Then
                    colData(i) = j
                    Exit Do
                End If
                j = j + 1
            Loop
        Next i
    
        ' 取得表格內容
        Dim liProfit() As String
        ReDim liProfit(10000, UBound(liProfitHeader) - 1)
        strTmp = Get_Html_ID(strXpathProfitData)
        strTmp = Mid(strTmp, 1, Len(strTmp) - 1)
        i = 0
        j = 0
        k = 0
        Do While True
            i = i + 1
            Set obj = objHtml.getElementById(strTmp + "[" + Trim(Str(i)) + "]/")
            If obj Is Nothing Then
                Exit Do
            End If
            
            For j = 0 To (UBound(liProfitHeader) - 1)
                liProfit(k, j) = obj.childNodes(colData(j)).innerText
            Next j
            
            k = k + 1
        Loop
    End If
End Sub

' 將網頁原始碼存為檔案後,利用該檔案內容進行測試
Sub Local_Test()
    Dim strTest As String
    Dim i As Integer, j As Integer
    
    strTest = ""
    
    i = 0
    j = 0
    
    Global_Variable_Init
    strIdCompany = "3008"
    'Open "info.txt" For Input As #1
    Open "dividend.txt" For Input As #1
        Do While Not EOF(1)
            Line Input #1, strTmp
            strTest = strTest + strTmp
        Loop
    Close #1
    
    Parse_Html strTest  ' 解析字串
    objHtml.body.innerHTML = strNewData  ' 將新網頁內容設定給html元件
End Sub

Public Sub Main()
    Global_Variable_Init
    strIdCompany = "3008"
    Get_Company_Info strIdCompany
    'Get_Company_Dividend strIdCompany
    'Get_Company_Profit strIdCompany
End Sub

 
Design by Free WordPress Themes | Bloggerized by Lasantha - Premium Blogger Themes | Blogger Templates