using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
namespace StockCSharp
{
class Program
{
static string UrlCompanyInfo = "https://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID=";
static string UrlCompanyDividend = "https://goodinfo.tw/StockInfo/StockDividendPolicy.asp?STOCK_ID=";
static string UrlCompanyProfit = "https://goodinfo.tw/StockInfo/StockBzPerformance.asp?STOCK_ID=";
static string IdCompany = "3008";
static string XpathCompanyName = "/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr[2]/td[3]/table[2]/tbody/tr[1]/td[2]";
static string XpathCompanyIndustry = "/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr[2]/td[3]/table[2]/tbody/tr[2]/td[2]";
static string XpathDividendHeader1 = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr[1]/td";
static string XpathDividendHeader = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr";
static string XpathDividendData = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/tbody[1]/tr";
static string XpathProfitHeader1 = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr[1]/td";
static string XpathProfitHeader = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr";
static string XpathProfitData = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/tbody[1]/tr";
// 抓取ID公司名稱與產業別
static void Get_Company_Info(string strID)
{
HtmlDocument doc;
HtmlNodeCollection nodes;
HtmlWeb web = new HtmlWeb();
string CompanyName, CompanyIndustry;
web.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko";
web.OverrideEncoding = Encoding.GetEncoding(65001); // UTF-8
doc = web.Load(UrlCompanyInfo + strID);
// 公司名稱
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathCompanyName, "/tbody([[]\\d[]])?", ""));
CompanyName = nodes[0].ChildNodes[0].InnerText;
// 產業別
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathCompanyIndustry, "/tbody([[]\\d[]])?", ""));
CompanyIndustry = nodes[0].ChildNodes[0].InnerText;
Console.WriteLine(CompanyName);
Console.WriteLine(CompanyIndustry);
}
static int Get_Span_Value(HtmlAttributeCollection attr, string span)
{
if (attr.Contains(span))
return Convert.ToInt32(attr[span].Value);
else
return 1;
}
// 抓取股利政策
static List<string[]> Get_Company_Dividend(string strID)
{
HtmlDocument doc;
HtmlNodeCollection nodes;
HtmlWeb web = new HtmlWeb();
int i, r, row, rows, c, col, cols, totalCols;
string[,] ArrHeader2D;
int[] colData;
List<string[]> liDividend = new List<string[]>();
web.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko";
web.OverrideEncoding = Encoding.GetEncoding(65001); // UTF-8
doc = web.Load(UrlCompanyDividend + strID);
cols = 0;
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathDividendHeader1, "/tbody([[]\\d[]])?", ""));
foreach (HtmlNode node in nodes)
cols += Get_Span_Value(node.Attributes, "colspan");
totalCols = cols;
ArrHeader2D = new string[4, totalCols];
// 計算表格Columns
i = 0;
foreach (HtmlNode node in nodes)
{
rows = Get_Span_Value(node.Attributes, "rowspan");
cols = Get_Span_Value(node.Attributes, "colspan");
for (col = 0; col < cols; col++)
for (row = 0; row < rows; row++)
{
ArrHeader2D[row, col + i] = node.InnerText.Replace("\u3000", "").Replace("\xa0", "");
ArrHeader2D[row, col + i] = ArrHeader2D[row, col + i].Replace("\n", "").Replace(" ", "");
}
i += cols;
}
// 補齊Header內容
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathDividendHeader, "/tbody([[]\\d[]])?", ""));
r = 0;
c = 0;
foreach (HtmlNode node in nodes)
{
if(r != 0)
{
foreach(HtmlNode td in node.ChildNodes)
{
if (td.Name != "td")
continue;
rows = Get_Span_Value(td.Attributes, "rowspan");
cols = Get_Span_Value(td.Attributes, "colspan");
for (col = 0; col < cols; col++)
for (row = 0; row < rows; row++)
{
while (ArrHeader2D[row + r, col + c] != null && ArrHeader2D[row + r, col + c] != "")
c += 1;
ArrHeader2D[row+r, col + c] = td.InnerText.Replace("\u3000", "").Replace("\xa0", "");
ArrHeader2D[row+r, col + c] = ArrHeader2D[row + r, col + c].Replace("\n", "").Replace(" ", "");
}
c += cols;
}
}
r += 1;
c = 0;
}
// 設定想要的欄位內容
liDividend.Add( new string[]{
"股利發放年度",
"合計", "合計", "股利合計", // 股利
"最高", "最低", "年均", // 股價
"現金", "股票", "合計", "EPS(元)", // 殖利率
"配息", "配股", "合計", // 發放率
});
// 找出欄位索引值
colData = new int[liDividend[0].Length];
for(i=0; i< colData.Length; i++)
{
int j = (i == 0) ? 0 : colData[i - 1];
for(; j< totalCols; j++)
{
if (ArrHeader2D[3, j] == liDividend[0][i])
{
colData[i] = j;
break;
}
}
}
// 取得表格內容
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathDividendData, "/tbody([[]\\d[]])?", ""));
foreach (HtmlNode node in nodes)
{
string[] li = new string[liDividend[0].Length];
List<HtmlNode> tds = new List<HtmlNode>();
foreach (HtmlNode td in node.ChildNodes) // 只有td才是我們要的資料
{
if (td.Name == "td")
tds.Add(td);
}
for (i=0; i< liDividend[0].Length; i++)
{
li[i] = tds[colData[i]].InnerText;
if (li[i] == "-")
li[i] = "";
}
liDividend.Add(li);
}
return liDividend;
}
// 抓取獲利狀況
static List<string[]> Get_Company_Profit(string strID)
{
HtmlDocument doc;
HtmlNodeCollection nodes;
HtmlWeb web = new HtmlWeb();
int i, r, row, rows, c, col, cols, totalCols;
string[,] ArrHeader2D;
int[] colData;
List<string[]> liProfit = new List<string[]>();
web.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko";
web.OverrideEncoding = Encoding.GetEncoding(65001); // UTF-8
doc = web.Load(UrlCompanyProfit + strID);
cols = 0;
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathProfitHeader1, "/tbody([[]\\d[]])?", ""));
foreach (HtmlNode node in nodes)
cols += Get_Span_Value(node.Attributes, "colspan");
totalCols = cols;
ArrHeader2D = new string[4, totalCols];
// 計算表格Columns
i = 0;
foreach (HtmlNode node in nodes)
{
rows = Get_Span_Value(node.Attributes, "rowspan");
cols = Get_Span_Value(node.Attributes, "colspan");
for (col = 0; col < cols; col++)
for (row = 0; row < rows; row++)
{
ArrHeader2D[row, col + i] = node.InnerText.Replace("\u3000", "").Replace("\xa0", "");
ArrHeader2D[row, col + i] = ArrHeader2D[row, col + i].Replace("\n", "").Replace(" ", "");
}
i += cols;
}
// 補齊Header內容
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathProfitHeader, "/tbody([[]\\d[]])?", ""));
r = 0;
c = 0;
foreach (HtmlNode node in nodes)
{
if (r != 0)
{
foreach (HtmlNode td in node.ChildNodes)
{
if (td.Name != "td")
continue;
rows = Get_Span_Value(td.Attributes, "rowspan");
cols = Get_Span_Value(td.Attributes, "colspan");
for (col = 0; col < cols; col++)
for (row = 0; row < rows; row++)
{
while (ArrHeader2D[row + r, col + c] != null && ArrHeader2D[row + r, col + c] != "")
c += 1;
ArrHeader2D[row + r, col + c] = td.InnerText.Replace("\u3000", "").Replace("\xa0", "");
ArrHeader2D[row + r, col + c] = ArrHeader2D[row + r, col + c].Replace("\n", "").Replace(" ", "");
}
c += cols;
}
}
r += 1;
c = 0;
}
// 設定想要的欄位內容
liProfit.Add(new string[]{
"年度", "財報評分",
"收盤", "平均", "漲跌", "漲跌(%)", // 股價
"營業收入", "營業毛利", "稅後淨利", // 獲利金額
"營業毛利", "稅後淨利", // 獲利率
"ROE(%)", "稅後EPS", "年增(元)",
});
// 找出欄位索引值
colData = new int[liProfit[0].Length];
for (i = 0; i < colData.Length; i++)
{
int j = (i == 0) ? 0 : colData[i - 1];
for (; j < totalCols; j++)
{
if (ArrHeader2D[1, j] == liProfit[0][i])
{
colData[i] = j;
break;
}
}
}
// 取得表格內容
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathProfitData, "/tbody([[]\\d[]])?", ""));
foreach (HtmlNode node in nodes)
{
string[] li = new string[liProfit[0].Length];
List<HtmlNode> tds = new List<HtmlNode>();
foreach (HtmlNode td in node.ChildNodes) // 只有td才是我們要的資料
{
if (td.Name == "td")
tds.Add(td);
}
for (i = 0; i < liProfit[0].Length; i++)
{
li[i] = tds[colData[i]].InnerText;
if (li[i] == "-")
li[i] = "";
}
liProfit.Add(li);
}
return liProfit;
}
static void Main(string[] args)
{
//Get_Company_Info(IdCompany);
/*
List<string[]> liDividend = Get_Company_Dividend(IdCompany);
foreach(string[] ArrDividend in liDividend)
{
for (int i = 0; i < ArrDividend.Length; i++)
Console.Write(String.Format("{0}, ", ArrDividend[i]));
Console.WriteLine("");
}
*/
List<string[]> liProfit = Get_Company_Profit(IdCompany);
foreach (string[] ArrProfit in liProfit)
{
for (int i = 0; i < ArrProfit.Length; i++)
Console.Write(String.Format("{0}, ", ArrProfit[i]));
Console.WriteLine("");
}
Console.WriteLine("Press Any Key...");
Console.ReadKey(true); //Pause
}
}
}
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
namespace StockCSharp
{
class Program
{
static string UrlCompanyInfo = "https://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID=";
static string UrlCompanyDividend = "https://goodinfo.tw/StockInfo/StockDividendPolicy.asp?STOCK_ID=";
static string UrlCompanyProfit = "https://goodinfo.tw/StockInfo/StockBzPerformance.asp?STOCK_ID=";
static string IdCompany = "3008";
static string XpathCompanyName = "/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr[2]/td[3]/table[2]/tbody/tr[1]/td[2]";
static string XpathCompanyIndustry = "/html/body/table[2]/tbody/tr/td[3]/table/tbody/tr[2]/td[3]/table[2]/tbody/tr[2]/td[2]";
static string XpathDividendHeader1 = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr[1]/td";
static string XpathDividendHeader = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr";
static string XpathDividendData = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/tbody[1]/tr";
static string XpathProfitHeader1 = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr[1]/td";
static string XpathProfitHeader = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/thead[1]/tr";
static string XpathProfitData = "/html/body/table[2]/tbody/tr/td[3]/div[2]/div/div/table/tbody[1]/tr";
// 抓取ID公司名稱與產業別
static void Get_Company_Info(string strID)
{
HtmlDocument doc;
HtmlNodeCollection nodes;
HtmlWeb web = new HtmlWeb();
string CompanyName, CompanyIndustry;
web.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko";
web.OverrideEncoding = Encoding.GetEncoding(65001); // UTF-8
doc = web.Load(UrlCompanyInfo + strID);
// 公司名稱
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathCompanyName, "/tbody([[]\\d[]])?", ""));
CompanyName = nodes[0].ChildNodes[0].InnerText;
// 產業別
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathCompanyIndustry, "/tbody([[]\\d[]])?", ""));
CompanyIndustry = nodes[0].ChildNodes[0].InnerText;
Console.WriteLine(CompanyName);
Console.WriteLine(CompanyIndustry);
}
static int Get_Span_Value(HtmlAttributeCollection attr, string span)
{
if (attr.Contains(span))
return Convert.ToInt32(attr[span].Value);
else
return 1;
}
// 抓取股利政策
static List<string[]> Get_Company_Dividend(string strID)
{
HtmlDocument doc;
HtmlNodeCollection nodes;
HtmlWeb web = new HtmlWeb();
int i, r, row, rows, c, col, cols, totalCols;
string[,] ArrHeader2D;
int[] colData;
List<string[]> liDividend = new List<string[]>();
web.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko";
web.OverrideEncoding = Encoding.GetEncoding(65001); // UTF-8
doc = web.Load(UrlCompanyDividend + strID);
cols = 0;
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathDividendHeader1, "/tbody([[]\\d[]])?", ""));
foreach (HtmlNode node in nodes)
cols += Get_Span_Value(node.Attributes, "colspan");
totalCols = cols;
ArrHeader2D = new string[4, totalCols];
// 計算表格Columns
i = 0;
foreach (HtmlNode node in nodes)
{
rows = Get_Span_Value(node.Attributes, "rowspan");
cols = Get_Span_Value(node.Attributes, "colspan");
for (col = 0; col < cols; col++)
for (row = 0; row < rows; row++)
{
ArrHeader2D[row, col + i] = node.InnerText.Replace("\u3000", "").Replace("\xa0", "");
ArrHeader2D[row, col + i] = ArrHeader2D[row, col + i].Replace("\n", "").Replace(" ", "");
}
i += cols;
}
// 補齊Header內容
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathDividendHeader, "/tbody([[]\\d[]])?", ""));
r = 0;
c = 0;
foreach (HtmlNode node in nodes)
{
if(r != 0)
{
foreach(HtmlNode td in node.ChildNodes)
{
if (td.Name != "td")
continue;
rows = Get_Span_Value(td.Attributes, "rowspan");
cols = Get_Span_Value(td.Attributes, "colspan");
for (col = 0; col < cols; col++)
for (row = 0; row < rows; row++)
{
while (ArrHeader2D[row + r, col + c] != null && ArrHeader2D[row + r, col + c] != "")
c += 1;
ArrHeader2D[row+r, col + c] = td.InnerText.Replace("\u3000", "").Replace("\xa0", "");
ArrHeader2D[row+r, col + c] = ArrHeader2D[row + r, col + c].Replace("\n", "").Replace(" ", "");
}
c += cols;
}
}
r += 1;
c = 0;
}
// 設定想要的欄位內容
liDividend.Add( new string[]{
"股利發放年度",
"合計", "合計", "股利合計", // 股利
"最高", "最低", "年均", // 股價
"現金", "股票", "合計", "EPS(元)", // 殖利率
"配息", "配股", "合計", // 發放率
});
// 找出欄位索引值
colData = new int[liDividend[0].Length];
for(i=0; i< colData.Length; i++)
{
int j = (i == 0) ? 0 : colData[i - 1];
for(; j< totalCols; j++)
{
if (ArrHeader2D[3, j] == liDividend[0][i])
{
colData[i] = j;
break;
}
}
}
// 取得表格內容
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathDividendData, "/tbody([[]\\d[]])?", ""));
foreach (HtmlNode node in nodes)
{
string[] li = new string[liDividend[0].Length];
List<HtmlNode> tds = new List<HtmlNode>();
foreach (HtmlNode td in node.ChildNodes) // 只有td才是我們要的資料
{
if (td.Name == "td")
tds.Add(td);
}
for (i=0; i< liDividend[0].Length; i++)
{
li[i] = tds[colData[i]].InnerText;
if (li[i] == "-")
li[i] = "";
}
liDividend.Add(li);
}
return liDividend;
}
// 抓取獲利狀況
static List<string[]> Get_Company_Profit(string strID)
{
HtmlDocument doc;
HtmlNodeCollection nodes;
HtmlWeb web = new HtmlWeb();
int i, r, row, rows, c, col, cols, totalCols;
string[,] ArrHeader2D;
int[] colData;
List<string[]> liProfit = new List<string[]>();
web.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko";
web.OverrideEncoding = Encoding.GetEncoding(65001); // UTF-8
doc = web.Load(UrlCompanyProfit + strID);
cols = 0;
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathProfitHeader1, "/tbody([[]\\d[]])?", ""));
foreach (HtmlNode node in nodes)
cols += Get_Span_Value(node.Attributes, "colspan");
totalCols = cols;
ArrHeader2D = new string[4, totalCols];
// 計算表格Columns
i = 0;
foreach (HtmlNode node in nodes)
{
rows = Get_Span_Value(node.Attributes, "rowspan");
cols = Get_Span_Value(node.Attributes, "colspan");
for (col = 0; col < cols; col++)
for (row = 0; row < rows; row++)
{
ArrHeader2D[row, col + i] = node.InnerText.Replace("\u3000", "").Replace("\xa0", "");
ArrHeader2D[row, col + i] = ArrHeader2D[row, col + i].Replace("\n", "").Replace(" ", "");
}
i += cols;
}
// 補齊Header內容
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathProfitHeader, "/tbody([[]\\d[]])?", ""));
r = 0;
c = 0;
foreach (HtmlNode node in nodes)
{
if (r != 0)
{
foreach (HtmlNode td in node.ChildNodes)
{
if (td.Name != "td")
continue;
rows = Get_Span_Value(td.Attributes, "rowspan");
cols = Get_Span_Value(td.Attributes, "colspan");
for (col = 0; col < cols; col++)
for (row = 0; row < rows; row++)
{
while (ArrHeader2D[row + r, col + c] != null && ArrHeader2D[row + r, col + c] != "")
c += 1;
ArrHeader2D[row + r, col + c] = td.InnerText.Replace("\u3000", "").Replace("\xa0", "");
ArrHeader2D[row + r, col + c] = ArrHeader2D[row + r, col + c].Replace("\n", "").Replace(" ", "");
}
c += cols;
}
}
r += 1;
c = 0;
}
// 設定想要的欄位內容
liProfit.Add(new string[]{
"年度", "財報評分",
"收盤", "平均", "漲跌", "漲跌(%)", // 股價
"營業收入", "營業毛利", "稅後淨利", // 獲利金額
"營業毛利", "稅後淨利", // 獲利率
"ROE(%)", "稅後EPS", "年增(元)",
});
// 找出欄位索引值
colData = new int[liProfit[0].Length];
for (i = 0; i < colData.Length; i++)
{
int j = (i == 0) ? 0 : colData[i - 1];
for (; j < totalCols; j++)
{
if (ArrHeader2D[1, j] == liProfit[0][i])
{
colData[i] = j;
break;
}
}
}
// 取得表格內容
nodes = doc.DocumentNode.SelectNodes(Regex.Replace(XpathProfitData, "/tbody([[]\\d[]])?", ""));
foreach (HtmlNode node in nodes)
{
string[] li = new string[liProfit[0].Length];
List<HtmlNode> tds = new List<HtmlNode>();
foreach (HtmlNode td in node.ChildNodes) // 只有td才是我們要的資料
{
if (td.Name == "td")
tds.Add(td);
}
for (i = 0; i < liProfit[0].Length; i++)
{
li[i] = tds[colData[i]].InnerText;
if (li[i] == "-")
li[i] = "";
}
liProfit.Add(li);
}
return liProfit;
}
static void Main(string[] args)
{
//Get_Company_Info(IdCompany);
/*
List<string[]> liDividend = Get_Company_Dividend(IdCompany);
foreach(string[] ArrDividend in liDividend)
{
for (int i = 0; i < ArrDividend.Length; i++)
Console.Write(String.Format("{0}, ", ArrDividend[i]));
Console.WriteLine("");
}
*/
List<string[]> liProfit = Get_Company_Profit(IdCompany);
foreach (string[] ArrProfit in liProfit)
{
for (int i = 0; i < ArrProfit.Length; i++)
Console.Write(String.Format("{0}, ", ArrProfit[i]));
Console.WriteLine("");
}
Console.WriteLine("Press Any Key...");
Console.ReadKey(true); //Pause
}
}
}
0 意見:
張貼留言