我想从本地网站抓取数据。该代码收集该页面中的表,但我还想收集从表“VO/NGO 名称”字段超链接的数据。
这是主表。我想要的其他字段来自单击“VO/NGO 名称”时出现的页面。
我阅读了在线资料,但无法更正代码。
第一个输出应该是这样的,所以每个非政府组织的列表应该是这样的:
Option Explicit
Public Sub GetInfo()
Const URL As String = "https://ngodarpan.gov.in/index.php/home/statewise_ngo/76/35/1"
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()
headers = Array("Sr No.", "Name of VO/NGO", "Address", "City","State","Telephone","Mobile No.","Website","Email")
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.Send
html.body.innerHTML = .responseText
End With
Set hTable = html.querySelector("table.dvdtbl")
Dim td As Object, tr As Object, r As Long, c As Long
r = 1
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For Each tr In hTable.getElementsByTagName("tr")
r = r + 1: c = 1
If r > 3 Then
For Each td In tr.getElementsByTagName("td")
.Cells(r - 2, c) = IIf(c = 2, "'" & td.innerText, td.innerText)
c = c + 1
Next
End If
Next
End With
End Sub
要实现您所追求的结果,需要做几件事。
最后,您必须使用任何 json 转换器或脚本控件从该 json 响应中挖掘出各个字段。
我的以下尝试可以为您获取 json 响应。您现在需要做的就是解析 json 以满足您的要求:
Sub FetchTabularInfo()
Dim Http As New XMLHTTP60, Html As New HTMLDocument
Dim col As Variant, icol As New Collection
Dim csrf As Variant, I&
With Http
.Open "GET", "https://ngodarpan.gov.in/index.php/home/statewise_ngo/76/35/1", False
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll(".table tr a[onclick^='show_ngo_info']")
For I = 0 To .Length - 1
icol.Add Split(Split(.item(I).getAttribute("onclick"), "(""")(1), """)")(0)
Next I
End With
For Each col In icol
With Http
.Open "GET", "https://ngodarpan.gov.in/index.php/ajaxcontroller/get_csrf", False
.send
csrf = .responseText
End With
csrf = Split(Replace(Split(csrf, ":")(1), """", ""), "}")(0)
With Http
.Open "POST", "https://ngodarpan.gov.in/index.php/ajaxcontroller/show_ngo_info", False
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.send "id=" & col & "&csrf_test_name=" & csrf
End With
Debug.Print Http.responseText
Next col
End Sub
第一个引线的输出:
{"status":1,"infor":{"0":{"UniqueID":"AN\/2017\/0161456","Mobile":"9476076176","Email":"[email protected]","ngo_url":"http:\/\/www.adityanatyaacademy.com","ngo_name":"AdityaNatyaAcademy","pan_updDocId":"220156","reg_updDocId":"221361","Off_phone1":null,"Major_Activities1":".Drama\nJatrapala\nStreetplays\nAwareness Programe"},"issues_working_db":"","operational_states_db":"ANDAMAN & NICOBAR ISLANDS, ","operational_district_db":"ANDAMAN & NICOBAR ISLANDS->South Andaman, "},"member_info":[{"SalCode":null,"FName":"ASHUTOSH KARMAKAR","MName":null,"LName":null,"DesigName":"President","EmailId":"[email protected]","MobileNo":"9434262953","pan_updDocId":"223392","aadhaar_updDocId":"223393"},{"SalCode":null,"FName":"KAVERI DEBSHARMA","MName":null,"LName":null,"DesigName":"Member","EmailId":"[email protected]","MobileNo":"9474299901","pan_updDocId":"223400","aadhaar_updDocId":"223401"},{"SalCode":null,"FName":"SATYAJIT BAIN","MName":null,"LName":null,"DesigName":"Asisstant Secretary","EmailId"
:"[email protected]","MobileNo":"9434271746","pan_updDocId":"223408","aadhaar_updDocId":"223409"}],"registeration_info":[{"nr_orgName":"AdityaNatyaAcademy","nr_add":"31 M.G. Road,\nOpp. Sun Sea Resort,\nMiddle Point.","nr_city":"Port Blair","StateName":"ANDAMAN & NICOBAR ISLANDS","reg_name":"Registrar of Companies","TypeDescription":"Registered Societies (Non-Government)","nr_regNo":"888","nr_updDocId":"0","nr_actName":"Society Registration Act 1860","nr_isFcra":"N","fcrano":"","ngo_reg_date":"05-12-1995"}],"source_info":[{"sourcefund":"S","deptt_name":"Directorate of Art and Culture","purpose":"To Promote Art and Culture in Andaman and Nicobar Islands.","datefrom":"2013-04-01","dateto":"2014-03-31","amount_sanctioned":"25000"},{"sourcefund":"S","deptt_name":"Directorate of Art and Culture","purpose":"To promote Art and Culture","datefrom":"2014-04-01","dateto":"2015-03-31","amount_sanctioned":"25000"},{"sourcefund":"S","deptt_name":"Directorate of Art and Culture","purpose":"To promote Art and Cult
ure","datefrom":"2015-04-01","dateto":"2016-03-31","amount_sanctioned":"35000"},{"sourcefund":"S","deptt_name":"Directorate of Art and Culture","purpose":"To promote Art and Culture","datefrom":"2016-04-01","dateto":"2017-03-31","amount_sanctioned":"25000"}]}
添加执行上述脚本的参考:
Microsoft Html Object Library
Microsoft xml, v6.0
本文收集自互联网,转载请注明来源。
如有侵权,请联系[email protected] 删除。
我来说两句