2009-03-11 10 views



Birkaç yıl önce yazdığım bir sınıf. Tam olarak tam değil ve her şeyin nasıl çalıştığını tam olarak anlamadan önce yapıldı (örneğin, karmaşık POST verilerini düzgün şekilde kodlamıyor), ancak tüm kusurlar için oldukça iyi çalışıyor ve çerezinizi nasıl tutabileceğinizi gösterecek. . Aynı VB.Net'te de, ama gerekirse sadece ayrı bir montaj içine oluşturmak veya tercüman vasıtasıyla çalıştırabilirsiniz:

CookieContainer sadece tarayıcı çerez deposunda gibi tasarlanmıştır
Imports System.Net 
Imports System.Collections.Generic 

Public Class WebScraper 
    Public Sub New() 
     SetUserAgent(UserAgent.IE6SP1) ''//default agent 
    End Sub 

#Region "Cookies" 
    Private Cookies As New CookieContainer() 

    Public Sub AddCookie(ByVal Name As String, ByVal data As String, Optional ByVal path As String = "", Optional ByVal domain As String = "") 
     Dim ck As New Cookie(Name, data, path, domain) 
    End Sub 
    Public Sub AddCookie(ByRef cookie As Cookie) 
    End Sub 

    Public Sub ResetSession() 
     Cookies = New CookieContainer() 
     ''//TODO: Add other session reset code here 
    End Sub 

    Public Function GetCookies(ByVal uri As System.Uri) As System.Net.CookieCollection 
     Return Cookies.GetCookies(uri) 
    End Function 
    Public Function GetCookies(ByVal url As String) As Net.CookieCollection 
     Dim url2 As Uri = Nothing 
     If Uri.TryCreate(url, UriKind.Absolute, url2) Then 
      Return Cookies.GetCookies(url2) 
      Return Nothing 
     End If 
    End Function 
#End Region 

    Public Property TimeOut() As UInteger 
      Return _TimeOut 
     End Get 
     Set(ByVal value As UInteger) 
      _TimeOut = value 
     End Set 
    End Property 
    Private _TimeOut As UInteger = 100000 ''//100000 matches default used by httprequest if none is specified 

    Public Property PageEncoding() As System.Text.Encoding 
      Return _PageEncoding 
     End Get 
     Set(ByVal value As System.Text.Encoding) 
      _PageEncoding = value 
     End Set 
    End Property 
    Private _PageEncoding As System.Text.Encoding = System.Text.Encoding.UTF8 

#Region "UserAgents" 
    ''// TODO: Update this for FF3, add GoogleBot 
    ''// TODO: Move to separate class with distinct sub-types (eg: UserAgents.IE.6XP or UserAgents.FF.2XP, classes that overload .ToString()) 
    Public Enum UserAgent 
    End Enum 

    Public Sub SetUserAgent(ByVal UserAgent As UserAgent) 
     Select Case UserAgent 
      Case WebScraper.UserAgent.FF2_Linux 
       Agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv: Gecko/20070713 Firefox/" 
      Case WebScraper.UserAgent.FF2_Mac 
       Agent = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1) Gecko/20070713 Firefox/" 
      Case WebScraper.UserAgent.FF2_Vista 
       Agent = "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv: Gecko/20070713 Firefox/" 
      Case WebScraper.UserAgent.FF2_XP 
       Agent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv: Gecko/20070713 Firefox/" 
      Case WebScraper.UserAgent.IE6SP1 
       Agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)" 
      Case WebScraper.UserAgent.IE7_Vista 
       Agent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)" 
      Case WebScraper.UserAgent.IE7_XP 
       Agent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)" 
      Case WebScraper.UserAgent.Safari 
       Agent = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en) AppleWebKit/522.12.1 (KHTML, like Gecko) Safari/522.12.1" 
     End Select 
    End Sub 

    Public Sub SetUserAgent(ByVal UserAgent As String) 
     Agent = UserAgent 
    End Sub 

    ''//defaults to IE6 SP1 
    ''// TODO: Choose a better default 
    Private Agent As String 
#End Region 

#Region "Get Page" 
    Public Function GetPage(ByVal URL As Uri, Optional ByVal PostData As String = "") As String 
     Dim reader As IO.StreamReader = Nothing 
      reader = New System.IO.StreamReader(SendRequest(URL, PostData).GetResponseStream, PageEncoding) 
      GetPage = reader.ReadToEnd() 
      GetPage = "" 
      End Try 
     End Try 
    End Function 
    Public Function GetPage(ByVal URL As String, Optional ByVal PostData As String = "") As String 
     Dim URL2 As Uri = Nothing 
     If Uri.TryCreate(URL, UriKind.Absolute, URL2) Then 
      Return GetPage(URL2, PostData) 
      Return "" 
     End If 
    End Function 
    Public Function GetPage(ByVal URL As String, ByRef PostData As IEnumerable(Of KeyValuePair(Of String, String))) As String 
     Return GetPage(URL, PrepPostData(PostData)) 
    End Function 
    Public Function GetPage(ByVal URL As Uri, ByRef PostData As IEnumerable(Of KeyValuePair(Of String, String))) As String 
     Return GetPage(URL, PrepPostData(PostData)) 
    End Function 
#End Region 

#Region "Get Response" 
    Public Function GetResponse(ByVal URL As Uri, Optional ByVal Postdata As String = "") As Object 
     Dim x As HttpWebResponse = SendRequest(URL, Postdata) 
     If x.ContentType.Contains("text") Then 
      Dim result As String 
      Dim reader As IO.StreamReader = Nothing 
       reader = New System.IO.StreamReader(x.GetResponseStream, System.Text.Encoding.UTF8) ''// TODO: figure out how to detect actual encoding 
       result = reader.ReadToEnd() 
       result = "" 
       End Try 
      End Try 
      Return result 
     ElseIf x.ContentType.Contains("image") Then 
      Dim result As Drawing.Image 
       result = System.Drawing.Image.FromStream(x.GetResponseStream) 
       result = Nothing 
      End Try 
      Return result 
      Return x.GetResponseStream 
     End If 
    End Function 
    Public Function GetResponse(ByVal URL As Uri, ByRef PostData As IEnumerable(Of KeyValuePair(Of String, String))) As Object 
     Return GetResponse(URL, PrepPostData(PostData)) 
    End Function 
    Public Function GetResponse(ByVal URL As String, ByRef PostData As IEnumerable(Of KeyValuePair(Of String, String))) As Object 
     Return GetResponse(URL, PrepPostData(PostData)) 
    End Function 
    Public Function GetResponse(ByVal URL As String, Optional ByVal PostData As String = "") As Object 
     Dim URL2 As Uri = Nothing 
     If Uri.TryCreate(URL, UriKind.Absolute, URL2) Then 
      Return GetResponse(URL2, PostData) 
      Return Nothing 
     End If 
    End Function 
#End Region 

#Region "SaveResponseToFile" 
    Function SaveResponseToFile(ByVal FullFileName As String, ByVal URL As Uri, Optional ByVal PostData As String = "") As Boolean 
      Dim x As New IO.BinaryReader(SendRequest(URL, PostData).GetResponseStream) 
      Dim y As New IO.FileStream(FullFileName, IO.FileMode.Create) 
      Dim z As New IO.BinaryWriter(y) 

      Try ''// TODO: I can do better here 
       While True 
       End While 
      Catch ''//continue 
      End Try 

      Return False 
     End Try 
     Return True 
    End Function 
    Function SaveResponseToFile(ByVal FullFileName As String, ByVal URL As String, Optional ByVal PostData As String = "") As Boolean 
     Dim URL2 As Uri = Nothing 
     If Uri.TryCreate(URL, UriKind.Absolute, URL2) Then 
      Return SaveResponseToFile(FullFileName, URL2, PostData) 
     Else : Return False 
     End If 
    End Function 
    Function SaveResponseToFile(ByVal FullFileName As String, ByVal URL As String, ByRef PostData As IEnumerable(Of KeyValuePair(Of String, String))) As Boolean 
     Return SaveResponseToFile(FullFileName, URL, PrepPostData(PostData)) 
    End Function 
    Function SaveResponseToFile(ByVal FullFileName As String, ByVal URL As Uri, ByRef PostData As IEnumerable(Of KeyValuePair(Of String, String))) As Boolean 
     Return SaveResponseToFile(FullFileName, URL, PrepPostData(PostData)) 
    End Function 
#End Region 

#Region "Get Image" 
    Public Function GetImage(ByVal URL As String) As System.Drawing.Image 
      GetImage = System.Drawing.Image.FromStream(SendRequest(URL).GetResponseStream) 
      GetImage = Nothing 
     End Try 
    End Function 
    Public Function GetImage(ByVal URL As Uri) As System.Drawing.Image 
      GetImage = System.Drawing.Image.FromStream(SendRequest(URL).GetResponseStream) 
      GetImage = Nothing 
     End Try 
    End Function 
#End Region 

#Region "PostToURL" 
    Public Sub PostToURL(ByVal URL As String, Optional ByVal PostData As String = "") 
     SendRequest(URL, PostData) 
    End Sub 
    Public Sub PostToURL(ByVal URL As Uri, Optional ByVal PostData As String = "") 
     SendRequest(URL, PostData) 
    End Sub 
    Public Sub PostToURL(ByVal URL As String, ByRef PostData As Dictionary(Of String, String)) 
     PostToURL(URL, PrepPostData(PostData)) 
    End Sub 
    Public Sub PostToURL(ByVal URL As Uri, ByRef PostData As Dictionary(Of String, String)) 
     PostToURL(URL, PrepPostData(PostData)) 
    End Sub 
#End Region 

#Region "Private Methods" 
    Private Function PrepPostData(ByRef PostData As IEnumerable(Of KeyValuePair(Of String, String))) As String 
     PrepPostData = "" ''// TODO: properly encode post data 
     For Each pair As KeyValuePair(Of String, String) In PostData 
      PrepPostData += pair.Key & "=" & pair.Value & "&" 
     Next pair 
     PrepPostData = PrepPostData.Remove(PrepPostData.Length - 1) 
    End Function 

    Private Function SendRequest(ByVal URL As String, Optional ByVal PostData As String = "") As HttpWebResponse 
     Dim URL2 As Uri = Nothing 
     If Uri.TryCreate(URL, UriKind.Absolute, URL2) Then 
      Return SendRequest(URL2, PostData) 
      Return Nothing 
     End If 
    End Function 
    Private Function SendRequest(ByVal URL As Uri, Optional ByVal PostData As String = "") As HttpWebResponse 
     Dim Request As HttpWebRequest = HttpWebRequest.Create(URL) 

     Request.CookieContainer = Cookies 
     Request.Timeout = TimeOut 
     Request.UserAgent = Agent 

     If PostData.Length > 0 Then 
      Request.Method = "POST" ''// TODO: allow explicitly setting METHOD and Content-type for request via properties 
      Request.ContentType = "application/x-www-form-urlencoded" 
      Dim sw As New IO.StreamWriter(Request.GetRequestStream()) 
     End If 

     Return Request.GetResponse() 
    End Function 
#End Region 
End Class 

. Bu nedenle, herhangi bir sitede çerezler içerebileceğinden, alan yolunun yanı sıra bitiş süreleri de ele alınacaktır. Aynı kapsayıcıda herhangi bir alan adının olabileceğini söyleyebilirim, böylece herhangi bir web isteğinin aynı kapsayıcıya sahip olması da mümkün olmalıdır.

CookieContainer'ın .Add (Cookie) ve .GetCookies (uri) yöntemi üzerinde bir hata olduğunu fark etmelisiniz.

ayrıntılara bakın ve burada düzeltmek:
