分享

让我的C++程序直接阅读网页(4) HTTP访问

 accesine 2005-10-13
让我的C++程序直接阅读网页(4) HTTP访问

作者: xyun 时间: 2005-9-19 12:24:48 浏览: 263
来自:
关键词: 让我的C++程序直接阅读网页(4) HTTP访问
 破解站  金山毒霸免费杀毒   WPS Office 2005

4. HTTP访问

Web简单看就是HTML + HTTP。前面提供的HTML读取函数所需要的原始HTML文本,得通过HTTP协议从各个web site上读取到。用socket可以实现HTPP访问,但是想比较全面地支持HTTP协议,使用现成的HTTP服务显得更有效率。

微软提供了Windows平台上访问HTTP的二组API组合,WinINet, WinHTTP

 

这里给出二个读取Web网页的函数,

* 使用WinINetreadHTTPFile

* 使用WinHTTPgetHTTPFile

 

void

getHttpFile( const HINTERNET h_site,

       string & rd,

       const string & site,

       const string & path,

       UInt32 flags = 0 )

{

       if ( path.empty() )

              return;

 

       HINTERNET h_file = NULL;

       wchar_t w_str[WSTR_LENGTH];

       ::memset( w_str, 0, WSTR_LENGTH * sizeof(wchar_t) );

       try

       {

              UInt32 l_0 = 0;

              UInt32 l_1 = 0;

              char *p_buf = NULL;

              ::mbstowcs( w_str, path.c_str(), path.size() );

              h_file = ::WinHttpOpenRequest( h_site,

                            L"GET",

                            w_str,

                            NULL,

                            WINHTTP_NO_REFERER,

                            WINHTTP_DEFAULT_ACCEPT_TYPES,

                            flags );

              if ( h_file == NULL )

                     throw ::GetLastError();

 

              BOOL b_res = ::WinHttpSendRequest( h_file,

                            WINHTTP_NO_ADDITIONAL_HEADERS,

                            0,

                            WINHTTP_NO_REQUEST_DATA,

                            0,

                            0,

                            0 );

              if ( ! b_res )

                     throw ::GetLastError();

 

              b_res = ::WinHttpReceiveResponse( h_file, NULL );

              if ( ! b_res )

                     throw ::GetLastError();

 

              // 如果需要可以在这里或稍后读入http cookies

 

              do

              {

                     l_0 = 0;

                     b_res = ::WinHttpQueryDataAvailable( h_file, &l_0 );

                     p_buf = new char[l_0 + 1];

                     ::ZeroMemory( p_buf, l_0 + 1 );

                     if ( b_res )

                     {

                            b_res = ::WinHttpReadData( h_file,

                                          p_buf,

                        l_0,

                                          &l_1 );

                            if ( b_res )

                            {

                                   if ( l_1 > 0 )

                                          rd.append( p_buf, l_1 );

                            }

                     }

                     delete [] p_buf;

              } while ( l_0 > 0 );

             

              if ( ! b_res )

                     throw ::GetLastError();

 

              canonHTML( rd );

              ::WinHttpCloseHandle( h_file );

       }

       catch ( ... )

       {

              if ( h_file != NULL )

                     ::WinHttpCloseHandle( h_file );

 

              throw;

       }

}

void

readHttpFile( string &rd,

       const CHttpConnection &server,

       const string & src_page )

{

       if ( src_page.empty() )

              return;

 

       DWORD dw_ret;

       CHttpFile *p_file = NULL;

       CHttpConnection *p_svr = const_cast<CHttpConnection *>(&server);

       char *rd_buf = NULL;

       try

       {

              p_file = p_svr->OpenRequest( CHttpConnection::HTTP_VERB_GET,

                     src_page.c_str(),

                     NULL,

                     1,

                     NULL,

                     NULL,

                     INTERNET_FLAG_EXISTING_CONNECT

                     | INTERNET_FLAG_RELOAD );

 

              p_file->SendRequest( "\r\n", 2 );

              p_file->QueryInfoStatusCode( dw_ret );

              if ( dw_ret != HTTP_STATUS_OK )

                     throw std::exception( "failed" );

 

              rd_buf = new char[BUF_SIZE];

              if ( rd_buf == NULL )

                     throw std::exception( "insufficientMemory" );

 

              rd.erase();

              memset( rd_buf, 0, BUF_SIZE );

              int l = p_file->Read( rd_buf, BUF_SIZE );

              while ( l > 0 )

              {

                     rd.append( rd_buf, l );

                     l = p_file->Read( rd_buf, BUF_SIZE );

              }

             

              if ( rd.empty() )

                     throw std::exception( "noContent" );

             

              canonHTML( rd );

              p_file->Close();

              delete [] rd_buf;

       }

       catch ( CInternetException *p_ex )

       {

              p_file->Close();

              if ( rd_buf ) delete [] rd_buf;

              TCHAR sz_err[255];

              p_ex->GetErrorMessage( sz_err, 255 );

              throw std::exception( sz_err );

       }

       catch ( ... )

       {

              p_file->Close();

              if ( rd_buf ) delete [] rd_buf;

              throw;

       }

}

 

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多