22 封装HttpClient

时间:2023-1-20    作者:老大夫    分类: 传智JAVA爬虫学习笔记


HttpUtils

老教程已经进不去京东了,因为直接爬取会被拦截到京东登录页.
我们需要进行请求头伪装.

1. 到搜索页面F12 -----> network---->获取信息复制

2. 到https://curlconverter.com/java/ 直接转成HttpGet设置代码

package cn.itcast.jd.util;

import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;
import org.springframework.stereotype.Component;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.UUID;

@Component
public class HttpUtils {
    //连接池管理器
    private PoolingHttpClientConnectionManager cm;
    //连接池管理器构造方法
    public HttpUtils() {
        this.cm = new PoolingHttpClientConnectionManager();

        //设置最大连接数
        this.cm.setMaxTotal(100);

        //设置每个主机的最大连接数
        this.cm.setDefaultMaxPerRoute(10);
    }

    /**
     * 根据请求地址下载页面数据
     * @param url
     * @return 页面数据
     */
    public String doGetHtml(String url){
        //获取httpClient对象
        CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();

        //设置HttpGet请求对象,设置url地址
        HttpGet httpGet= new HttpGet(url);

        //设置请求信息
        httpGet.setConfig(this.getConfig());

        //伪装请求头 ,这里信息太多我就不贴了
        httpGet.setHeader("a123123d.com");
        httpGet.setHeader("123123123");
        httpGet.setHeader("1231231230.7,213123126");
        httpGet.setHeader("1232113123");
        httpGet.setHeader("c123123D4Q");
        httpGet.setHeader("123123123/");
        httpGet.setHeader("123123123213"");
        httpGet.setHeader("123123123");
        httpGet.setHeader("12321312"");
        httpGet.setHeader("123123123");
        httpGet.setHeader("12312312");
        httpGet.setHeader("123123213");
        httpGet.setHeader("12312312");
        httpGet.setHeader("12312312");
        httpGet.setHeader("123123123");

        CloseableHttpResponse response=null;
        try {
            //使用httpClient发起请求,获取响应
            response = httpClient.execute(httpGet);

            //解析响应返回结果
            if(response.getStatusLine().getStatusCode() == 200){
                //判断响应体是否不为空,如果不为空就可以使用EntityUtils
                if(response.getEntity() != null){
                    String content = EntityUtils.toString(response.getEntity(), "utf8");
                    return content;
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if(response != null){
                try {
                    response.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
        //返回空串
        return "";
    }

    /**
     * 下载图片
      * @param url
     * @return 图片名称
     */
    public String doGetImage(String url){
        //获取httpClient对象
        CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();

        //设置HttpGet请求对象,设置url地址
        HttpGet httpGet= new HttpGet(url);

        //设置请求信息
        httpGet.setConfig(this.getConfig());

         //伪装请求头 ,这里信息太多我就不贴了
        httpGet.setHeader("a123123d.com");
        httpGet.setHeader("123123123");
        httpGet.setHeader("1231231230.7,213123126");
        httpGet.setHeader("1232113123");
        httpGet.setHeader("c123123D4Q");
        httpGet.setHeader("123123123/");
        httpGet.setHeader("123123123213"");
        httpGet.setHeader("123123123");
        httpGet.setHeader("12321312"");
        httpGet.setHeader("123123123");
        httpGet.setHeader("12312312");
        httpGet.setHeader("123123213");
        httpGet.setHeader("12312312");
        httpGet.setHeader("12312312");
        httpGet.setHeader("123123123");

        CloseableHttpResponse response=null;
        try {
            //使用httpClient发起请求,获取响应
            response = httpClient.execute(httpGet);

            //解析响应返回结果
            if(response.getStatusLine().getStatusCode() == 200){
                //判断响应体是否不为空,如果不为空就可以使用EntityUtils
                if(response.getEntity() != null){
                    //下载图片

                    //获取图片的后缀
                    String extName = url.substring(url.lastIndexOf("."));

                    //创建一个图片名,重命名图片
                    String picName = UUID.randomUUID().toString()+extName;

                    //下载图片
                    //声明outputStream
                    OutputStream outputStream =new FileOutputStream(new File("C:\\Users\\16259\\Desktop\\images\\"+picName));
                    response.getEntity().writeTo(outputStream);

                    //返回图片的名称
                    return picName;
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if(response != null){
                try {
                    response.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
        //如果下载失败,返回空串
        return "";
    }

    //设置请求信息
    private RequestConfig getConfig() {
        RequestConfig config = RequestConfig.custom()
                .setConnectTimeout(1000)    //创建连接的最长时间
                .setConnectionRequestTimeout(500)   //获取连接的最长时间
                .setSocketTimeout(10 * 1000)    //数据传输的最长时间
                .build();
        return config;
    }

}


扫描二维码,在手机上阅读

推荐阅读: