基于(HttpClient5.1 / HttpClient 4 / WebMagic / Socket)
解决方案:
基于HttpClient5.1:
关键代码,重写DefaultRoutePlanner.determineLocalAddress 方法,加入需绑定的本地IP。
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.config.RequestConfig;
import org.apache.hc.client5.http.impl.DefaultSchemePortResolver;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
import org.apache.hc.client5.http.impl.routing.DefaultRoutePlanner;
import org.apache.hc.core5.http.HttpHost;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.protocol.HttpContext;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.net.InetAddress;
public class HttpClient5Test {
@Test
void httpClient5Connect() {
String localAddress = "192.168.2.103";
String[] ipStr = localAddress.split("\\.");
byte[] localAddressByte = new byte[4];
for (int i = 0; i < 4; i++) {
localAddressByte[i] = (byte) (Integer.parseInt(ipStr[i]) & 0xff);
}
RequestConfig config = RequestConfig.custom().build();
HttpGet httpGet = new HttpGet("http://192.168.2.1/");
httpGet.setConfig(config);
HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
// 关键代码,重写DefaultRoutePlanner.determineLocalAddress 方法,加入需绑定的本地IP
httpClientBuilder.setRoutePlanner(new DefaultRoutePlanner(DefaultSchemePortResolver.INSTANCE) {
@SneakyThrows
@Override
protected InetAddress determineLocalAddress(final HttpHost firstHop, final HttpContext context) {
return InetAddress.getByAddress(localAddressByte);
}
});
try (CloseableHttpClient httpClient = httpClientBuilder.build()) {
CloseableHttpResponse response = httpClient.execute(httpGet);
String body = EntityUtils.toString(response.getEntity(), "UTF-8");
log.info(body);
// TODO 获取页面内容实现
} catch (IOException | ParseException e) {
e.printStackTrace();
}
}
}
基于WebMagic:
首先,重写HttpUriRequestConverter类,增加绑定本地IP地址功能。代码如下:
mport org.apache.http.HttpHost;
import org.apache.http.auth.AuthState;
import org.apache.http.auth.ChallengeState;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CookieStore;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.cookie.BasicClientCookie;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.downloader.HttpClientRequestContext;
import us.codecraft.webmagic.downloader.HttpUriRequestConverter;
import us.codecraft.webmagic.proxy.Proxy;
import us.codecraft.webmagic.utils.HttpConstant;
import us.codecraft.webmagic.utils.UrlUtils;
import java.net.InetAddress;
import java.util.Map;
public class ProxyHttpUriRequestConverter extends HttpUriRequestConverter {
/**
* 使用本地指定IP访问
*/
private InetAddress localAddress;
public ProxyHttpUriRequestConverter() {
this(null);
}
public ProxyHttpUriRequestConverter(InetAddress localAddress) {
this.localAddress = localAddress;
}
@Override
public HttpClientRequestContext convert(Request request, Site site, Proxy proxy) {
HttpClientRequestContext httpClientRequestContext = new HttpClientRequestContext();
httpClientRequestContext.setHttpUriRequest(convertHttpUriRequest(request, site, proxy));
httpClientRequestContext.setHttpClientContext(convertHttpClientContext(request, site, proxy));
return httpClientRequestContext;
}
private HttpClientContext convertHttpClientContext(Request request, Site site, Proxy proxy) {
HttpClientContext httpContext = new HttpClientContext();
if (proxy != null && proxy.getUsername() != null) {
AuthState authState = new AuthState();
authState.update(new BasicScheme(ChallengeState.PROXY), new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword()));
httpContext.setAttribute(HttpClientContext.PROXY_AUTH_STATE, authState);
}
if (request.getCookies() != null && !request.getCookies().isEmpty()) {
CookieStore cookieStore = new BasicCookieStore();
for (Map.Entry<String, String> cookieEntry : request.getCookies().entrySet()) {
BasicClientCookie cookie1 = new BasicClientCookie(cookieEntry.getKey(), cookieEntry.getValue());
cookie1.setDomain(UrlUtils.removePort(UrlUtils.getDomain(request.getUrl())));
cookieStore.addCookie(cookie1);
}
httpContext.setCookieStore(cookieStore);
}
return httpContext;
}
private HttpUriRequest convertHttpUriRequest(Request request, Site site, Proxy proxy) {
RequestBuilder requestBuilder = selectRequestMethod(request).setUri(UrlUtils.fixIllegalCharacterInUrl(request.getUrl()));
if (site.getHeaders() != null) {
for (Map.Entry<String, String> headerEntry : site.getHeaders().entrySet()) {
requestBuilder.addHeader(headerEntry.getKey(), headerEntry.getValue());
}
}
RequestConfig.Builder requestConfigBuilder = RequestConfig.custom();
if (site != null) {
requestConfigBuilder.setConnectionRequestTimeout(site.getTimeOut())
.setSocketTimeout(site.getTimeOut())
.setConnectTimeout(site.getTimeOut())
.setCookieSpec(CookieSpecs.STANDARD);
}
if (null != localAddress) {
// 关键代码,通过本地IP地址绑定本地网卡
requestConfigBuilder.setLocalAddress(localAddress);
}
if (proxy != null) {
requestConfigBuilder.setProxy(new HttpHost(proxy.getHost(), proxy.getPort(), proxy.getScheme()));
}
requestBuilder.setConfig(requestConfigBuilder.build());
HttpUriRequest httpUriRequest = requestBuilder.build();
if (request.getHeaders() != null && !request.getHeaders().isEmpty()) {
for (Map.Entry<String, String> header : request.getHeaders().entrySet()) {
httpUriRequest.addHeader(header.getKey(), header.getValue());
}
}
return httpUriRequest;
private RequestBuilder selectRequestMethod(Request request) {
String method = request.getMethod();
if (method == null || method.equalsIgnoreCase(HttpConstant.Method.GET)) {
//default get
return RequestBuilder.get();
} else if (method.equalsIgnoreCase(HttpConstant.Method.POST)) {
return addFormParams(RequestBuilder.post(), request);
} else if (method.equalsIgnoreCase(HttpConstant.Method.HEAD)) {
return RequestBuilder.head();
} else if (method.equalsIgnoreCase(HttpConstant.Method.PUT)) {
return addFormParams(RequestBuilder.put(), request);
} else if (method.equalsIgnoreCase(HttpConstant.Method.DELETE)) {
return RequestBuilder.delete();
} else if (method.equalsIgnoreCase(HttpConstant.Method.TRACE)) {
return RequestBuilder.trace();
}
throw new IllegalArgumentException("Illegal HTTP Method " + method);
}
private RequestBuilder addFormParams(RequestBuilder requestBuilder, Request request) {
if (request.getRequestBody() != null) {
ByteArrayEntity entity = new ByteArrayEntity(request.getRequestBody().getBody());
entity.setContentType(request.getRequestBody().getContentType());
requestBuilder.setEntity(entity);
}
return requestBuilder;
}
}
import lombok.extern.slf4j.Slf4j;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.downloader.HttpClientDownloader;
import us.codecraft.webmagic.downloader.HttpUriRequestConverter;
import us.codecraft.webmagic.processor.PageProcessor;
import java.net.InetAddress;
import java.net.UnknownHostException;
@Slf4j
public class RouterProccessor implements PageProcessor {
@Override
public void process(Page page) {
log.info("URL : {}", page.getUrl().get());
log.info("HTML : {}", page.getHtml().get());
}
@Override
public Site getSite() {
return Site.me();
}
public static void main(String[] args) throws UnknownHostException {
String localAddress = "192.168.2.103";
String[] ipStr = localAddress.split("\\.");
byte[] localAddressByte = new byte[4];
for (int i = 0; i < 4; i++) {
localAddressByte[i] = (byte) (Integer.parseInt(ipStr[i]) & 0xff);
}
HttpClientDownloader httpClientDownloader = new HttpClientDownloader();
// 关键代码,设置本机需要绑定的网卡IP地址
HttpUriRequestConverter httpUriRequestConverter = new ProxyHttpUriRequestConverter(InetAddress.getByAddress(localAddressByte));
httpClientDownloader.setHttpUriRequestConverter(httpUriRequestConverter);
Spider.create(new RouterProccessor()).addUrl("http://192.168.2.1/").thread(1)
.setDownloader(httpClientDownloader)
.runAsync();
}
}
基于HttpClient 4.5.13:
引用httpclient包
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.13</version>
</dependency>
public static void httpClientConnect(String local, String ip) throws UnknownHostException {
String localAddress = local;
String[] ipStr = localAddress.split("\\.");
byte[] localAddressByte = new byte[4];
for (int i = 0; i < 4; i++) {
localAddressByte[i] = (byte) (Integer.parseInt(ipStr[i]) & 0xff);
}
RequestConfig config = RequestConfig.custom().setLocalAddress(InetAddress.getByAddress(localAddressByte))
.build();
HttpGet httpGet = new HttpGet("http://" + ip);
httpGet.setConfig(config);
try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
CloseableHttpResponse response = httpClient.execute(httpGet);
String body = EntityUtils.toString(response.getEntity(), "UTF-8");
// log.info(body);
// TODO 获取页面内容实现
} catch (IOException e) {
e.printStackTrace();
}
}
基于Socket:
@Test
void socketConnect() throws IOException {
String localAddress = "192.168.1.2";
String[] ipStr = localAddress.split("\\.");
byte[] localAddressByte = new byte[4];
for (int i = 0; i < 4; i++) {
localAddressByte[i] = (byte) (Integer.parseInt(ipStr[i]) & 0xff);
}
String address = "192.168.1.1";
int port = 80;
Socket soc = new java.net.Socket();
soc.bind(new InetSocketAddress(InetAddress.getByAddress(localAddressByte), 0));
soc.connect(new InetSocketAddress(address, port));
// TODO 获取页面内容实现
}
沙发🛋️