jsoup: Suchabfrage an Google senden

In diesem Beispiel erfahren Sie, wie Sie mitjsoup eine Suchanfrage an Google senden.
Document doc = Jsoup
.connect("https://www.google.com/search?q=mario");
.userAgent("Mozilla/5.0")
.timeout(5000).get();
Unusual traffic from your computer network
Verwenden Sie dieses Beispiel nicht, um Google zu spammen. Sie erhalten die obige Nachricht von Google. Lesen Sie dieseGoogle answer.
1. jsoup Beispiel
Beispiel zum Senden einer "Mario" -Suchabfrage an Google, Analysieren des Suchergebnisses und Herausfiltern des Domainnamens.
FunnyCrawler.java
package com.example;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class FunnyCrawler {
private static Pattern patternDomainName;
private Matcher matcher;
private static final String DOMAIN_NAME_PATTERN
= "([a-zA-Z0-9]([a-zA-Z0-9\\-]{0,61}[a-zA-Z0-9])?\\.)+[a-zA-Z]{2,6}";
static {
patternDomainName = Pattern.compile(DOMAIN_NAME_PATTERN);
}
public static void main(String[] args) {
FunnyCrawler obj = new FunnyCrawler();
Set result = obj.getDataFromGoogle("mario");
for(String temp : result){
System.out.println(temp);
}
System.out.println(result.size());
}
public String getDomainName(String url){
String domainName = "";
matcher = patternDomainName.matcher(url);
if (matcher.find()) {
domainName = matcher.group(0).toLowerCase().trim();
}
return domainName;
}
private Set getDataFromGoogle(String query) {
Set result = new HashSet();
String request = "https://www.google.com/search?q=" + query + "&num=20";
System.out.println("Sending request..." + request);
try {
// need http protocol, set this as a Google bot agent :)
Document doc = Jsoup
.connect(request)
.userAgent(
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)")
.timeout(5000).get();
// get all links
Elements links = doc.select("a[href]");
for (Element link : links) {
String temp = link.attr("href");
if(temp.startsWith("/url?q=")){
//use regex to get domain name
result.add(getDomainName(temp));
}
}
} catch (IOException e) {
e.printStackTrace();
}
return result;
}
}
Ausgabe
Sending request...https://www.google.com/search?q=mario&num=20 www.imdb.com www.mariobatali.com www.freemario.org www.mariogames.be mario.wikia.com stabyourself.net webcache.googleusercontent.com www.youtube.com www.huffingtonpost.com www.mariowiki.com mario.lancashire.gov.uk amirulhafiz.deviantart.com www.mariohugo.com mariofoods.com mario.nintendo.com www.mario2u.com www.botta.ch en.wikipedia.org www.mariotestino.com www.hubmario.com www.mariolemieux.org pouetpu.pbworks.com 23