jsoup: Suchabfrage an Google senden

jsoup: Suchabfrage an Google senden

google-search

In diesem Beispiel erfahren Sie, wie Sie mitjsoup eine Suchanfrage an Google senden.

    Document doc = Jsoup
        .connect("https://www.google.com/search?q=mario");
        .userAgent("Mozilla/5.0")
        .timeout(5000).get();

Unusual traffic from your computer network
Verwenden Sie dieses Beispiel nicht, um Google zu spammen. Sie erhalten die obige Nachricht von Google. Lesen Sie dieseGoogle answer.

1. jsoup Beispiel

Beispiel zum Senden einer "Mario" -Suchabfrage an Google, Analysieren des Suchergebnisses und Herausfiltern des Domainnamens.

FunnyCrawler.java

package com.example;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class FunnyCrawler {

  private static Pattern patternDomainName;
  private Matcher matcher;
  private static final String DOMAIN_NAME_PATTERN
    = "([a-zA-Z0-9]([a-zA-Z0-9\\-]{0,61}[a-zA-Z0-9])?\\.)+[a-zA-Z]{2,6}";
  static {
    patternDomainName = Pattern.compile(DOMAIN_NAME_PATTERN);
  }

  public static void main(String[] args) {

    FunnyCrawler obj = new FunnyCrawler();
    Set result = obj.getDataFromGoogle("mario");
    for(String temp : result){
        System.out.println(temp);
    }
    System.out.println(result.size());
  }

  public String getDomainName(String url){

    String domainName = "";
    matcher = patternDomainName.matcher(url);
    if (matcher.find()) {
        domainName = matcher.group(0).toLowerCase().trim();
    }
    return domainName;

  }

  private Set getDataFromGoogle(String query) {

    Set result = new HashSet();
    String request = "https://www.google.com/search?q=" + query + "&num=20";
    System.out.println("Sending request..." + request);

    try {

        // need http protocol, set this as a Google bot agent :)
        Document doc = Jsoup
            .connect(request)
            .userAgent(
              "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)")
            .timeout(5000).get();

        // get all links
        Elements links = doc.select("a[href]");
        for (Element link : links) {

            String temp = link.attr("href");
            if(temp.startsWith("/url?q=")){
                                //use regex to get domain name
                result.add(getDomainName(temp));
            }

        }

    } catch (IOException e) {
        e.printStackTrace();
    }

    return result;
  }

}

Ausgabe

Sending request...https://www.google.com/search?q=mario&num=20

www.imdb.com
www.mariobatali.com
www.freemario.org
www.mariogames.be
mario.wikia.com
stabyourself.net
webcache.googleusercontent.com
www.youtube.com
www.huffingtonpost.com
www.mariowiki.com
mario.lancashire.gov.uk
amirulhafiz.deviantart.com
www.mariohugo.com
mariofoods.com
mario.nintendo.com
www.mario2u.com
www.botta.ch
en.wikipedia.org
www.mariotestino.com
www.hubmario.com
www.mariolemieux.org
pouetpu.pbworks.com
23