2016-03-27 17 views
-3

Eşzamanlı yürütücü kullanarak veya daha iyi bir yöntemle bunu nasıl yeniden uygulayabilirsiniz. iş parçacığı yürütücüsü anlamı. Temel olarak tarayıcının belirtilen URL’yi taramasını ve daha sonra başka bir web sitesinde bulunan URL’leri takip etmesini istiyorum.Konu havuzu yürütücüsünü kullanarak bunu nasıl yeniden uygulayabilirsiniz?

package Mainpackge; 

import java.io.IOException; 

import java.util.ArrayList; 
import java.util.List; 

import org.jsoup.Jsoup; 
import org.jsoup.nodes.Document; 
import org.jsoup.nodes.Element; 
import org.jsoup.select.Elements; 

public class main { 

    public static void main(String[] args) { 
     //List of urs to collect data from 
     String[] urls = new String[]{ 

       "http://www.answers.com/", 
       "http://www.britannica.com/", 
       "https://ie.yahoo.com/?p=us", 
       "https://en.wikipedia.org/wiki/Main_Page", 
       "http://ww w.worldbook.com/", 
       "http://www.computerlanguage.com/", 
       "http://www.howstuffworks.com/", 
       "http://www.dmoz.org/Computers/Computer_Science/" 
       }; 

     // Create and start workers 
     List<Worker> workers = new ArrayList<>(urls.length); 
     for (String url : urls) { 
      Worker w = new Worker(url); 
      workers.add(w); 
      new Thread(w).start(); 
     } 

     // Retrieve results 
     for (Worker w : workers) { 
      Elements results = w.waitForResults(); 
      if (results != null) 
       for (Element result : results) { result.absUrl("a") ; 
        System.out.println(w.getName()+": "+result.absUrl("href")); 
       } 

      else 
       System.err.println(w.getName()+" had some error!"); 
     } 
    } 
} 

class Worker implements Runnable { 

    private String url; 
    private Elements results; 
    private String name; 
    private static int number = 0; 

    private final Object lock = new Object(); 

    public Worker(String url) { 
     this.url = url; 
     this.name = "Worker-" + (number++); 
    } 

    public String getName() { 
     return name; 
    } 

    @Override 
    public void run() { 
     try { 
      Document doc = Jsoup.connect(this.url).get(); 

      Elements links = doc.select("a"); 

      // Update results 
      synchronized (lock) { 
       this.results = links; 
       lock.notifyAll(); 
      } 
     } catch (IOException e) { 
      // You should implement a better error handling code.. 
      System.err.println("Error while parsing: "+this.url); 
      e.printStackTrace(); 
     } 
    } 

    public Elements waitForResults() { 
     synchronized (lock) { 
      try { 
       while (this.results == null) { 
        lock.wait(); 
       } 
       return this.results; 
      } catch (InterruptedException e) { 
       // Again better error handling 
       e.printStackTrace(); 
      } 

      return null; 
     } 
    } 
} 

cevap

0

Konuların tamamı için ExecutorService ve Callable uygulamasını kullanarak tam örnek.

import java.util.ArrayList; 
import java.util.Arrays; 
import java.util.List; 
import java.util.concurrent.Callable; 
import java.util.concurrent.ExecutionException; 
import java.util.concurrent.ExecutorService; 
import java.util.concurrent.Executors; 
import java.util.concurrent.Future; 
import java.util.concurrent.TimeUnit; 

public class ThreadPoolExample { 
    public static void main(String[] args) throws InterruptedException, ExecutionException { 
     List<String> urls = Arrays.asList(new String[]{ 
       "http://www.answers.com/", 
       "http://www.britannica.com/", 
       "https://ie.yahoo.com/?p=us", 
       "https://en.wikipedia.org/wiki/Main_Page", 
       "http://ww w.worldbook.com/", 
       "http://www.computerlanguage.com/", 
       "http://www.howstuffworks.com/", 
       "http://www.dmoz.org/Computers/Computer_Science/" 
       }); 

     ExecutorService ex = Executors.newFixedThreadPool(10); 
     ex.awaitTermination(2, TimeUnit.SECONDS); 

     List<Future<Element>> results = new ArrayList<>(); 
     for (String string : urls) { 
      results.add(ex.submit(new Crawler(string))); 
     } 

     for (Future<Element> future : results) { 
      // Get will wait for the thread to be done 
      for (String url : future.get().urls) { 
       // ADD A NEW THREAD FOR EACH URLS YOU FOUND ! 
       ex.submit(new Crawler(url)); 
      } 
     } 
     ex.shutdown(); 
    } 

    public static class Crawler implements Callable<Element>{ 
     String url; 
     public Crawler(String url) { 
      this.url = url; 
     } 
     @Override 
     public Element call() throws Exception { 
      // Implement your crawling logic and return your elements 
      return new Element(Arrays.asList(new String[]{"all new urls", "that you found while crwaling"})); 
     } 

    } 

    public static class Element{ 
     List<String> urls; 
     public Element(List<String> urls) { 
      this.urls = urls; 
     } 
     @Override 
     public String toString() { 
      return "Elements found : " + urls.size(); 
     } 
    } 
} 
+0

Neden yürütme hizmetini taklit ettikten sonra "bekletme" yi çağırıyorsunuz? İplik havuzu neden 10'a ayarlandı? Traditionnally, iş parçacığı havuzu boyutu, kullanılabilir işlemcilerin sayısına ayarlanır ('Runtime.getRuntime(). AvailableProcessors()'). bkz .: http://stackoverflow.com/a/1980858/363573 – Stephan