In this post, I will explain the most convenient way using Selenium WebDriver to get all the links from web page and verifying each page contains specific like 404 or Page not found in different ways.
//Following ways to identify 404 links
By using page title
By using page source
By using response code of page URL
Please find the below code for the same.
Sample Code:import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class GetAllLinks {
public WebDriver driver;
ArrayList<String> al ;
@BeforeClass
public void setup(){
driver = new FirefoxDriver();
driver.get("https://www.google.com/");
driver.manage().window().maximize();
}
@Test
public void identifyBrokenAnd404Links() throws MalformedURLException, IOException{
al = new ArrayList<String>();
//identifying total number of URls in a page
List<WebElement> links = driver.findElements(By.tagName("a"));
System.out.println(links.size());
//for Getting all links from page
for (int i = 0;i<links.size(); i++) {
//get one by one URL href value
String URL=links.get(i).getAttribute("href");
//Removing unwanted URLS based on http or https
if(links.get(i).getAttribute("href").contains("https")||links.get(i).getAttribute("href").contains("http"))
{
System.out.println(URL);
//storing all in URL's in array list
al.add(URL);
}
}
//Identifying broken and 404 links
for(int i=0;i<al.size();i++){
//Navigating each URL
driver.get(al.get(i));
//getting response Code for the link
int statusCode= ResponseCode(al.get(i));
//verifying 404 links using page title
if(driver.getTitle().contains("404")){
System.out.println("404 link is "+al.get(i));
}
//verifying 404 links using page source
else if(driver.getPageSource().contains("404 page not found")){
System.out.println("404 link is "+al.get(i));
}
//verifying 404 links using status code
else if(statusCode==404){
System.out.println("404 link is "+al.get(i));
}
}
}
//method for generating response code for URL
public static int ResponseCode(String URL) throws MalformedURLException, IOException {
URL url = new URL(URL);
HttpURLConnection huc = (HttpURLConnection) url.openConnection();
huc.setRequestMethod("GET");
huc.connect();
return huc.getResponseCode();
}
}