from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

linkstosearch = [
    'https://www.euronews.com/tag/solar-energy',
    'https://www.pv-magazine.com/category/opinion/',
    'https://www.theguardian.com/environment/solarpower'
]
sndround = []

keyword = "solar"
reslinks = []

def findElsOnPage(link):

    ldomain = link[:16] #check if the found link includes the parent link, avoids going social media
    linklist = [] #links found related to parent link
    driver.get(link) #go to link
    driver.implicitly_wait(0.5) #wait to reduce the load
    
    articles = driver.find_elements(By.TAG_NAME,"a")
    for links in articles:
        try:
            link_href = links.get_attribute("href")
            if((link_href is not None)and(ldomain in link_href) and (keyword in link_href) and ("#" not in link_href) and (link_href not in linklist) and(link_href not in reslinks)):
                #print(link_href)
                if(("?"in link_href) and (("page" in link_href)or("p=" in link_href))):
                    if("7" in link_href):
                        break
                    if(link_href not in sndround):
                        print(link_href)
                        sndround.append(link_href)
                elif("tag" in link_href):
                    continue
                else:
                    linklist.append(link_href)
        except:
            continue
            
    return linklist


for link in linkstosearch:
    reslinks.extend(findElsOnPage(link))
    
for link in sndround:
    reslinks.extend(findElsOnPage(link))
    
driver.quit()
print("Mission complete! \nCheck souplinks.txt")

with open('souplinks.txt', 'w') as file:
    # Write each element to a new line in the file
    for link in reslinks:
        file.write(link + '\n')