Source code for search_engine_parser.core.engines.yahoo

"""@desc
		Parser for Yahoo search results
"""
import re

from search_engine_parser.core.base import BaseSearch, ReturnType, SearchItem


[docs]class Search(BaseSearch):
    """
    Searches Yahoo for string
    """
    name = "Yahoo"
    search_url = "https://search.yahoo.com/search?"
    summary = "\tYahoo is one the most popular email providers and holds the fourth place in "\
        "search with 3.90% market share.\n\tFrom October 2011 to October 2015, Yahoo search "\
        "was powered exclusively by Bing. \n\tSince October 2015 Yahoo agreed with Google to "\
        "provide search-related services and since then the results of Yahoo are powered both "\
        "by Google and Bing. \n\tYahoo is also the default search engine for Firefox browsers "\
        "in the United States (since 2014)."

[docs]    def get_params(self, query=None, page=None, offset=None, **kwargs):
        params = {}
        params["p"] = query
        params["b"] = offset
        return params

[docs]    def parse_soup(self, soup):
        """
        Parses Yahoo for a search query
        """
        # find all divs
        return soup.find_all('div', class_='Sr')

[docs]    def parse_single_result(self, single_result, return_type=ReturnType.FULL, **kwargs):
        """
        Parses the source code to return

        :param single_result: single result found in <div class="Sr">
        :type single_result: `bs4.element.ResultSet`
        :return: parsed title, link and description of single result
        :rtype: dict
        """
        rdict = SearchItem()
        h3_tag = single_result.find('h3', class_='title')

        if return_type in (ReturnType.FULL, return_type.TITLE):
            title = h3_tag.text
            rdict["titles"] = title

        if return_type in (ReturnType.FULL, ReturnType.LINK):
            link_tag = h3_tag.find('a')
            raw_link = link_tag.get('href')
            re_str = re.findall("/RU=(.+)/RK", raw_link)[0]
            re_str = re_str.replace("%3a", ":")
            link = re_str.replace("%2f", "/")
            rdict["links"] = link

        if return_type in (ReturnType.FULL, return_type.DESCRIPTION):
            desc = single_result.find('p', class_='fz-ms')
            rdict["descriptions"] = desc.text

        return rdict