Source code for search_engine_parser.core.engines.coursera

"""@desc
		Parser for coursera search results
"""

from search_engine_parser.core.base import BaseSearch, ReturnType, SearchItem
from urllib.parse import urljoin


[docs]class Search(BaseSearch):
    """
    Searches Coursera for string
    """
    name = "Coursera"
    search_url = "https://www.coursera.org/search?"
    summary = "\tCoursera is an American online learning platform founded by Stanford professors Andrew Ng and " \
              "Daphne Koller that offers massive open online courses, specializations, and degrees."

[docs]    def get_params(self, query=None, page=None, offset=None, **kwargs):
        params = {}
        params["query"] =query
        params["page"] = page
        return params

[docs]    def parse_soup(self, soup):
        """
        Parses Coursera Search Soup for results
        """
        # find all class_='gs_r gs_or gs_scl' => each result
        return soup.find_all('li', class_='ais-InfiniteHits-item')

[docs]    def parse_single_result(self, single_result, return_type=ReturnType.FULL, **kwargs):
        """
        Parses the source code to return

        :param single_result: single result found in <div class="gs_r gs_or gs_scl">
        :type single_result: `bs4.element.ResultSet`
        :return: parsed title, link, description, file link, result type of single result
        :rtype: dict
        """
        rdict = SearchItem()

        if return_type in (ReturnType.FULL, return_type.LINK):
            link = single_result.find('a', class_='rc-DesktopSearchCard anchor-wrapper').get('href')

            rdict["links"] = urljoin('https://www.coursera.org', link)

        if return_type in (ReturnType.FULL, return_type.TITLE):
            title = single_result.find('h2', class_="card-title").text
            rdict["titles"] = title

        if return_type in (ReturnType.FULL,):
            partner_elem = single_result.find('span', class_='partner-name')
            partner = ''
            if partner_elem:
                partner = partner_elem.text

            rating_avg_elem = single_result.find('span', class_='ratings-text')
            rating_avg = None
            if rating_avg_elem:
                rating_avg = float(rating_avg_elem.text)

            enrollment_elem = single_result.find('span', class_='enrollment-number')
            enrolment_number = None

            if enrollment_elem:
                enr_cl_txt = enrollment_elem.text.lower().replace(',', '').replace('.', '')\
                        .replace('m', '0' * 6).replace('k', '0' * 3)
                if enr_cl_txt.isdigit():
                    enrolment_number = int(enr_cl_txt)

            difficulty_elem = single_result.find('span', class_='difficulty')
            difficulty = ''
            if difficulty_elem:
                difficulty = difficulty_elem.text

            rating_count_elem = single_result.find('span', class_='ratings-count')
            rating_count = None
            if rating_count_elem:
                rating_count_elem = rating_count_elem.find('span')
                rating_count_cl = rating_count_elem.text.replace(',', '')
                if rating_count_cl.isdigit():
                    rating_count = int(rating_count_cl)

            rdict.update({
                "partners": partner,
                "ratings_avg": rating_avg,
                "ratings_count": rating_count,
                "enrolments_numbers": enrolment_number,
                "difficulties": difficulty,
            })
        return rdict