"""Parsing a Wikipedia page about a programming language.

 We extract the relationship 'Influenced by' and 'Influenced' to other
 languages.
 """


 from collections import defaultdict

 import bs4
 from bs4 import BeautifulSoup


 def _find_info_table(html):
     """Find the table with info box on the right hand side.

     This box contains the information about languages that influenced
     the target language as well as about languages that got influenced
     by it.
     """
     return html.find('table', attrs={'class': 'infobox vevent'})


 def _find_lang_row(info_table, target_header):
     """Find the row in the table that contains `target_header`.

     Where `target_header` is either 'Influenced by' or 'Influenced'.
     """
     res = None
     for row in info_table:
         header = getattr(row, 'th', None)
         if header and header.contents[0] == target_header:
             res = row
             break
     return res


 def _make_lang_map(lang_row):
     """Map the url to the language.

     We use a set to hold the language name just in case there are
     several spellings for a language, i.e. an entry with the same url.
     """
     res = defaultdict(set)
     tags = (entry for entry in lang_row.find('td')
             if isinstance(entry, bs4.element.Tag))
     for tag in tags:
         href = tag.get('href')
         name = str(tag.contents[0]) if href else None
         if name:
             res[href].add(name)
     return res


 def parse(html_text):
     """Parse the given HTML.
     """
     html = BeautifulSoup(html_text)
     info_table = _find_info_table(html)
     res = {}
     if not info_table:
         return res
     for target_header in ['Influenced by', 'Influenced']:
         lang_row = _find_lang_row(info_table, target_header)
         if lang_row:
             mapping = _make_lang_map(lang_row)
         else:
             mapping = {}
         res[target_header] = mapping
     return res