Vangelis Katsikaros

Question 3

I would like to isolate one table from the webpage, how do I do this?

import requests
from bs4 import BeautifulSoup

url = 'https://vkatsikaros.github.io/dataharvest24-www.github.io/'
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    
    table = soup.find('table', id='target-table')  # Use id or class as necessary

    headers = []
    for th in table.find('thead').find_all('th'):
        headers.append(th.text.strip())

    rows = []
    for tr in table.find('tbody').find_all('tr'):
        cells = [td.text.strip() for td in tr.find_all('td')]
        rows.append(cells)

    print("Headers:", headers)
    for row in rows:
        print("Row:", row)
else:
    print('Failed to retrieve the webpage. Status code:', response.status_code)

The diff:

 if response.status_code == 200:
     soup = BeautifulSoup(response.content, 'html.parser')
-    titles = soup.find_all('h2')
     
-    for title in titles:
-        print(title.get_text())
+    table = soup.find('table', id='target-table')  # Use id or class as necessary
+
+    headers = []
+    for th in table.find('thead').find_all('th'):
+        headers.append(th.text.strip())
+
+    rows = []
+    for tr in table.find('tbody').find_all('tr'):
+        cells = [td.text.strip() for td in tr.find_all('td')]
+        rows.append(cells)
+
+    print("Headers:", headers)
+    for row in rows:
+        print("Row:", row)
 else:
     print('Failed to retrieve the webpage. Status code:', response.status_code)

Output

---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-25-4a4a60cc96be> in <cell line: 7>()
     11 
     12     headers = []
---> 13     for th in table.find('thead').find_all('th'):
     14         headers.append(th.text.strip())
     15 

AttributeError: 'NoneType' object has no attribute 'find'
⇦ question 2a Index question 4 ⇨