Vangelis Katsikaros

Question 5

I noticed the table does not have an identifier. However the table I want is in a div that has the identifier id="_idItemTableForP" Would this help?

import requests
from bs4 import BeautifulSoup

url = 'https://vkatsikaros.github.io/dataharvest24-www.github.io/'
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    div = soup.find('div', id='_idItemTableForP')

    if div is not None:
        table = div.find('table')

        if table is not None:
            headers = []
            for th in table.find('thead').find_all('th'):
                headers.append(th.text.strip())

            rows = []
            for tr in table.find('tbody').find_all('tr'):
                cells = [td.text.strip() for td in tr.find_all('td')]
                rows.append(cells)

            print("Headers:", headers)
            for row in rows:
                print("Row:", row)
        else:
            print("Table not found within the div. Check the structure.")
    else:
        print("Div with id '_idItemTableForP' not found. Check the id.")
else:
    print('Failed to retrieve the webpage. Status code:', response.status_code)

The diff:

 if response.status_code == 200:
     soup = BeautifulSoup(response.content, 'html.parser')
-    table = soup.find('table', id='target-table')  # Adjust this line based on your table's actual identifier
+    div = soup.find('div', id='_idItemTableForP')
+    
+    if div is not None:
+        table = div.find('table')
 
         if table is not None:
             headers = []
@@ -22,6 +25,8 @@ if response.status_code == 200:
             for row in rows:
                 print("Row:", row)
         else:
-        print("Table not found. Check the id or class name.")
+            print("Table not found within the div. Check the structure.")
+    else:
+        print("Div with id '_idItemTableForP' not found. Check the id.")
 else:
     print('Failed to retrieve the webpage. Status code:', response.status_code)

Output

---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-3-46cc628e2efb> in <cell line: 7>()
     14         if table is not None:
     15             headers = []
---> 16             for th in table.find('thead').find_all('th'):
     17                 headers.append(th.text.strip())
     18 

AttributeError: 'NoneType' object has no attribute 'find_all'
⇦ question 4 Index question 6 ⇨