Vangelis Katsikaros

Question 6

Hm I now get the error AttributeError: 'NoneType' object has no attribute 'find_all'

import requests
from bs4 import BeautifulSoup

url = 'https://vkatsikaros.github.io/dataharvest24-www.github.io/'
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    div = soup.find('div', id='_idItemTableForP')
    if div is not None:
        table = div.find('table')

        if table is not None:
            headers = []
            thead = table.find('thead')
            if thead is not None:
                for th in thead.find_all('th'):
                    headers.append(th.text.strip())
            else:
                print("Thead not found in the table.")

            rows = []
            tbody = table.find('tbody')
            if tbody is not None:
                for tr in tbody.find_all('tr'):
                    cells = [td.text.strip() for td in tr.find_all('td')]
                    rows.append(cells)
            else:
                print("Tbody not found in the table.")

            if headers:
                print("Headers:", headers)
            else:
                print("No headers found.")
                
            if rows:
                for row in rows:
                    print("Row:", row)
            else:
                print("No rows found.")
        else:
            print("Table not found within the div. Check the structure.")
    else:
        print("Div with id '_idItemTableForP' not found. Check the id.")
else:
    print('Failed to retrieve the webpage. Status code:', response.status_code)

The diff:

 if response.status_code == 200:
     soup = BeautifulSoup(response.content, 'html.parser')
     div = soup.find('div', id='_idItemTableForP')
-
     if div is not None:
         table = div.find('table')
 
         if table is not None:
             headers = []
-            for th in table.find('thead').find_all('th'):
+            thead = table.find('thead')
+            if thead is not None:
+                for th in thead.find_all('th'):
                     headers.append(th.text.strip())
+            else:
+                print("Thead not found in the table.")
 
             rows = []
-            for tr in table.find('tbody').find_all('tr'):
+            tbody = table.find('tbody')
+            if tbody is not None:
+                for tr in tbody.find_all('tr'):
                     cells = [td.text.strip() for td in tr.find_all('td')]
                     rows.append(cells)
+            else:
+                print("Tbody not found in the table.")
 
+            if headers:
                 print("Headers:", headers)
+            else:
+                print("No headers found.")
+                
+            if rows:
                 for row in rows:
                     print("Row:", row)
+            else:
+                print("No rows found.")
         else:
             print("Table not found within the div. Check the structure.")
     else:

Output

Thead not found in the table.
No headers found.
Row: []
Row: ['', '', 'Brick 1 x 1Brick : 3005', 'New', '15,730,337', '6,663', 'EUR 0.0001+']
Row: ['Used', '4,921,039', '87,937', 'EUR 0.0013+']
Row: ['', '', 'Brick 1 x 2Brick : 3004', 'New', '16,800,431', '6,779', 'EUR 0.0001+']
Row: ['Used', '7,076,722', '101,667', 'EUR 0.0009+']
Row: ['', '', 'Brick 1 x 3Brick : 3622', 'New', '3,867,766', '5,893', 'EUR 0.0001+']
Row: ['Used', '1,311,312', '47,376', 'EUR 0.001+']
Row: ['', '', 'Brick 1 x 4Brick : 3010', 'New', '5,709,796', '6,332', 'EUR 0.0001+']
Row: ['Used', '1,851,694', '68,725', 'EUR 0.0027+']
Row: ['', '', 'Brick 1 x 6Brick : 3009', 'New', '2,288,682', '5,834', 'EUR 0.0001+']
Row: ['Used', '829,247', '45,917', 'EUR 0.0013+']
Row: ['', '', 'Brick 1 x 8Brick : 3008', 'New', '1,101,907', '5,121', 'EUR 0.0001+']
Row: ['Used', '350,629', '27,114', 'EUR 0.005+']
Row: ['', '', 'Brick 2 x 2Brick : 3003', 'New', '6,762,462', '6,463', 'EUR 0.0001+']
Row: ['Used', '2,931,055', '78,657', 'EUR 0.0043+']
Row: ['', '', 'Brick 2 x 3Brick : 3002', 'New', '1,051,070', '5,635', 'EUR 0.0055+']
Row: ['Used', '799,535', '39,107', 'EUR 0.0046+']
Row: ['', '', 'Brick 2 x 4Brick : 3001', 'New', '5,920,355', '6,239', 'EUR 0.0001+']
...

Progress! We see data! But no URL yet.

⇦ question 5 Index question 7 ⇨