Question 5
I noticed the table does not have an identifier. However the table I want is in a div that has the identifier id="_idItemTableForP"
Would this help?
import requests
from bs4 import BeautifulSoup
url = 'https://vkatsikaros.github.io/dataharvest24-www.github.io/'
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
div = soup.find('div', id='_idItemTableForP')
if div is not None:
table = div.find('table')
if table is not None:
headers = []
for th in table.find('thead').find_all('th'):
headers.append(th.text.strip())
rows = []
for tr in table.find('tbody').find_all('tr'):
cells = [td.text.strip() for td in tr.find_all('td')]
rows.append(cells)
print("Headers:", headers)
for row in rows:
print("Row:", row)
else:
print("Table not found within the div. Check the structure.")
else:
print("Div with id '_idItemTableForP' not found. Check the id.")
else:
print('Failed to retrieve the webpage. Status code:', response.status_code)
The diff:
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
- table = soup.find('table', id='target-table') # Adjust this line based on your table's actual identifier
+ div = soup.find('div', id='_idItemTableForP')
+
+ if div is not None:
+ table = div.find('table')
if table is not None:
headers = []
@@ -22,6 +25,8 @@ if response.status_code == 200:
for row in rows:
print("Row:", row)
else:
- print("Table not found. Check the id or class name.")
+ print("Table not found within the div. Check the structure.")
+ else:
+ print("Div with id '_idItemTableForP' not found. Check the id.")
else:
print('Failed to retrieve the webpage. Status code:', response.status_code)
Output
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-3-46cc628e2efb> in <cell line: 7>()
14 if table is not None:
15 headers = []
---> 16 for th in table.find('thead').find_all('th'):
17 headers.append(th.text.strip())
18
AttributeError: 'NoneType' object has no attribute 'find_all'
⇦ question 4 | Index | question 6 ⇨ |