H
HAIRY
Парсю одну и ту же страницу
в этом варианте, в match_info['liga'] сохраняется последнее значение.
Парсю вот так
все работает, лиги сохраняются как надо. В чем разница расскажите? Одни и те же элементы, одна и та же страница, одна эта же логика
Python:
g_matchs = {}
for liga in ligs:
liga_name = liga.find_element_by_class_name('c-events__liga').text
matchs = liga.find_elements_by_xpath("//div[@class='c-events__item c-events__item_col']")
for match in matchs:
match_info = {}
title_match = match.find_element_by_class_name('c-events__teams').get_attribute('title')
link_match = match.find_element_by_class_name('c-events__name').get_attribute('href')
srore = match.find_elements_by_class_name("c-events-scoreboard__cell--all")
score_1 = int(srore[0].text)
score_2 = int(srore[1].text)
if score_1 == 0 and score_2 == 0:
match_info['liga'], match_info['link'] = (liga_name, link_match)
g_matchs[title_match] = match_info
Парсю вот так
Python:
soup = BeautifulSoup(driver.page_source)
driver.close()
blocks = soup.find_all('div', {'data-name': 'dashboard-champ-content'})
matchs = {}
for block in blocks:
liga = block.find('a', class_='c-events__liga').get('title')
teams = block.find_all('div', class_='c-events__item c-events__item_col')
for team_info in teams:
matchs_info = {}
time = team_info.find('div', class_='c-events__time')
timer = time.find('span').text
totals = team_info.find('div', class_="c-events-scoreboard__lines")
total = totals.find_all('span', class_='c-events-scoreboard__cell c-events-scoreboard__cell--all')
total_team_one = int(total[0].text)
total_team_two = int(total[1].text)
if (total_team_one > 10 and total_team_two > 10):
continue
link = str(team_info.find('a', class_='c-events__name').get('href'))
team = str(team_info.find('span', class_='c-events__teams').get('title').strip())
matchs_info['liga'], matchs_info['total_team_one'],\
matchs_info['total_team_two'], matchs_info['link']= (liga, total_team_one, total_team_two, link)
matchs[team]= matchs_info
return matchs