I am confident that you can utilize your Firefox browser for this task, although I have not personally tested it. As a Chrome user myself, if you choose to use chromedriver, ensure to match the version with your browser and add it to your system path. The only drawback of this method is that it keeps a browser window open until the page fully loads (due to waiting for JavaScript to generate match data). Feel free to reach out if you require any further assistance. Best of luck!
https://chromedriver.chromium.org/downloads
Known issues: Occasionally, an index out of range error may occur when retrieving match data. This appears to be related to potential changes in the xpath for each link.
from selenium import webdriver
from lxml import html
from lxml.html import HtmlElement
def test():
# URLs specified for testing purposes
urls = ['https://www.mismarcadores.com/partido/noIPZ3Lj/#h2h;overall']
# Iterating over all URLs
for url in urls:
print("Scores after this match {u}".format(u=url), get_last_5(url))
def get_last_5(url):
print("processing {u}, please wait...".format(u=url))
browser = webdriver.Chrome()
browser.get(url)
innerHTML = browser.execute_script("return document.body.innerHTML")
tree: HtmlElement = html.fromstring(innerHTML)
first_team = tree.xpath('//*[@id="flashscore"]/div[1]/div[1]/div[2]/div/div/a')[0].text
second_team = tree.xpath('//*[@id="flashscore"]/div[1]/div[3]/div[2]/div/div/a')[0].text
match_date = tree.xpath('//*[@id="utime"]')[0].text[0:8]
rows = tree.xpath('//*[@id="tab-h2h-overall"]/div[1]/table/tbody')[0].getchildren()[:-1]
browser.quit()
match_position = None
for i in range(len(rows)):
if is_match(first_team, second_team, match_date, rows[i]):
match_position = i + 1
break
if (match_position + 5) < len(rows):
rows = rows[match_position:][:5]
else:
rows = rows[match_position:len(rows)]
scores = []
for row in rows:
data = row.getchildren()[4].getchildren()[0].text_content()
score = data if len(data) == 5 else data[-6:-1]
scores.append(score)
print("finished processing {u}.".format(u=url))
return scores
def is_match(t1, t2, match_date, row):
date = row.getchildren()[0].getchildren()[0].text
team1element = row.getchildren()[2].getchildren()[0]
mt1 = team1element.getchildren()[0].text if len(team1element.getchildren()) > 0 else team1element.text
team2element = row.getchildren()[3].getchildren()[0]
mt2 = team2element.getchildren()[0].text if len(team2element.getchildren()) > 0 else team2element.text
if match_date == date and t1 == mt1 and t2 == mt2:
return True
return False