Inteligenta artificiala pariaza pe Brazilia

 

 

In [1]:
# in primul rand, importam modulele necesare
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
import matplotlib.ticker as plticker
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
In [2]:
# citim cele doua dataset-uri de pe Kaggle
world_cup = pd.read_csv('datasets/World Cup 2018 Dataset.csv')
results = pd.read_csv('datasets/results.csv')
In [3]:
# ... si vedem cu ochii noștri ce conțin
world_cup.head()
Out[3]:
  Team Group Previous appearances Previous titles Previous finals Previous semifinals Current FIFA rank First match against Match index history with first opponent W-L history with first opponent goals Second match against Match index.1 history with second opponent W-L history with second opponent goals Third match against Match index.2 history with third opponent W-L history with third opponent goals Unnamed: 19
0 Russia A 10.0 0.0 0.0 1.0 65.0 Saudi Arabia 1.0 -1.0 -2.0 Egypt 17.0 NaN NaN Uruguay 33.0 0.0 0.0 NaN
1 Saudi Arabia A 4.0 0.0 0.0 0.0 63.0 Russia 1.0 1.0 2.0 Uruguay 18.0 1.0 1.0 Egypt 34.0 -5.0 -5.0 NaN
2 Egypt A 2.0 0.0 0.0 0.0 31.0 Uruguay 2.0 -1.0 -2.0 Russia 17.0 NaN NaN Saudi Arabia 34.0 5.0 5.0 NaN
3 Uruguay A 12.0 2.0 2.0 5.0 21.0 Egypt 2.0 1.0 2.0 Saudi Arabia 18.0 -1.0 -1.0 Russia 33.0 0.0 0.0 NaN
4 Porugal B 6.0 0.0 0.0 2.0 3.0 Spain 3.0 -12.0 -31.0 Morocco 19.0 -1.0 -2.0 Iran 35.0 2.0 5.0 NaN
In [4]:
results.head()
Out[4]:
  date home_team away_team home_score away_score tournament city country
0 1872-11-30 Scotland England 0 0 Friendly Glasgow Scotland
1 1873-03-08 England Scotland 4 2 Friendly London England
2 1874-03-07 Scotland England 2 1 Friendly Glasgow Scotland
3 1875-03-06 England Scotland 2 2 Friendly London England
4 1876-03-04 Scotland England 3 0 Friendly Glasgow Scotland
In [5]:
# adaugam diferența de goluri și stabilim cine este câștigătorul

winner = []
for i in range (len(results['home_team'])):
    if results ['home_score'][i] > results['away_score'][i]:
        winner.append(results['home_team'][i])
    elif results['home_score'][i] < results ['away_score'][i]:
        winner.append(results['away_team'][i])
    else:
        winner.append('Draw')
results['winning_team'] = winner

# diferența de goluri o transformăm în coloană separată în dataset-ul results

results['goal_difference'] = np.absolute(results['home_score'] - results['away_score'])

results.head()
Out[5]:
  date home_team away_team home_score away_score tournament city country winning_team goal_difference
0 1872-11-30 Scotland England 0 0 Friendly Glasgow Scotland Draw 0
1 1873-03-08 England Scotland 4 2 Friendly London England England 2
2 1874-03-07 Scotland England 2 1 Friendly Glasgow Scotland Scotland 1
3 1875-03-06 England Scotland 2 2 Friendly London England Draw 0
4 1876-03-04 Scotland England 3 0 Friendly Glasgow Scotland Scotland 3
In [6]:
# hai sa ne jucam cu un subset care include meciurile jucate de România în dataframe-ul Romania

df = results[(results['home_team'] == 'Romania') | (results['away_team'] == 'Romania')]
romania = df.iloc[:]
romania.head()
Out[6]:
  date home_team away_team home_score away_score tournament city country winning_team goal_difference
623 1922-06-08 Yugoslavia Romania 1 2 Friendly Belgrade Yugoslavia Romania 1
639 1922-09-03 Romania Poland 1 1 Friendly Cernăuţi Romania Draw 0
702 1923-06-10 Romania Yugoslavia 1 2 Friendly Bucharest Romania Yugoslavia 1
713 1923-07-01 Romania Czechoslovakia 0 6 Friendly Cluj Romania Czechoslovakia 6
720 1923-09-02 Poland Romania 1 1 Friendly Lwów Poland Draw 0
In [7]:
year = []
for row in romania['date']:
    year.append(int(row[:4]))
romania ['match_year']= year
romania_1930 = romania[romania.match_year >= 1930]
romania_1930.count()
Out[7]:
date               655
home_team          655
away_team          655
home_score         655
away_score         655
tournament         655
city               655
country            655
winning_team       655
goal_difference    655
match_year         655
dtype: int64
In [8]:
# hai sa vedem si grafic cum stam...
wins = []
for row in romania_1930['winning_team']:
    if row != 'Romania' and row != 'Draw':
        wins.append('Loss')
    else:
        wins.append(row)
winsdf= pd.DataFrame(wins, columns=[ 'Romania_Results'])

# ... plot
fig, ax = plt.subplots(1)
fig.set_size_inches(10.7, 6.27)
sns.set(style='darkgrid')
sns.countplot(x='Romania_Results', data=winsdf)
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x10de5dcc0>
 
In [9]:
# sa uitam de Romania, ca ma apuca plansu', si sa vedem echipele calificate la world cup 2018:
worldcup_teams = ['Australia', ' Iran', 'Japan', 'Korea Republic', 
            'Saudi Arabia', 'Egypt', 'Morocco', 'Nigeria', 
            'Senegal', 'Tunisia', 'Costa Rica', 'Mexico', 
            'Panama', 'Argentina', 'Brazil', 'Colombia', 
            'Peru', 'Uruguay', 'Belgium', 'Croatia', 
            'Denmark', 'England', 'France', 'Germany', 
            'Iceland', 'Poland', 'Portugal', 'Russia', 
            'Serbia', 'Spain', 'Sweden', 'Switzerland']
df_teams_home = results[results['home_team'].isin(worldcup_teams)]
df_teams_away = results[results['away_team'].isin(worldcup_teams)]
df_teams = pd.concat((df_teams_home, df_teams_away))
df_teams.drop_duplicates()
df_teams.count()
Out[9]:
date               20565
home_team          20565
away_team          20565
home_score         20565
away_score         20565
tournament         20565
city               20565
country            20565
winning_team       20565
goal_difference    20565
dtype: int64
In [10]:
df_teams.head()
Out[10]:
  date home_team away_team home_score away_score tournament city country winning_team goal_difference
1 1873-03-08 England Scotland 4 2 Friendly London England England 2
3 1875-03-06 England Scotland 2 2 Friendly London England Draw 0
6 1877-03-03 England Scotland 1 3 Friendly London England Scotland 2
10 1879-01-18 England Wales 2 1 Friendly London England England 1
11 1879-04-05 England Scotland 5 4 Friendly London England England 1
In [11]:
# acum sa facem cam ce-am facut cu Romania mai sus, ca sa facem predictiile folosind victoriile drept metrics
year = []
for row in df_teams['date']:
    year.append(int(row[:4]))
df_teams['match_year'] = year
df_teams_1930 = df_teams[df_teams.match_year >= 1930]
df_teams_1930.head()
Out[11]:
  date home_team away_team home_score away_score tournament city country winning_team goal_difference match_year
1230 1930-01-01 Spain Czechoslovakia 1 0 Friendly Barcelona Spain Spain 1 1930
1231 1930-01-12 Portugal Czechoslovakia 1 0 Friendly Lisbon Portugal Portugal 1 1930
1237 1930-02-23 Portugal France 2 0 Friendly Porto Portugal Portugal 2 1930
1238 1930-03-02 Germany Italy 0 2 Friendly Frankfurt am Main Germany Italy 2 1930
1240 1930-03-23 France Switzerland 3 3 Friendly Colombes France Draw 0 1930
In [12]:
# scapam de coloanele care in mod evident nu vor influenta rezultatul
df_teams_1930 = df_teams.drop(['date', 'home_score', 'away_score', 'tournament', 'city', 'country', 'goal_difference', 'match_year'], axis=1)
df_teams_1930.head()
Out[12]:
  home_team away_team winning_team
1 England Scotland England
3 England Scotland Draw
6 England Scotland Scotland
10 England Wales England
11 England Scotland England
In [13]:
# construim modelul. 
# labels = '2' pentru victoria echipei pe teren propriu, '1' pentru egalitate si '0' pentru victoria echipei in deplasare

df_teams_1930 = df_teams_1930.reset_index(drop=True)
df_teams_1930.loc[df_teams_1930.winning_team == df_teams_1930.home_team,'winning_team']=2
df_teams_1930.loc[df_teams_1930.winning_team == 'Draw', 'winning_team']=1
df_teams_1930.loc[df_teams_1930.winning_team == df_teams_1930.away_team, 'winning_team']=0

df_teams_1930.head()
Out[13]:
  home_team away_team winning_team
0 England Scotland 2
1 England Scotland 1
2 England Scotland 0
3 England Wales 2
4 England Scotland 2
In [14]:
# convertim echipa gazda si echipa oaspete din variabile categorice in inputuri continue
# folosim o variabila 'dummy'
final = pd.get_dummies(df_teams_1930, prefix=['home_team', 'away_team'], columns=['home_team', 'away_team'])

# Separam X si y sets
X = final.drop(['winning_team'], axis=1)
y = final["winning_team"]
y = y.astype('int')

# Separam train si test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
In [15]:
final.head()
Out[15]:
  winning_team home_team_Afghanistan home_team_Albania home_team_Algeria home_team_Andorra home_team_Angola home_team_Argentina home_team_Armenia home_team_Aruba home_team_Australia away_team_Venezuela away_team_Vietnam away_team_Vietnam Republic away_team_Wales away_team_Western Australia away_team_Yemen away_team_Yemen DPR away_team_Yugoslavia away_team_Zambia away_team_Zimbabwe
0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 2 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
4 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

5 rows × 417 columns

In [16]:
# folosim logistic regression ca algoritm pentru predictie
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
score = logreg.score(X_train, y_train)
score2 = logreg.score(X_test, y_test)

print("Training set accuracy: ", '%.3f'%(score))
print("Test set accuracy: ", '%.3f'%(score2))
 
Training set accuracy:  0.573
Test set accuracy:  0.551
 
Asta e un rezultat cam “la ghici”, asa ca bagam in seama si rankingurile FIFA, in sensul ca echipa cu un rank mai mare o consideram favorita si o pozitionam in coloana “home”, desi nu exista asa ceva la WC2018Rusia
In [17]:
ranking = pd.read_csv('datasets/fifa_rankings.csv') 
fixtures = pd.read_csv('datasets/fixtures.csv')

# Lista pentru stocarea meciuril
pred_set = []
In [18]:
# facem coloane noi cu rank-ul fiecarei echipe
fixtures.insert(1, 'first_position', fixtures['Home Team'].map(ranking.set_index('Team')['Position']))
fixtures.insert(2, 'second_position', fixtures['Away Team'].map(ranking.set_index('Team')['Position']))
fixtures = fixtures.iloc[:48, :]
fixtures.tail()
Out[18]:
  Round Number first_position second_position Date Location Home Team Away Team Group Result
43 3 6.0 25.0 27/06/2018 21:00 Nizhny Novgorod Stadium Switzerland Costa Rica Group E NaN
44 3 60.0 10.0 28/06/2018 17:00 Volgograd Stadium Japan Poland Group H NaN
45 3 28.0 16.0 28/06/2018 17:00 Samara Stadium Senegal Colombia Group H NaN
46 3 55.0 14.0 28/06/2018 21:00 Saransk Stadium Panama Tunisia Group G NaN
47 3 13.0 3.0 28/06/2018 21:00 Kaliningrad Stadium England Belgium Group G NaN
In [19]:
# Loop pentru a adauga echipe la noul dataset bazat pe rankul fiecarei echipe
for index, row in fixtures.iterrows():
    if row['first_position'] < row['second_position']:
        pred_set.append({'home_team': row['Home Team'], 'away_team': row['Away Team'], 'winning_team': None})
    else:
        pred_set.append({'home_team': row['Away Team'], 'away_team': row['Home Team'], 'winning_team': None})
        
pred_set = pd.DataFrame(pred_set)
backup_pred_set = pred_set

pred_set.head()
Out[19]:
  away_team home_team winning_team
0 Saudi Arabia Russia None
1 Egypt Uruguay None
2 Morocco Iran None
3 Spain Portugal None
4 Australia France None
In [20]:
pred_set = pd.get_dummies(pred_set, prefix=['home_team', 'away_team'], columns=['home_team', 'away_team'])

missing_cols = set(final.columns) - set(pred_set.columns)
for c in missing_cols:
    pred_set[c] = 0
pred_set = pred_set[final.columns]

pred_set = pred_set.drop(['winning_team'], axis=1)

pred_set.head()
Out[20]:
  home_team_Afghanistan home_team_Albania home_team_Algeria home_team_Andorra home_team_Angola home_team_Argentina home_team_Armenia home_team_Aruba home_team_Australia home_team_Austria away_team_Venezuela away_team_Vietnam away_team_Vietnam Republic away_team_Wales away_team_Western Australia away_team_Yemen away_team_Yemen DPR away_team_Yugoslavia away_team_Zambia away_team_Zimbabwe
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

5 rows × 416 columns

In [21]:
# arata bine. let's go on...
predictions = logreg.predict(pred_set)
for i in range(fixtures.shape[0]):
    print(backup_pred_set.iloc[i, 1] + " and " + backup_pred_set.iloc[i, 0])
    if predictions[i] == 2:
        print("Winner: " + backup_pred_set.iloc[i, 1])
    elif predictions[i] == 1:
        print("Draw")
    elif predictions[i] == 0:
        print("Winner: " + backup_pred_set.iloc[i, 0])
    print('Probability of ' + backup_pred_set.iloc[i, 1] + ' winning: ', '%.3f'%(logreg.predict_proba(pred_set)[i][2]))
    print('Probability of Draw: ', '%.3f'%(logreg.predict_proba(pred_set)[i][1]))
    print('Probability of ' + backup_pred_set.iloc[i, 0] + ' winning: ', '%.3f'%(logreg.predict_proba(pred_set)[i][0]))
    print("")
 
Russia and Saudi Arabia
Winner: Russia
Probability of Russia winning:  0.667
Probability of Draw:  0.223
Probability of Saudi Arabia winning:  0.111

Uruguay and Egypt
Winner: Uruguay
Probability of Uruguay winning:  0.583
Probability of Draw:  0.352
Probability of Egypt winning:  0.065

Iran and Morocco
Draw
Probability of Iran winning:  0.217
Probability of Draw:  0.407
Probability of Morocco winning:  0.376

Portugal and Spain
Winner: Spain
Probability of Portugal winning:  0.302
Probability of Draw:  0.344
Probability of Spain winning:  0.354

France and Australia
Winner: France
Probability of France winning:  0.628
Probability of Draw:  0.227
Probability of Australia winning:  0.145

Argentina and Iceland
Winner: Argentina
Probability of Argentina winning:  0.803
Probability of Draw:  0.161
Probability of Iceland winning:  0.036

Peru and Denmark
Winner: Peru
Probability of Peru winning:  0.439
Probability of Draw:  0.171
Probability of Denmark winning:  0.391

Croatia and Nigeria
Winner: Croatia
Probability of Croatia winning:  0.590
Probability of Draw:  0.258
Probability of Nigeria winning:  0.152

Costa Rica and Serbia
Winner: Serbia
Probability of Costa Rica winning:  0.315
Probability of Draw:  0.324
Probability of Serbia winning:  0.361

Germany and Mexico
Winner: Germany
Probability of Germany winning:  0.567
Probability of Draw:  0.282
Probability of Mexico winning:  0.150

Brazil and Switzerland
Winner: Brazil
Probability of Brazil winning:  0.775
Probability of Draw:  0.138
Probability of Switzerland winning:  0.087

Sweden and Korea Republic
Winner: Sweden
Probability of Sweden winning:  0.503
Probability of Draw:  0.329
Probability of Korea Republic winning:  0.168

Belgium and Panama
Winner: Belgium
Probability of Belgium winning:  0.765
Probability of Draw:  0.145
Probability of Panama winning:  0.090

England and Tunisia
Winner: England
Probability of England winning:  0.649
Probability of Draw:  0.292
Probability of Tunisia winning:  0.059

Colombia and Japan
Winner: Colombia
Probability of Colombia winning:  0.511
Probability of Draw:  0.210
Probability of Japan winning:  0.280

Poland and Senegal
Winner: Poland
Probability of Poland winning:  0.612
Probability of Draw:  0.223
Probability of Senegal winning:  0.165

Egypt and Russia
Winner: Russia
Probability of Egypt winning:  0.225
Probability of Draw:  0.297
Probability of Russia winning:  0.478

Portugal and Morocco
Winner: Portugal
Probability of Portugal winning:  0.486
Probability of Draw:  0.377
Probability of Morocco winning:  0.138

Uruguay and Saudi Arabia
Winner: Uruguay
Probability of Uruguay winning:  0.668
Probability of Draw:  0.259
Probability of Saudi Arabia winning:  0.073

Spain and Iran
Winner: Spain
Probability of Spain winning:  0.695
Probability of Draw:  0.247
Probability of Iran winning:  0.058

Denmark and Australia
Winner: Denmark
Probability of Denmark winning:  0.551
Probability of Draw:  0.241
Probability of Australia winning:  0.207

France and Peru
Winner: France
Probability of France winning:  0.635
Probability of Draw:  0.215
Probability of Peru winning:  0.150

Argentina and Croatia
Winner: Argentina
Probability of Argentina winning:  0.599
Probability of Draw:  0.255
Probability of Croatia winning:  0.146

Brazil and Costa Rica
Winner: Brazil
Probability of Brazil winning:  0.800
Probability of Draw:  0.147
Probability of Costa Rica winning:  0.053

Iceland and Nigeria
Winner: Nigeria
Probability of Iceland winning:  0.278
Probability of Draw:  0.248
Probability of Nigeria winning:  0.474

Switzerland and Serbia
Winner: Switzerland
Probability of Switzerland winning:  0.402
Probability of Draw:  0.228
Probability of Serbia winning:  0.370

Belgium and Tunisia
Winner: Belgium
Probability of Belgium winning:  0.619
Probability of Draw:  0.253
Probability of Tunisia winning:  0.128

Mexico and Korea Republic
Winner: Mexico
Probability of Mexico winning:  0.504
Probability of Draw:  0.327
Probability of Korea Republic winning:  0.169

Germany and Sweden
Winner: Germany
Probability of Germany winning:  0.571
Probability of Draw:  0.228
Probability of Sweden winning:  0.201

England and Panama
Winner: England
Probability of England winning:  0.781
Probability of Draw:  0.178
Probability of Panama winning:  0.041

Senegal and Japan
Winner: Senegal
Probability of Senegal winning:  0.397
Probability of Draw:  0.278
Probability of Japan winning:  0.325

Poland and Colombia
Draw
Probability of Poland winning:  0.379
Probability of Draw:  0.391
Probability of Colombia winning:  0.230

Uruguay and Russia
Winner: Uruguay
Probability of Uruguay winning:  0.403
Probability of Draw:  0.388
Probability of Russia winning:  0.209

Egypt and Saudi Arabia
Winner: Egypt
Probability of Egypt winning:  0.544
Probability of Draw:  0.216
Probability of Saudi Arabia winning:  0.240

Portugal and Iran
Winner: Portugal
Probability of Portugal winning:  0.548
Probability of Draw:  0.353
Probability of Iran winning:  0.099

Spain and Morocco
Winner: Spain
Probability of Spain winning:  0.650
Probability of Draw:  0.267
Probability of Morocco winning:  0.083

France and Denmark
Winner: France
Probability of France winning:  0.621
Probability of Draw:  0.159
Probability of Denmark winning:  0.220

Peru and Australia
Winner: Peru
Probability of Peru winning:  0.463
Probability of Draw:  0.250
Probability of Australia winning:  0.288

Argentina and Nigeria
Winner: Argentina
Probability of Argentina winning:  0.708
Probability of Draw:  0.222
Probability of Nigeria winning:  0.070

Croatia and Iceland
Winner: Croatia
Probability of Croatia winning:  0.734
Probability of Draw:  0.185
Probability of Iceland winning:  0.080

Mexico and Sweden
Winner: Mexico
Probability of Mexico winning:  0.465
Probability of Draw:  0.264
Probability of Sweden winning:  0.271

Germany and Korea Republic
Winner: Germany
Probability of Germany winning:  0.598
Probability of Draw:  0.282
Probability of Korea Republic winning:  0.120

Brazil and Serbia
Winner: Brazil
Probability of Brazil winning:  0.714
Probability of Draw:  0.165
Probability of Serbia winning:  0.120

Switzerland and Costa Rica
Winner: Switzerland
Probability of Switzerland winning:  0.587
Probability of Draw:  0.213
Probability of Costa Rica winning:  0.200

Poland and Japan
Winner: Poland
Probability of Poland winning:  0.551
Probability of Draw:  0.242
Probability of Japan winning:  0.206

Colombia and Senegal
Winner: Colombia
Probability of Colombia winning:  0.577
Probability of Draw:  0.194
Probability of Senegal winning:  0.229

Tunisia and Panama
Winner: Tunisia
Probability of Tunisia winning:  0.631
Probability of Draw:  0.257
Probability of Panama winning:  0.113

Belgium and England
Winner: England
Probability of Belgium winning:  0.273
Probability of Draw:  0.235
Probability of England winning:  0.492

In [22]:
# 16-zecimi
group_16 = [('Uruguay', 'Portugal'),
            ('France', 'Croatia'),
            ('Brazil', 'Mexico'),
            ('England', 'Colombia'),
            ('Spain', 'Russia'),
            ('Argentina', 'Peru'),
            ('Germany', 'Switzerland'),
            ('Poland', 'Belgium')]
In [23]:
def clean_and_predict(matches, ranking, final, logreg):
    positions = []
    for match in matches:
        positions.append(ranking.loc[ranking['Team'] == match[0],'Position'].iloc[0])
        positions.append(ranking.loc[ranking['Team'] == match[1],'Position'].iloc[0])
    
    pred_set = []

    i = 0
    j = 0

    while i < len(positions):
        dict1 = {}
        if positions[i] < positions[i + 1]:
            dict1.update({'home_team': matches[j][0], 'away_team': matches[j][1]})
        else:
            dict1.update({'home_team': matches[j][1], 'away_team': matches[j][0]})
        pred_set.append(dict1)
        i += 2
        j += 1

    pred_set = pd.DataFrame(pred_set)
    backup_pred_set = pred_set

    pred_set = pd.get_dummies(pred_set, prefix=['home_team', 'away_team'], columns=['home_team', 'away_team'])

    missing_cols2 = set(final.columns) - set(pred_set.columns)
    for c in missing_cols2:
        pred_set[c] = 0
    pred_set = pred_set[final.columns]

    pred_set = pred_set.drop(['winning_team'], axis=1)

    predictions = logreg.predict(pred_set)
    for i in range(len(pred_set)):
        print(backup_pred_set.iloc[i, 1] + " and " + backup_pred_set.iloc[i, 0])
        if predictions[i] == 2:
            print("Winner: " + backup_pred_set.iloc[i, 1])
        elif predictions[i] == 1:
            print("Draw")
        elif predictions[i] == 0:
            print("Winner: " + backup_pred_set.iloc[i, 0])
        print('Probability of ' + backup_pred_set.iloc[i, 1] + ' winning: ' , '%.3f'%(logreg.predict_proba(pred_set)[i][2]))
        print('Probability of Draw: ', '%.3f'%(logreg.predict_proba(pred_set)[i][1])) 
        print('Probability of ' + backup_pred_set.iloc[i, 0] + ' winning: ', '%.3f'%(logreg.predict_proba(pred_set)[i][0]))
        print("")
In [24]:
clean_and_predict(group_16, ranking, final, logreg)
 
Portugal and Uruguay
Winner: Portugal
Probability of Portugal winning:  0.428
Probability of Draw:  0.285
Probability of Uruguay winning:  0.287

France and Croatia
Winner: France
Probability of France winning:  0.481
Probability of Draw:  0.252
Probability of Croatia winning:  0.267

Brazil and Mexico
Winner: Brazil
Probability of Brazil winning:  0.695
Probability of Draw:  0.209
Probability of Mexico winning:  0.096

England and Colombia
Winner: England
Probability of England winning:  0.516
Probability of Draw:  0.368
Probability of Colombia winning:  0.116

Spain and Russia
Winner: Spain
Probability of Spain winning:  0.529
Probability of Draw:  0.280
Probability of Russia winning:  0.191

Argentina and Peru
Winner: Argentina
Probability of Argentina winning:  0.713
Probability of Draw:  0.212
Probability of Peru winning:  0.075

Germany and Switzerland
Winner: Germany
Probability of Germany winning:  0.672
Probability of Draw:  0.192
Probability of Switzerland winning:  0.137

Belgium and Poland
Winner: Belgium
Probability of Belgium winning:  0.513
Probability of Draw:  0.202
Probability of Poland winning:  0.285

In [25]:
# sa vedem sferturile
quarters = [('Portugal', 'France'),
            ('Spain', 'Argentina'),
            ('Brazil', 'England'),
            ('Germany', 'Belgium')]
In [26]:
clean_and_predict(quarters, ranking, final, logreg)
 
Portugal and France
Winner: Portugal
Probability of Portugal winning:  0.437
Probability of Draw:  0.256
Probability of France winning:  0.307

Argentina and Spain
Winner: Argentina
Probability of Argentina winning:  0.518
Probability of Draw:  0.262
Probability of Spain winning:  0.220

Brazil and England
Winner: Brazil
Probability of Brazil winning:  0.525
Probability of Draw:  0.216
Probability of England winning:  0.260

Germany and Belgium
Winner: Germany
Probability of Germany winning:  0.563
Probability of Draw:  0.269
Probability of Belgium winning:  0.167

In [27]:
# semifinalele
semi = [('Portugal', 'Brazil'),
        ('Argentina', 'Germany')]
In [28]:
clean_and_predict(semi, ranking, final, logreg)
 
Brazil and Portugal
Winner: Brazil
Probability of Brazil winning:  0.705
Probability of Draw:  0.152
Probability of Portugal winning:  0.143

Germany and Argentina
Winner: Germany
Probability of Germany winning:  0.441
Probability of Draw:  0.264
Probability of Argentina winning:  0.295

In [29]:
# ... FINALA...
finals = [('Brazil', 'Germany')]
In [30]:
clean_and_predict(finals, ranking, final, logreg)
 
Germany and Brazil
Winner: Brazil
Probability of Germany winning:  0.359
Probability of Draw:  0.220
Probability of Brazil winning:  0.421

 

qed, #haisapariempeBrazilia

 

http://musetoiu.ro/wp-content/uploads/2018/06/brasil-1024x576.jpghttp://musetoiu.ro/wp-content/uploads/2018/06/brasil-150x150.jpgdan musetoiuMLRevista preseibrazilia,python,rusia world cup 2018
Inteligenta artificiala pariaza pe Brazilia /*!** Twitter Bootstrap**//*! * Bootstrap v3.3.7 (http://getbootstrap.com) * Copyright 2011-2016 Twitter, Inc. * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE) *//*! normalize.css v3.0.3 | MIT License | github.com/necolas/normalize.css */html { font-family: sans-serif; -ms-text-size-adjust: 100%; -webkit-text-size-adjust: 100%;}body { ...