Set up files and notebook

In [ ]:
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
#from mpl_toolkits.basemap import Basemap
In [ ]:
# Authenticate google drive access and mount it to /content/drive path.
from google.colab import drive
drive.mount('/content/drive')
In [ ]:
# Check the contents of the Data/ directory.
import glob
DATA_PATH_PREFIX = '/content/drive/My Drive/Colab Notebooks/Data'
def get_data_file(file_name):
  return DATA_PATH_PREFIX+'/'+file_name 
glob.glob(DATA_PATH_PREFIX + '/*')
In [ ]:
prefix = '/content/drive/My Drive/Colab Notebooks/Data/XContest'
def get_file(file_name):
  return prefix + '/' + file_name
glob.glob(prefix + '/*')

Definitions

In [5]:
def read_data(file):
  xc = pd.read_json(get_file(file), orient='records')
  return xc

def cleanup(data):
  data = data[['pilot', 'pointStart', 'stats', 'takeoff', 'glider']]
  return data

def extract_dict(data, column):
  data_extracted = data[column].apply(pd.Series)
  return data_extracted

def clean_pilot(data):
  data = data[['id', 'username', 'countryIso', 'isMale']]
  data = data.rename(columns={'id':'idPilot', 'countryIso':'fromCountry'})
  return data

def clean_takeoff(data):
  data = data[['name', 'countryIso']]
  data = data.rename(columns={'name':'takeoffName', 'countryIso':'takeoffCountry'})
  return data

def clean_glider(data):
  data = data[['name', 'nameProducer', 'subclass']]
  data = data.rename(columns={'name':'gliderName', 'nameProducer':'gliderProducer', 'subclass':'gliderClass'})
  return data

def clean_pointStart(data):
  data = data.rename(columns={'time':'takeoffTime'})
  return data

def clean_stats(data):
  data = data.rename(columns={'duration':'flightDuration'})
  return data

def combine_frames(data1, data2, original_column):
  final_data = pd.concat([data1, data2], axis=1).drop(original_column, axis=1)
  return final_data

def delete_NO_glider(data):
  data = data[data.gliderProducer.map(len) > 1]
  return data

def order_descending(data, series):
  data = data.sort_values(series, ascending=False)
  return data

def give_month(takeoff):
  s = takeoff[5:7]
  month_to_name = {'01': 1, '02':2, '03': 3, '04':4, 
                   '05':5, '06':6, '07':7, '08':8, '09':9,
                   '10':10, '11':11, '12':12}
  return month_to_name[s]
  
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August',
          'September', 'October', 'November', 'December']

def get_month_index(month):
  return month-1

def get_country_index(country):
  return country_idx[country]

def clean_time(time):
  time = time[2:4] + ":" + time[5:7] + ":" + time[8:10]
  return time

def date_datetime(column):
  column = pd.to_datetime(column)       #.dt.time
  return column

def find_mistake_time(time):
  if time[6] == "-":
    return False
  else:
    return True

def define_time_categories(time):
  if time[0] != '0' and time[3] in ['0', '1', '2']:
    t = int(time[0:2]) + 0.5
  elif time[0] == '0' and time[3] in ['0', '1', '2']:
    t = int(time[1]) + 0.5
  elif time[0] == '0' and time[3] in ['3', '4', '5']:
    t = int(time[1]) + 0.9
  elif time[0] != '0' and time[3] in ['3', '4', '5']:
    t = int(time[0:2]) + 0.9
  return t

time_categories = ['0:30', '1:00', '1:30', '2:00', '2:30', '3:00', '3:30', '4:00', '4:30', '5:00', '5:30', '6:00', 
                   '6:30', '7:00', '7:30', '8:00', '8:30', '9:00', '9:30', '10:00', '10:30', '11:00']

def make_delta_time(time):
  h, m, s = time.split(':')
  return datetime.timedelta(hours=int(h), minutes=int(m), seconds=int(s))

def round_to_minutes(time):
  return time.round(freq='T')

def return_to_time(number):
  number = pd.TimedeltaIndex(number, unit='m')
  return number

def myplot(graph):
  graph = graph[graph.index < pd.Timedelta(seconds=5*3600)]
  ax = graph.plot(figsize=(25,15)) 
  vals = ax.get_yticks()
  ax.set_yticklabels(['{}%'.format(x) for x in vals])

def get_percents(amount):
  amount = amount / sum(amount)*100
  return amount

Read and clean the original data from xc 2019

In [45]:
xc_2019 = read_data('merged-2019.json.xz')
print(xc_2019.shape)
xc_2019 = cleanup(xc_2019)
(286864, 18)

Extract and clean the data

In [ ]:
xc_2019_pilot_extracted = extract_dict(xc_2019, 'pilot')
xc_2019_takeoff_extracted = extract_dict(xc_2019, 'takeoff')
xc_2019_glider_extracted = extract_dict(xc_2019, 'glider')
xc_2019_pointStart_extracted = extract_dict(xc_2019, 'pointStart')
xc_2019_stats_extracted = extract_dict(xc_2019, 'stats')
In [ ]:
xc_2019_pilot_cleaned = clean_pilot(xc_2019_pilot_extracted)
xc_2019_takeoff_cleaned = clean_takeoff(xc_2019_takeoff_extracted)
xc_2019_glider_cleaned = clean_glider(xc_2019_glider_extracted)
xc_2019_pointStart_cleaned = clean_pointStart(xc_2019_pointStart_extracted)
xc_2019_stats_cleaned = clean_stats(xc_2019_stats_extracted)
In [ ]:
xc_2019_cleaned = combine_frames(xc_2019, xc_2019_pilot_cleaned, 'pilot')
xc_2019_cleaned = combine_frames(xc_2019_cleaned, xc_2019_takeoff_cleaned, 'takeoff')
xc_2019_cleaned = combine_frames(xc_2019_cleaned, xc_2019_glider_cleaned, 'glider')
xc_2019_cleaned = combine_frames(xc_2019_cleaned, xc_2019_pointStart_cleaned, 'pointStart')
xc_2019_cleaned = combine_frames(xc_2019_cleaned, xc_2019_stats_cleaned, 'stats')
xc_2019_cleaned = xc_2019_cleaned[xc_2019_cleaned.takeoffTime.notnull()]
xc_2019_cleaned = xc_2019_cleaned.reset_index(drop=True)
xc_2019_cleaned["takeoffMonth"] = xc_2019_cleaned.takeoffTime.map(give_month)
xc_2019_cleaned = xc_2019_cleaned.drop([222421, 265162, 278725])
xc_2019_cleaned.head()

Save cleaned data back to drive

In [ ]:
xc_2019_cleaned.to_json("/content/drive/My Drive/Colab Notebooks/Data/XContest/xc_2019_cleaned.json")

Read the cleaned data from drive

In [ ]:
xc_2019_cleaned = read_data('xc_2019_cleaned.json')
xc_2019_cleaned = xc_2019_cleaned.drop([222421, 265162, 278725, 233031])
#xc_2019_cleaned['Right_time'] = xc_2019_cleaned.flightDuration.map(find_mistake_time) #used to find wrong notation of time, after not needed anymore
#time_wrong = xc_2019_cleaned.loc[xc_2019_cleaned['Right_time'] == False]               #used to find wrong notation of time, after not needed anymore
xc_2019_cleaned.flightDuration = xc_2019_cleaned.flightDuration.map(clean_time)
xc_2019_cleaned.flightDuration = xc_2019_cleaned.flightDuration.map(make_delta_time)
xc_2019_cleaned['TimeCategory'] = xc_2019_cleaned.flightDuration.map(round_to_minutes)

Create separate df for men and women - all flights

Dataframe - women

In [20]:
xc_2019_women = xc_2019_cleaned.loc[xc_2019_cleaned['isMale'] == False]
xc_2019_women = xc_2019_women.reset_index(drop=True)
xc_2019_women["takeoffMonth"] = xc_2019_women.takeoffTime.map(give_month)
airtime_sum_women = xc_2019_women.flightDuration.sum()
flights_women = xc_2019_women.shape[0]
airtime_per_flight_women = airtime_sum_women / flights_women
print("Average duration of the woman's flight in 2019:", airtime_per_flight_women)
Average duration of the woman's flight in 2019: 0 days 01:10:05.125034

Dataframe - men

In [21]:
xc_2019_men = xc_2019_cleaned.loc[xc_2019_cleaned['isMale']]
xc_2019_men.head()
xc_2019_men = xc_2019_men[xc_2019_men.takeoffTime.notnull()]
xc_2019_men = xc_2019_men.reset_index(drop=True)
xc_2019_men["takeoffMonth"] = xc_2019_men.takeoffTime.map(give_month)
airtime_sum_men = xc_2019_men.flightDuration.sum()
flights_men = xc_2019_men.shape[0]
airtime_per_flight_men = airtime_sum_men / flights_men
print("Average duration of the man's flight in 2019:", airtime_per_flight_men)
Average duration of the man's flight in 2019: 0 days 01:20:10.098556

Flights per country per month

In [23]:
xc_2019_byMonthCountry = xc_2019_cleaned.groupby(by=['takeoffMonth', 'takeoffCountry'])
xc_2019_heatData = xc_2019_byMonthCountry.size().reset_index(name='count')
index = xc_2019_heatData.takeoffCountry.unique()
cols = xc_2019_heatData.takeoffMonth.unique()
data = np.zeros((index.size, cols.size))
country_idx = dict()
for idx, country in enumerate(index):
  country_idx[country] = idx

for _, row in xc_2019_heatData.iterrows():
  month_index = get_month_index(row['takeoffMonth'])
  country_index = get_country_index(row['takeoffCountry'])
  data[country_index, month_index] = row['count']

xc_2019_heatMatrix = pd.DataFrame(data, index=index, columns=cols, )
pd.set_option('display.max_rows', 200)
xc_2019_heatMatrix.style.background_gradient(cmap='Blues')
Out[23]:
1 2 3 4 5 6 7 8 9 10 11 12
AE 2.000000 1.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
AR 53.000000 44.000000 35.000000 51.000000 16.000000 25.000000 21.000000 53.000000 76.000000 63.000000 32.000000 32.000000
AT 244.000000 1127.000000 2474.000000 2479.000000 1609.000000 4487.000000 3137.000000 3024.000000 2059.000000 1011.000000 549.000000 279.000000
AU 963.000000 444.000000 316.000000 455.000000 130.000000 90.000000 110.000000 152.000000 148.000000 325.000000 327.000000 540.000000
BE 3.000000 14.000000 52.000000 16.000000 52.000000 21.000000 28.000000 32.000000 32.000000 7.000000 12.000000 0.000000
BG 8.000000 5.000000 32.000000 26.000000 76.000000 133.000000 325.000000 488.000000 278.000000 22.000000 28.000000 2.000000
BO 7.000000 3.000000 27.000000 24.000000 0.000000 13.000000 5.000000 1.000000 3.000000 12.000000 17.000000 6.000000
BR 368.000000 243.000000 459.000000 287.000000 191.000000 356.000000 283.000000 242.000000 470.000000 490.000000 416.000000 309.000000
CA 2.000000 9.000000 82.000000 82.000000 160.000000 161.000000 424.000000 214.000000 65.000000 26.000000 11.000000 4.000000
CH 566.000000 1968.000000 3585.000000 2200.000000 2969.000000 5073.000000 4478.000000 5397.000000 2110.000000 1561.000000 582.000000 571.000000
CL 301.000000 355.000000 145.000000 161.000000 110.000000 82.000000 60.000000 118.000000 165.000000 144.000000 336.000000 145.000000
CN 46.000000 12.000000 25.000000 31.000000 53.000000 241.000000 81.000000 83.000000 102.000000 74.000000 61.000000 30.000000
CO 5645.000000 4506.000000 3450.000000 2385.000000 1770.000000 2317.000000 2556.000000 2259.000000 1849.000000 2076.000000 1854.000000 4235.000000
CR 14.000000 23.000000 38.000000 1.000000 0.000000 5.000000 4.000000 5.000000 0.000000 0.000000 0.000000 3.000000
CT 180.000000 446.000000 588.000000 246.000000 437.000000 645.000000 561.000000 881.000000 830.000000 272.000000 199.000000 232.000000
CU 17.000000 26.000000 102.000000 131.000000 65.000000 118.000000 255.000000 42.000000 79.000000 29.000000 40.000000 14.000000
CY 1.000000 1.000000 9.000000 5.000000 5.000000 3.000000 5.000000 3.000000 3.000000 12.000000 1.000000 1.000000
CZ 206.000000 436.000000 1094.000000 1529.000000 1564.000000 2337.000000 2064.000000 2291.000000 1579.000000 1069.000000 519.000000 173.000000
DE 46.000000 231.000000 469.000000 803.000000 875.000000 868.000000 707.000000 706.000000 430.000000 298.000000 99.000000 50.000000
DO 5.000000 10.000000 15.000000 2.000000 1.000000 3.000000 3.000000 7.000000 5.000000 7.000000 13.000000 4.000000
EC 4.000000 1.000000 4.000000 6.000000 22.000000 2.000000 12.000000 35.000000 26.000000 16.000000 41.000000 18.000000
EE 3.000000 17.000000 8.000000 14.000000 17.000000 4.000000 35.000000 23.000000 2.000000 4.000000 11.000000 6.000000
EG 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
ES 1330.000000 1237.000000 1232.000000 905.000000 1222.000000 1351.000000 1451.000000 1466.000000 1148.000000 673.000000 809.000000 1025.000000
FI 40.000000 16.000000 74.000000 384.000000 192.000000 245.000000 292.000000 149.000000 60.000000 36.000000 5.000000 18.000000
FR 127.000000 307.000000 718.000000 721.000000 1195.000000 1858.000000 2333.000000 1995.000000 1484.000000 374.000000 131.000000 76.000000
GB 37.000000 55.000000 176.000000 270.000000 330.000000 137.000000 163.000000 142.000000 153.000000 67.000000 37.000000 11.000000
GE 9.000000 33.000000 8.000000 16.000000 17.000000 11.000000 25.000000 113.000000 53.000000 20.000000 1.000000 0.000000
GR 18.000000 33.000000 86.000000 78.000000 159.000000 117.000000 170.000000 118.000000 319.000000 249.000000 50.000000 42.000000
GS 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
HK 1.000000 0.000000 1.000000 1.000000 3.000000 2.000000 0.000000 0.000000 4.000000 11.000000 15.000000 3.000000
HU 67.000000 307.000000 882.000000 725.000000 727.000000 995.000000 1005.000000 1018.000000 755.000000 446.000000 295.000000 97.000000
ID 3.000000 7.000000 11.000000 2.000000 15.000000 19.000000 26.000000 53.000000 43.000000 12.000000 4.000000 17.000000
IE 8.000000 1.000000 12.000000 10.000000 34.000000 9.000000 36.000000 10.000000 8.000000 9.000000 3.000000 0.000000
IL 1.000000 0.000000 4.000000 9.000000 3.000000 12.000000 24.000000 15.000000 16.000000 9.000000 5.000000 3.000000
IN 111.000000 175.000000 947.000000 1475.000000 1267.000000 2894.000000 13.000000 23.000000 226.000000 1727.000000 370.000000 45.000000
IR 377.000000 371.000000 529.000000 489.000000 901.000000 1078.000000 619.000000 845.000000 720.000000 469.000000 498.000000 325.000000
IT 1873.000000 2870.000000 7292.000000 4198.000000 2832.000000 6133.000000 3870.000000 6089.000000 5819.000000 3138.000000 1323.000000 1957.000000
JO 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000
JP 72.000000 59.000000 62.000000 118.000000 173.000000 101.000000 68.000000 175.000000 100.000000 65.000000 85.000000 58.000000
KE 99.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 0.000000 0.000000
KR 4.000000 3.000000 4.000000 11.000000 5.000000 17.000000 3.000000 17.000000 23.000000 20.000000 4.000000 3.000000
LK 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000
LT 2.000000 14.000000 16.000000 169.000000 212.000000 257.000000 126.000000 196.000000 20.000000 8.000000 0.000000 1.000000
LV 107.000000 17.000000 56.000000 130.000000 162.000000 261.000000 195.000000 278.000000 61.000000 45.000000 29.000000 8.000000
MA 134.000000 303.000000 472.000000 26.000000 43.000000 5.000000 0.000000 3.000000 16.000000 36.000000 268.000000 195.000000
MC 72.000000 32.000000 48.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 5.000000 7.000000 47.000000
ME 3.000000 78.000000 14.000000 1.000000 4.000000 8.000000 3.000000 8.000000 7.000000 0.000000 9.000000 0.000000
MQ 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
MU 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000
MX 781.000000 82.000000 23.000000 34.000000 7.000000 14.000000 16.000000 13.000000 16.000000 4.000000 54.000000 435.000000
MY 5.000000 0.000000 3.000000 1.000000 1.000000 2.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000
MZ 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
NA 7.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
NL 12.000000 51.000000 108.000000 142.000000 203.000000 193.000000 79.000000 149.000000 146.000000 133.000000 22.000000 7.000000
NO 3.000000 6.000000 15.000000 76.000000 34.000000 56.000000 87.000000 75.000000 41.000000 14.000000 4.000000 2.000000
NP 105.000000 339.000000 369.000000 24.000000 26.000000 0.000000 0.000000 0.000000 1.000000 42.000000 210.000000 74.000000
NR 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
NZ 141.000000 328.000000 180.000000 120.000000 28.000000 25.000000 18.000000 43.000000 80.000000 111.000000 158.000000 184.000000
PE 37.000000 21.000000 63.000000 7.000000 12.000000 50.000000 41.000000 28.000000 15.000000 14.000000 35.000000 28.000000
PL 36.000000 80.000000 176.000000 341.000000 314.000000 673.000000 560.000000 617.000000 385.000000 202.000000 86.000000 25.000000
PS 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000
PT 68.000000 182.000000 200.000000 102.000000 124.000000 379.000000 335.000000 284.000000 139.000000 55.000000 59.000000 125.000000
RE 7.000000 7.000000 18.000000 38.000000 14.000000 28.000000 14.000000 31.000000 56.000000 1.000000 22.000000 23.000000
RO 136.000000 236.000000 625.000000 386.000000 519.000000 622.000000 779.000000 1065.000000 750.000000 632.000000 371.000000 117.000000
RS 1.000000 5.000000 6.000000 6.000000 4.000000 12.000000 59.000000 72.000000 52.000000 14.000000 0.000000 3.000000
RU 4.000000 7.000000 15.000000 138.000000 166.000000 167.000000 110.000000 237.000000 209.000000 39.000000 14.000000 4.000000
SA 15.000000 5.000000 8.000000 10.000000 8.000000 3.000000 6.000000 40.000000 26.000000 5.000000 1.000000 0.000000
SI 43.000000 112.000000 446.000000 539.000000 189.000000 1261.000000 737.000000 1533.000000 429.000000 111.000000 19.000000 62.000000
SK 93.000000 180.000000 508.000000 435.000000 505.000000 852.000000 627.000000 901.000000 1001.000000 531.000000 252.000000 54.000000
TH 66.000000 107.000000 74.000000 273.000000 9.000000 2.000000 2.000000 0.000000 3.000000 0.000000 8.000000 31.000000
TR 9.000000 44.000000 71.000000 185.000000 289.000000 237.000000 376.000000 341.000000 398.000000 385.000000 50.000000 34.000000
TW 8.000000 16.000000 10.000000 1.000000 2.000000 8.000000 5.000000 5.000000 16.000000 17.000000 7.000000 26.000000
UA 5.000000 11.000000 34.000000 72.000000 56.000000 90.000000 110.000000 66.000000 57.000000 51.000000 33.000000 2.000000
US 86.000000 87.000000 348.000000 439.000000 431.000000 702.000000 620.000000 455.000000 420.000000 124.000000 123.000000 96.000000
UY 25.000000 23.000000 12.000000 23.000000 0.000000 0.000000 0.000000 2.000000 7.000000 22.000000 24.000000 26.000000
VN 147.000000 322.000000 286.000000 214.000000 206.000000 383.000000 357.000000 521.000000 849.000000 663.000000 482.000000 306.000000
ZA 362.000000 277.000000 115.000000 17.000000 18.000000 42.000000 16.000000 41.000000 30.000000 87.000000 135.000000 391.000000
AL 0.000000 1.000000 1.000000 3.000000 19.000000 49.000000 25.000000 5.000000 39.000000 2.000000 6.000000 4.000000
BA 0.000000 6.000000 10.000000 10.000000 16.000000 9.000000 73.000000 61.000000 25.000000 11.000000 4.000000 0.000000
GT 0.000000 13.000000 1.000000 12.000000 8.000000 8.000000 4.000000 3.000000 2.000000 0.000000 0.000000 0.000000
HR 0.000000 5.000000 18.000000 53.000000 49.000000 50.000000 35.000000 54.000000 38.000000 10.000000 5.000000 0.000000
LI 0.000000 14.000000 14.000000 8.000000 23.000000 18.000000 4.000000 17.000000 5.000000 11.000000 7.000000 8.000000
MK 0.000000 1.000000 4.000000 2.000000 37.000000 82.000000 421.000000 1274.000000 91.000000 18.000000 0.000000 0.000000
PF 0.000000 2.000000 9.000000 9.000000 3.000000 5.000000 1.000000 0.000000 1.000000 1.000000 4.000000 4.000000
PH 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 4.000000 0.000000 0.000000
PY 0.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
ZW 0.000000 2.000000 1.000000 0.000000 0.000000 1.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000
CI 0.000000 0.000000 2.000000 2.000000 5.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
EH 0.000000 0.000000 9.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
IQ 0.000000 0.000000 2.000000 0.000000 0.000000 3.000000 10.000000 0.000000 0.000000 0.000000 0.000000 0.000000
MT 0.000000 0.000000 1.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
NC 0.000000 0.000000 7.000000 4.000000 5.000000 6.000000 24.000000 14.000000 12.000000 17.000000 26.000000 24.000000
SE 0.000000 0.000000 4.000000 29.000000 17.000000 18.000000 33.000000 17.000000 7.000000 4.000000 1.000000 0.000000
BY 0.000000 0.000000 0.000000 3.000000 0.000000 0.000000 9.000000 6.000000 11.000000 0.000000 0.000000 0.000000
DK 0.000000 0.000000 0.000000 3.000000 4.000000 18.000000 40.000000 24.000000 8.000000 0.000000 0.000000 1.000000
KZ 0.000000 0.000000 0.000000 3.000000 3.000000 25.000000 24.000000 158.000000 50.000000 8.000000 0.000000 0.000000
LU 0.000000 0.000000 0.000000 3.000000 0.000000 0.000000 0.000000 2.000000 2.000000 0.000000 0.000000 0.000000
FO 0.000000 0.000000 0.000000 0.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
IS 0.000000 0.000000 0.000000 0.000000 2.000000 9.000000 1.000000 3.000000 0.000000 0.000000 3.000000 0.000000
LB 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 2.000000 0.000000 0.000000 10.000000 0.000000
TN 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
MN 0.000000 0.000000 0.000000 0.000000 0.000000 4.000000 1.000000 2.000000 1.000000 0.000000 0.000000 0.000000
PK 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 23.000000 7.000000 54.000000 0.000000 0.000000 0.000000
TJ 0.000000 0.000000 0.000000 0.000000 0.000000 4.000000 21.000000 5.000000 0.000000 0.000000 0.000000 0.000000
XK 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 7.000000 2.000000 5.000000 5.000000 0.000000 1.000000
GL 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 11.000000 0.000000 0.000000 0.000000 0.000000
KG 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 35.000000 15.000000 0.000000 0.000000 0.000000
PG 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 1.000000 1.000000 0.000000 0.000000 0.000000
SJ 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 4.000000 0.000000 0.000000 0.000000 0.000000 0.000000
SZ 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000
TG 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 7.000000 0.000000 0.000000 0.000000 0.000000 0.000000
UZ 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 17.000000 0.000000 2.000000 3.000000 0.000000 0.000000
AZ 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 6.000000 0.000000 0.000000 0.000000 0.000000
SM 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 0.000000 0.000000 0.000000
AM 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 28.000000 0.000000 0.000000 0.000000
TZ 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 4.000000 1.000000 0.000000 0.000000
VE 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 10.000000 4.000000 10.000000
AO 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000
GP 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 0.000000 0.000000
MD 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 0.000000 0.000000
CV 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 17.000000 0.000000
MG 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000
AF 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000
QA 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000

The lenght of flights

In [25]:
women = xc_2019_women.groupby('TimeCategory').TimeCategory.count()
women = get_percents(women)
men = xc_2019_men.groupby('TimeCategory').TimeCategory.count()
men = get_percents(men)
myplot(pd.concat({'men': men, 'women': women}, axis=1))
In [26]:
myplot(pd.concat({'men': men.cumsum(), 'women': women.cumsum()}, axis=1))
In [27]:
myplot(pd.concat({'delta': (men-women).rolling(window=7, center=True).mean()}, axis=1))

Countries with most take offs

In [29]:
women = xc_2019_women.groupby('takeoffCountry').takeoffCountry.count()
women = get_percents(women)
women = women[women > 0.1]
men = xc_2019_men.groupby('takeoffCountry').takeoffCountry.count()
men = get_percents(men)
men = men[men > 0.1]
graph = pd.concat({'men': men, 'women': women}, axis=1).sort_values('women', ascending=False)
ax = graph.plot.bar(title="Countries with most take offs in 2019", color=['dodgerblue', 'hotpink'], figsize=(15,15))
vals = ax.get_yticks()
ax.set_yticklabels(['{}%'.format(x) for x in vals])
ax
Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4d73653358>

Flights based on time of the year

In [12]:
women = xc_2019_women.groupby('takeoffMonth').takeoffMonth.count()
women = get_percents(women)
men = xc_2019_men.groupby('takeoffMonth').takeoffMonth.count()
men = get_percents(men)
graph = pd.concat({'men': men, 'women': women}, axis=1)
ax = graph.plot.bar(title="Flights based on time of the year", color=['dodgerblue', 'hotpink'], figsize=(15,15))
vals = ax.get_yticks()
x_ticks = ax.get_xticks()
ax.set_yticklabels(['{}%'.format(x) for x in vals])
ax.set_xticklabels([months[x] for x in x_ticks])
ax
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4d734dddd8>

Create df of unique pilots - men and women

Creat df of unique pilots based on their longest flight

In [44]:
women_2019 = order_descending(xc_2019_women, 'flightDuration')
women_2019 = women_2019.drop_duplicates(subset=['idPilot'])
pilots_women = women_2019.shape[0]
men_2019 = order_descending(xc_2019_men, 'flightDuration')
men_2019 = men_2019.drop_duplicates(subset=['idPilot'])
pilots_men = men_2019.shape[0]

Amount of men and women flying in 2019

In [42]:
print("There are", pilots_women, "women pilots in 2019.")
print("There are", pilots_men, "men pilots in 2019.", '\n')
pilots = [pilots_women, pilots_men]
labels = [str(pilots_women) + ' Women', str(pilots_men) + ' Men']
plt.pie(pilots, labels=labels, colors=['hotpink', 'dodgerblue'], autopct='%1.1f%%', radius=2.0)
plt.title('Pilots in 2019')
plt.axis('equal')
plt.show
There are 1019 women pilots in 2019.
There are 13951 men pilots in 2019. 

Out[42]:
<function matplotlib.pyplot.show>

Average airtime per pilot in a year

In [14]:
airtime_sum_women = xc_2019_women.flightDuration.sum()
airtime_per_woman = pd.Timedelta(seconds=airtime_sum_women.total_seconds() / pilots_women)
airtime_sum_men = xc_2019_men.flightDuration.sum()
airtime_per_man = pd.Timedelta(seconds=airtime_sum_men.total_seconds() / pilots_men)

airtime = {'female pilot': airtime_per_woman, 'male pilot': airtime_per_man}
average_flying = pd.DataFrame({'average pilots':['average female pilot', 'average male pilot'],
                               'sum airtime':[airtime_per_woman.total_seconds()/3600,
                                              airtime_per_man.total_seconds()/3600]})

plt.title('Flight hours per year')
plt.ylabel('hours')
plt.bar(x=average_flying['average pilots'], height=average_flying['sum airtime'])
Out[14]:
<BarContainer object of 2 artists>

Glider classes

In [16]:
women = women_2019.groupby('gliderClass').gliderClass.count()
women = get_percents(women)
men = men_2019.groupby('gliderClass').gliderClass.count()
men = get_percents(men)
graph = pd.concat({'men': men, 'women': women}, axis=1)
graph = graph.drop(['T', 'Z'])
ax = graph.plot.bar(title="Glider classes", color=['dodgerblue', 'hotpink'], figsize=(10,10))
vals = ax.get_yticks()
ax.set_yticklabels(['{}%'.format(x) for x in vals])
ax
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4d7350d898>

Brands of gliders

In [17]:
women = delete_NO_glider(women_2019)
women = women.groupby('gliderProducer').gliderProducer.count()
women = get_percents(women)
women = women[women > 0.1]
men = delete_NO_glider(men_2019)
men = men.groupby('gliderProducer').gliderProducer.count()
men = get_percents(men)
men = men[men > 0.1]
graph = pd.concat({'men': men, 'women': women}, axis=1).sort_values('women', ascending=False)
graph = graph[:20]
ax = graph.plot.bar(title="The brands of gliders pilots fly", color=['dodgerblue', 'hotpink'], figsize=(15,15))
vals = ax.get_yticks()
ax.set_yticklabels(['{}%'.format(x) for x in vals])
ax
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4d73577f28>

Where are the pilots from

In [18]:
women = women_2019.groupby('fromCountry').fromCountry.count()
women = get_percents(women)
women = women[women > 0.1]
men = men_2019.groupby('fromCountry').fromCountry.count()
men = get_percents(men)
men = men[men > 0.1]
graph = pd.concat({'men': men, 'women': women}, axis=1)
graph = graph.sort_values('women', ascending=False)
ax = graph.plot.bar(title="Where are the pilots from?", color=['dodgerblue', 'hotpink'], figsize=(15,15))
vals = ax.get_yticks()
ax.set_yticklabels(['{}%'.format(x) for x in vals])
ax
Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f4d7350e6a0>