{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "f9f65a02", "metadata": { "id": "f9f65a02" }, "outputs": [], "source": [ "import tensorflow as tf\n", "import pandas as pd\n", "import numpy as np\n", "import glob\n", "import os\n", "import keras\n", "from sklearn.preprocessing import LabelBinarizer\n", "from sklearn.preprocessing import MinMaxScaler\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import BatchNormalization\n", "from tensorflow.keras.layers import Conv2D\n", "from tensorflow.keras.layers import MaxPooling2D\n", "from tensorflow.keras.layers import Activation\n", "from tensorflow.keras.layers import Dropout\n", "from tensorflow.keras.layers import Dense\n", "from tensorflow.keras.layers import Flatten\n", "from tensorflow.keras.layers import Input\n", "from tensorflow.keras.models import Model\n", "from tensorflow import keras\n", "from keras.models import Sequential\n", "from keras.layers import Dense\n", "from keras.wrappers.scikit_learn import KerasClassifier\n", "from keras.wrappers.scikit_learn import KerasRegressor\n", "from sklearn.model_selection import cross_val_score\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import OneHotEncoder\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from sklearn.model_selection import KFold, StratifiedKFold\n", "import matplotlib.pyplot as mp\n", "from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score\n", "import seaborn as sb\n", "from sklearn.model_selection import cross_validate\n", "from tensorflow.keras.callbacks import TensorBoard\n", "from imblearn.over_sampling import SMOTE\n", "from imblearn.pipeline import Pipeline, make_pipeline\n", "from imblearn.under_sampling import RandomUnderSampler\n", "from sklearn.metrics import roc_auc_score\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.metrics import roc_curve, auc\n", "import matplotlib.pyplot as plt\n", "from sklearn.svm import SVC\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import RocCurveDisplay\n", "from sklearn.datasets import load_wine\n", "from sklearn.model_selection import train_test_split\n", "from matplotlib import pyplot\n", "from sklearn.metrics import mean_absolute_error\n", "import xgboost as xgb" ] }, { "cell_type": "code", "execution_count": null, "id": "d0318dcb", "metadata": { "id": "d0318dcb" }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": null, "id": "3a659dea", "metadata": { "id": "3a659dea" }, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import mean_absolute_error as mae\n", "from sklearn.metrics import mean_squared_error as mse" ] }, { "cell_type": "code", "execution_count": null, "id": "a550fbd3", "metadata": { "id": "a550fbd3" }, "outputs": [], "source": [ "import statistics\n", "tf.random.set_seed(123)\n", "from matplotlib import pyplot" ] }, { "cell_type": "code", "execution_count": null, "id": "63f425f7", "metadata": { "id": "63f425f7" }, "outputs": [], "source": [ "df = pd.read_csv('case_training.csv')\n", "df = df.drop(df.columns[0], axis = 1)" ] }, { "cell_type": "code", "execution_count": null, "id": "26bafe22", "metadata": { "id": "26bafe22", "outputId": "794d039a-94fe-42d5-f553-11798bc2860f" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeweightMaleSchedSkinMinsscheduled_case_durationABRAMS, REID ALLENHENTZEN, ERIC RICHARDLEEK, BRYAN TERRYMEUNIER, MATTHEW JOHNRECHNIC, MARK...withoutwoodworkworkstationworseworstwouldwristbert_textactual_case_duration
0422220.809011500010...00000002EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMUM...69
1383035.20909000010...00000002EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMUM...110
2613556.81659500100...00000003Narrative & Impression EXAM DESCRIPTION: X-RAY...103
3572681.60606000010...00000002EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMUM...70
4552625.6012012000010...00000103EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMUM...90
..................................................................
140403488.01609000010...00000003EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMU...76
141423244.8112012010000...00000000EXAM DESCRIPTION: X-RAY ELBOW 2 VIEWS - LEFT ...162
142413577.61858510000...00000003EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMUM...111
143642281.6012012000010...00000002EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMU...102
144512368.0015017510000...10000003EXAM DESCRIPTION: CT RT UPPER EXTREMITY CLINI...233
\n", "

145 rows × 1351 columns

\n", "
" ], "text/plain": [ " Age weight Male SchedSkinMins scheduled_case_duration \\\n", "0 42 2220.8 0 90 115 \n", "1 38 3035.2 0 90 90 \n", "2 61 3556.8 1 65 95 \n", "3 57 2681.6 0 60 60 \n", "4 55 2625.6 0 120 120 \n", ".. ... ... ... ... ... \n", "140 40 3488.0 1 60 90 \n", "141 42 3244.8 1 120 120 \n", "142 41 3577.6 1 85 85 \n", "143 64 2281.6 0 120 120 \n", "144 51 2368.0 0 150 175 \n", "\n", " ABRAMS, REID ALLEN HENTZEN, ERIC RICHARD LEEK, BRYAN TERRY \\\n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 1 \n", "3 0 0 0 \n", "4 0 0 0 \n", ".. ... ... ... \n", "140 0 0 0 \n", "141 1 0 0 \n", "142 1 0 0 \n", "143 0 0 0 \n", "144 1 0 0 \n", "\n", " MEUNIER, MATTHEW JOHN RECHNIC, MARK ... without wood work \\\n", "0 1 0 ... 0 0 0 \n", "1 1 0 ... 0 0 0 \n", "2 0 0 ... 0 0 0 \n", "3 1 0 ... 0 0 0 \n", "4 1 0 ... 0 0 0 \n", ".. ... ... ... ... ... ... \n", "140 1 0 ... 0 0 0 \n", "141 0 0 ... 0 0 0 \n", "142 0 0 ... 0 0 0 \n", "143 1 0 ... 0 0 0 \n", "144 0 0 ... 1 0 0 \n", "\n", " workstation worse worst would wrist \\\n", "0 0 0 0 0 2 \n", "1 0 0 0 0 2 \n", "2 0 0 0 0 3 \n", "3 0 0 0 0 2 \n", "4 0 0 1 0 3 \n", ".. ... ... ... ... ... \n", "140 0 0 0 0 3 \n", "141 0 0 0 0 0 \n", "142 0 0 0 0 3 \n", "143 0 0 0 0 2 \n", "144 0 0 0 0 3 \n", "\n", " bert_text actual_case_duration \n", "0 EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMUM... 69 \n", "1 EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMUM... 110 \n", "2 Narrative & Impression EXAM DESCRIPTION: X-RAY... 103 \n", "3 EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMUM... 70 \n", "4 EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMUM... 90 \n", ".. ... ... \n", "140 EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMU... 76 \n", "141 EXAM DESCRIPTION: X-RAY ELBOW 2 VIEWS - LEFT ... 162 \n", "142 EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMUM... 111 \n", "143 EXAM DESCRIPTION: X-RAY WRIST COMPLETE MINIMU... 102 \n", "144 EXAM DESCRIPTION: CT RT UPPER EXTREMITY CLINI... 233 \n", "\n", "[145 rows x 1351 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "id": "6196d9c1", "metadata": { "id": "6196d9c1", "outputId": "068a1228-2f59-469d-9251-50f357876214" }, "outputs": [ { "data": { "text/plain": [ "['Age',\n", " 'weight',\n", " 'Male',\n", " 'SchedSkinMins',\n", " 'scheduled_case_duration',\n", " 'ABRAMS, REID ALLEN',\n", " 'HENTZEN, ERIC RICHARD',\n", " 'LEEK, BRYAN TERRY',\n", " 'MEUNIER, MATTHEW JOHN',\n", " 'RECHNIC, MARK',\n", " 'Healthy',\n", " 'Mild Systemic Disease',\n", " 'Severe Systemic Disease',\n", " 'Choice Per Patient on Day of Surgery',\n", " 'General',\n", " 'Monitored Anesthesia Care (MAC) ',\n", " 'Regional',\n", " 'height',\n", " '00',\n", " '01',\n", " '02',\n", " '03',\n", " '04',\n", " '05',\n", " '06',\n", " '07',\n", " '08',\n", " '09',\n", " '10',\n", " '104',\n", " '105',\n", " '108',\n", " '11',\n", " '116',\n", " '12',\n", " '1204',\n", " '121',\n", " '13',\n", " '1354',\n", " '138',\n", " '14',\n", " '140',\n", " '15',\n", " '16',\n", " '160',\n", " '1640',\n", " '17',\n", " '174',\n", " '18',\n", " '1814',\n", " '188',\n", " '19',\n", " '191',\n", " '1930',\n", " '196',\n", " '1st',\n", " '20',\n", " '2001',\n", " '2015',\n", " '2017',\n", " '2018',\n", " '2019',\n", " '2020',\n", " '21',\n", " '211',\n", " '2139',\n", " '22',\n", " '223',\n", " '2236',\n", " '228',\n", " '23',\n", " '2306',\n", " '24',\n", " '242',\n", " '244',\n", " '249',\n", " '25',\n", " '26',\n", " '262',\n", " '27',\n", " '270',\n", " '28',\n", " '284',\n", " '29',\n", " '297',\n", " '2nd',\n", " '30',\n", " '306',\n", " '307',\n", " '31',\n", " '32',\n", " '33',\n", " '330',\n", " '332',\n", " '34',\n", " '346',\n", " '35',\n", " '36',\n", " '360',\n", " '363',\n", " '37',\n", " '38',\n", " '383',\n", " '39',\n", " '391',\n", " '3d',\n", " '3rd',\n", " '40',\n", " '41',\n", " '416',\n", " '42',\n", " '43',\n", " '44',\n", " '45',\n", " '46',\n", " '467',\n", " '47',\n", " '48',\n", " '49',\n", " '50',\n", " '51',\n", " '52',\n", " '53',\n", " '54',\n", " '55',\n", " '56',\n", " '57',\n", " '574',\n", " '58',\n", " '59',\n", " '5th',\n", " '60',\n", " '61',\n", " '62',\n", " '64',\n", " '72',\n", " '74',\n", " '76',\n", " '80',\n", " '82',\n", " '84',\n", " '841',\n", " '90',\n", " '961',\n", " 'abnormalities',\n", " 'abnormality',\n", " 'abut',\n", " 'abutment',\n", " 'accession',\n", " 'accident',\n", " 'accompanying',\n", " 'accounting',\n", " 'accounts',\n", " 'acquired',\n", " 'acromioclavicular',\n", " 'across',\n", " 'acute',\n", " 'adam',\n", " 'addition',\n", " 'additional',\n", " 'additionally',\n", " 'adjacent',\n", " 'administered',\n", " 'administration',\n", " 'advanced',\n", " 'age',\n", " 'ago',\n", " 'agree',\n", " 'ailable',\n", " 'alex',\n", " 'algorithm',\n", " 'algorithms',\n", " 'aligned',\n", " 'alignment',\n", " 'allowing',\n", " 'almost',\n", " 'along',\n", " 'already',\n", " 'also',\n", " 'although',\n", " 'amilcare',\n", " 'amount',\n", " 'anatomic',\n", " 'anchor',\n", " 'anchors',\n", " 'angle',\n", " 'angular',\n", " 'angulated',\n", " 'angulation',\n", " 'ankle',\n", " 'antecubital',\n", " 'anterior',\n", " 'anteriorly',\n", " 'anterolateral',\n", " 'anterolaterally',\n", " 'anteromedial',\n", " 'anteromedially',\n", " 'ap',\n", " 'apex',\n", " 'apparent',\n", " 'appear',\n", " 'appearance',\n", " 'appears',\n", " 'appreciated',\n", " 'approaches',\n", " 'appropriate',\n", " 'approximate',\n", " 'approximately',\n", " 'arbelo',\n", " 'arcs',\n", " 'area',\n", " 'areas',\n", " 'arising',\n", " 'around',\n", " 'art',\n", " 'arthrodesis',\n", " 'arthroplasty',\n", " 'arthrosis',\n", " 'articular',\n", " 'articulate',\n", " 'articulated',\n", " 'articulating',\n", " 'articulation',\n", " 'articulations',\n", " 'artifact',\n", " 'artifactual',\n", " 'aspect',\n", " 'aspects',\n", " 'aspiration',\n", " 'assess',\n", " 'assessed',\n", " 'assessing',\n", " 'assessment',\n", " 'associated',\n", " 'assure',\n", " 'attachment',\n", " 'attempted',\n", " 'attention',\n", " 'automatic',\n", " 'av',\n", " 'available',\n", " 'avascular',\n", " 'avulsion',\n", " 'axial',\n", " 'axis',\n", " 'baldassarre',\n", " 'bandage',\n", " 'basal',\n", " 'base',\n", " 'basis',\n", " 'bed',\n", " 'bending',\n", " 'benefit',\n", " 'best',\n", " 'better',\n", " 'bike',\n", " 'bilateral',\n", " 'bilaterally',\n", " 'bivalve',\n", " 'blood',\n", " 'bodies',\n", " 'body',\n", " 'bold',\n", " 'bone',\n", " 'bones',\n", " 'bony',\n", " 'borderline',\n", " 'boss',\n", " 'bradley',\n", " 'brady',\n", " 'break',\n", " 'bridging',\n", " 'brogan',\n", " 'bubbles',\n", " 'bultman',\n", " 'c7',\n", " 'calcific',\n", " 'calcifications',\n", " 'calcium',\n", " 'callus',\n", " 'cannot',\n", " 'capitate',\n", " 'capitellar',\n", " 'capitellum',\n", " 'capitolunate',\n", " 'capsule',\n", " 'care',\n", " 'carpal',\n", " 'carpi',\n", " 'carpometacarpal',\n", " 'carpus',\n", " 'cast',\n", " 'casting',\n", " 'centered',\n", " 'central',\n", " 'centrally',\n", " 'change',\n", " 'changed',\n", " 'changes',\n", " 'channel',\n", " 'characterized',\n", " 'chen',\n", " 'cheng',\n", " 'chip',\n", " 'chondrocalcinosis',\n", " 'christine',\n", " 'christopher',\n", " 'chronic',\n", " 'chung',\n", " 'circumferential',\n", " 'clavicle',\n", " 'clearly',\n", " 'clinical',\n", " 'close',\n", " 'closed',\n", " 'closely',\n", " 'cm',\n", " 'cmc',\n", " 'cmcj',\n", " 'coalition',\n", " 'collapse',\n", " 'collateral',\n", " 'collection',\n", " 'collections',\n", " 'colles',\n", " 'collision',\n", " 'columnar',\n", " 'combined',\n", " 'comminuted',\n", " 'comminution',\n", " 'common',\n", " 'communicated',\n", " 'compare',\n", " 'compared',\n", " 'comparison',\n", " 'compartment',\n", " 'compartments',\n", " 'compatible',\n", " 'complete',\n", " 'complication',\n", " 'complications',\n", " 'component',\n", " 'concentric',\n", " 'concurrent',\n", " 'configuration',\n", " 'confirm',\n", " 'congruent',\n", " 'consequently',\n", " 'consider',\n", " 'considerable',\n", " 'considerably',\n", " 'consideration',\n", " 'consistent',\n", " 'conspicuity',\n", " 'conspicuous',\n", " 'contemplated',\n", " 'contralateral',\n", " 'contrast',\n", " 'control',\n", " 'conventional',\n", " 'coracoclavicular',\n", " 'coronal',\n", " 'coronally',\n", " 'coronoid',\n", " 'corpus',\n", " 'corrected',\n", " 'correction',\n", " 'correlate',\n", " 'corresponding',\n", " 'cortex',\n", " 'cortical',\n", " 'corticated',\n", " 'cotterill',\n", " 'cottrell',\n", " 'created',\n", " 'critical',\n", " 'cross',\n", " 'cruz',\n", " 'ct',\n", " 'ctdi',\n", " 'ctdivol',\n", " 'ctdlvol',\n", " 'ctrm',\n", " 'current',\n", " 'currently',\n", " 'cyst',\n", " 'cystic',\n", " 'cysts',\n", " 'dark',\n", " 'data',\n", " 'date',\n", " 'dated',\n", " 'david',\n", " 'day',\n", " 'days',\n", " 'decrease',\n", " 'decreased',\n", " 'dedicated',\n", " 'deep',\n", " 'definite',\n", " 'definitely',\n", " 'definitive',\n", " 'deformity',\n", " 'degenerative',\n", " 'degree',\n", " 'degrees',\n", " 'delasotta',\n", " 'delayed',\n", " 'delineated',\n", " 'demineralization',\n", " 'demineralized',\n", " 'demonstrate',\n", " 'demonstrated',\n", " 'demonstrates',\n", " 'demonstrating',\n", " 'demonstration',\n", " 'densities',\n", " 'density',\n", " 'deposition',\n", " 'depressed',\n", " 'depression',\n", " 'described',\n", " 'description',\n", " 'detail',\n", " 'detailed',\n", " 'details',\n", " 'diameter',\n", " 'diaphyseal',\n", " 'diaphysis',\n", " 'diastasis',\n", " 'diego',\n", " 'differences',\n", " 'different',\n", " 'difficult',\n", " 'diffuse',\n", " 'diffusely',\n", " 'digitorum',\n", " 'dimension',\n", " 'directed',\n", " 'discontinuity',\n", " 'disease',\n", " 'disi',\n", " 'dislocated',\n", " 'dislocation',\n", " 'dispersed',\n", " 'displaced',\n", " 'displacement',\n", " 'distal',\n", " 'distally',\n", " 'distance',\n", " 'distances',\n", " 'distracted',\n", " 'distraction',\n", " 'disuse',\n", " 'dlp',\n", " 'doi',\n", " 'dominant',\n", " 'donated',\n", " 'done',\n", " 'dorsal',\n", " 'dorsally',\n", " 'dorsolateral',\n", " 'dorsum',\n", " 'dose',\n", " 'drift',\n", " 'due',\n", " 'dynamic',\n", " 'earlier',\n", " 'early',\n", " 'ecchymosis',\n", " 'edema',\n", " 'edward',\n", " 'effect',\n", " 'effusion',\n", " 'either',\n", " 'elbow',\n", " 'elsewhere',\n", " 'employ',\n", " 'employed',\n", " 'encounter',\n", " 'end',\n", " 'enhancement',\n", " 'enter',\n", " 'entering',\n", " 'enthesopathic',\n", " 'enthesopathy',\n", " 'entire',\n", " 'entrapment',\n", " 'epic',\n", " 'epicondyle',\n", " 'epiphysis',\n", " 'equipment',\n", " 'eric',\n", " 'especially',\n", " 'essentially',\n", " 'establish',\n", " 'evaluate',\n", " 'evaluated',\n", " 'evaluation',\n", " 'evelyn',\n", " 'evidence',\n", " 'evident',\n", " 'exam',\n", " 'examination',\n", " 'examinations',\n", " 'exams',\n", " 'exhibiting',\n", " 'exposes',\n", " 'exposure',\n", " 'extend',\n", " 'extended',\n", " 'extending',\n", " 'extends',\n", " 'extension',\n", " 'extensive',\n", " 'extensor',\n", " 'external',\n", " 'extra',\n", " 'extremity',\n", " 'facet',\n", " 'failure',\n", " 'fall',\n", " 'fat',\n", " 'fdp',\n", " 'features',\n", " 'fell',\n", " 'fellow',\n", " 'femoral',\n", " 'fiberglass',\n", " 'fibrocartilage',\n", " 'fifth',\n", " 'films',\n", " 'finding',\n", " 'findings',\n", " 'fine',\n", " 'finger',\n", " 'fingers',\n", " 'first',\n", " 'five',\n", " 'fixating',\n", " 'fixation',\n", " 'fixator',\n", " 'flake',\n", " 'flexed',\n", " 'flexion',\n", " 'flexor',\n", " 'fliszar',\n", " 'fluid',\n", " 'fluoroscopy',\n", " 'focal',\n", " 'foci',\n", " 'follow',\n", " 'following',\n", " 'followup',\n", " 'forearm',\n", " 'foreign',\n", " 'foreshortening',\n", " 'formation',\n", " 'fossa',\n", " 'fossae',\n", " 'four',\n", " 'fourth',\n", " 'fpl',\n", " 'fracture',\n", " 'fractured',\n", " 'fractures',\n", " 'fragment',\n", " 'fragments',\n", " 'friend',\n", " 'frontal',\n", " 'fully',\n", " 'fusion',\n", " 'fx',\n", " 'galleazi',\n", " 'gap',\n", " 'gapping',\n", " 'gaps',\n", " 'gas',\n", " 'generate',\n", " 'generated',\n", " 'gentili',\n", " 'given',\n", " 'globules',\n", " 'grade',\n", " 'greater',\n", " 'greatest',\n", " 'greenstick',\n", " 'gross',\n", " 'grossly',\n", " 'half',\n", " 'hamate',\n", " 'hand',\n", " 'hannah',\n", " 'hardware',\n", " 'harris',\n", " 'head',\n", " 'healed',\n", " 'healing',\n", " 'health',\n", " 'heather',\n", " 'heavily',\n", " 'helical',\n", " 'helling',\n", " 'helpful',\n", " 'hemarthrosis',\n", " 'hematoma',\n", " 'hemorrhage',\n", " 'high',\n", " 'highly',\n", " 'history',\n", " 'hours',\n", " 'however',\n", " 'hr',\n", " 'huang',\n", " 'hughes',\n", " 'humeral',\n", " 'humerus',\n", " 'humpback',\n", " 'hydroxyapatite',\n", " 'identified',\n", " 'image',\n", " 'imaged',\n", " 'images',\n", " 'imaging',\n", " 'immediate',\n", " 'impacted',\n", " 'impaction',\n", " 'impax',\n", " 'impinge',\n", " 'implies',\n", " 'impression',\n", " 'improve',\n", " 'improved',\n", " 'improvement',\n", " 'incidental',\n", " 'inclination',\n", " 'included',\n", " 'includes',\n", " 'including',\n", " 'incomplete',\n", " 'incompletely',\n", " 'incongruent',\n", " 'incongruity',\n", " 'increase',\n", " 'increased',\n", " 'ind',\n", " 'independent',\n", " 'index',\n", " 'indicating',\n", " 'indication',\n", " 'inferior',\n", " 'inferiorly',\n", " 'initial',\n", " 'injuries',\n", " 'injury',\n", " 'instability',\n", " 'institution',\n", " 'insufficiency',\n", " 'intact',\n", " 'intercarpal',\n", " 'interfragmentary',\n", " 'internal',\n", " 'interosseous',\n", " 'interphalangeal',\n", " 'interposition',\n", " 'interpretation',\n", " 'interpreting',\n", " 'interspaces',\n", " 'interval',\n", " 'intra',\n", " 'intraarticular',\n", " 'intramedullary',\n", " 'intraosseous',\n", " 'intravenous',\n", " 'involve',\n", " 'involvement',\n", " 'involves',\n", " 'involving',\n", " 'irregularity',\n", " 'island',\n", " 'isolated',\n", " 'iterative',\n", " 'iv',\n", " 'jazbeh',\n", " 'joint',\n", " 'joints',\n", " 'junction',\n", " 'karen',\n", " 'known',\n", " 'l1',\n", " 'laceration',\n", " 'large',\n", " 'larger',\n", " 'lateral',\n", " 'lateralization',\n", " 'lauren',\n", " 'lawrence',\n", " 'laxity',\n", " 'least',\n", " 'left',\n", " 'length',\n", " 'lesion',\n", " 'less',\n", " 'level',\n", " 'levels',\n", " 'ligament',\n", " 'ligamentous',\n", " 'ligaments',\n", " 'like',\n", " 'likely',\n", " 'likewise',\n", " 'limit',\n", " 'limited',\n", " 'limiting',\n", " 'limits',\n", " 'lin',\n", " 'line',\n", " 'linear',\n", " 'lines',\n", " 'lip',\n", " 'lister',\n", " 'location',\n", " 'long',\n", " 'longstanding',\n", " 'longus',\n", " 'loss',\n", " 'low',\n", " 'lower',\n", " 'lt',\n", " 'lucency',\n", " 'lunate',\n", " 'lunotriquetral',\n", " 'made',\n", " 'madelung',\n", " 'main',\n", " 'maintained',\n", " 'maintenance',\n", " 'major',\n", " 'makes',\n", " 'makeup',\n", " 'malalignment',\n", " 'malunion',\n", " 'malunited',\n", " 'margin',\n", " 'marginal',\n", " 'margins',\n", " 'marked',\n", " 'markedly',\n", " 'mass',\n", " 'material',\n", " 'maximal',\n", " 'maxwell',\n", " 'may',\n", " 'mcglone',\n", " 'mcp',\n", " 'measurable',\n", " 'measured',\n", " 'measures',\n", " 'measuring',\n", " 'media',\n", " 'medial',\n", " 'medially',\n", " 'mediolateral',\n", " 'medullary',\n", " 'metacarpal',\n", " 'metacarpals',\n", " 'metacarpophalangeal',\n", " 'metadiaphysis',\n", " 'metallic',\n", " 'metaphyseal',\n", " 'metaphyses',\n", " 'metaphysis',\n", " 'mexico',\n", " 'mgy',\n", " 'mid',\n", " 'midcarpal',\n", " 'middle',\n", " 'migrated',\n", " 'mild',\n", " 'mildly',\n", " 'min',\n", " 'mineralization',\n", " 'mini',\n", " 'minimal',\n", " 'minimally',\n", " 'minimum',\n", " 'minor',\n", " 'mm',\n", " 'moderate',\n", " 'moderately',\n", " 'modern',\n", " 'morphology',\n", " 'motor',\n", " 'msk',\n", " 'multiplanar',\n", " 'multiple',\n", " 'muscle',\n", " 'muscles',\n", " 'musculotendinous',\n", " 'mvc',\n", " 'narrative',\n", " 'narrowing',\n", " 'near',\n", " 'necessary',\n", " 'neck',\n", " 'necrosis',\n", " 'need',\n", " 'negative',\n", " 'neurovascular',\n", " 'neutral',\n", " 'new',\n", " 'newly',\n", " 'noncontrast',\n", " 'nondisplaced',\n", " 'none',\n", " 'nonspecific',\n", " 'nonunion',\n", " 'nonunited',\n", " 'normal',\n", " 'normally',\n", " 'norman',\n", " 'notch',\n", " 'note',\n", " 'noted',\n", " 'number',\n", " 'numerous',\n", " 'oblique',\n", " 'obscure',\n", " 'obscured',\n", " 'obscures',\n", " 'obscuring',\n", " 'obtained',\n", " 'obvious',\n", " 'october',\n", " 'offset',\n", " 'old',\n", " 'olecranon',\n", " 'ongoing',\n", " 'open',\n", " 'operative',\n", " 'optimized',\n", " 'order',\n", " 'ordered',\n", " 'oriented',\n", " 'orif',\n", " 'origin',\n", " 'original',\n", " 'orthogonal',\n", " 'osborne',\n", " 'osseous',\n", " 'ossicle',\n", " 'ossicles',\n", " 'ossific',\n", " 'osteoarthritic',\n", " 'osteoarthrosis',\n", " 'osteonecrosis',\n", " 'osteopenia',\n", " 'osteopenic',\n", " 'osteophyte',\n", " 'osteophytes',\n", " 'osteophytosis',\n", " 'osteotomy',\n", " 'otherwise',\n", " 'outs',\n", " 'outside',\n", " 'overall',\n", " 'overlap',\n", " 'overlapping',\n", " 'overlying',\n", " 'override',\n", " 'overriding',\n", " 'pa',\n", " 'pacs',\n", " 'pads',\n", " 'pain',\n", " 'palmar',\n", " 'paris',\n", " 'part',\n", " 'partial',\n", " 'partially',\n", " 'particularly',\n", " 'partly',\n", " 'pass',\n", " 'passenger',\n", " 'passes',\n", " 'pathology',\n", " 'pathria',\n", " 'patient',\n", " 'pattern',\n", " 'peer',\n", " 'penticuff',\n", " 'per',\n", " 'percutaneous',\n", " 'performed',\n", " 'perihardware',\n", " 'perilunate',\n", " 'periosteal',\n", " 'persistent',\n", " 'petechial',\n", " 'phalangeal',\n", " 'phalanges',\n", " 'phalanx',\n", " 'physician',\n", " 'physiologic',\n", " 'physis',\n", " 'pieces',\n", " 'pin',\n", " 'pip',\n", " 'pisiform',\n", " 'place',\n", " 'placed',\n", " 'placement',\n", " 'plain',\n", " 'plane',\n", " 'planes',\n", " 'planning',\n", " 'plaster',\n", " 'plate',\n", " 'plating',\n", " 'please',\n", " 'plus',\n", " 'point',\n", " 'pole',\n", " 'polyarticular',\n", " 'poorly',\n", " 'portion',\n", " 'portions',\n", " 'positive',\n", " 'possibility',\n", " 'possible',\n", " 'possibly',\n", " 'post',\n", " 'posterior',\n", " 'posteriorly',\n", " 'posterolaterally',\n", " 'postreduction',\n", " 'postsurgical',\n", " 'posttraumatic',\n", " 'potential',\n", " 'practice',\n", " 'pre',\n", " 'predominant',\n", " 'predominantly',\n", " 'predominately',\n", " 'preferential',\n", " 'preliminary',\n", " 'preoperative',\n", " 'presence',\n", " 'present',\n", " 'preserved',\n", " 'presumably',\n", " 'presumed',\n", " 'previous',\n", " 'previously',\n", " 'principal',\n", " 'pringle',\n", " 'prior',\n", " 'probable',\n", " 'probably',\n", " 'process',\n", " 'processed',\n", " 'processes',\n", " 'processing',\n", " 'products',\n", " 'profundus',\n", " 'progressive',\n", " 'projecting',\n", " 'projection',\n", " 'projections',\n", " 'projects',\n", " 'prominent',\n", " 'pronator',\n", " 'pronounced',\n", " 'protocol',\n", " 'provided',\n", " 'provider',\n", " 'proximal',\n", " 'proximally',\n", " 'punctate',\n", " 'quadratus',\n", " 'question',\n", " 'questionable',\n", " 'quite',\n", " 'radial',\n", " 'radialis',\n", " 'radially',\n", " 'radiation',\n", " 'radii',\n", " 'radiocapitellar',\n", " 'radiocarpal',\n", " 'radiograph',\n", " 'radiographs',\n", " 'radiologist',\n", " 'radiopaque',\n", " 'radioscaphoid',\n", " 'radioulnar',\n", " 'radius',\n", " 'raising',\n", " 'randall',\n", " 'range',\n", " ...]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(df.columns)" ] }, { "cell_type": "code", "execution_count": null, "id": "feaab124", "metadata": { "id": "feaab124" }, "outputs": [], "source": [ "X = df.iloc[:, : -2]" ] }, { "cell_type": "code", "execution_count": null, "id": "c5673c64", "metadata": { "id": "c5673c64", "outputId": "72ae7aaf-1201-444f-8ac0-ac9e8ee8ca08" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeweightMaleSchedSkinMinsscheduled_case_durationABRAMS, REID ALLENHENTZEN, ERIC RICHARDLEEK, BRYAN TERRYMEUNIER, MATTHEW JOHNRECHNIC, MARK...with1withinwithoutwoodworkworkstationworseworstwouldwrist
0422220.809011500010...0100000002
1383035.20909000010...0000000002
2613556.81659500100...0000000003
3572681.60606000010...0000000002
4552625.6012012000010...0100000103
..................................................................
140403488.01609000010...0000000003
141423244.8112012010000...0000000000
142413577.61858510000...0000000003
143642281.6012012000010...0000000002
144512368.0015017510000...0210000003
\n", "

145 rows × 1349 columns

\n", "
" ], "text/plain": [ " Age weight Male SchedSkinMins scheduled_case_duration \\\n", "0 42 2220.8 0 90 115 \n", "1 38 3035.2 0 90 90 \n", "2 61 3556.8 1 65 95 \n", "3 57 2681.6 0 60 60 \n", "4 55 2625.6 0 120 120 \n", ".. ... ... ... ... ... \n", "140 40 3488.0 1 60 90 \n", "141 42 3244.8 1 120 120 \n", "142 41 3577.6 1 85 85 \n", "143 64 2281.6 0 120 120 \n", "144 51 2368.0 0 150 175 \n", "\n", " ABRAMS, REID ALLEN HENTZEN, ERIC RICHARD LEEK, BRYAN TERRY \\\n", "0 0 0 0 \n", "1 0 0 0 \n", "2 0 0 1 \n", "3 0 0 0 \n", "4 0 0 0 \n", ".. ... ... ... \n", "140 0 0 0 \n", "141 1 0 0 \n", "142 1 0 0 \n", "143 0 0 0 \n", "144 1 0 0 \n", "\n", " MEUNIER, MATTHEW JOHN RECHNIC, MARK ... with1 within without wood \\\n", "0 1 0 ... 0 1 0 0 \n", "1 1 0 ... 0 0 0 0 \n", "2 0 0 ... 0 0 0 0 \n", "3 1 0 ... 0 0 0 0 \n", "4 1 0 ... 0 1 0 0 \n", ".. ... ... ... ... ... ... ... \n", "140 1 0 ... 0 0 0 0 \n", "141 0 0 ... 0 0 0 0 \n", "142 0 0 ... 0 0 0 0 \n", "143 1 0 ... 0 0 0 0 \n", "144 0 0 ... 0 2 1 0 \n", "\n", " work workstation worse worst would wrist \n", "0 0 0 0 0 0 2 \n", "1 0 0 0 0 0 2 \n", "2 0 0 0 0 0 3 \n", "3 0 0 0 0 0 2 \n", "4 0 0 0 1 0 3 \n", ".. ... ... ... ... ... ... \n", "140 0 0 0 0 0 3 \n", "141 0 0 0 0 0 0 \n", "142 0 0 0 0 0 3 \n", "143 0 0 0 0 0 2 \n", "144 0 0 0 0 0 3 \n", "\n", "[145 rows x 1349 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X" ] }, { "cell_type": "code", "execution_count": null, "id": "cfa7675b", "metadata": { "id": "cfa7675b" }, "outputs": [], "source": [ "Y = df['actual_case_duration']" ] }, { "cell_type": "code", "execution_count": null, "id": "8e1e612a", "metadata": { "id": "8e1e612a", "outputId": "e09009f6-a4f0-4806-d353-d64fb4d5e926" }, "outputs": [ { "data": { "text/plain": [ "0 69\n", "1 110\n", "2 103\n", "3 70\n", "4 90\n", " ... \n", "140 76\n", "141 162\n", "142 111\n", "143 102\n", "144 233\n", "Name: actual_case_duration, Length: 145, dtype: int64" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Y" ] }, { "cell_type": "code", "execution_count": null, "id": "21b5b085", "metadata": { "id": "21b5b085" }, "outputs": [], "source": [ "df_test = pd.read_csv('case_testing.csv')" ] }, { "cell_type": "code", "execution_count": null, "id": "12e85310", "metadata": { "scrolled": true, "id": "12e85310" }, { "cell_type": "code", "execution_count": null, "id": "f4b9bdfb", "metadata": { "id": "f4b9bdfb" }, "outputs": [], "source": [ "X_test = df_test.iloc[:, 1: -2]" ] }, { "cell_type": "code", "execution_count": null, "id": "6642140e", "metadata": { "scrolled": true, "id": "6642140e", "outputId": "069ab5a9-6c55-4885-ee66-7dfc0780b8bf" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeweightMaleSchedSkinMinsscheduled_case_durationABRAMS, REID ALLENHENTZEN, ERIC RICHARDLEEK, BRYAN TERRYMEUNIER, MATTHEW JOHNRECHNIC, MARK...with1withinwithoutwoodworkworkstationworseworstwouldwrist
0242542.401959510000...0010000002
1383062.401909010000...0100000004
2583041.601759510000...0000000004
3292244.800606000010...0000000003
4653800.000606000010...0010000004
5573384.001609000010...0010000002
6782560.000858510000...0260000003
7232000.000558000010...0000000004
8253433.60012014510000...0010000002
9602619.200757510000...0000000002
10472198.400609000010...0000000002
12592644.800606000010...0000000003
13602048.0008010510000...0200000003
14402652.80111511501000...0010000002
15603619.070909010000...0000000003
16712400.000858510000...0000000002
17494160.001757510000...0000000002
18292000.0008511000010...0000000005
19542067.200858510000...0000000003
20283680.0019011500010...0000000003
21771888.00012014010000...0000000004
22291900.800909010000...0000000002
23572344.000606000010...0000000003
24672240.0009011010000...0000000001
25672560.00010510510000...0000000003
26533020.801858510000...0000000001
27503537.600609000010...0000000002
28192598.401606000010...0000000003
29563233.600606000010...0000000003
30583052.800656500010...0000000002
32202702.401959500010...0000000003
33642766.40010010000010...0000000003
34623062.400606000010...0000000003
35213672.001858510000...0010010001
36593062.401858501000...0010000002
37392721.60011511500010...0110000000
39592350.400959510000...0010000003
40652336.000757510000...0010000002
41382630.401555500010...0000000002
42612848.000858500100...0000000003
43451916.800606000010...0000000000
44503025.600909010000...0000000003
45252080.001606000010...1000000000
47732033.600609000010...0000000001
48632395.20010010010000...0010020001
50533024.00010010001000...0200000025
51642548.800909010000...0000000002
52272284.800555500010...0110010000
53332144.000909010000...0050000002
55612262.400656500010...0000000002
56241859.200606000010...0000000002
57252640.001656500010...0010000002
58292865.601909010000...0010030002
59792590.400151510000...0000000003
61672422.400757510000...0000000003
62672640.000609000010...0000000002
\n", "

56 rows × 1349 columns

\n", "
" ], "text/plain": [ " Age weight Male SchedSkinMins scheduled_case_duration \\\n", "0 24 2542.40 1 95 95 \n", "1 38 3062.40 1 90 90 \n", "2 58 3041.60 1 75 95 \n", "3 29 2244.80 0 60 60 \n", "4 65 3800.00 0 60 60 \n", "5 57 3384.00 1 60 90 \n", "6 78 2560.00 0 85 85 \n", "7 23 2000.00 0 55 80 \n", "8 25 3433.60 0 120 145 \n", "9 60 2619.20 0 75 75 \n", "10 47 2198.40 0 60 90 \n", "12 59 2644.80 0 60 60 \n", "13 60 2048.00 0 80 105 \n", "14 40 2652.80 1 115 115 \n", "15 60 3619.07 0 90 90 \n", "16 71 2400.00 0 85 85 \n", "17 49 4160.00 1 75 75 \n", "18 29 2000.00 0 85 110 \n", "19 54 2067.20 0 85 85 \n", "20 28 3680.00 1 90 115 \n", "21 77 1888.00 0 120 140 \n", "22 29 1900.80 0 90 90 \n", "23 57 2344.00 0 60 60 \n", "24 67 2240.00 0 90 110 \n", "25 67 2560.00 0 105 105 \n", "26 53 3020.80 1 85 85 \n", "27 50 3537.60 0 60 90 \n", "28 19 2598.40 1 60 60 \n", "29 56 3233.60 0 60 60 \n", "30 58 3052.80 0 65 65 \n", "32 20 2702.40 1 95 95 \n", "33 64 2766.40 0 100 100 \n", "34 62 3062.40 0 60 60 \n", "35 21 3672.00 1 85 85 \n", "36 59 3062.40 1 85 85 \n", "37 39 2721.60 0 115 115 \n", "39 59 2350.40 0 95 95 \n", "40 65 2336.00 0 75 75 \n", "41 38 2630.40 1 55 55 \n", "42 61 2848.00 0 85 85 \n", "43 45 1916.80 0 60 60 \n", "44 50 3025.60 0 90 90 \n", "45 25 2080.00 1 60 60 \n", "47 73 2033.60 0 60 90 \n", "48 63 2395.20 0 100 100 \n", "50 53 3024.00 0 100 100 \n", "51 64 2548.80 0 90 90 \n", "52 27 2284.80 0 55 55 \n", "53 33 2144.00 0 90 90 \n", "55 61 2262.40 0 65 65 \n", "56 24 1859.20 0 60 60 \n", "57 25 2640.00 1 65 65 \n", "58 29 2865.60 1 90 90 \n", "59 79 2590.40 0 15 15 \n", "61 67 2422.40 0 75 75 \n", "62 67 2640.00 0 60 90 \n", "\n", " ABRAMS, REID ALLEN HENTZEN, ERIC RICHARD LEEK, BRYAN TERRY \\\n", "0 1 0 0 \n", "1 1 0 0 \n", "2 1 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "5 0 0 0 \n", "6 1 0 0 \n", "7 0 0 0 \n", "8 1 0 0 \n", "9 1 0 0 \n", "10 0 0 0 \n", "12 0 0 0 \n", "13 1 0 0 \n", "14 0 1 0 \n", "15 1 0 0 \n", "16 1 0 0 \n", "17 1 0 0 \n", "18 0 0 0 \n", "19 1 0 0 \n", "20 0 0 0 \n", "21 1 0 0 \n", "22 1 0 0 \n", "23 0 0 0 \n", "24 1 0 0 \n", "25 1 0 0 \n", "26 1 0 0 \n", "27 0 0 0 \n", "28 0 0 0 \n", "29 0 0 0 \n", "30 0 0 0 \n", "32 0 0 0 \n", "33 0 0 0 \n", "34 0 0 0 \n", "35 1 0 0 \n", "36 0 1 0 \n", "37 0 0 0 \n", "39 1 0 0 \n", "40 1 0 0 \n", "41 0 0 0 \n", "42 0 0 1 \n", "43 0 0 0 \n", "44 1 0 0 \n", "45 0 0 0 \n", "47 0 0 0 \n", "48 1 0 0 \n", "50 0 1 0 \n", "51 1 0 0 \n", "52 0 0 0 \n", "53 1 0 0 \n", "55 0 0 0 \n", "56 0 0 0 \n", "57 0 0 0 \n", "58 1 0 0 \n", "59 1 0 0 \n", "61 1 0 0 \n", "62 0 0 0 \n", "\n", " MEUNIER, MATTHEW JOHN RECHNIC, MARK ... with1 within without wood \\\n", "0 0 0 ... 0 0 1 0 \n", "1 0 0 ... 0 1 0 0 \n", "2 0 0 ... 0 0 0 0 \n", "3 1 0 ... 0 0 0 0 \n", "4 1 0 ... 0 0 1 0 \n", "5 1 0 ... 0 0 1 0 \n", "6 0 0 ... 0 2 6 0 \n", "7 1 0 ... 0 0 0 0 \n", "8 0 0 ... 0 0 1 0 \n", "9 0 0 ... 0 0 0 0 \n", "10 1 0 ... 0 0 0 0 \n", "12 1 0 ... 0 0 0 0 \n", "13 0 0 ... 0 2 0 0 \n", "14 0 0 ... 0 0 1 0 \n", "15 0 0 ... 0 0 0 0 \n", "16 0 0 ... 0 0 0 0 \n", "17 0 0 ... 0 0 0 0 \n", "18 1 0 ... 0 0 0 0 \n", "19 0 0 ... 0 0 0 0 \n", "20 1 0 ... 0 0 0 0 \n", "21 0 0 ... 0 0 0 0 \n", "22 0 0 ... 0 0 0 0 \n", "23 1 0 ... 0 0 0 0 \n", "24 0 0 ... 0 0 0 0 \n", "25 0 0 ... 0 0 0 0 \n", "26 0 0 ... 0 0 0 0 \n", "27 1 0 ... 0 0 0 0 \n", "28 1 0 ... 0 0 0 0 \n", "29 1 0 ... 0 0 0 0 \n", "30 1 0 ... 0 0 0 0 \n", "32 1 0 ... 0 0 0 0 \n", "33 1 0 ... 0 0 0 0 \n", "34 1 0 ... 0 0 0 0 \n", "35 0 0 ... 0 0 1 0 \n", "36 0 0 ... 0 0 1 0 \n", "37 1 0 ... 0 1 1 0 \n", "39 0 0 ... 0 0 1 0 \n", "40 0 0 ... 0 0 1 0 \n", "41 1 0 ... 0 0 0 0 \n", "42 0 0 ... 0 0 0 0 \n", "43 1 0 ... 0 0 0 0 \n", "44 0 0 ... 0 0 0 0 \n", "45 1 0 ... 1 0 0 0 \n", "47 1 0 ... 0 0 0 0 \n", "48 0 0 ... 0 0 1 0 \n", "50 0 0 ... 0 2 0 0 \n", "51 0 0 ... 0 0 0 0 \n", "52 1 0 ... 0 1 1 0 \n", "53 0 0 ... 0 0 5 0 \n", "55 1 0 ... 0 0 0 0 \n", "56 1 0 ... 0 0 0 0 \n", "57 1 0 ... 0 0 1 0 \n", "58 0 0 ... 0 0 1 0 \n", "59 0 0 ... 0 0 0 0 \n", "61 0 0 ... 0 0 0 0 \n", "62 1 0 ... 0 0 0 0 \n", "\n", " work workstation worse worst would wrist \n", "0 0 0 0 0 0 2 \n", "1 0 0 0 0 0 4 \n", "2 0 0 0 0 0 4 \n", "3 0 0 0 0 0 3 \n", "4 0 0 0 0 0 4 \n", "5 0 0 0 0 0 2 \n", "6 0 0 0 0 0 3 \n", "7 0 0 0 0 0 4 \n", "8 0 0 0 0 0 2 \n", "9 0 0 0 0 0 2 \n", "10 0 0 0 0 0 2 \n", "12 0 0 0 0 0 3 \n", "13 0 0 0 0 0 3 \n", "14 0 0 0 0 0 2 \n", "15 0 0 0 0 0 3 \n", "16 0 0 0 0 0 2 \n", "17 0 0 0 0 0 2 \n", "18 0 0 0 0 0 5 \n", "19 0 0 0 0 0 3 \n", "20 0 0 0 0 0 3 \n", "21 0 0 0 0 0 4 \n", "22 0 0 0 0 0 2 \n", "23 0 0 0 0 0 3 \n", "24 0 0 0 0 0 1 \n", "25 0 0 0 0 0 3 \n", "26 0 0 0 0 0 1 \n", "27 0 0 0 0 0 2 \n", "28 0 0 0 0 0 3 \n", "29 0 0 0 0 0 3 \n", "30 0 0 0 0 0 2 \n", "32 0 0 0 0 0 3 \n", "33 0 0 0 0 0 3 \n", "34 0 0 0 0 0 3 \n", "35 0 1 0 0 0 1 \n", "36 0 0 0 0 0 2 \n", "37 0 0 0 0 0 0 \n", "39 0 0 0 0 0 3 \n", "40 0 0 0 0 0 2 \n", "41 0 0 0 0 0 2 \n", "42 0 0 0 0 0 3 \n", "43 0 0 0 0 0 0 \n", "44 0 0 0 0 0 3 \n", "45 0 0 0 0 0 0 \n", "47 0 0 0 0 0 1 \n", "48 0 2 0 0 0 1 \n", "50 0 0 0 0 2 5 \n", "51 0 0 0 0 0 2 \n", "52 0 1 0 0 0 0 \n", "53 0 0 0 0 0 2 \n", "55 0 0 0 0 0 2 \n", "56 0 0 0 0 0 2 \n", "57 0 0 0 0 0 2 \n", "58 0 3 0 0 0 2 \n", "59 0 0 0 0 0 3 \n", "61 0 0 0 0 0 3 \n", "62 0 0 0 0 0 2 \n", "\n", "[56 rows x 1349 columns]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test" ] }, { "cell_type": "code", "execution_count": null, "id": "4386654d", "metadata": { "id": "4386654d" }, "outputs": [], "source": [ "Y_test = df_test['actual_case_duration']" ] }, { "cell_type": "code", "execution_count": null, "id": "dc14609f", "metadata": { "scrolled": true, "id": "dc14609f", "outputId": "2457337a-0902-45a0-9bde-d882b4b4605d" }, "outputs": [ { "data": { "text/plain": [ "0 147\n", "1 92\n", "2 88\n", "3 107\n", "4 74\n", "5 104\n", "6 132\n", "7 119\n", "8 66\n", "9 96\n", "10 77\n", "12 81\n", "13 97\n", "14 140\n", "15 108\n", "16 123\n", "17 114\n", "18 83\n", "19 72\n", "20 80\n", "21 78\n", "22 153\n", "23 94\n", "24 87\n", "25 80\n", "26 114\n", "27 128\n", "28 94\n", "29 80\n", "30 82\n", "32 90\n", "33 83\n", "34 71\n", "35 126\n", "36 108\n", "37 112\n", "39 90\n", "40 97\n", "41 93\n", "42 91\n", "43 74\n", "44 103\n", "45 72\n", "47 85\n", "48 121\n", "50 89\n", "51 115\n", "52 77\n", "53 81\n", "55 68\n", "56 82\n", "57 94\n", "58 86\n", "59 69\n", "61 101\n", "62 102\n", "Name: actual_case_duration, dtype: int64" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Y_test" ] }, { "cell_type": "code", "execution_count": null, "id": "f0bb13c0", "metadata": { "id": "f0bb13c0" }, "outputs": [], "source": [ "import scipy.stats\n", "\n", "def mean_confidence_interval(data, confidence=0.95):\n", " a = 1.0 * np.array(data)\n", " n = len(a)\n", " m, se = np.mean(a), scipy.stats.sem(a)\n", " h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)\n", " return m, m-h, m+h\n", "# https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data" ] }, { "cell_type": "markdown", "id": "3586b596", "metadata": { "id": "3586b596" }, "source": [ "# logistic regression" ] }, { "cell_type": "code", "execution_count": null, "id": "3b273ecd", "metadata": { "scrolled": true, "id": "3b273ecd", "outputId": "c69bc4b1-230d-4701-d98e-125d621d1040" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "mae: 30.666666666666668\n", "mse: 1359.0666666666666\n", "rmse: 36.86552138064328\n", "2\n", "mae: 31.933333333333334\n", "mse: 2741.4\n", "rmse: 52.358380418038145\n", "3\n", "mae: 17.8\n", "mse: 435.93333333333334\n", "rmse: 20.87901657965081\n", "4\n", "mae: 26.8\n", "mse: 1578.6666666666667\n", "rmse: 39.73243846867024\n", "5\n", "mae: 21.0\n", "mse: 776.4666666666667\n", "rmse: 27.865151473958772\n", "6\n", "mae: 36.07142857142857\n", "mse: 3163.3571428571427\n", "rmse: 56.2437298092609\n", "7\n", "mae: 27.857142857142858\n", "mse: 1515.2857142857142\n", "rmse: 38.926670988998204\n", "8\n", "mae: 33.92857142857143\n", "mse: 1761.9285714285713\n", "rmse: 41.97533289240922\n", "9\n", "mae: 25.642857142857142\n", "mse: 1097.5\n", "rmse: 33.12853754695489\n", "10\n", "mae: 29.142857142857142\n", "mse: 2027.7142857142858\n", "rmse: 45.030148630826055\n" ] } ], "source": [ "kfold = KFold(n_splits=10)\n", "rmse_scores = []\n", "mse_scores = []\n", "mae_scores = []\n", "\n", "fold = 0\n", "for train_idx, test_idx in kfold.split(X, Y):\n", " fold = fold + 1\n", " print(fold)\n", " x_train_f = X.iloc[train_idx]\n", " y_train_f = Y.iloc[train_idx]\n", " x_test_f = X.iloc[test_idx]\n", " y_test_f = Y.iloc[test_idx]\n", " \n", " clf = LogisticRegression(max_iter=10000)\n", " \n", " clf.fit(x_train_f, y_train_f)\n", " \n", " preds = clf.predict(x_test_f)\n", "\n", " print(\"mae:\",mae(y_test_f, preds))\n", " mae_scores.append(mae(y_test_f, preds))\n", " print(\"mse:\",mse(y_test_f, preds))\n", " mse_scores.append(mse(y_test_f, preds))\n", " print(\"rmse:\",mse(y_test_f, preds, squared=False))\n", " rmse_scores.append(mse(y_test_f, preds, squared=False))" ] }, { "cell_type": "code", "execution_count": null, "id": "9a74fa4c", "metadata": { "id": "9a74fa4c", "outputId": "f6bdb294-a017-4217-aeb2-c7585db23a11" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mae_scores: 28.084285714285713\n", "mse_scores: 1645.7319047619048\n", "rmse_scores: 39.30049281894105\n" ] } ], "source": [ "print(\"mae_scores:\", statistics.mean(mae_scores))\n", "print(\"mse_scores:\", statistics.mean(mse_scores))\n", "print(\"rmse_scores:\", statistics.mean(rmse_scores))" ] }, { "cell_type": "code", "execution_count": null, "id": "8f176263", "metadata": { "id": "8f176263", "outputId": "2b0032f5-2c78-49ed-d837-f806844d11ac" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mae_scores: (28.084285714285716, 24.064615310798768, 32.10395611777266)\n", "mse_scores: (1645.7319047619046, 1047.8596098803541, 2243.604199643455)\n", "rmse_scores: (39.300492818941045, 31.71474195467546, 46.88624368320663)\n" ] } ], "source": [ "print(\"mae_scores:\", mean_confidence_interval(mae_scores))\n", "print(\"mse_scores:\", mean_confidence_interval(mse_scores))\n", "print(\"rmse_scores:\", mean_confidence_interval(rmse_scores))" ] }, { "cell_type": "markdown", "id": "605b9313", "metadata": { "id": "605b9313" }, "source": [ "# Random Forest" ] }, { "cell_type": "code", "execution_count": null, "id": "e9ca6226", "metadata": { "id": "e9ca6226" }, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestRegressor" ] }, { "cell_type": "code", "execution_count": null, "id": "140c1cc6", "metadata": { "scrolled": true, "id": "140c1cc6", "outputId": "3d762c4d-c123-4568-a78a-069fdfc2d446" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "mae: 27.690666666666665\n", "mse: 1072.5338074074073\n", "rmse: 32.74956194222157\n", "2\n", "mae: 28.946666666666665\n", "mse: 2199.333161481481\n", "rmse: 46.8970485369973\n", "3\n", "mae: 20.668444444444447\n", "mse: 620.0286992592593\n", "rmse: 24.900375484302625\n", "4\n", "mae: 23.77377777777777\n", "mse: 903.2386933333332\n", "rmse: 30.0539297485925\n", "5\n", "mae: 22.97022222222222\n", "mse: 953.110651851852\n", "rmse: 30.87249021138159\n", "6\n", "mae: 31.75190476190476\n", "mse: 2229.8016793650795\n", "rmse: 47.220775929299165\n", "7\n", "mae: 27.834761904761905\n", "mse: 1360.0788984126987\n", "rmse: 36.87924753045673\n", "8\n", "mae: 29.951904761904757\n", "mse: 1423.176123809524\n", "rmse: 37.72500661112631\n", "9\n", "mae: 18.543809523809525\n", "mse: 676.2268888888888\n", "rmse: 26.004362881810597\n", "10\n", "mae: 28.349999999999998\n", "mse: 1497.0421619047618\n", "rmse: 38.69162909344555\n", "mae_scores: 26.04821587301587\n", "mse_scores: 1293.4570765714286\n", "rmse_scores: 35.199442796963396\n" ] } ], "source": [ "kfold = KFold(n_splits=10)\n", "rmse_scores = []\n", "mse_scores = []\n", "mae_scores = []\n", "\n", "fold = 0\n", "for train_idx, test_idx in kfold.split(X, Y):\n", " fold = fold + 1\n", " print(fold)\n", " x_train_f = X.iloc[train_idx]\n", " y_train_f = Y.iloc[train_idx]\n", " x_test_f = X.iloc[test_idx]\n", " y_test_f = Y.iloc[test_idx]\n", " \n", " clf = RandomForestRegressor(n_estimators = 150)\n", " \n", " clf.fit(x_train_f, y_train_f)\n", " \n", " preds = clf.predict(x_test_f)\n", "\n", " print(\"mae:\",mae(y_test_f, preds))\n", " mae_scores.append(mae(y_test_f, preds))\n", " print(\"mse:\",mse(y_test_f, preds))\n", " mse_scores.append(mse(y_test_f, preds))\n", " print(\"rmse:\",mse(y_test_f, preds, squared=False))\n", " rmse_scores.append(mse(y_test_f, preds, squared=False))\n", "print(\"mae_scores:\", statistics.mean(mae_scores))\n", "print(\"mse_scores:\", statistics.mean(mse_scores))\n", "print(\"rmse_scores:\", statistics.mean(rmse_scores))" ] }, { "cell_type": "code", "execution_count": null, "id": "d85d996a", "metadata": { "id": "d85d996a", "outputId": "740c581b-0d9d-4388-c340-9d22b36c60c4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mae_scores: (26.048215873015874, 22.964292410664964, 29.132139335366784)\n", "mse_scores: (1293.4570765714284, 886.5860924354886, 1700.3280607073682)\n", "rmse_scores: (35.199442796963396, 29.634949829846583, 40.76393576408021)\n" ] } ], "source": [ "print(\"mae_scores:\", mean_confidence_interval(mae_scores))\n", "print(\"mse_scores:\", mean_confidence_interval(mse_scores))\n", "print(\"rmse_scores:\", mean_confidence_interval(rmse_scores))" ] }, { "cell_type": "markdown", "id": "2149b61b", "metadata": { "id": "2149b61b" }, "source": [ "# Neural Network" ] }, { "cell_type": "code", "execution_count": null, "id": "5c03f666", "metadata": { "scrolled": true, "id": "5c03f666", "outputId": "eccf5f40-e8fb-42ac-b724-bab2343f4dd6" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "1/1 [==============================] - 0s 98ms/step\n", "mae: 26.578528340657552\n", "mse: 1126.9674725655816\n", "rmse: 33.570336199769905\n", "2\n", "1/1 [==============================] - 0s 65ms/step\n", "mae: 26.368604532877605\n", "mse: 1981.2446006137955\n", "rmse: 44.51117388492237\n", "3\n", "1/1 [==============================] - 0s 59ms/step\n", "mae: 19.81238301595052\n", "mse: 561.9554283723857\n", "rmse: 23.705599093302528\n", "4\n", "1/1 [==============================] - 0s 71ms/step\n", "mae: 18.95923614501953\n", "mse: 781.6167605288676\n", "rmse: 27.957409760721177\n", "5\n", "WARNING:tensorflow:5 out of the last 5 calls to .predict_function at 0x000002B612135630> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n", "1/1 [==============================] - 0s 65ms/step\n", "mae: 20.896653747558595\n", "mse: 912.183359353062\n", "rmse: 30.202373405960365\n", "6\n", "WARNING:tensorflow:6 out of the last 6 calls to .predict_function at 0x000002B612137250> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n", "1/1 [==============================] - 0s 44ms/step\n", "mae: 32.553023202078684\n", "mse: 2051.88312082376\n", "rmse: 45.29771650783028\n", "7\n", "1/1 [==============================] - 0s 46ms/step\n", "mae: 28.421306610107422\n", "mse: 1483.8367474434906\n", "rmse: 38.52060159763202\n", "8\n", "1/1 [==============================] - 0s 84ms/step\n", "mae: 33.432312556675505\n", "mse: 1804.4202386510067\n", "rmse: 42.478467941428946\n", "9\n", "1/1 [==============================] - 0s 55ms/step\n", "mae: 17.65907941545759\n", "mse: 470.03091941462066\n", "rmse: 21.680196480074176\n", "10\n", "1/1 [==============================] - 0s 73ms/step\n", "mae: 27.851313999720983\n", "mse: 1380.82787919042\n", "rmse: 37.159492450656806\n", "mae_scores: 25.2532441566104\n", "mse_scores: 1255.496652695699\n", "rmse_scores: 34.50833673222986\n" ] } ], "source": [ "\n", "kfold = KFold(n_splits=10)\n", "rmse_scores = []\n", "mse_scores = []\n", "mae_scores = []\n", "\n", "fold = 0\n", "for train_idx, test_idx in kfold.split(X, Y):\n", " fold = fold + 1\n", " print(fold)\n", " x_train_f = X.iloc[train_idx]\n", " y_train_f = Y.iloc[train_idx]\n", " x_test_f = X.iloc[test_idx]\n", " y_test_f = Y.iloc[test_idx]\n", " \n", " model = Sequential()\n", " model.add(Dense(128, activation='relu'))\n", " model.add(Dense(1, activation='linear'))\n", " opt = tf.keras.optimizers.Adam(learning_rate=0.01)\n", " model.compile(loss='mean_absolute_error', optimizer='adam')\n", " model.fit(x_train_f, y_train_f, epochs=100, batch_size=64, verbose = 0)\n", " \n", " preds = model.predict(x_test_f)\n", "\n", " print(\"mae:\",mae(y_test_f, preds))\n", " mae_scores.append(mae(y_test_f, preds))\n", " print(\"mse:\",mse(y_test_f, preds))\n", " mse_scores.append(mse(y_test_f, preds))\n", " print(\"rmse:\",mse(y_test_f, preds, squared=False))\n", " rmse_scores.append(mse(y_test_f, preds, squared=False))\n", "print(\"mae_scores:\", statistics.mean(mae_scores))\n", "print(\"mse_scores:\", statistics.mean(mse_scores))\n", "print(\"rmse_scores:\", statistics.mean(rmse_scores))" ] }, { "cell_type": "code", "execution_count": null, "id": "172067e9", "metadata": { "id": "172067e9", "outputId": "4c9c1eb9-83fe-48de-cdb3-30e2b8316f9a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mae_scores: (25.2532441566104, 21.22178042594876, 29.284707887272038)\n", "mse_scores: (1255.496652695699, 843.3804644910222, 1667.6128409003757)\n", "rmse_scores: (34.50833673222986, 28.444360668178042, 40.57231279628167)\n" ] } ], "source": [ "print(\"mae_scores:\", mean_confidence_interval(mae_scores))\n", "print(\"mse_scores:\", mean_confidence_interval(mse_scores))\n", "print(\"rmse_scores:\", mean_confidence_interval(rmse_scores))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" }, "colab": { "provenance": [] } }, "nbformat": 4, "nbformat_minor": 5 }