{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Gender Recognition by Voice Using Numerical Algorithms" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Importing Data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "\n", "sns.set(font_scale=1.5)\n", "\n", "data = pd.read_csv(\"data/cleaned/voice_data.csv\")\n", "X_train = pd.read_csv(\"data/cleaned/X_train.csv\")\n", "y_train = pd.read_csv(\"data/cleaned/y_train.csv\")\n", "X_test = pd.read_csv(\"data/cleaned/X_test.csv\")\n", "y_test = pd.read_csv(\"data/cleaned/y_test.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanfreqmedianQ25Q75IQRsdlog_skewlog_kurtsp.entsfmmodemeanfunlog_minfunexp_maxfunmodindxmeandommindommaxdomdfrangelabel
0-4.049248-4.224901-2.576102-5.693607-0.2147780.4273552.9258923.177486-0.0390830.471575-2.141210-1.812038-1.3844860.636793-1.454772-1.564205-0.708404-1.431422-1.419137male
1-3.841053-3.999293-2.486885-5.588987-0.2584850.6116694.0327214.022523-0.0652360.594431-2.141210-1.079594-1.369352-0.524133-1.014103-1.561916-0.708404-1.418107-1.405818male
2-3.463066-4.095851-2.706986-3.9286990.9093261.6038484.6750894.506253-1.0837300.398261-2.141210-1.365368-1.3901310.404082-1.065344-1.563866-0.708404-1.429203-1.416917male
3-0.992157-0.759454-0.901418-0.7112050.6326900.899998-0.927599-0.8377091.5163831.797340-1.054576-1.666966-1.143909-0.5241330.614286-1.195367-0.708404-1.273867-1.261532male
4-1.530640-1.676948-1.268395-0.7920291.0055881.322561-1.055855-0.8076351.7083362.114740-0.790514-1.127233-1.2397250.1892380.289046-0.221660-0.7084040.1241540.136933male
\n", "
" ], "text/plain": [ " meanfreq median Q25 Q75 IQR sd log_skew \\\n", "0 -4.049248 -4.224901 -2.576102 -5.693607 -0.214778 0.427355 2.925892 \n", "1 -3.841053 -3.999293 -2.486885 -5.588987 -0.258485 0.611669 4.032721 \n", "2 -3.463066 -4.095851 -2.706986 -3.928699 0.909326 1.603848 4.675089 \n", "3 -0.992157 -0.759454 -0.901418 -0.711205 0.632690 0.899998 -0.927599 \n", "4 -1.530640 -1.676948 -1.268395 -0.792029 1.005588 1.322561 -1.055855 \n", "\n", " log_kurt sp.ent sfm mode meanfun log_minfun exp_maxfun \\\n", "0 3.177486 -0.039083 0.471575 -2.141210 -1.812038 -1.384486 0.636793 \n", "1 4.022523 -0.065236 0.594431 -2.141210 -1.079594 -1.369352 -0.524133 \n", "2 4.506253 -1.083730 0.398261 -2.141210 -1.365368 -1.390131 0.404082 \n", "3 -0.837709 1.516383 1.797340 -1.054576 -1.666966 -1.143909 -0.524133 \n", "4 -0.807635 1.708336 2.114740 -0.790514 -1.127233 -1.239725 0.189238 \n", "\n", " modindx meandom mindom maxdom dfrange label \n", "0 -1.454772 -1.564205 -0.708404 -1.431422 -1.419137 male \n", "1 -1.014103 -1.561916 -0.708404 -1.418107 -1.405818 male \n", "2 -1.065344 -1.563866 -0.708404 -1.429203 -1.416917 male \n", "3 0.614286 -1.195367 -0.708404 -1.273867 -1.261532 male \n", "4 0.289046 -0.221660 -0.708404 0.124154 0.136933 male " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 3168 entries, 0 to 3167\n", "Data columns (total 20 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 meanfreq 3168 non-null float64\n", " 1 median 3168 non-null float64\n", " 2 Q25 3168 non-null float64\n", " 3 Q75 3168 non-null float64\n", " 4 IQR 3168 non-null float64\n", " 5 sd 3168 non-null float64\n", " 6 log_skew 3168 non-null float64\n", " 7 log_kurt 3168 non-null float64\n", " 8 sp.ent 3168 non-null float64\n", " 9 sfm 3168 non-null float64\n", " 10 mode 3168 non-null float64\n", " 11 meanfun 3168 non-null float64\n", " 12 log_minfun 3168 non-null float64\n", " 13 exp_maxfun 3168 non-null float64\n", " 14 modindx 3168 non-null float64\n", " 15 meandom 3168 non-null float64\n", " 16 mindom 3168 non-null float64\n", " 17 maxdom 3168 non-null float64\n", " 18 dfrange 3168 non-null float64\n", " 19 label 3168 non-null object \n", "dtypes: float64(19), object(1)\n", "memory usage: 495.1+ KB\n" ] } ], "source": [ "data.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The meaning of the features are as follows:\n", "\n", "* `meanfreq`: mean frequency (in kHz)\n", "* `median`: median frequency (in kHz)\n", "* `Q25`: first quantile (in kHz)\n", "* `Q75`: third quantile (in kHz)\n", "* `IQR`: inter-quantile range (in kHz)\n", "* `sd`: standard deviation of frequency\n", "* `log_skew`: skewness after logarithmic transformation\n", "* `log_kurt`: kurtosis after logarithmic transformation\n", "* `sp.ent`: spectral entropy\n", "* `sfm`: spectral flatness\n", "* `mode`: mode frequency\n", "* `log_meanfun`: average of fundamental frequency measured across acoustic signal\n", "* `log_minfun`: minimum fundamental frequency measured across acoustic signal after logarithmic transformation\n", "* `exp_maxfun`: maximum fundamental frequency measured across acoustic signal after exponential transformation\n", "* `modindx`: modulation index\n", "* `meandom`: average of dominant frequency measured across acoustic signal\n", "* `mindom`: minimum of dominant frequency measured across acoustic signal\n", "* `maxdom`: maximum of dominant frequency measured across acoustic signal\n", "* `dfrange`: range of dominant frequency measured across acoustic signal\n", "* `label`: male or female" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Ensemble Vote Model\n", "\n", "Ensemble Vote Model is a technique developed by ourselves to combine multiple models in order to increase overall accuracy. We have integrated the outputs of high-performing models such as Random Forest, Support Vector Machine, and Multilayer Perception models, and selected the majority vote as the final prediction. " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from sklearn.neural_network import MLPClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.ensemble import RandomForestClassifier\n", "\n", "\n", "class CombinedMethod(object):\n", " def __init__(self, X_train, y_train):\n", " self.X_train = X_train\n", " self.y_train = y_train\n", " self.rfClassifier = RandomForestClassifier(random_state=87).fit(\n", " X_train, y_train.squeeze()\n", " )\n", " self.svmClassifier = SVC(random_state=87, kernel=\"rbf\", gamma=0.01, C=10).fit(\n", " X_train, y_train.squeeze()\n", " )\n", " self.mlpClassifier = MLPClassifier(\n", " random_state=87, max_iter=1000, hidden_layer_sizes=(100, 100, 100, 100)\n", " ).fit(X_train, y_train.squeeze())\n", "\n", " def get_all_predictions(self, X):\n", " rf_predictions = self.rfClassifier.predict(X)\n", " svm_predictions = self.svmClassifier.predict(X)\n", " mlp_predictions = self.mlpClassifier.predict(X)\n", " return rf_predictions, svm_predictions, mlp_predictions\n", "\n", " def predict(self, X):\n", " rf_predictions = self.rfClassifier.predict(X)\n", " svm_predictions = self.svmClassifier.predict(X)\n", " mlp_predictions = self.mlpClassifier.predict(X)\n", " predictions = []\n", " for i in range(len(X)):\n", " predictions.append(\n", " np.argmax(\n", " np.bincount(\n", " [rf_predictions[i], svm_predictions[i], mlp_predictions[i]]\n", " )\n", " )\n", " )\n", " return predictions\n", "\n", " def score(self, X, y):\n", " predictions = self.predict(X)\n", " return np.sum(predictions == y.squeeze()) / len(y)\n", "\n", " def score_all(self, X, y):\n", " rf_score = self.rfClassifier.score(X, y.squeeze())\n", " svm_score = self.svmClassifier.score(X, y.squeeze())\n", " mlp_score = self.mlpClassifier.score(X, y.squeeze())\n", " combined_score = self.score(X, y)\n", " return rf_score, svm_score, mlp_score, combined_score" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We get the following results:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train Set Scores: 1.0\n", "Test Set Scores: 0.9800664451827242\n" ] } ], "source": [ "y_train_clean = y_train == \"male\"\n", "y_test_clean = y_test == \"male\"\n", "combinedMethod = CombinedMethod(X_train, y_train_clean)\n", "print(\"Train Set Scores:\", combinedMethod.score(X_train, y_train_clean))\n", "print(\"Test Set Scores:\", combinedMethod.score(X_test, y_test_clean))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And we get the results of the three models:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.9800664451827242, 0.9651162790697675, 0.973421926910299, 0.9767441860465116)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combinedMethod.score_all(X_test, y_test_clean)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We found that the accuracy of the Ensemble Vote model was not as ideal as we had hoped. This experience taught us the importance of carefully selecting and combining models based on their individual strengths and weaknesses, and considering the underlying assumptions and limitations of each model." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Conclusion\n", "\n", "Here are the results of the models:\n", "\n", "| Model | Training Accuracy | Testing Accuracy |\n", "| --- | --- | --- |\n", "| Classification Tree | 1.0000 | 0.9751 |\n", "| Random Forest | 1.0000 | 0.9801 |\n", "| Logistic Regression | 0.9763 | 0.9734 |\n", "| K-Nearest Neighbors | 1.0000 | 0.9817 |\n", "| Support Vector Machine | 0.9896 | 0.9834 |\n", "| Multi-Layer Perceptron | 1.0000 | 0.9734 |\n", "| Ensemble Vote | 1.0000 | 0.9800 |" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "plt.figure(figsize=(15, 10))\n", "\n", "# Make the font bigger\n", "sns.set(font_scale=2)\n", "\n", "# Define the data\n", "data = {\n", " \"Model\": [\"CART\", \"RF\", \"LR\", \"KNN\", \"SVM\", \"MLP\", \"Ensemble\"],\n", " \"Training Accuracy\": [1.0000, 1.0000, 0.9763, 1.0000, 0.9896, 1.0000, 1.0000],\n", " \"Testing Accuracy\": [0.9751, 0.9801, 0.9734, 0.9817, 0.9834, 0.9734, 0.9800],\n", "}\n", "\n", "# Convert the data to a pandas DataFrame\n", "results = pd.DataFrame(data)\n", "\n", "# Set the style\n", "sns.set_style(\"whitegrid\")\n", "\n", "# Create the barplot\n", "ax = sns.barplot(x=\"Model\", y=\"Testing Accuracy\", data=results)\n", "\n", "# Add labels to the bars\n", "for i in range(len(results)):\n", " ax.annotate(\n", " f\"{results['Testing Accuracy'][i]:.4f}\",\n", " (i, results[\"Testing Accuracy\"][i]),\n", " ha=\"center\",\n", " va=\"bottom\",\n", " )\n", "\n", "# Add a title\n", "plt.title(\"Model Performance\")\n", "\n", "# Rotate the x-axis labels to avoid overlapping\n", "# ax.set_xticklabels(data['Model'], rotation=45)\n", "\n", "# Show the plot\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 1 }