Skip to content

Commit 2952eb6

Browse files
committed
Add Cleaning and Tidying DateTime Data notes to case study
1 parent 1b8b5ba commit 2952eb6

File tree

1 file changed

+204
-0
lines changed

1 file changed

+204
-0
lines changed

β€Žpandas/pandas_Foundations/case_study_sunlight_in_Austin.ipynb

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,210 @@
684684
"## Cleaning and Tidying DateTime Data"
685685
]
686686
},
687+
{
688+
"cell_type": "code",
689+
"execution_count": 9,
690+
"metadata": {},
691+
"outputs": [
692+
{
693+
"data": {
694+
"text/html": [
695+
"<div>\n",
696+
"<style scoped>\n",
697+
" .dataframe tbody tr th:only-of-type {\n",
698+
" vertical-align: middle;\n",
699+
" }\n",
700+
"\n",
701+
" .dataframe tbody tr th {\n",
702+
" vertical-align: top;\n",
703+
" }\n",
704+
"\n",
705+
" .dataframe thead th {\n",
706+
" text-align: right;\n",
707+
" }\n",
708+
"</style>\n",
709+
"<table border=\"1\" class=\"dataframe\">\n",
710+
" <thead>\n",
711+
" <tr style=\"text-align: right;\">\n",
712+
" <th></th>\n",
713+
" <th>Wban</th>\n",
714+
" <th>date</th>\n",
715+
" <th>Time</th>\n",
716+
" <th>StationType</th>\n",
717+
" <th>sky_condition</th>\n",
718+
" <th>visibility</th>\n",
719+
" <th>dry_bulb_faren</th>\n",
720+
" <th>dry_bulb_cel</th>\n",
721+
" <th>wet_bulb_faren</th>\n",
722+
" <th>wet_bulb_cel</th>\n",
723+
" <th>dew_point_faren</th>\n",
724+
" <th>dew_point_cel</th>\n",
725+
" <th>relative_humidity</th>\n",
726+
" <th>wind_speed</th>\n",
727+
" <th>wind_direction</th>\n",
728+
" <th>station_pressure</th>\n",
729+
" <th>sea_level_pressure</th>\n",
730+
" </tr>\n",
731+
" </thead>\n",
732+
" <tbody>\n",
733+
" <tr>\n",
734+
" <th>0</th>\n",
735+
" <td>13904</td>\n",
736+
" <td>20110101</td>\n",
737+
" <td>0053</td>\n",
738+
" <td>12</td>\n",
739+
" <td>OVC045</td>\n",
740+
" <td>10.00</td>\n",
741+
" <td>51</td>\n",
742+
" <td>10.6</td>\n",
743+
" <td>38</td>\n",
744+
" <td>3.1</td>\n",
745+
" <td>15</td>\n",
746+
" <td>-9.4</td>\n",
747+
" <td>24</td>\n",
748+
" <td>15</td>\n",
749+
" <td>360</td>\n",
750+
" <td>29.42</td>\n",
751+
" <td>29.95</td>\n",
752+
" </tr>\n",
753+
" <tr>\n",
754+
" <th>1</th>\n",
755+
" <td>13904</td>\n",
756+
" <td>20110101</td>\n",
757+
" <td>0153</td>\n",
758+
" <td>12</td>\n",
759+
" <td>OVC049</td>\n",
760+
" <td>10.00</td>\n",
761+
" <td>51</td>\n",
762+
" <td>10.6</td>\n",
763+
" <td>37</td>\n",
764+
" <td>3.0</td>\n",
765+
" <td>14</td>\n",
766+
" <td>-10.0</td>\n",
767+
" <td>23</td>\n",
768+
" <td>10</td>\n",
769+
" <td>340</td>\n",
770+
" <td>29.49</td>\n",
771+
" <td>30.01</td>\n",
772+
" </tr>\n",
773+
" <tr>\n",
774+
" <th>2</th>\n",
775+
" <td>13904</td>\n",
776+
" <td>20110101</td>\n",
777+
" <td>0253</td>\n",
778+
" <td>12</td>\n",
779+
" <td>OVC060</td>\n",
780+
" <td>10.00</td>\n",
781+
" <td>51</td>\n",
782+
" <td>10.6</td>\n",
783+
" <td>37</td>\n",
784+
" <td>2.9</td>\n",
785+
" <td>13</td>\n",
786+
" <td>-10.6</td>\n",
787+
" <td>22</td>\n",
788+
" <td>15</td>\n",
789+
" <td>010</td>\n",
790+
" <td>29.49</td>\n",
791+
" <td>30.01</td>\n",
792+
" </tr>\n",
793+
" <tr>\n",
794+
" <th>3</th>\n",
795+
" <td>13904</td>\n",
796+
" <td>20110101</td>\n",
797+
" <td>0353</td>\n",
798+
" <td>12</td>\n",
799+
" <td>OVC065</td>\n",
800+
" <td>10.00</td>\n",
801+
" <td>50</td>\n",
802+
" <td>10.0</td>\n",
803+
" <td>38</td>\n",
804+
" <td>3.1</td>\n",
805+
" <td>17</td>\n",
806+
" <td>-8.3</td>\n",
807+
" <td>27</td>\n",
808+
" <td>7</td>\n",
809+
" <td>350</td>\n",
810+
" <td>29.51</td>\n",
811+
" <td>30.03</td>\n",
812+
" </tr>\n",
813+
" <tr>\n",
814+
" <th>4</th>\n",
815+
" <td>13904</td>\n",
816+
" <td>20110101</td>\n",
817+
" <td>0453</td>\n",
818+
" <td>12</td>\n",
819+
" <td>BKN070</td>\n",
820+
" <td>10.00</td>\n",
821+
" <td>50</td>\n",
822+
" <td>10.0</td>\n",
823+
" <td>37</td>\n",
824+
" <td>2.8</td>\n",
825+
" <td>15</td>\n",
826+
" <td>-9.4</td>\n",
827+
" <td>25</td>\n",
828+
" <td>11</td>\n",
829+
" <td>020</td>\n",
830+
" <td>29.51</td>\n",
831+
" <td>30.04</td>\n",
832+
" </tr>\n",
833+
" </tbody>\n",
834+
"</table>\n",
835+
"</div>"
836+
],
837+
"text/plain": [
838+
" Wban date Time StationType sky_condition visibility dry_bulb_faren \\\n",
839+
"0 13904 20110101 0053 12 OVC045 10.00 51 \n",
840+
"1 13904 20110101 0153 12 OVC049 10.00 51 \n",
841+
"2 13904 20110101 0253 12 OVC060 10.00 51 \n",
842+
"3 13904 20110101 0353 12 OVC065 10.00 50 \n",
843+
"4 13904 20110101 0453 12 BKN070 10.00 50 \n",
844+
"\n",
845+
" dry_bulb_cel wet_bulb_faren wet_bulb_cel dew_point_faren dew_point_cel \\\n",
846+
"0 10.6 38 3.1 15 -9.4 \n",
847+
"1 10.6 37 3.0 14 -10.0 \n",
848+
"2 10.6 37 2.9 13 -10.6 \n",
849+
"3 10.0 38 3.1 17 -8.3 \n",
850+
"4 10.0 37 2.8 15 -9.4 \n",
851+
"\n",
852+
" relative_humidity wind_speed wind_direction station_pressure \\\n",
853+
"0 24 15 360 29.42 \n",
854+
"1 23 10 340 29.49 \n",
855+
"2 22 15 010 29.49 \n",
856+
"3 27 7 350 29.51 \n",
857+
"4 25 11 020 29.51 \n",
858+
"\n",
859+
" sea_level_pressure \n",
860+
"0 29.95 \n",
861+
"1 30.01 \n",
862+
"2 30.01 \n",
863+
"3 30.03 \n",
864+
"4 30.04 "
865+
]
866+
},
867+
"execution_count": 9,
868+
"metadata": {},
869+
"output_type": "execute_result"
870+
}
871+
],
872+
"source": [
873+
"# Convert the date column to string\n",
874+
"df_dropped['date'] = df_dropped['date'].astype(str)\n",
875+
"\n",
876+
"# Add leading zeroes zeros to the 'Time' column\n",
877+
"df_dropped['Time'] = df_dropped['Time'].apply(lambda x: '{:0>4}'.format(x))\n",
878+
"\n",
879+
"# Concatenate the new date and Time columns\n",
880+
"date_string = df_dropped['date'] + df_dropped['Time']\n",
881+
"\n",
882+
"# Concatenate the new date and Time columns\n",
883+
"date_times = pd.to_datetime(date_string, format='%Y%m%d%H%M')\n",
884+
"\n",
885+
"# Set the index to be the new date_times\n",
886+
"df_clean = df_dropped.set_index(date_times)\n",
887+
"\n",
888+
"df_dropped.head()"
889+
]
890+
},
687891
{
688892
"cell_type": "code",
689893
"execution_count": null,

0 commit comments

Comments
 (0)